From 3acd1a7d623a0a1f4776bcb7f2bc3cb9b65610ee Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 4 Jan 2008 13:33:58 +0000 Subject: [PATCH] fix JIT x86 FP output svn: r8201 --- collects/tests/mzscheme/optimize.ss | 8 +- src/mzscheme/src/jit.c | 129 ++++++++++++++++++-------- src/mzscheme/src/lightning/i386/asm.h | 2 + src/mzscheme/src/lightning/i386/fp.h | 69 +++++++++++++- 4 files changed, 168 insertions(+), 40 deletions(-) diff --git a/collects/tests/mzscheme/optimize.ss b/collects/tests/mzscheme/optimize.ss index 09fd0b5327..85b534f3ee 100644 --- a/collects/tests/mzscheme/optimize.ss +++ b/collects/tests/mzscheme/optimize.ss @@ -81,7 +81,12 @@ v (/ v 33333))) v)]) - (bin0 iv op (/ arg1 33333) (/ arg2 33333))))] + (bin0 iv op (/ arg1 33333) (/ arg2 33333))) + (let ([iv (if (number? v) +nan.0 #f)]) + (bin0 iv op (exact->inexact arg1) +nan.0) + (bin0 iv op +nan.0 (exact->inexact arg2)) + (unless (eq? op 'eq?) + (bin0 iv op +nan.0 +nan.0))))] [tri0 (lambda (v op get-arg1 arg2 arg3 check-effect) ;; (printf "Trying ~a ~a ~a\n" op (get-arg1) arg2 arg3); (let ([name `(,op ,get-arg1 ,arg2, arg3)]) @@ -203,6 +208,7 @@ (bin #f '= 200 100) (bin #t '= 100 100) (bin #f '= -200 100) + (bin #f '= +nan.0 +nan.0) (un 3 'add1 2) (un -3 'add1 -4) diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index 30f3e3039c..d4704aff87 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -60,7 +60,7 @@ END_XFORM_ARITH; #endif #ifdef MZ_USE_JIT_I386 -/* # define JIT_USE_FP_OPS */ +# define JIT_USE_FP_OPS #endif #ifdef MZ_USE_JIT_X86_64 @@ -1017,6 +1017,10 @@ static void *retry_alloc_code, *retry_alloc_code_keep_r0_r1; static void *retry_alloc_r1; /* set by prepare_retry_alloc() */ +#ifdef JIT_USE_FP_OPS +static double save_fp; +#endif + static void *prepare_retry_alloc(void *p, void *p2) { /* Alocate enough to trigger a new page */ @@ -1059,7 +1063,8 @@ static long initial_tag_word(Scheme_Type tag) static int inline_alloc(mz_jit_state *jitter, int amt, Scheme_Type ty, int keep_r0_r1) /* Puts allocated result at JIT_V1; first word is GC tag. - Uses JIT_R2 as temporary. The allocated memory is "dirty" (i.e., not 0ed). */ + Uses JIT_R2 as temporary. The allocated memory is "dirty" (i.e., not 0ed). + Save FP0 when FP ops are enabled. */ { GC_CAN_IGNORE jit_insn *ref, *reffail; long a_word, sz, algn; @@ -2436,6 +2441,41 @@ static int can_fast_double(int arith, int cmp, int two_args) return 0; } +/* The following FP-generation code is written to work both with a FP + stack (i387) and normal FP regsiters (everything else), though the + double-agent operations that end in _fppop() and _fppush(). In + FP-stack mode, the register names don't actually matter, but the + pushes and pops much balance. The popping branch operations pop + both arguments before branching. */ + +#if !defined(MZ_USE_JIT_I386) || defined(JIT_X86_64) +/* Not FP stack, so use normal variants. */ +#define jit_movi_d_fppush(rd,immd) jit_movi_d(rd,immd) +#define jit_ldi_d_fppush(rd, is) jit_ldi_d(rd, is) +#define jit_ldr_d_fppush(rd, rs) jit_ldr_d(rd, rs) +#define jit_ldxi_d_fppush(rd, rs, is) jit_ldxi_d(rd, rs, is) +#define jit_addr_d_fppop(rd,s1,s2) jit_addr_d(rd,s1,s2) +#define jit_subr_d_fppop(rd,s1,s2) jit_subr_d(rd,s1,s2) +#define jit_subrr_d_fppop(rd,s1,s2) jit_subrr_d(rd,s1,s2) +#define jit_mulr_d_fppop(rd,s1,s2) jit_mulr_d(rd,s1,s2) +#define jit_divr_d_fppop(rd,s1,s2) jit_divr_d(rd,s1,s2) +#define jit_divrr_d_fppop(rd,s1,s2) jit_divrr_d(rd,s1,s2) +#define jit_negr_d_fppop(rd,rs) jit_negr_d(rd,rs) +#define jit_abs_d_fppop(rd,rs) jit_abs_d(rd,rs) +#define jit_sti_d_fppop(id, rs) jit_sti_d(id, rs) +#define jit_stxi_d_fppop(id, rd, rs) jit_stxi_d(id, rd, rs) +#define jit_bger_d_fppop(d, s1, s2) jit_bger_d(d, s1, s2) +#define jit_bantiger_d_fppop(d, s1, s2) jit_bantiger_d(d, s1, s2) +#define jit_bler_d_fppop(d, s1, s2) jit_bler_d(d, s1, s2) +#define jit_bantiler_d_fppop(d, s1, s2) jit_bantiler_d(d, s1, s2) +#define jit_bgtr_d_fppop(d, s1, s2) jit_bgtr_d(d, s1, s2) +#define jit_bantigtr_d_fppop(d, s1, s2) jit_bantigtr_d(d, s1, s2) +#define jit_bltr_d_fppop(d, s1, s2) jit_bltr_d(d, s1, s2) +#define jit_bantiltr_d_fppop(d, s1, s2) jit_bantiltr_d(d, s1, s2) +#define jit_beqr_d_fppop(d, s1, s2) jit_beqr_d(d, s1, s2) +#define jit_bner_d_fppop(d, s1, s2) jit_bner_d(d, s1, s2) +#endif + static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int reversed, int two_args, int second_const, jit_insn **_refd, jit_insn **_refdt, int branch_short) @@ -2467,51 +2507,57 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r __END_SHORT_JUMPS__(1); /* Yes, they're doubles. */ - jit_ldxi_d(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val); + jit_ldxi_d_fppush(JIT_FPR1, JIT_R0, &((Scheme_Double *)0x0)->double_val); if (two_args) { - jit_ldxi_d(JIT_FPR1, JIT_R1, &((Scheme_Double *)0x0)->double_val); + jit_ldxi_d_fppush(JIT_FPR0, JIT_R1, &((Scheme_Double *)0x0)->double_val); } else if ((arith == -1) && !second_const && reversed) { reversed = 0; + } else if (arith == 11) { + /* abs needs no extra number */ } else { double d = second_const; - jit_movi_d(JIT_FPR1, d); + jit_movi_d_fppush(JIT_FPR0, d); reversed = !reversed; cmp = -cmp; } if (arith) { switch (arith) { case 1: - jit_addr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); + jit_addr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1); break; case 2: - jit_mulr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); + jit_mulr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1); break; case -2: if (reversed) - jit_divr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); + jit_divrr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1); else - jit_divr_d(JIT_FPR0, JIT_FPR1, JIT_FPR0); + jit_divr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1); break; case -1: { if (!two_args && !second_const && !reversed) { /* Need a special case to make sure that (- 0.0) => -0.0 */ - jit_negr_d(JIT_FPR0, JIT_FPR0); + jit_negr_d_fppop(JIT_FPR1, JIT_FPR1); } else if (reversed) - jit_subr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); + jit_subrr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1); else - jit_subr_d(JIT_FPR0, JIT_FPR1, JIT_FPR0); + jit_subr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1); } break; case 9: /* min */ case 10: /* max */ { - GC_CAN_IGNORE jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc, *refn; __START_SHORT_JUMPS__(1); + /* If R0 is nan, then copy to R1, ensuring nan result */ + refn = jit_beqr_d(jit_forward(), JIT_FPR1, JIT_FPR1); + jit_movr_p(JIT_R1, JIT_R0); + mz_patch_branch(refn); if (arith == 9) { - refc = jit_bler_d(jit_forward(), JIT_FPR0, JIT_FPR1); + refc = jit_bger_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); } else { - refc = jit_bger_d(jit_forward(), JIT_FPR0, JIT_FPR1); + refc = jit_bltr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); } jit_movr_p(JIT_R0, JIT_R1); mz_patch_branch(refc); @@ -2520,10 +2566,7 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r } break; case 11: /* abs */ - __START_SHORT_JUMPS__(1); - refskip = jit_bger_d(jit_forward(), JIT_FPR0, JIT_FPR1); - jit_subr_d(JIT_FPR0, JIT_FPR1, JIT_FPR0); - __END_SHORT_JUMPS__(1); + jit_abs_d_fppop(JIT_FPR1, JIT_FPR1); break; default: break; @@ -2536,8 +2579,9 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r inline_alloc(jitter, sizeof(Scheme_Double), scheme_double_type, 0); CHECK_LIMIT(); jit_addi_p(JIT_R0, JIT_V1, sizeof(long)); + (void)jit_stxi_d_fppop(&((Scheme_Double *)0x0)->double_val, JIT_R0, JIT_FPR1); # else - (void)jit_sti_d(&double_result, JIT_FPR0); + (void)jit_sti_d_fppop(&double_result, JIT_FPR1); JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); mz_prepare(0); (void)mz_finish(malloc_double); @@ -2546,46 +2590,47 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r #endif CHECK_LIMIT(); - (void)jit_stxi_d(&((Scheme_Double *)0x0)->double_val, JIT_R0, JIT_FPR0); - if (refskip) { __START_SHORT_JUMPS__(1); mz_patch_branch(refskip); __END_SHORT_JUMPS__(1); } } + } else { + /* The "anti" variants below invert the branch. Unlike the "un" + variants, the "anti" variants invert the comparison result + after the layer where +nan.0 always generates false. */ __START_SHORT_JUMPS__(branch_short); switch (cmp) { case -2: - refdt = jit_bltr_d(jit_forward(), JIT_FPR1, JIT_FPR0); + refd = jit_bantiltr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); break; case -1: - refdt = jit_bler_d(jit_forward(), JIT_FPR1, JIT_FPR0); + refd = jit_bantiler_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); break; case 0: - refdt = jit_beqr_d(jit_forward(), JIT_FPR1, JIT_FPR0); + refd = jit_bantieqr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); break; case 1: - refdt = jit_bger_d(jit_forward(), JIT_FPR1, JIT_FPR0); + refd = jit_bantiger_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); break; case 2: - refdt = jit_bgtr_d(jit_forward(), JIT_FPR1, JIT_FPR0); + refd = jit_bantigtr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1); break; default: - refdt = NULL; + refd = NULL; break; } __END_SHORT_JUMPS__(branch_short); - *_refdt = refdt; + *_refd = refd; } - __START_SHORT_JUMPS__(1); } - /* Jump to return result or false branch: */ - refd = jit_jmpi(jit_forward()); - *_refd = refd; + /* Jump to return result or true branch: */ + refdt = jit_jmpi(jit_forward()); + *_refdt = refdt; /* No, they're not both doubles. */ if (two_args) { @@ -3002,8 +3047,8 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } } } - if (refd) - mz_patch_ucbranch(refd); + if (refdt) + mz_patch_ucbranch(refdt); jit_patch_movi(ref, (_jit.x.pc)); } else { /* If second is constant, first arg is in JIT_R0. */ @@ -3060,7 +3105,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj ref2 = jit_jmpi(jit_forward()); mz_patch_branch(ref3); if (refd) - mz_patch_ucbranch(refd); + mz_patch_branch(refd); (void)jit_movi_p(JIT_R0, scheme_false); mz_patch_ucbranch(ref2); jit_patch_movi(ref, (_jit.x.pc)); @@ -4940,7 +4985,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m case scheme_branch_type: { Scheme_Branch_Rec *branch = (Scheme_Branch_Rec *)obj; - jit_insn *refs[5], *ref2; + jit_insn *refs[6], *ref2; int nsrs, nsrs1, g1, g2, amt; #ifdef NEED_LONG_JUMPS int then_short_ok, else_short_ok; @@ -4966,6 +5011,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m refs[2] = NULL; /* a movi patch, instead of a branch */ refs[3] = NULL; refs[4] = NULL; + refs[5] = NULL; /* a jmpi instead of a test branch */ if (!generate_inlined_test(jitter, branch->test, then_short_ok, refs)) { CHECK_LIMIT(); @@ -5018,6 +5064,9 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m if (refs[4]) { mz_patch_branch(refs[4]); } + if (refs[5]) { + mz_patch_ucbranch(refs[5]); + } __END_SHORT_JUMPS__(then_short_ok); PAUSE_JIT_DATA(); LOG_IT(("...else\n")); @@ -6347,6 +6396,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) retry_alloc_code = jit_get_ip().ptr; mz_prolog(JIT_V1); +#ifdef JIT_USE_FP_OPS + (void)jit_sti_d_fppop(&save_fp, JIT_FPR1); +#endif JIT_UPDATE_THREAD_RSPTR(); jit_prepare(2); CHECK_LIMIT(); @@ -6363,6 +6415,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) if (i) { jit_ldi_l(JIT_R1, &retry_alloc_r1); } +#ifdef JIT_USE_FP_OPS + (void)jit_ldi_d_fppush(JIT_FPR1, &save_fp); +#endif mz_epilog(JIT_V1); CHECK_LIMIT(); } diff --git a/src/mzscheme/src/lightning/i386/asm.h b/src/mzscheme/src/lightning/i386/asm.h index cd5784faec..9f8c3efca2 100644 --- a/src/mzscheme/src/lightning/i386/asm.h +++ b/src/mzscheme/src/lightning/i386/asm.h @@ -1141,10 +1141,12 @@ typedef _uc jit_insn; #define FSTPr(RD) ESCri(RD,053) #define FCOMr(RD) ESCri(RD,002) #define FCOMPr(RD) ESCri(RD,003) +#define FCOMPPr(RD) ESCri(RD,073) #define FCOMIr(RD) ESCri(RD,036) #define FCOMIPr(RD) ESCri(RD,076) #define FUCOMr(RD) ESCri(RD,054) #define FUCOMPr(RD) ESCri(RD,055) +#define FUCOMPPr(RD) ESCri(RD,025) #define FUCOMIr(RD) ESCri(RD,035) #define FUCOMIPr(RD) ESCri(RD,075) #define FADDPr(RD) ESCri(RD,060) diff --git a/src/mzscheme/src/lightning/i386/fp.h b/src/mzscheme/src/lightning/i386/fp.h index 9394d5dead..79db017114 100644 --- a/src/mzscheme/src/lightning/i386/fp.h +++ b/src/mzscheme/src/lightning/i386/fp.h @@ -67,13 +67,24 @@ #define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr) #define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr) +#define jit_subrr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBRrr,FSUBrr) #define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr) #define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr) +#define jit_divrr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVRrr,FDIVrr) #define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1)) #define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0)) #define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa)) +#define jit_addr_d_fppop(rd,s1,s2) (FADDPr(1)) +#define jit_subr_d_fppop(rd,s1,s2) (FSUBPr(1)) +#define jit_subrr_d_fppop(rd,s1,s2) (FSUBRPr(1)) +#define jit_mulr_d_fppop(rd,s1,s2) (FMULPr(1)) +#define jit_divr_d_fppop(rd,s1,s2) (FDIVPr(1)) +#define jit_divrr_d_fppop(rd,s1,s2) (FDIVRPr(1)) +#define jit_negr_d_fppop(rd,rs) ( _OO (0xd9e0)) +#define jit_abs_d_fppop(rd,rs) ( _OO (0xd9e1)) + /* - moves: move FPR0 to FPR3 @@ -127,6 +138,15 @@ union jit_double_imm { jit_ldr_d((rd), _ESP), \ ADDLir(8, _ESP)) +#define jit_movi_d_fppush(rd,immd) \ + (_O (0x68), \ + _jit.x.uc_pc[4] = 0x68, \ + ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \ + *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \ + _jit.x.uc_pc += 9, \ + jit_ldr_d_fppush((rd), _ESP), \ + ADDLir(8, _ESP)) + #define jit_ldi_f(rd, is) \ ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \ : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1))) @@ -135,6 +155,8 @@ union jit_double_imm { ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \ : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1))) +#define jit_ldi_d_fppush(rd, is) FLDLm((is), 0, 0, 0) + #define jit_ldr_f(rd, rs) \ ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \ : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1))) @@ -143,6 +165,8 @@ union jit_double_imm { ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \ : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1))) +#define jit_ldr_d_fppush(rd, rs) FLDLm(0, (rs), 0, 0) + #define jit_ldxi_f(rd, rs, is) \ ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \ : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1))) @@ -151,6 +175,8 @@ union jit_double_imm { ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \ : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1))) +#define jit_ldxi_d_fppush(rd, rs, is) FLDLm((is), (rs), 0, 0) + #define jit_ldxr_f(rd, s1, s2) \ ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \ : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) @@ -173,6 +199,9 @@ union jit_double_imm { #define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0)) #define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0)) +#define jit_sti_d_fppop(id, rs) FSTPLm((id), 0, 0, 0) +#define jit_stxi_d_fppop(id, rd, rs) FSTPLm((id), (rd), 0, 0) + /* Assume round to near mode */ #define jit_floorr_d_i(rd, rs) \ (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))) @@ -248,7 +277,26 @@ union jit_double_imm { ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ #define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \ - (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + (((s1) == 0 ? FCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ + PUSHLr(_EAX), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir ((_and), _EAX) : 0), \ + ((cmp) ? CMPLir ((cmp), _AL) : 0), \ + POPLr(_EAX), \ + res ((d), 0, 0, 0), _jit.x.pc) + +#define jit_fp_test_fppop(d, n, _and, res) \ + (FUCOMPPr(1), \ + ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \ + FNSTSWr(_EAX), \ + SHRLir(n, _EAX), \ + ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \ + res, \ + ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ + +#define jit_fp_btest_fppop(d, n, _and, cmp, res) \ + (FUCOMPPr(1), \ PUSHLr(_EAX), \ FNSTSWr(_EAX), \ SHRLir(n, _EAX), \ @@ -298,22 +346,39 @@ union jit_double_imm { #define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL)) #define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL)) #define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL)) - #define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm) #define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm) +#define jit_bantigtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) +#define jit_bantiger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) #define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) #define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) #define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm) #define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm) +#define jit_bantiltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) +#define jit_bantiler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) #define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) #define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) #define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm) +#define jit_bantieqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) #define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) #define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm) #define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm) #define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm) #define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm) +#define jit_bger_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 9, 0, 0, JNCm) +#define jit_bantiger_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 9, 0, 0, JCm) +#define jit_bler_d_fppop(d, s1, s2) (FXCHr(1), jit_bger_d_fppop(d, s1, s2)) +#define jit_bantiler_d_fppop(d, s1, s2) (FXCHr(1), jit_bantiger_d_fppop(d, s1, s2)) + +#define jit_bgtr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0, JZm) +#define jit_bantigtr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0, JNZm) +#define jit_bltr_d_fppop(d, s1, s2) (FXCHr(1), jit_bgtr_d_fppop(d, s1, s2)) +#define jit_bantiltr_d_fppop(d, s1, s2) (FXCHr(1), jit_bantigtr_d_fppop(d, s1, s2)) + +#define jit_beqr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0x40, JZm) +#define jit_bantieqr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0x40, JNZm) + #define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs)) #define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs)) #define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs)))