fix JIT x86 FP output

svn: r8201
This commit is contained in:
Matthew Flatt 2008-01-04 13:33:58 +00:00
parent 9b928ba025
commit 3acd1a7d62
4 changed files with 168 additions and 40 deletions

View File

@ -81,7 +81,12 @@
v v
(/ v 33333))) (/ v 33333)))
v)]) v)])
(bin0 iv op (/ arg1 33333) (/ arg2 33333))))] (bin0 iv op (/ arg1 33333) (/ arg2 33333)))
(let ([iv (if (number? v) +nan.0 #f)])
(bin0 iv op (exact->inexact arg1) +nan.0)
(bin0 iv op +nan.0 (exact->inexact arg2))
(unless (eq? op 'eq?)
(bin0 iv op +nan.0 +nan.0))))]
[tri0 (lambda (v op get-arg1 arg2 arg3 check-effect) [tri0 (lambda (v op get-arg1 arg2 arg3 check-effect)
;; (printf "Trying ~a ~a ~a\n" op (get-arg1) arg2 arg3); ;; (printf "Trying ~a ~a ~a\n" op (get-arg1) arg2 arg3);
(let ([name `(,op ,get-arg1 ,arg2, arg3)]) (let ([name `(,op ,get-arg1 ,arg2, arg3)])
@ -203,6 +208,7 @@
(bin #f '= 200 100) (bin #f '= 200 100)
(bin #t '= 100 100) (bin #t '= 100 100)
(bin #f '= -200 100) (bin #f '= -200 100)
(bin #f '= +nan.0 +nan.0)
(un 3 'add1 2) (un 3 'add1 2)
(un -3 'add1 -4) (un -3 'add1 -4)

View File

@ -60,7 +60,7 @@ END_XFORM_ARITH;
#endif #endif
#ifdef MZ_USE_JIT_I386 #ifdef MZ_USE_JIT_I386
/* # define JIT_USE_FP_OPS */ # define JIT_USE_FP_OPS
#endif #endif
#ifdef MZ_USE_JIT_X86_64 #ifdef MZ_USE_JIT_X86_64
@ -1017,6 +1017,10 @@ static void *retry_alloc_code, *retry_alloc_code_keep_r0_r1;
static void *retry_alloc_r1; /* set by prepare_retry_alloc() */ static void *retry_alloc_r1; /* set by prepare_retry_alloc() */
#ifdef JIT_USE_FP_OPS
static double save_fp;
#endif
static void *prepare_retry_alloc(void *p, void *p2) static void *prepare_retry_alloc(void *p, void *p2)
{ {
/* Alocate enough to trigger a new page */ /* Alocate enough to trigger a new page */
@ -1059,7 +1063,8 @@ static long initial_tag_word(Scheme_Type tag)
static int inline_alloc(mz_jit_state *jitter, int amt, Scheme_Type ty, int keep_r0_r1) static int inline_alloc(mz_jit_state *jitter, int amt, Scheme_Type ty, int keep_r0_r1)
/* Puts allocated result at JIT_V1; first word is GC tag. /* Puts allocated result at JIT_V1; first word is GC tag.
Uses JIT_R2 as temporary. The allocated memory is "dirty" (i.e., not 0ed). */ Uses JIT_R2 as temporary. The allocated memory is "dirty" (i.e., not 0ed).
Save FP0 when FP ops are enabled. */
{ {
GC_CAN_IGNORE jit_insn *ref, *reffail; GC_CAN_IGNORE jit_insn *ref, *reffail;
long a_word, sz, algn; long a_word, sz, algn;
@ -2436,6 +2441,41 @@ static int can_fast_double(int arith, int cmp, int two_args)
return 0; return 0;
} }
/* The following FP-generation code is written to work both with a FP
stack (i387) and normal FP regsiters (everything else), though the
double-agent operations that end in _fppop() and _fppush(). In
FP-stack mode, the register names don't actually matter, but the
pushes and pops much balance. The popping branch operations pop
both arguments before branching. */
#if !defined(MZ_USE_JIT_I386) || defined(JIT_X86_64)
/* Not FP stack, so use normal variants. */
#define jit_movi_d_fppush(rd,immd) jit_movi_d(rd,immd)
#define jit_ldi_d_fppush(rd, is) jit_ldi_d(rd, is)
#define jit_ldr_d_fppush(rd, rs) jit_ldr_d(rd, rs)
#define jit_ldxi_d_fppush(rd, rs, is) jit_ldxi_d(rd, rs, is)
#define jit_addr_d_fppop(rd,s1,s2) jit_addr_d(rd,s1,s2)
#define jit_subr_d_fppop(rd,s1,s2) jit_subr_d(rd,s1,s2)
#define jit_subrr_d_fppop(rd,s1,s2) jit_subrr_d(rd,s1,s2)
#define jit_mulr_d_fppop(rd,s1,s2) jit_mulr_d(rd,s1,s2)
#define jit_divr_d_fppop(rd,s1,s2) jit_divr_d(rd,s1,s2)
#define jit_divrr_d_fppop(rd,s1,s2) jit_divrr_d(rd,s1,s2)
#define jit_negr_d_fppop(rd,rs) jit_negr_d(rd,rs)
#define jit_abs_d_fppop(rd,rs) jit_abs_d(rd,rs)
#define jit_sti_d_fppop(id, rs) jit_sti_d(id, rs)
#define jit_stxi_d_fppop(id, rd, rs) jit_stxi_d(id, rd, rs)
#define jit_bger_d_fppop(d, s1, s2) jit_bger_d(d, s1, s2)
#define jit_bantiger_d_fppop(d, s1, s2) jit_bantiger_d(d, s1, s2)
#define jit_bler_d_fppop(d, s1, s2) jit_bler_d(d, s1, s2)
#define jit_bantiler_d_fppop(d, s1, s2) jit_bantiler_d(d, s1, s2)
#define jit_bgtr_d_fppop(d, s1, s2) jit_bgtr_d(d, s1, s2)
#define jit_bantigtr_d_fppop(d, s1, s2) jit_bantigtr_d(d, s1, s2)
#define jit_bltr_d_fppop(d, s1, s2) jit_bltr_d(d, s1, s2)
#define jit_bantiltr_d_fppop(d, s1, s2) jit_bantiltr_d(d, s1, s2)
#define jit_beqr_d_fppop(d, s1, s2) jit_beqr_d(d, s1, s2)
#define jit_bner_d_fppop(d, s1, s2) jit_bner_d(d, s1, s2)
#endif
static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int reversed, int two_args, int second_const, static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int reversed, int two_args, int second_const,
jit_insn **_refd, jit_insn **_refdt, jit_insn **_refd, jit_insn **_refdt,
int branch_short) int branch_short)
@ -2467,51 +2507,57 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r
__END_SHORT_JUMPS__(1); __END_SHORT_JUMPS__(1);
/* Yes, they're doubles. */ /* Yes, they're doubles. */
jit_ldxi_d(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val); jit_ldxi_d_fppush(JIT_FPR1, JIT_R0, &((Scheme_Double *)0x0)->double_val);
if (two_args) { if (two_args) {
jit_ldxi_d(JIT_FPR1, JIT_R1, &((Scheme_Double *)0x0)->double_val); jit_ldxi_d_fppush(JIT_FPR0, JIT_R1, &((Scheme_Double *)0x0)->double_val);
} else if ((arith == -1) && !second_const && reversed) { } else if ((arith == -1) && !second_const && reversed) {
reversed = 0; reversed = 0;
} else if (arith == 11) {
/* abs needs no extra number */
} else { } else {
double d = second_const; double d = second_const;
jit_movi_d(JIT_FPR1, d); jit_movi_d_fppush(JIT_FPR0, d);
reversed = !reversed; reversed = !reversed;
cmp = -cmp; cmp = -cmp;
} }
if (arith) { if (arith) {
switch (arith) { switch (arith) {
case 1: case 1:
jit_addr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); jit_addr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1);
break; break;
case 2: case 2:
jit_mulr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); jit_mulr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1);
break; break;
case -2: case -2:
if (reversed) if (reversed)
jit_divr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); jit_divrr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1);
else else
jit_divr_d(JIT_FPR0, JIT_FPR1, JIT_FPR0); jit_divr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1);
break; break;
case -1: case -1:
{ {
if (!two_args && !second_const && !reversed) { if (!two_args && !second_const && !reversed) {
/* Need a special case to make sure that (- 0.0) => -0.0 */ /* Need a special case to make sure that (- 0.0) => -0.0 */
jit_negr_d(JIT_FPR0, JIT_FPR0); jit_negr_d_fppop(JIT_FPR1, JIT_FPR1);
} else if (reversed) } else if (reversed)
jit_subr_d(JIT_FPR0, JIT_FPR0, JIT_FPR1); jit_subrr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1);
else else
jit_subr_d(JIT_FPR0, JIT_FPR1, JIT_FPR0); jit_subr_d_fppop(JIT_FPR1, JIT_FPR0, JIT_FPR1);
} }
break; break;
case 9: /* min */ case 9: /* min */
case 10: /* max */ case 10: /* max */
{ {
GC_CAN_IGNORE jit_insn *refc; GC_CAN_IGNORE jit_insn *refc, *refn;
__START_SHORT_JUMPS__(1); __START_SHORT_JUMPS__(1);
/* If R0 is nan, then copy to R1, ensuring nan result */
refn = jit_beqr_d(jit_forward(), JIT_FPR1, JIT_FPR1);
jit_movr_p(JIT_R1, JIT_R0);
mz_patch_branch(refn);
if (arith == 9) { if (arith == 9) {
refc = jit_bler_d(jit_forward(), JIT_FPR0, JIT_FPR1); refc = jit_bger_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
} else { } else {
refc = jit_bger_d(jit_forward(), JIT_FPR0, JIT_FPR1); refc = jit_bltr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
} }
jit_movr_p(JIT_R0, JIT_R1); jit_movr_p(JIT_R0, JIT_R1);
mz_patch_branch(refc); mz_patch_branch(refc);
@ -2520,10 +2566,7 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r
} }
break; break;
case 11: /* abs */ case 11: /* abs */
__START_SHORT_JUMPS__(1); jit_abs_d_fppop(JIT_FPR1, JIT_FPR1);
refskip = jit_bger_d(jit_forward(), JIT_FPR0, JIT_FPR1);
jit_subr_d(JIT_FPR0, JIT_FPR1, JIT_FPR0);
__END_SHORT_JUMPS__(1);
break; break;
default: default:
break; break;
@ -2536,8 +2579,9 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r
inline_alloc(jitter, sizeof(Scheme_Double), scheme_double_type, 0); inline_alloc(jitter, sizeof(Scheme_Double), scheme_double_type, 0);
CHECK_LIMIT(); CHECK_LIMIT();
jit_addi_p(JIT_R0, JIT_V1, sizeof(long)); jit_addi_p(JIT_R0, JIT_V1, sizeof(long));
(void)jit_stxi_d_fppop(&((Scheme_Double *)0x0)->double_val, JIT_R0, JIT_FPR1);
# else # else
(void)jit_sti_d(&double_result, JIT_FPR0); (void)jit_sti_d_fppop(&double_result, JIT_FPR1);
JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); JIT_UPDATE_THREAD_RSPTR_IF_NEEDED();
mz_prepare(0); mz_prepare(0);
(void)mz_finish(malloc_double); (void)mz_finish(malloc_double);
@ -2546,46 +2590,47 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r
#endif #endif
CHECK_LIMIT(); CHECK_LIMIT();
(void)jit_stxi_d(&((Scheme_Double *)0x0)->double_val, JIT_R0, JIT_FPR0);
if (refskip) { if (refskip) {
__START_SHORT_JUMPS__(1); __START_SHORT_JUMPS__(1);
mz_patch_branch(refskip); mz_patch_branch(refskip);
__END_SHORT_JUMPS__(1); __END_SHORT_JUMPS__(1);
} }
} }
} else { } else {
/* The "anti" variants below invert the branch. Unlike the "un"
variants, the "anti" variants invert the comparison result
after the layer where +nan.0 always generates false. */
__START_SHORT_JUMPS__(branch_short); __START_SHORT_JUMPS__(branch_short);
switch (cmp) { switch (cmp) {
case -2: case -2:
refdt = jit_bltr_d(jit_forward(), JIT_FPR1, JIT_FPR0); refd = jit_bantiltr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
break; break;
case -1: case -1:
refdt = jit_bler_d(jit_forward(), JIT_FPR1, JIT_FPR0); refd = jit_bantiler_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
break; break;
case 0: case 0:
refdt = jit_beqr_d(jit_forward(), JIT_FPR1, JIT_FPR0); refd = jit_bantieqr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
break; break;
case 1: case 1:
refdt = jit_bger_d(jit_forward(), JIT_FPR1, JIT_FPR0); refd = jit_bantiger_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
break; break;
case 2: case 2:
refdt = jit_bgtr_d(jit_forward(), JIT_FPR1, JIT_FPR0); refd = jit_bantigtr_d_fppop(jit_forward(), JIT_FPR0, JIT_FPR1);
break; break;
default: default:
refdt = NULL; refd = NULL;
break; break;
} }
__END_SHORT_JUMPS__(branch_short); __END_SHORT_JUMPS__(branch_short);
*_refdt = refdt; *_refd = refd;
} }
__START_SHORT_JUMPS__(1); __START_SHORT_JUMPS__(1);
} }
/* Jump to return result or false branch: */ /* Jump to return result or true branch: */
refd = jit_jmpi(jit_forward()); refdt = jit_jmpi(jit_forward());
*_refd = refd; *_refdt = refdt;
/* No, they're not both doubles. */ /* No, they're not both doubles. */
if (two_args) { if (two_args) {
@ -3002,8 +3047,8 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj
} }
} }
} }
if (refd) if (refdt)
mz_patch_ucbranch(refd); mz_patch_ucbranch(refdt);
jit_patch_movi(ref, (_jit.x.pc)); jit_patch_movi(ref, (_jit.x.pc));
} else { } else {
/* If second is constant, first arg is in JIT_R0. */ /* If second is constant, first arg is in JIT_R0. */
@ -3060,7 +3105,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj
ref2 = jit_jmpi(jit_forward()); ref2 = jit_jmpi(jit_forward());
mz_patch_branch(ref3); mz_patch_branch(ref3);
if (refd) if (refd)
mz_patch_ucbranch(refd); mz_patch_branch(refd);
(void)jit_movi_p(JIT_R0, scheme_false); (void)jit_movi_p(JIT_R0, scheme_false);
mz_patch_ucbranch(ref2); mz_patch_ucbranch(ref2);
jit_patch_movi(ref, (_jit.x.pc)); jit_patch_movi(ref, (_jit.x.pc));
@ -4940,7 +4985,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m
case scheme_branch_type: case scheme_branch_type:
{ {
Scheme_Branch_Rec *branch = (Scheme_Branch_Rec *)obj; Scheme_Branch_Rec *branch = (Scheme_Branch_Rec *)obj;
jit_insn *refs[5], *ref2; jit_insn *refs[6], *ref2;
int nsrs, nsrs1, g1, g2, amt; int nsrs, nsrs1, g1, g2, amt;
#ifdef NEED_LONG_JUMPS #ifdef NEED_LONG_JUMPS
int then_short_ok, else_short_ok; int then_short_ok, else_short_ok;
@ -4966,6 +5011,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m
refs[2] = NULL; /* a movi patch, instead of a branch */ refs[2] = NULL; /* a movi patch, instead of a branch */
refs[3] = NULL; refs[3] = NULL;
refs[4] = NULL; refs[4] = NULL;
refs[5] = NULL; /* a jmpi instead of a test branch */
if (!generate_inlined_test(jitter, branch->test, then_short_ok, refs)) { if (!generate_inlined_test(jitter, branch->test, then_short_ok, refs)) {
CHECK_LIMIT(); CHECK_LIMIT();
@ -5018,6 +5064,9 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int m
if (refs[4]) { if (refs[4]) {
mz_patch_branch(refs[4]); mz_patch_branch(refs[4]);
} }
if (refs[5]) {
mz_patch_ucbranch(refs[5]);
}
__END_SHORT_JUMPS__(then_short_ok); __END_SHORT_JUMPS__(then_short_ok);
PAUSE_JIT_DATA(); PAUSE_JIT_DATA();
LOG_IT(("...else\n")); LOG_IT(("...else\n"));
@ -6347,6 +6396,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data)
retry_alloc_code = jit_get_ip().ptr; retry_alloc_code = jit_get_ip().ptr;
mz_prolog(JIT_V1); mz_prolog(JIT_V1);
#ifdef JIT_USE_FP_OPS
(void)jit_sti_d_fppop(&save_fp, JIT_FPR1);
#endif
JIT_UPDATE_THREAD_RSPTR(); JIT_UPDATE_THREAD_RSPTR();
jit_prepare(2); jit_prepare(2);
CHECK_LIMIT(); CHECK_LIMIT();
@ -6363,6 +6415,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data)
if (i) { if (i) {
jit_ldi_l(JIT_R1, &retry_alloc_r1); jit_ldi_l(JIT_R1, &retry_alloc_r1);
} }
#ifdef JIT_USE_FP_OPS
(void)jit_ldi_d_fppush(JIT_FPR1, &save_fp);
#endif
mz_epilog(JIT_V1); mz_epilog(JIT_V1);
CHECK_LIMIT(); CHECK_LIMIT();
} }

View File

@ -1141,10 +1141,12 @@ typedef _uc jit_insn;
#define FSTPr(RD) ESCri(RD,053) #define FSTPr(RD) ESCri(RD,053)
#define FCOMr(RD) ESCri(RD,002) #define FCOMr(RD) ESCri(RD,002)
#define FCOMPr(RD) ESCri(RD,003) #define FCOMPr(RD) ESCri(RD,003)
#define FCOMPPr(RD) ESCri(RD,073)
#define FCOMIr(RD) ESCri(RD,036) #define FCOMIr(RD) ESCri(RD,036)
#define FCOMIPr(RD) ESCri(RD,076) #define FCOMIPr(RD) ESCri(RD,076)
#define FUCOMr(RD) ESCri(RD,054) #define FUCOMr(RD) ESCri(RD,054)
#define FUCOMPr(RD) ESCri(RD,055) #define FUCOMPr(RD) ESCri(RD,055)
#define FUCOMPPr(RD) ESCri(RD,025)
#define FUCOMIr(RD) ESCri(RD,035) #define FUCOMIr(RD) ESCri(RD,035)
#define FUCOMIPr(RD) ESCri(RD,075) #define FUCOMIPr(RD) ESCri(RD,075)
#define FADDPr(RD) ESCri(RD,060) #define FADDPr(RD) ESCri(RD,060)

View File

@ -67,13 +67,24 @@
#define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr) #define jit_addr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr)
#define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr) #define jit_subr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr)
#define jit_subrr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FSUBRrr,FSUBrr)
#define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr) #define jit_mulr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr)
#define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr) #define jit_divr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr)
#define jit_divrr_d(rd,s1,s2) jit_fp_binary((rd),(s1),(s2),FDIVRrr,FDIVrr)
#define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1)) #define jit_abs_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e1))
#define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0)) #define jit_negr_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9e0))
#define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa)) #define jit_sqrt_d(rd,rs) jit_fp_unary ((rd), (rs), _OO (0xd9fa))
#define jit_addr_d_fppop(rd,s1,s2) (FADDPr(1))
#define jit_subr_d_fppop(rd,s1,s2) (FSUBPr(1))
#define jit_subrr_d_fppop(rd,s1,s2) (FSUBRPr(1))
#define jit_mulr_d_fppop(rd,s1,s2) (FMULPr(1))
#define jit_divr_d_fppop(rd,s1,s2) (FDIVPr(1))
#define jit_divrr_d_fppop(rd,s1,s2) (FDIVRPr(1))
#define jit_negr_d_fppop(rd,rs) ( _OO (0xd9e0))
#define jit_abs_d_fppop(rd,rs) ( _OO (0xd9e1))
/* - moves: /* - moves:
move FPR0 to FPR3 move FPR0 to FPR3
@ -127,6 +138,15 @@ union jit_double_imm {
jit_ldr_d((rd), _ESP), \ jit_ldr_d((rd), _ESP), \
ADDLir(8, _ESP)) ADDLir(8, _ESP))
#define jit_movi_d_fppush(rd,immd) \
(_O (0x68), \
_jit.x.uc_pc[4] = 0x68, \
((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd, \
*((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 5))->i[1], \
_jit.x.uc_pc += 9, \
jit_ldr_d_fppush((rd), _ESP), \
ADDLir(8, _ESP))
#define jit_ldi_f(rd, is) \ #define jit_ldi_f(rd, is) \
((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \ ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0)) \
: (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1))) : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1)))
@ -135,6 +155,8 @@ union jit_double_imm {
((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \ ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0)) \
: (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1))) : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1)))
#define jit_ldi_d_fppush(rd, is) FLDLm((is), 0, 0, 0)
#define jit_ldr_f(rd, rs) \ #define jit_ldr_f(rd, rs) \
((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \ ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0)) \
: (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1))) : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
@ -143,6 +165,8 @@ union jit_double_imm {
((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \ ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0)) \
: (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1))) : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
#define jit_ldr_d_fppush(rd, rs) FLDLm(0, (rs), 0, 0)
#define jit_ldxi_f(rd, rs, is) \ #define jit_ldxi_f(rd, rs, is) \
((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \ ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0)) \
: (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1))) : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
@ -151,6 +175,8 @@ union jit_double_imm {
((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \ ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0)) \
: (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1))) : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
#define jit_ldxi_d_fppush(rd, rs, is) FLDLm((is), (rs), 0, 0)
#define jit_ldxr_f(rd, s1, s2) \ #define jit_ldxr_f(rd, s1, s2) \
((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \ ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1)) \
: (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
@ -173,6 +199,9 @@ union jit_double_imm {
#define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0)) #define jit_sti_d(id, rs) jit_fxch ((rs), FSTLm((id), 0, 0, 0))
#define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0)) #define jit_str_d(rd, rs) jit_fxch ((rs), FSTLm(0, (rd), 0, 0))
#define jit_sti_d_fppop(id, rs) FSTPLm((id), 0, 0, 0)
#define jit_stxi_d_fppop(id, rd, rs) FSTPLm((id), (rd), 0, 0)
/* Assume round to near mode */ /* Assume round to near mode */
#define jit_floorr_d_i(rd, rs) \ #define jit_floorr_d_i(rd, rs) \
(FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))) (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX)))
@ -248,7 +277,26 @@ union jit_double_imm {
((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */ ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */
#define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \ #define jit_fp_btest(d, s1, s2, n, _and, cmp, res) \
(((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \ (((s1) == 0 ? FCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))), \
PUSHLr(_EAX), \
FNSTSWr(_EAX), \
SHRLir(n, _EAX), \
((_and) ? ANDLir ((_and), _EAX) : 0), \
((cmp) ? CMPLir ((cmp), _AL) : 0), \
POPLr(_EAX), \
res ((d), 0, 0, 0), _jit.x.pc)
#define jit_fp_test_fppop(d, n, _and, res) \
(FUCOMPPr(1), \
((d) != _EAX ? MOVLrr(_EAX, (d)) : 0), \
FNSTSWr(_EAX), \
SHRLir(n, _EAX), \
((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)), \
res, \
((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0)) /* xchg */
#define jit_fp_btest_fppop(d, n, _and, cmp, res) \
(FUCOMPPr(1), \
PUSHLr(_EAX), \ PUSHLr(_EAX), \
FNSTSWr(_EAX), \ FNSTSWr(_EAX), \
SHRLir(n, _EAX), \ SHRLir(n, _EAX), \
@ -298,22 +346,39 @@ union jit_double_imm {
#define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL)) #define jit_uneqr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 15, 0, ADCBir (0, _AL))
#define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL)) #define jit_ordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, SBBBir (-1, _AL))
#define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL)) #define jit_unordr_d(d, s1, s2) jit_fp_test((d), (s1), (s2), 11, 0, ADCBir (0, _AL))
#define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm) #define jit_bgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JZm)
#define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm) #define jit_bger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JNCm)
#define jit_bantigtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm)
#define jit_bantiger_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm)
#define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm) #define jit_bunler_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0, JNZm)
#define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm) #define jit_bunltr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 9, 0, 0, JCm)
#define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm) #define jit_bltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JZm)
#define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm) #define jit_bler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JNCm)
#define jit_bantiltr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm)
#define jit_bantiler_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm)
#define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm) #define jit_bunger_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 8, 0x45, 0, JNZm)
#define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm) #define jit_bungtr_d(d, s1, s2) jit_fp_btest((d), (s2), (s1), 9, 0, 0, JCm)
#define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm) #define jit_beqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JZm)
#define jit_bantieqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm)
#define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm) #define jit_bner_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 8, 0x45, 0x40, JNZm)
#define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm) #define jit_bltgtr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JNCm)
#define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm) #define jit_buneqr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 15, 0, 0, JCm)
#define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm) #define jit_bordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JNCm)
#define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm) #define jit_bunordr_d(d, s1, s2) jit_fp_btest((d), (s1), (s2), 11, 0, 0, JCm)
#define jit_bger_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 9, 0, 0, JNCm)
#define jit_bantiger_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 9, 0, 0, JCm)
#define jit_bler_d_fppop(d, s1, s2) (FXCHr(1), jit_bger_d_fppop(d, s1, s2))
#define jit_bantiler_d_fppop(d, s1, s2) (FXCHr(1), jit_bantiger_d_fppop(d, s1, s2))
#define jit_bgtr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0, JZm)
#define jit_bantigtr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0, JNZm)
#define jit_bltr_d_fppop(d, s1, s2) (FXCHr(1), jit_bgtr_d_fppop(d, s1, s2))
#define jit_bantiltr_d_fppop(d, s1, s2) (FXCHr(1), jit_bantigtr_d_fppop(d, s1, s2))
#define jit_beqr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0x40, JZm)
#define jit_bantieqr_d_fppop(d, s1, s2) jit_fp_btest_fppop((d), 8, 0x45, 0x40, JNZm)
#define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs)) #define jit_getarg_f(rd, ofs) jit_ldxi_f((rd), JIT_FP,(ofs))
#define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs)) #define jit_getarg_d(rd, ofs) jit_ldxi_d((rd), JIT_FP,(ofs))
#define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs))) #define jit_pusharg_d(rs) (jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs)))