From ffb0dd52c5428f379318b7e9470530e235059d38 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 26 Apr 2014 11:11:31 -0600 Subject: [PATCH] ARM JIT: fix software floating-point I broke uses of LDRD and STRD when compacting the set of registers used by the JIT. The LDRD and STRD instructions are given one register explicitly, but they implicitly use the next regsister, too, and the specified register must be even-numbered. Lining up a pair of registers requires a little shuffling before and after the operation. Also, the LDRDI and STRD encodings were broken, and the inlined fl->fx conversion was not right. Closes PR 14470 --- racket/src/racket/src/lightning/arm/asm.h | 13 +++- racket/src/racket/src/lightning/arm/fp-swf.h | 66 +++++++++++++------- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/racket/src/racket/src/lightning/arm/asm.h b/racket/src/racket/src/lightning/arm/asm.h index 8964eb1bf5..600eabb00c 100644 --- a/racket/src/racket/src/lightning/arm/asm.h +++ b/racket/src/racket/src/lightning/arm/asm.h @@ -60,6 +60,15 @@ typedef enum { #define JIT_TMP2 _R7 #define JIT_FTMP JIT_TMP2 +/* Software FP without thumb needs 2 consecutive registers, + so JIT_DTMP must be an even-numbered register. It conceptually + overlaps with JIT_TMP and JIT_FTMP, but shuffle registers + locally to make those two consecutive. */ +#define JIT_DTMP _R2 +#define push_DTMP() jit_movr_p(JIT_FTMP, JIT_DTMP) +#define pop_DTMP() jit_movr_p(JIT_DTMP, JIT_FTMP) +#define alt_DTMP(r) ((r == JIT_DTMP) ? JIT_FTMP : r) + /* must use blx to call functions or jit instruction set matches runtime? */ #define jit_exchange_p() 1 @@ -1763,7 +1772,7 @@ _arm_cc_pkh(jit_state_t _jitp, int cc, int o, int rn, int rd, int rm, int im) #define _CC_LDRDN(cc,rt,rn,rm) corrr(cc,ARM_LDRD,rn,rt,rm) #define _LDRDN(rd,rn,rm) _CC_LDRDN(ARM_CC_AL,rn,rt,rm) #define _CC_LDRDI(cc,rt,rn,im) corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im) -#define _LDRDI(rt,rn,im) _CC_LDRDI(ARM_CC_AL,rn,rt,im) +#define _LDRDI(rt,rn,im) _CC_LDRDI(ARM_CC_AL,rt,rn,im) #define T2_LDRDI(rt,rt2,rn,im) torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im) #define _CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im) #define _LDRDIN(rt,rn,im) _CC_LDRDIN(ARM_CC_AL,rt,rn,im) @@ -1811,7 +1820,7 @@ _arm_cc_pkh(jit_state_t _jitp, int cc, int o, int rn, int rd, int rm, int im) #define _CC_STRIN(cc,rt,rn,im) corri(cc,ARM_STRI,rn,rt,im) #define _STRIN(rt,rn,im) _CC_STRIN(ARM_CC_AL,rt,rn,im) #define T2_STRIN(rt,rn,im) torri8(THUMB2_STRI,rn,rt,im) -#define _CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_P,rt,rn,rm) +#define _CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_P,rn,rt,rm) #define _STRD(rt,rn,rm) _CC_STRD(ARM_CC_AL,rt,rn,rm) #define _CC_STRDN(cc,rt,rn,rm) corrr(cc,ARM_STRD,rn,rt,rm) #define _STRDN(rt,rn,rm) _CC_STRDN(ARM_CC_AL,rt,rn,rm) diff --git a/racket/src/racket/src/lightning/arm/fp-swf.h b/racket/src/racket/src/lightning/arm/fp-swf.h index 35c342fa4d..8402493b99 100644 --- a/racket/src/racket/src/lightning/arm/fp-swf.h +++ b/racket/src/racket/src/lightning/arm/fp-swf.h @@ -80,10 +80,10 @@ static float jit_swf_fp(__aeabi_fdiv)(float a, float b) { return a / b; } static double jit_swf_fp(__aeabi_ddiv)(double a, double b) { return a / b; } static float jit_swf_fp(__aeabi_i2f)(int a) { return (float)a; } static double jit_swf_fp(__aeabi_i2d)(int a) { return (double)a; } -static float jit_swf_fp(__aeabi_d2f)(double a) { return (float)a; } static double jit_swf_fp(__aeabi_f2d)(float a) { return (double)a; } -static int jit_swf_fp(__aeabi_f2iz)(double a) { return (int)a; } -static int jit_swf_fp(__aeabi_d2iz)(float a) { return (float)a; } +static float jit_swf_fp(__aeabi_d2f)(double a) { return (float)a; } +static int jit_swf_fp(__aeabi_f2iz)(float a) { return (int)a; } +static int jit_swf_fp(__aeabi_d2iz)(double a) { return (int)a; } static int jit_swf_fp(__aeabi_fcmplt)(float a, float b) { return a < b; } static int jit_swf_fp(__aeabi_dcmplt)(double a, double b) { return a < b; } static int jit_swf_fp(__aeabi_fcmple)(float a, float b) { return a <= b; } @@ -175,8 +175,10 @@ swf_movr_d(jit_state_t _jitp, jit_fpr_t r0, jit_fpr_t r1) /* jit_ret() must follow! */ _LDRDIN(_R0, JIT_FP, swf_off(r1) + 8); else { - _LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); - _STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); + push_DTMP(); + _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8); + _STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8); + pop_DTMP(); } } else { @@ -974,8 +976,10 @@ __jit_inline void swf_ldr_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1) { if (!jit_thumb_p() && jit_armv5e_p()) { - _LDRDI(JIT_TMP, r1, 0); - _STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); + push_DTMP(); + _LDRDI(JIT_DTMP, r1, 0); + _STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8); + pop_DTMP(); } else { jit_ldxi_i(JIT_TMP, r1, 0); @@ -997,8 +1001,10 @@ swf_ldi_d(jit_state_t _jitp, jit_fpr_t r0, void *i0) { jit_movi_i(JIT_TMP, (int)i0); if (!jit_thumb_p() && jit_armv5e_p()) { - _LDRDI(JIT_TMP, JIT_TMP, 0); - _STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); + push_DTMP(); + _LDRDI(JIT_DTMP, JIT_TMP, 0); + _STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8); + pop_DTMP(); } else { jit_ldxi_i(JIT_FTMP, JIT_TMP, 4); @@ -1019,8 +1025,10 @@ __jit_inline void swf_ldxr_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1, jit_gpr_t r2) { if (!jit_thumb_p() && jit_armv5e_p()) { - _LDRD(JIT_TMP, r1, r2); - _STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); + push_DTMP(); + _LDRD(JIT_DTMP, r1, r2); + _STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8); + pop_DTMP(); } else { jit_addr_i(JIT_TMP, r1, r2); @@ -1042,15 +1050,17 @@ __jit_inline void swf_ldxi_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1, int i0) { if (!jit_thumb_p() && jit_armv5e_p()) { + push_DTMP(); if (i0 >= 0 && i0 <= 255) - _LDRDI(JIT_TMP, r1, i0); + _LDRDI(JIT_DTMP, r1, i0); else if (i0 < 0 && i0 >= -255) - _LDRDIN(JIT_TMP, r1, -i0); + _LDRDIN(JIT_DTMP, r1, -i0); else { - jit_addi_i(JIT_FTMP, r1, i0); - _LDRDI(JIT_TMP, JIT_FTMP, 0); + jit_addi_i(JIT_TMP, r1, i0); + _LDRDI(JIT_DTMP, JIT_TMP, 0); } - _STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); + _STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8); + pop_DTMP(); } else { if (((jit_thumb_p() && i0 >= -255) || @@ -1086,8 +1096,10 @@ __jit_inline void swf_str_d(jit_state_t _jitp, jit_gpr_t r0, jit_fpr_t r1) { if (!jit_thumb_p() && jit_armv5e_p()) { - _LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); - _STRDI(JIT_TMP, r0, 0); + push_DTMP(); + _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8); + _STRDI(JIT_DTMP, alt_DTMP(r0), 0); + pop_DTMP(); } else { swf_ldrin(JIT_TMP, JIT_FP, swf_off(r1) + 8); @@ -1126,8 +1138,10 @@ __jit_inline void swf_stxr_d(jit_state_t _jitp, jit_gpr_t r0, jit_gpr_t r1, jit_fpr_t r2) { if (!jit_thumb_p() && jit_armv5e_p()) { - _LDRDIN(JIT_TMP, JIT_FP, swf_off(r2) + 8); - _STRD(JIT_TMP, r0, r1); + push_DTMP(); + _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r2) + 8); + _STRD(JIT_DTMP, alt_DTMP(r0), alt_DTMP(r1)); + pop_DTMP(); } else { jit_addr_i(JIT_TMP, r0, r1); @@ -1150,12 +1164,16 @@ swf_stxi_d(jit_state_t _jitp, int i0, jit_gpr_t r0, jit_fpr_t r1) { if (!jit_thumb_p() && jit_armv5e_p()) { if (i0 >= 0 && i0 <= 255) { - _LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); - _STRDI(JIT_TMP, r0, i0); + push_DTMP(); + _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8); + _STRDI(JIT_DTMP, alt_DTMP(r0), i0); + pop_DTMP(); } else if (i0 < 0 && i0 >= -255) { - _LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); - _STRDIN(JIT_TMP, r0, -i0); + push_DTMP(); + _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8); + _STRDIN(JIT_DTMP, alt_DTMP(r0), -i0); + pop_DTMP(); } else { jit_addi_i(JIT_FTMP, r0, i0);