ARM JIT: fix software floating-point

I broke uses of LDRD and STRD when compacting the set of registers
used by the JIT. The LDRD and STRD instructions are given one
register explicitly, but they implicitly use the next regsister, too,
and the specified register must be even-numbered. Lining up a pair of
registers requires a little shuffling before and after the operation.

Also, the LDRDI and STRD encodings were broken, and the inlined
fl->fx conversion was not right.

Closes PR 14470
This commit is contained in:
Matthew Flatt 2014-04-26 11:11:31 -06:00
parent b88f391c1c
commit ffb0dd52c5
2 changed files with 53 additions and 26 deletions

View File

@ -60,6 +60,15 @@ typedef enum {
#define JIT_TMP2 _R7 #define JIT_TMP2 _R7
#define JIT_FTMP JIT_TMP2 #define JIT_FTMP JIT_TMP2
/* Software FP without thumb needs 2 consecutive registers,
so JIT_DTMP must be an even-numbered register. It conceptually
overlaps with JIT_TMP and JIT_FTMP, but shuffle registers
locally to make those two consecutive. */
#define JIT_DTMP _R2
#define push_DTMP() jit_movr_p(JIT_FTMP, JIT_DTMP)
#define pop_DTMP() jit_movr_p(JIT_DTMP, JIT_FTMP)
#define alt_DTMP(r) ((r == JIT_DTMP) ? JIT_FTMP : r)
/* must use blx to call functions or jit instruction set matches runtime? */ /* must use blx to call functions or jit instruction set matches runtime? */
#define jit_exchange_p() 1 #define jit_exchange_p() 1
@ -1763,7 +1772,7 @@ _arm_cc_pkh(jit_state_t _jitp, int cc, int o, int rn, int rd, int rm, int im)
#define _CC_LDRDN(cc,rt,rn,rm) corrr(cc,ARM_LDRD,rn,rt,rm) #define _CC_LDRDN(cc,rt,rn,rm) corrr(cc,ARM_LDRD,rn,rt,rm)
#define _LDRDN(rd,rn,rm) _CC_LDRDN(ARM_CC_AL,rn,rt,rm) #define _LDRDN(rd,rn,rm) _CC_LDRDN(ARM_CC_AL,rn,rt,rm)
#define _CC_LDRDI(cc,rt,rn,im) corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im) #define _CC_LDRDI(cc,rt,rn,im) corri8(cc,ARM_LDRDI|ARM_P,rn,rt,im)
#define _LDRDI(rt,rn,im) _CC_LDRDI(ARM_CC_AL,rn,rt,im) #define _LDRDI(rt,rn,im) _CC_LDRDI(ARM_CC_AL,rt,rn,im)
#define T2_LDRDI(rt,rt2,rn,im) torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im) #define T2_LDRDI(rt,rt2,rn,im) torrri8(THUMB2_LDRDI|ARM_P,rn,rt,rt2,im)
#define _CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im) #define _CC_LDRDIN(cc,rt,rn,im) corri8(cc,ARM_LDRDI,rn,rt,im)
#define _LDRDIN(rt,rn,im) _CC_LDRDIN(ARM_CC_AL,rt,rn,im) #define _LDRDIN(rt,rn,im) _CC_LDRDIN(ARM_CC_AL,rt,rn,im)
@ -1811,7 +1820,7 @@ _arm_cc_pkh(jit_state_t _jitp, int cc, int o, int rn, int rd, int rm, int im)
#define _CC_STRIN(cc,rt,rn,im) corri(cc,ARM_STRI,rn,rt,im) #define _CC_STRIN(cc,rt,rn,im) corri(cc,ARM_STRI,rn,rt,im)
#define _STRIN(rt,rn,im) _CC_STRIN(ARM_CC_AL,rt,rn,im) #define _STRIN(rt,rn,im) _CC_STRIN(ARM_CC_AL,rt,rn,im)
#define T2_STRIN(rt,rn,im) torri8(THUMB2_STRI,rn,rt,im) #define T2_STRIN(rt,rn,im) torri8(THUMB2_STRI,rn,rt,im)
#define _CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_P,rt,rn,rm) #define _CC_STRD(cc,rt,rn,rm) corrr(cc,ARM_STRD|ARM_P,rn,rt,rm)
#define _STRD(rt,rn,rm) _CC_STRD(ARM_CC_AL,rt,rn,rm) #define _STRD(rt,rn,rm) _CC_STRD(ARM_CC_AL,rt,rn,rm)
#define _CC_STRDN(cc,rt,rn,rm) corrr(cc,ARM_STRD,rn,rt,rm) #define _CC_STRDN(cc,rt,rn,rm) corrr(cc,ARM_STRD,rn,rt,rm)
#define _STRDN(rt,rn,rm) _CC_STRDN(ARM_CC_AL,rt,rn,rm) #define _STRDN(rt,rn,rm) _CC_STRDN(ARM_CC_AL,rt,rn,rm)

View File

@ -80,10 +80,10 @@ static float jit_swf_fp(__aeabi_fdiv)(float a, float b) { return a / b; }
static double jit_swf_fp(__aeabi_ddiv)(double a, double b) { return a / b; } static double jit_swf_fp(__aeabi_ddiv)(double a, double b) { return a / b; }
static float jit_swf_fp(__aeabi_i2f)(int a) { return (float)a; } static float jit_swf_fp(__aeabi_i2f)(int a) { return (float)a; }
static double jit_swf_fp(__aeabi_i2d)(int a) { return (double)a; } static double jit_swf_fp(__aeabi_i2d)(int a) { return (double)a; }
static float jit_swf_fp(__aeabi_d2f)(double a) { return (float)a; }
static double jit_swf_fp(__aeabi_f2d)(float a) { return (double)a; } static double jit_swf_fp(__aeabi_f2d)(float a) { return (double)a; }
static int jit_swf_fp(__aeabi_f2iz)(double a) { return (int)a; } static float jit_swf_fp(__aeabi_d2f)(double a) { return (float)a; }
static int jit_swf_fp(__aeabi_d2iz)(float a) { return (float)a; } static int jit_swf_fp(__aeabi_f2iz)(float a) { return (int)a; }
static int jit_swf_fp(__aeabi_d2iz)(double a) { return (int)a; }
static int jit_swf_fp(__aeabi_fcmplt)(float a, float b) { return a < b; } static int jit_swf_fp(__aeabi_fcmplt)(float a, float b) { return a < b; }
static int jit_swf_fp(__aeabi_dcmplt)(double a, double b) { return a < b; } static int jit_swf_fp(__aeabi_dcmplt)(double a, double b) { return a < b; }
static int jit_swf_fp(__aeabi_fcmple)(float a, float b) { return a <= b; } static int jit_swf_fp(__aeabi_fcmple)(float a, float b) { return a <= b; }
@ -175,8 +175,10 @@ swf_movr_d(jit_state_t _jitp, jit_fpr_t r0, jit_fpr_t r1)
/* jit_ret() must follow! */ /* jit_ret() must follow! */
_LDRDIN(_R0, JIT_FP, swf_off(r1) + 8); _LDRDIN(_R0, JIT_FP, swf_off(r1) + 8);
else { else {
_LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); push_DTMP();
_STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8);
_STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8);
pop_DTMP();
} }
} }
else { else {
@ -974,8 +976,10 @@ __jit_inline void
swf_ldr_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1) swf_ldr_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1)
{ {
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
_LDRDI(JIT_TMP, r1, 0); push_DTMP();
_STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); _LDRDI(JIT_DTMP, r1, 0);
_STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8);
pop_DTMP();
} }
else { else {
jit_ldxi_i(JIT_TMP, r1, 0); jit_ldxi_i(JIT_TMP, r1, 0);
@ -997,8 +1001,10 @@ swf_ldi_d(jit_state_t _jitp, jit_fpr_t r0, void *i0)
{ {
jit_movi_i(JIT_TMP, (int)i0); jit_movi_i(JIT_TMP, (int)i0);
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
_LDRDI(JIT_TMP, JIT_TMP, 0); push_DTMP();
_STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); _LDRDI(JIT_DTMP, JIT_TMP, 0);
_STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8);
pop_DTMP();
} }
else { else {
jit_ldxi_i(JIT_FTMP, JIT_TMP, 4); jit_ldxi_i(JIT_FTMP, JIT_TMP, 4);
@ -1019,8 +1025,10 @@ __jit_inline void
swf_ldxr_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1, jit_gpr_t r2) swf_ldxr_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1, jit_gpr_t r2)
{ {
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
_LDRD(JIT_TMP, r1, r2); push_DTMP();
_STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); _LDRD(JIT_DTMP, r1, r2);
_STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8);
pop_DTMP();
} }
else { else {
jit_addr_i(JIT_TMP, r1, r2); jit_addr_i(JIT_TMP, r1, r2);
@ -1042,15 +1050,17 @@ __jit_inline void
swf_ldxi_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1, int i0) swf_ldxi_d(jit_state_t _jitp, jit_fpr_t r0, jit_gpr_t r1, int i0)
{ {
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
push_DTMP();
if (i0 >= 0 && i0 <= 255) if (i0 >= 0 && i0 <= 255)
_LDRDI(JIT_TMP, r1, i0); _LDRDI(JIT_DTMP, r1, i0);
else if (i0 < 0 && i0 >= -255) else if (i0 < 0 && i0 >= -255)
_LDRDIN(JIT_TMP, r1, -i0); _LDRDIN(JIT_DTMP, r1, -i0);
else { else {
jit_addi_i(JIT_FTMP, r1, i0); jit_addi_i(JIT_TMP, r1, i0);
_LDRDI(JIT_TMP, JIT_FTMP, 0); _LDRDI(JIT_DTMP, JIT_TMP, 0);
} }
_STRDIN(JIT_TMP, JIT_FP, swf_off(r0) + 8); _STRDIN(JIT_DTMP, JIT_FP, swf_off(r0) + 8);
pop_DTMP();
} }
else { else {
if (((jit_thumb_p() && i0 >= -255) || if (((jit_thumb_p() && i0 >= -255) ||
@ -1086,8 +1096,10 @@ __jit_inline void
swf_str_d(jit_state_t _jitp, jit_gpr_t r0, jit_fpr_t r1) swf_str_d(jit_state_t _jitp, jit_gpr_t r0, jit_fpr_t r1)
{ {
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
_LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); push_DTMP();
_STRDI(JIT_TMP, r0, 0); _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8);
_STRDI(JIT_DTMP, alt_DTMP(r0), 0);
pop_DTMP();
} }
else { else {
swf_ldrin(JIT_TMP, JIT_FP, swf_off(r1) + 8); swf_ldrin(JIT_TMP, JIT_FP, swf_off(r1) + 8);
@ -1126,8 +1138,10 @@ __jit_inline void
swf_stxr_d(jit_state_t _jitp, jit_gpr_t r0, jit_gpr_t r1, jit_fpr_t r2) swf_stxr_d(jit_state_t _jitp, jit_gpr_t r0, jit_gpr_t r1, jit_fpr_t r2)
{ {
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
_LDRDIN(JIT_TMP, JIT_FP, swf_off(r2) + 8); push_DTMP();
_STRD(JIT_TMP, r0, r1); _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r2) + 8);
_STRD(JIT_DTMP, alt_DTMP(r0), alt_DTMP(r1));
pop_DTMP();
} }
else { else {
jit_addr_i(JIT_TMP, r0, r1); jit_addr_i(JIT_TMP, r0, r1);
@ -1150,12 +1164,16 @@ swf_stxi_d(jit_state_t _jitp, int i0, jit_gpr_t r0, jit_fpr_t r1)
{ {
if (!jit_thumb_p() && jit_armv5e_p()) { if (!jit_thumb_p() && jit_armv5e_p()) {
if (i0 >= 0 && i0 <= 255) { if (i0 >= 0 && i0 <= 255) {
_LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); push_DTMP();
_STRDI(JIT_TMP, r0, i0); _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8);
_STRDI(JIT_DTMP, alt_DTMP(r0), i0);
pop_DTMP();
} }
else if (i0 < 0 && i0 >= -255) { else if (i0 < 0 && i0 >= -255) {
_LDRDIN(JIT_TMP, JIT_FP, swf_off(r1) + 8); push_DTMP();
_STRDIN(JIT_TMP, r0, -i0); _LDRDIN(JIT_DTMP, JIT_FP, swf_off(r1) + 8);
_STRDIN(JIT_DTMP, alt_DTMP(r0), -i0);
pop_DTMP();
} }
else { else {
jit_addi_i(JIT_FTMP, r0, i0); jit_addi_i(JIT_FTMP, r0, i0);