setup for x86_64 JIT FP

svn: r8487
This commit is contained in:
Matthew Flatt 2008-01-31 16:22:47 +00:00
parent 6a8a1557d3
commit aadb3ec7f0
3 changed files with 318 additions and 2 deletions

View File

@ -1,6 +1,5 @@
This variant of GNU Lightning version 1.2 is slightly modified for
MzScheme:
This variant of GNU Lightning version 1.2 is modified for MzScheme:
* For PowerPC, branches can be generated in long-jump mode, so that
patched addresses are not limited to a 2^16 or 2^26 difference from
@ -27,6 +26,13 @@ MzScheme:
(thanks to Ray Racine), and to call mprotect() based on whether
`MZ_JIT_USE_MPROTECT' (instead iof `linux') is #defined.
* The FP operations use unordered comparison, and an "anti" set of
operations inverts the jump (without inverting the arguments).
On x86, there are aso _fppush variants that ignore the provided
registers and work in terms of the FP stack, instead.
* The x86_64 FP support is based on lightning 1.3.
----------------------------------------
GNU Lightning authors:

View File

@ -0,0 +1,304 @@
/******************************** -*- C -*- ****************************
*
* Support macros for SSE floating-point math
*
***********************************************************************/
/***********************************************************************
*
* Copyright 2006 Free Software Foundation, Inc.
* Written by Paolo Bonzini.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
* Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*
***********************************************************************/
#ifndef __lightning_fp_h
#define __lightning_fp_h
#include <float.h>
#define JIT_FPR_NUM 9
#define JIT_FPRET _XMM0
#define JIT_FPR(i) (_XMM7 + (i))
#define JIT_FPTMP _XMM6
/* Either use a temporary register that is finally AND/OR/XORed with RS = RD,
or use RD as the temporary register and to the AND/OR/XOR with RS. */
#define jit_unop_tmp(rd, rs, op) \
( (rs) == (rd) \
? op((rd), JIT_FPTMP, JIT_FPTMP)) \
: op((rd), (rd), (rs)))
#define jit_unop_f(rd, rs, op) \
((rs) == (rd) ? op((rd)) : (MOVSSrr ((rs), (rd)), op((rd))))
#define jit_unop_d(rd, rs, op) \
((rs) == (rd) ? op((rd)) : (MOVSDrr ((rs), (rd)), op((rd))))
#define jit_3opc_f(rd, s1, s2, op) \
( (s1) == (rd) ? op((s2), (rd)) \
: ((s2) == (rd) ? op((s1), (rd)) \
: (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
#define jit_3opc_d(rd, s1, s2, op) \
( (s1) == (rd) ? op((s2), (rd)) \
: ((s2) == (rd) ? op((s1), (rd)) \
: (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
#define jit_3op_f(rd, s1, s2, op) \
( (s1) == (rd) ? op((s2), (rd)) \
: ((s2) == (rd) \
? (MOVSSrr ((rd), JIT_FPTMP), MOVSSrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \
: (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
#define jit_3op_d(rd, s1, s2, op) \
( (s1) == (rd) ? op((s2), (rd)) \
: ((s2) == (rd) \
? (MOVSDrr ((rd), JIT_FPTMP), MOVSDrr ((s1), (rd)), op(JIT_FPTMP, (rd))) \
: (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
#define jit_addr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), ADDSSrr)
#define jit_subr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), SUBSSrr)
#define jit_mulr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), MULSSrr)
#define jit_divr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), DIVSSrr)
#define jit_addr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), ADDSDrr)
#define jit_subr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), SUBSDrr)
#define jit_mulr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), MULSDrr)
#define jit_divr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), DIVSDrr)
#define jit_movr_f(rd,rs) MOVSSrr((rs), (rd))
#define jit_movr_d(rd,rs) MOVSDrr((rs), (rd))
/* either pcmpeqd %xmm7, %xmm7 / psrld $1, %xmm7 / andps %xmm7, %RD (if RS = RD)
or pcmpeqd %RD, %RD / psrld $1, %RD / andps %RS, %RD (if RS != RD) */
#define _jit_abs_f(rd,cnst,rs) \
(PCMPEQDrr((cnst), (cnst)), PSRLDir (1, (cnst)), ANDPSrr ((rs), (rd)))
#define _jit_neg_f(rd,cnst,rs) \
(PCMPEQDrr((cnst), (cnst)), PSLLDir (31, (cnst)), XORPSrr ((rs), (rd)))
#define jit_abs_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_f)
#define jit_neg_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_f)
#define _jit_abs_d(rd,cnst,rs) \
(PCMPEQDrr((cnst), (cnst)), PSRLQir (1, (cnst)), ANDPDrr ((rs), (rd)))
#define _jit_neg_d(rd,cnst,rs) \
(PCMPEQDrr((cnst), (cnst)), PSLLQir (63, (cnst)), XORPDrr ((rs), (rd)))
#define jit_abs_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_d)
#define jit_neg_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_d)
#define jit_sqrt_d(rd,rs) SQRTSSrr((rs), (rd))
#define jit_sqrt_f(rd,rs) SQRTSDrr((rs), (rd))
#define _jit_ldi_f(d, is) MOVSSmr((is), 0, 0, 0, (d))
#define _jit_ldxi_f(d, rs, is) MOVSSmr((is), (rs), 0, 0, (d))
#define jit_ldr_f(d, rs) MOVSSmr(0, (rs), 0, 0, (d))
#define jit_ldxr_f(d, s1, s2) MOVSSmr(0, (s1), (s2), 1, (d))
#define _jit_sti_f(id, rs) MOVSSrm((rs), (id), 0, 0, 0)
#define _jit_stxi_f(id, rd, rs) MOVSSrm((rs), (id), (rd), 0, 0)
#define jit_str_f(rd, rs) MOVSSrm((rs), 0, (rd), 0, 0)
#define jit_stxr_f(d1, d2, rs) MOVSSrm((rs), 0, (d1), (d2), 1)
#define jit_ldi_f(d, is) (_u32P((long)(is)) ? _jit_ldi_f((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_f((d), JIT_REXTMP)))
#define jit_sti_f(id, rs) (_u32P((long)(id)) ? _jit_sti_f((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_f (JIT_REXTMP, (rs))))
#define jit_ldxi_f(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_f((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_f((d), (rs), JIT_REXTMP)))
#define jit_stxi_f(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_f((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_f (JIT_REXTMP, (rd), (rs))))
#define _jit_ldi_d(d, is) MOVSDmr((is), 0, 0, 0, (d))
#define _jit_ldxi_d(d, rs, is) MOVSDmr((is), (rs), 0, 0, (d))
#define jit_ldr_d(d, rs) MOVSDmr(0, (rs), 0, 0, (d))
#define jit_ldxr_d(d, s1, s2) MOVSDmr(0, (s1), (s2), 1, (d))
#define _jit_sti_d(id, rs) MOVSDrm((rs), (id), 0, 0, 0)
#define _jit_stxi_d(id, rd, rs) MOVSDrm((rs), (id), (rd), 0, 0)
#define jit_str_d(rd, rs) MOVSDrm((rs), 0, (rd), 0, 0)
#define jit_stxr_d(d1, d2, rs) MOVSDrm((rs), 0, (d1), (d2), 1)
#define jit_ldi_d(d, is) (_u32P((long)(is)) ? _jit_ldi_d((d), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_d((d), JIT_REXTMP)))
#define jit_sti_d(id, rs) (_u32P((long)(id)) ? _jit_sti_d((id), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_d (JIT_REXTMP, (rs))))
#define jit_ldxi_d(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_d((d), (rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_d((d), (rs), JIT_REXTMP)))
#define jit_stxi_d(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_d((id), (rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_d (JIT_REXTMP, (rd), (rs))))
#define jit_movi_f(rd,immf) \
((immf) == 0.0 ? XORSSrr ((rd), (rd)) : \
(_O (0x50), \
MOVLim (0x12345678L, 0, _ESP, 0, 0), \
*((float *) (_jit.x.uc_pc - 4)) = (float) immf, \
jit_ldr_f((rd), _ESP), \
ADDLir(4, _ESP)))
union jit_double_imm {
double d;
long l;
};
#define jit_movi_d(rd,immd) \
((immd) == 0.0 ? XORSDrr ((rd), (rd)) : \
(_O (0x50), \
MOVQir (0x123456789abcdef0L, _EAX), \
((union jit_double_imm *) (_jit.x.uc_pc - 8))->d = (double) immd, \
_O (0x50), jit_ldr_d((rd), _ESP), \
_O (0x58), _O (0x58)))
#define jit_extr_i_d(rd, rs) CVTSI2SDLrr((rs), (rd))
#define jit_extr_i_f(rd, rs) CVTSI2SSLrr((rs), (rd))
#define jit_extr_l_d(rd, rs) CVTSI2SDQrr((rs), (rd))
#define jit_extr_l_f(rd, rs) CVTSI2SSQrr((rs), (rd))
#define jit_roundr_d_i(rd, rs) CVTSD2SILrr((rs), (rd))
#define jit_roundr_f_i(rd, rs) CVTSS2SILrr((rs), (rd))
#define jit_roundr_d_l(rd, rs) CVTSD2SIQrr((rs), (rd))
#define jit_roundr_f_l(rd, rs) CVTSS2SIQrr((rs), (rd))
#define jit_truncr_d_i(rd, rs) CVTTSD2SILrr((rs), (rd))
#define jit_truncr_f_i(rd, rs) CVTTSS2SILrr((rs), (rd))
#define jit_truncr_d_l(rd, rs) CVTTSD2SIQrr((rs), (rd))
#define jit_truncr_f_l(rd, rs) CVTTSS2SIQrr((rs), (rd))
#define jit_ceilr_f_i(rd, rs) do { \
jit_roundr_f_i ((rd), (rs)); \
jit_extr_i_f (JIT_FPTMP, (rd)); \
UCOMISSrr ((rs), JIT_FPTMP); \
ADCLir (0, (rd)); \
} while (0)
#define jit_ceilr_d_i(rd, rs) do { \
jit_roundr_d_i ((rd), (rs)); \
jit_extr_i_d (JIT_FPTMP, (rd)); \
UCOMISDrr ((rs), JIT_FPTMP); \
ADCLir (0, (rd)); \
} while (0)
#define jit_ceilr_f_l(rd, rs) do { \
jit_roundr_f_l ((rd), (rs)); \
jit_extr_l_f (JIT_FPTMP, (rd)); \
UCOMISSrr ((rs), JIT_FPTMP); \
ADCLir (0, (rd)); \
} while (0)
#define jit_ceilr_d_l(rd, rs) do { \
jit_roundr_d_l ((rd), (rs)); \
jit_extr_l_d (JIT_FPTMP, (rd)); \
UCOMISDrr ((rs), JIT_FPTMP); \
ADCLir (0, (rd)); \
} while (0)
#define jit_floorr_f_i(rd, rs) do { \
jit_roundr_f_i ((rd), (rs)); \
jit_extr_i_f (JIT_FPTMP, (rd)); \
UCOMISSrr (JIT_FPTMP, (rs)); \
SBBLir (0, (rd)); \
} while (0)
#define jit_floorr_d_i(rd, rs) do { \
jit_roundr_d_i ((rd), (rs)); \
jit_extr_i_d (JIT_FPTMP, (rd)); \
UCOMISDrr (JIT_FPTMP, (rs)); \
SBBLir (0, (rd)); \
} while (0)
#define jit_floorr_f_l(rd, rs) do { \
jit_roundr_f_l ((rd), (rs)); \
jit_extr_l_f (JIT_FPTMP, (rd)); \
UCOMISSrr (JIT_FPTMP, (rs)); \
SBBLir (0, (rd)); \
} while (0)
#define jit_floorr_d_l(rd, rs) do { \
jit_roundr_d_l ((rd), (rs)); \
jit_extr_l_d (JIT_FPTMP, (rd)); \
UCOMISDrr (JIT_FPTMP, (rs)); \
SBBLir (0, (rd)); \
} while (0)
#define jit_bltr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAm ((d)))
#define jit_bler_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAEm ((d)))
#define jit_beqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a06), JEm ((d)))
#define jit_bner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO (0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */
#define jit_bger_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAEm ((d)))
#define jit_bgtr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAm ((d)))
#define jit_bunltr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAEm ((d)))
#define jit_bunler_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAm ((d)))
#define jit_buneqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JEm ((d)))
#define jit_bltgtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNEm ((d)))
#define jit_bunger_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAm ((d)))
#define jit_bungtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAEm ((d)))
#define jit_bordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNPm ((d)))
#define jit_bunordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JPm ((d)))
#define jit_bltr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAm ((d)))
#define jit_bler_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAEm ((d)))
#define jit_beqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a06), JEm ((d)))
#define jit_bner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO (0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */
#define jit_bger_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAEm ((d)))
#define jit_bgtr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAm ((d)))
#define jit_bunltr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAEm ((d)))
#define jit_bunler_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAm ((d)))
#define jit_buneqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JEm ((d)))
#define jit_bltgtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNEm ((d)))
#define jit_bunger_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAm ((d)))
#define jit_bungtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAEm ((d)))
#define jit_bordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNPm ((d)))
#define jit_bunordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JPm ((d)))
#define jit_ltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAr (jit_reg8((d))))
#define jit_ler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETAEr (jit_reg8((d))))
#define jit_eqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
#define jit_ner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
#define jit_ger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAEr (jit_reg8((d))))
#define jit_gtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETAr (jit_reg8((d))))
#define jit_unltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAEr (jit_reg8((d))))
#define jit_unler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2), (s1)), SETNAr (jit_reg8((d))))
#define jit_uneqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETEr (jit_reg8((d))))
#define jit_ltgtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNEr (jit_reg8((d))))
#define jit_unger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAr (jit_reg8((d))))
#define jit_ungtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNAEr (jit_reg8((d))))
#define jit_ordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETNPr (jit_reg8((d))))
#define jit_unordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1), (s2)), SETPr (jit_reg8((d))))
#define jit_ltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAr (jit_reg8((d))))
#define jit_ler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETAEr (jit_reg8((d))))
#define jit_eqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
#define jit_ner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), MOVLir (1, (d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
#define jit_ger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAEr (jit_reg8((d))))
#define jit_gtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETAr (jit_reg8((d))))
#define jit_unltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAEr (jit_reg8((d))))
#define jit_unler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2), (s1)), SETNAr (jit_reg8((d))))
#define jit_uneqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETEr (jit_reg8((d))))
#define jit_ltgtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNEr (jit_reg8((d))))
#define jit_unger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAr (jit_reg8((d))))
#define jit_ungtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNAEr (jit_reg8((d))))
#define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d))))
#define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d))))
#define jit_prepare_f(num) (_jitl.nextarg_putfp = _XMM0 + (num))
#define jit_prepare_d(num) (_jitl.nextarg_putfp = _XMM0 + (num))
#define jit_arg_f() (_XMM0 + _jitl.nextarg_getfp++)
#define jit_arg_d() (_XMM0 + _jitl.nextarg_getfp++)
#define jit_getarg_f(rd, ofs) (jit_movr_f ((rd), (ofs)))
#define jit_getarg_d(rd, ofs) (jit_movr_d ((rd), (ofs)))
#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp, jit_movr_f (_jitl.nextarg_putfp, (rs)))
#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp, jit_movr_d (_jitl.nextarg_putfp, (rs)))
#endif /* __lightning_fp_h */

View File

@ -33,6 +33,10 @@
#ifndef __lightning_asm_fp_h
#define __lightning_asm_fp_h
#ifdef JIT_X86_64___
# include "fp-64.h"
#else
/* We really must map the x87 stack onto a flat register file. In practice,
we can provide something sensible and make it work on the x86 using the
stack like a file of eight registers.
@ -409,4 +413,6 @@ union jit_double_imm {
_OO(0xd9f1)) /* fyl2x */
#endif
#endif
#endif /* __lightning_asm_h */