implement JITted x86 floating-point operations with SSE

Currently, the choice of SSE vs. x87 is when Racket is compiled.
SSE is always used for x86_64 and on Mac OS X, and x87 is used
for other platforms.
This commit is contained in:
Matthew Flatt 2011-11-23 16:51:23 -07:00
parent b7d2d5da62
commit e438793d43
9 changed files with 819 additions and 24 deletions

View File

@ -162,7 +162,9 @@
# if defined(i386)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "i386-linux"
# define REGISTER_POOR_MACHINE
# define ASM_DBLPREC_CONTROL_87
# ifndef MZ_USE_JIT_SSE
# define ASM_DBLPREC_CONTROL_87
# endif
# endif
# if defined(powerpc)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "ppc-linux"
@ -186,7 +188,9 @@
# if defined(__x86_64__)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "x86_64-linux"
# define REGISTER_POOR_MACHINE
# define ASM_DBLPREC_CONTROL_87
# ifdef MZ_NO_JIT_SSE
# define ASM_DBLPREC_CONTROL_87
# endif
# endif
# ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "unknown-linux"
@ -336,17 +340,21 @@
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "i386-freebsd"
# define REGISTER_POOR_MACHINE
# define MZ_USE_JIT_I386
# if defined(__FreeBSD_kernel__)
# define ASM_DBLPREC_CONTROL_87
# else
# define FREEBSD_CONTROL_387
# ifndef MZ_JIT_X86_SSE
# if defined(__FreeBSD_kernel__)
# define ASM_DBLPREC_CONTROL_87
# else
# define FREEBSD_CONTROL_387
# endif
# endif
# elif defined(__amd64__)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "amd64-freebsd"
# define REGISTER_POOR_MACHINE
# define MZ_USE_JIT_X86_64
# if defined(__FreeBSD_kernel__)
# define ASM_DBLPREC_CONTROL_87
# ifdef MZ_NO_JIT_SSE
# if defined(__FreeBSD_kernel__)
# define ASM_DBLPREC_CONTROL_87
# endif
# endif
# elif defined(__sparc64__)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "sparc64-freebsd"
@ -751,7 +759,10 @@
# define MZ_USE_JIT_X86_64
#else
# define MZ_USE_JIT_I386
# define ASM_DBLPREC_CONTROL_87
# ifndef MZ_NO_JIT_SSE
# define MZ_USE_JIT_SSE
# define ASM_DBLPREC_CONTROL_87
# endif
#endif
# define MZ_JIT_USE_MPROTECT

View File

@ -298,7 +298,7 @@ JIT_HEADERS = $(srcdir)/../src/jit.h \
$(srcdir)/lightning/i386/core.h $(srcdir)/lightning/i386/core-common.h \
$(srcdir)/lightning/i386/asm.h $(srcdir)/lightning/i386/asm-common.h \
$(srcdir)/lightning/i386/funcs.h $(srcdir)/lightning/i386/funcs-common.h \
$(srcdir)/lightning/i386/fp.h $(srcdir)/lightning/i386/fp-common.h \
$(srcdir)/lightning/i386/fp.h $(srcdir)/lightning/i386/fp-sse.h $(srcdir)/lightning/i386/fp-common.h \
$(srcdir)/lightning/ppc/core.h $(srcdir)/lightning/ppc/core-common.h \
$(srcdir)/lightning/ppc/asm.h $(srcdir)/lightning/ppc/asm-common.h \
$(srcdir)/lightning/ppc/funcs.h $(srcdir)/lightning/ppc/funcs-common.h \

View File

@ -43,6 +43,9 @@ END_XFORM_ARITH;
#ifdef MZ_USE_JIT_X86_64
# define MZ_USE_JIT_I386
# define JIT_X86_64
# ifndef MZ_NO_JIT_SSE
# define JIT_X86_SSE
# endif
#endif
#ifdef MZ_USE_JIT_I386
@ -51,6 +54,12 @@ END_XFORM_ARITH;
# endif
#endif
#ifdef MZ_USE_JIT_SSE
# ifndef JIT_X86_SSE
# define JIT_X86_SSE
# endif
#endif
#ifdef MZ_USE_JIT_PPC
# ifndef DEFINE_LIGHTNING_FUNCS
# define SUPPRESS_LIGHTNING_FUNCS
@ -275,7 +284,7 @@ extern struct scheme_jit_common_record scheme_jit_common;
#define sjc scheme_jit_common
typedef struct {
typedef struct mz_jit_state {
MZTAG_IF_REQUIRED
GC_CAN_IGNORE jit_state js;
char *limit;
@ -835,7 +844,7 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
#define __START_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __START_TINY_JUMPS__(1); } else { __START_SHORT_JUMPS__(cond); }
#define __END_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __END_TINY_JUMPS__(1); } else { __END_SHORT_JUMPS__(cond); }
#ifdef JIT_X86_64
#if defined(JIT_X86_64) || defined(JIT_X86_SSE)
# define __START_TINY_JUMPS_IF_COMPACT__(cond) /* empty */
# define __END_TINY_JUMPS_IF_COMPACT__(cond) /* empty */
#else
@ -941,7 +950,7 @@ static void emit_indentation(mz_jit_state *jitter)
pushes and pops much balance. The popping branch operations pop
both arguments before branching. */
#if !defined(MZ_USE_JIT_I386)
#if !defined(MZ_USE_JIT_I386) || defined(JIT_X86_SSE)
/* Not FP stack, so use normal variants. */
#define DIRECT_FPR_ACCESS
#define jit_movi_d_fppush(rd,immd) jit_movi_d(rd,immd)
@ -959,7 +968,7 @@ static void emit_indentation(mz_jit_state *jitter)
#define jit_abs_d_fppop(rd,rs) jit_abs_d(rd,rs)
#define jit_sqrt_d_fppop(rd,rs) jit_sqrt_d(rd,rs)
#define jit_sti_d_fppop(id, rs) jit_sti_d(id, rs)
#define jit_str_d_fppop(id, rd, rs) jit_str_d(id, rd, rs)
#define jit_str_d_fppop(id, rd) jit_str_d(id, rd)
#define jit_stxi_d_fppop(id, rd, rs) jit_stxi_d(id, rd, rs)
#define jit_stxr_d_fppop(id, rd, rs) jit_stxr_d(id, rd, rs)
#define jit_bger_d_fppop(d, s1, s2) jit_bger_d(d, s1, s2)

View File

@ -278,7 +278,7 @@ int scheme_generate_alloc_retry(mz_jit_state *jitter, int i)
#ifdef JIT_USE_FP_OPS
if (i == 2) {
(void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR1, JIT_R2);
(void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R2);
}
#endif
JIT_UPDATE_THREAD_RSPTR();
@ -299,7 +299,7 @@ int scheme_generate_alloc_retry(mz_jit_state *jitter, int i)
}
#ifdef JIT_USE_FP_OPS
if (i == 2) {
(void)mz_tl_ldi_d_fppush(JIT_FPR1, tl_scheme_jit_save_fp, JIT_R2);
(void)mz_tl_ldi_d_fppush(JIT_FPR0, tl_scheme_jit_save_fp, JIT_R2);
}
#endif
return 1;

View File

@ -1220,6 +1220,562 @@ typedef _uc jit_insn;
( ((N)&7) == 0) ? 0 : \
JITFAIL(".align argument too large")))
/* --- Media 128-bit instructions ------------------------------------------ */
typedef enum {
X86_SSE_MOV = 0x10,
X86_SSE_MOVLP = 0x12,
X86_SSE_MOVHP = 0x16,
X86_SSE_MOVA = 0x28,
X86_SSE_CVTIS = 0x2a,
X86_SSE_CVTTSI = 0x2c,
X86_SSE_CVTSI = 0x2d,
X86_SSE_UCOMI = 0x2e,
X86_SSE_COMI = 0x2f,
X86_SSE_ROUND = 0x3a,
X86_SSE_SQRT = 0x51,
X86_SSE_RSQRT = 0x52,
X86_SSE_RCP = 0x53,
X86_SSE_AND = 0x54,
X86_SSE_ANDN = 0x55,
X86_SSE_OR = 0x56,
X86_SSE_XOR = 0x57,
X86_SSE_ADD = 0x58,
X86_SSE_MUL = 0x59,
X86_SSE_CVTSD = 0x5a,
X86_SSE_CVTDT = 0x5b,
X86_SSE_SUB = 0x5c,
X86_SSE_MIN = 0x5d,
X86_SSE_DIV = 0x5e,
X86_SSE_MAX = 0x5f,
X86_SSE_X2G = 0x6e,
X86_SSE_EQB = 0x74,
X86_SSE_EQW = 0x75,
X86_SSE_EQD = 0x76,
X86_SSE_G2X = 0x7e,
X86_SSE_MOV2 = 0xd6
} x86_sse_t;
#define _BIT(X) (!!(X))
#define _rR(R) ((R) & 0x0f)
#define _rX(R) _rN(R)
#define _rXP(R) ((R) > 0 && _rR(R) > 7)
#define _SCL1 _b00
#define _rA(R) _r4(R)
#define _RSP 0x54
#define _i_X(op, md, rb, ri, ms) _r_X(op, md, rb, ri, ms)
#define _f_X(rd, md, rb, ri, ms) _i_X((int)_rX(rd), md, rb, ri, ms)
#ifdef JIT_X86_64
# define x86_REXwrxb(l, w, r, x, b) \
(((l) || (((int)(w) << 3) | (((int)(r)) << 2) | (((int)(x)) << 1) | ((int)(b)))) \
? _jit_B(0x40 | (((int)(w) << 3) | (((int)(r)) << 2) | (((int)(x)) << 1) | ((int)(b)))) \
: (void)0)
#else
# define x86_REXwrxb(l, w, r, x, b) (void)0
#endif
#define x86_REXwrx_(l, w, r, x, mr) x86_REXwrxb(l, w, r, x, _BIT(_rXP(mr)))
#define x86_REXw_x_(l, w, r, x, mr) x86_REXwrx_(l, w, _BIT(_rXP(r)), x, mr)
#define x86_rex_l_rr(rr, mr) x86_REXw_x_(0, 0, rr, 0, mr)
#define x86_rex_l_mr(rb, ri, rd) x86_REXw_x_(0, 0, rd, _BIT(_rXP(ri)), rb)
#define x86_rex_l_rm(rs, rb, ri) x86_rex_l_mr(rb, ri, rs)
#define _rex_ff_rr(rr, mr) x86_rex_l_rr(rr, mr)
#define _rex_if_rr(rr, mr) x86_rex_l_rr(rr, mr)
#define _rex_fi_rr(rr, mr) x86_rex_l_rr(rr, mr)
#define _rex_if_mr(rb, ri, rd) x86_rex_l_mr(rb, ri, rd)
#define _rex_fi_rm(rs, rb, ri) x86_rex_l_rm(rs, rb, ri)
#define __sse_ff_rr(op, rs, rd) (_rex_ff_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rX(rs)))
#define __sse_id_rr(op, rs, rd) __sse_if_rr(op, rs, rd)
#define __sse_if_rr(op, rs, rd) (_rex_if_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rA(rd), _rX(rs)))
#define __sse_di_rr(op, rs, rd) __sse_fi_rr(op, rs, rd)
#define __sse_fi_rr(op, rs, rd) (_rex_fi_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rA(rs)))
#define __sse_id_mr(op, md, rb, mi, ms, rd) __sse_if_mr(op, md, rb, mi, ms, rd)
#define __sse_if_mr(op, md, rb, ri, ms, rd) (_rex_if_mr(rb, ri, rd), _O(0x0f), _O(op), _f_X(rd, md, rb, ri, ms))
#define __sse_di_rm(op, rs, md, rb, mi, ms) __sse_fi_rm(op, rs, md, rb, mi, ms)
#define __sse_fi_rm(op, rs, md, rb, ri, ms) (_rex_fi_rm(rs, rb, ri), _O(0x0f), _O(op), _f_X(rs, md, rb, ri, ms))
#define __sse1_di_rm(op, rs, md, mb, mi, ms) __sse1_fi_rm(op, rs, md, mb, mi, ms)
#define __sse1_fi_rm(op, rs, md, rb, ri, ms) (_rex_fi_rm(rs, rb, ri), _O(0x0f), _O(0x01 | op), _f_X(rs, md, rb, ri, ms))
#define _sse_ff_rr(px, op, rs, rd) (_jit_B(px), __sse_ff_rr(op, rs, rd))
#define _sse_id_rr(px, op, rs, rd) _sse_if_rr(px, op, rs, rd)
#define _sse_if_rr(px, op, rs, rd) (_jit_B(px), __sse_if_rr(op, rs, rd))
#define _sse_di_rr(px, op, rs, rd) _sse_fi_rr(px, op, rs, rd)
#define _sse_fi_rr(px, op, rs, rd) (_jit_B(px), __sse_fi_rr(op, rs, rd))
#define _sse_id_mr(px, op, md, rb, mi, ms, rd) _sse_if_mr(px, op, md, rb, mi, ms, rd)
#define _sse_if_mr(px, op, md, rb, ri, ms, rd) (_jit_B(px), __sse_if_mr(op, md, rb, ri, ms, rd))
#define _sse_di_rm(px, op, rs, md, rb, mi, ms) _sse_fi_rm(px, op, rs, md, rb, mi, ms)
#define _sse_fi_rm(px, op, rs, md, rb, ri, ms) (_jit_B(px), __sse_fi_rm(op, rs, md, rb, ri, ms))
#define _sse1_di_rm(px, op, rs, md, mb, mi, ms) _sse1_fi_rm(px, op, rs, md, mb, mi, ms)
#define _sse1_fi_rm(px, op, rs, md, rb, ri, ms) (_jit_B(px), __sse1_fi_rm(op, rs, md, rb, ri, ms))
#define _SSEPSrr(OP,RS,RD) __sse_ff_rr ( OP, RS, RD)
#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __sse_if_mr ( OP, MD, MB, MI, MS, RD)
#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __sse_fi_rm ( OP, RS, MD, MB, MI, MS)
#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __sse1_fi_rm( OP, RS, MD, MB, MI, MS)
#define _SSEPDrr(OP,RS,RD) _sse_ff_rr (0x66, OP, RS, RD)
#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _sse_if_mr (0x66, OP, MD, MB, MI, MS, RD)
#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _sse_fi_rm (0x66, OP, RS, MD, MB, MI, MS)
#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _sse1_fi_rm(0x66, OP, RS, MD, MB, MI, MS)
#define _SSESSrr(OP,RS,RD) _sse_ff_rr (0xf3, OP, RS, RD)
#define _SSESSmr(OP,MD,MB,MI,MS,RD) _sse_if_mr (0xf3, OP, MD, MB, MI, MS, RD)
#define _SSESSrm(OP,RS,MD,MB,MI,MS) _sse_fi_rm (0xf3, OP, RS, MD, MB, MI, MS)
#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _sse1_fi_rm(0xf3, OP, RS, MD, MB, MI, MS)
#define _SSESDrr(OP,RS,RD) _sse_ff_rr (0xf2, OP, RS, RD)
#define _SSESDmr(OP,MD,MB,MI,MS,RD) _sse_if_mr (0xf2, OP, MD, MB, MI, MS, RD)
#define _SSESDrm(OP,RS,MD,MB,MI,MS) _sse_fi_rm (0xf2, OP, RS, MD, MB, MI, MS)
#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _sse1_fi_rm(0xf2, OP, RS, MD, MB, MI, MS)
#define _NOREG 0
/* SSE */
#define LDMXCSRmr(MD, MB, MI, MS) \
(_REXLmr(MB, MI, _NOREG), \
_O(0x0f), \
_O(0xae), \
_i_X(_b10, MD, MB, MI, MS))
#define STMXCSRrm(MD, MB, MI, MS) \
(_REXLrm(_NOREG, MI, MB), \
_O(0x0f), \
_O(0xae), \
_i_X(_b11, MD, MB, MI, MS))
/* SSE2 */
#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
/* SSE */
#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
/* SSE2 */
#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
/* SSE */
#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
/* SSE2 */
#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
/* SSE */
#define ANDNSSrr ANDNPSrr
#define ANDNSSmr ANDNPSrr
/* SSE2 */
#define ANDNSDrr ANDNPDrr
#define ANDNSDmr ANDNPDrr
/* SSE */
#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
/* SSE2 */
#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
/* SSE */
#define ANDSSrr ANDPSrr
#define ANDSSmr ANDPSrr
/* SSE2 */
#define ANDSDrr ANDPDrr
#define ANDSDmr ANDPDrr
/* SSE */
#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE2 */
#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE */
#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE2 */
#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE */
#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE2 */
#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE */
#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE2 */
#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE */
#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE2 */
#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE */
#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE2 */
#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE */
#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE2 */
#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE */
#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE2 */
#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE */
#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
/* SSE2 */
#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
/* SSE */
#define ORSSrr ORPSrr
#define ORSSmr ORPSrr
/* SSE2 */
#define ORSDrr ORPDrr
#define ORSDmr ORPDrr
/* SSE */
#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
/* SSE */
#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
/* SSE */
#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE2 */
#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE */
#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE2 */
#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE */
#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE2 */
#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE */
#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE2 */
#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE */
#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
/* SSE2 */
#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
/* SSE */
#define XORSSrr XORPSrr
#define XORSSmr XORPSrr
/* SSE2 */
#define XORSDrr XORPDrr
#define XORSDmr XORPDrr
/* No prefixes here. */
/* SSE */
#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
/* SSE2 */
#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
/* No prefixes here. */
/* SSE */
#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
/* SSE2 */
#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
/* SSE */
#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD)
#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD)
#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
/* SSE */
#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD)
#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD)
#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
/* SSE */
#define CVTPS2PIrr(RS, RD) __sse_ff_rr( X86_SSE_CVTSI, RS, RD)
#define CVTPS2PImr(MD, MB, MI, MS, RD) __sse_if_mr( X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTPD2PIrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_CVTSI, RS, RD)
#define CVTPD2PImr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE */
#define CVTPI2PSrr(RS, RD) __sse_ff_rr( X86_SSE_CVTIS, RS, RD)
#define CVTPI2PSmr(MD, MB, MI, MS, RD) __sse_if_mr( X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTPI2PDrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_CVTIS, RS, RD)
#define CVTPI2PDmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTPS2PDrr(RS, RD) __sse_ff_rr( X86_SSE_CVTSD, RS, RD)
#define CVTPS2PDmr(MD, MB, MI, MS, RD) __sse_if_mr( X86_SSE_CVTSD, MD, MB, MI, MS, RD)
#define CVTPD2PSrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_CVTSD, RS, RD)
#define CVTPD2PSmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTSS2SDrr(RS, RD) _sse_ff_rr(0xf3, X86_SSE_CVTSD, RS, RD)
#define CVTSS2SDmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
#define CVTSD2SSrr(RS, RD) _sse_ff_rr(0xf2, X86_SSE_CVTSD, RS, RD)
#define CVTSD2SSmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
/* SSE */
#define CVTTSS2SILrr(RS, RD) _sse_id_rr(0xf3, X86_SSE_CVTTSI, RS, RD)
#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTTSD2SILrr(RS, RD) _sse_id_rr(0xf2, X86_SSE_CVTTSI, RS, RD)
#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD)
/* SSE */
#define CVTSS2SILrr(RS, RD) _sse_if_rr(0xf3, X86_SSE_CVTSI, RS, RD)
#define CVTSS2SILmr(MD, MB, MI, MS, RD) _sse_if_mr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTSD2SILrr(RS, RD) _sse_id_rr(0xf2, X86_SSE_CVTSI, RS, RD)
#define CVTSD2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE */
#define CVTSI2SSLrr(RS, RD) _sse_fi_rr(0xf3, X86_SSE_CVTIS, RS, RD)
#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _sse_if_mr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTSI2SDLrr(RS, RD) _sse_di_rr(0xf2, X86_SSE_CVTIS, RS, RD)
#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define MOVDLXrr(RS, RD) _sse_di_rr(0x66, X86_SSE_X2G, RS, RD)
#define MOVDLXmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_X2G, MD, MB, MI, MS, RD)
/* SSE2 */
#define MOVDXLrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_G2X, RS, RD)
#define MOVDXLrm(RS, MD, MB, MI, MS) _sse_di_rm(0x66, X86_SSE_G2X, RS, MD, MB, MI, MS)
/* SSE */
#define MOVDLMrr(RS, RD) __sse_ff_rr( X86_SSE_X2G, RS, RD)
#define MOVDLMmr(MD, MB, MI, MS, RD) __sse_id_mr( X86_SSE_X2G, MD, MB, MI, MS, RD)
/* SSE */
#define MOVDMLrr(RS, RD) __sse_ff_rr( X86_SSE_G2X, RS, RD)
#define MOVDMLrm(RS, MD, MB, MI, MS) __sse_fi_rm( X86_SSE_G2X, RS, MD, MB, MI, MS)
/* SSE3 */
#define MOVDQ2Qrr(RS, RD) _sse_ff_rr(0xf2, X86_SSE_MOV2, RS, RD)
#define MOVQ2DQrr(RS, RD) _sse_ff_rr(0xf3, X86_SSE_MOV2, RS, RD)
/* SSE */
#define MOVHLPSrr(RS, RD) __sse_ff_rr( X86_SSE_MOVLP, RS, RD)
#define MOVLHPSrr(RS, RD) __sse_ff_rr( X86_SSE_MOVHP, RS, RD)
/* SSE2 */
#define MOVDQArr(RS, RD) _sse_ff_rr(0x66, 0x6f, RS, RD)
#define MOVDQAmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, 0x6f, MD, MB, MI, MS, RD)
#define MOVDQArm(RS, MD, MB, MI, MS) _sse_di_rm(0x66, 0x7f, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVDQUrr(RS, RD) _sse_ff_rr(0xf3, 0x6f, RS, RD)
#define MOVDQUmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, 0x6f, MD, MB, MI, MS, RD)
#define MOVDQUrm(RS, MD, MB, MI, MS) _sse_di_rm(0xf3, 0x7f, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVHPDmr(MD, MB, MI, MS, RD) _sse_id_mr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD)
#define MOVHPDrm(RS, MD, MB, MI, MS) _sse1_di_rm(0x66, X86_SSE_MOVHP, RS, MD, MB, MI, MS)
/* SSE */
#define MOVHPSmr(MD, MB, MI, MS, RD) __sse_if_mr ( X86_SSE_MOVHP, MD, MB, MI, MS, RD)
#define MOVHPSrm(RS, MD, MB, MI, MS) __sse1_fi_rm( X86_SSE_MOVHP, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVLPDmr(MD, MB, MI, MS, RD) _sse_id_mr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD)
#define MOVLPDrm(RS, MD, MB, MI, MS) _sse1_di_rm(0x66, X86_SSE_MOVLP, RS, MD, MB, MI, MS)
/* SSE */
#define MOVLPSmr(MD, MB, MI, MS, RD) __sse_if_mr ( X86_SSE_MOVLP, MD, MB, MI, MS, RD)
#define MOVLPSrm(RS, MD, MB, MI, MS) __sse1_fi_rm( X86_SSE_MOVLP, RS, MD, MB, MI, MS)
/* FIXME 0x66 prefix actually required to modify 128 bits register */
/* SSE or SSE2 with 0x66 prefix */
#define PCMPEQBrr(RS, RD) \
_sse_ff_rr(0x66, X86_SSE_EQB, RS, RD)
#define PCMPEQBrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, X86_SSE_EQB, MD, MB, MI, MS, RD)
#define PCMPEQWrr(RS, RD) \
_sse_ff_rr(0x66, X86_SSE_EQW, RS, RD)
#define PCMPEQWrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, X86_SSE_EQW, MD, MB, MI, MS, RD)
#define PCMPEQLrr(RS, RD) \
_sse_ff_rr(0x66, X86_SSE_EQD, RS, RD)
#define PCMPEQLrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, X86_SSE_EQD, MD, MB, MI, MS, RD)
/* SSE2 with 0x66 prefix, SSE otherwise */
#define PSRLWrr(RS, RD) \
_sse_ff_rr(0x66, 0xd1, RS, RD)
#define PSRLWrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, 0xd1, MD, MB, MI, MS, RD)
#define PSRLWir(IM, RD) \
(_O(0x66), \
_REXLrr(_NOREG, RD), \
_O(0x0f), \
_O(0x71), \
_Mrm(_b11, _b10, _rX(RD)), \
_O(IM))
/* SSE2 with 0x66 prefix, SSE otherwise */
#define PSRLLrr(RS, RD) \
_sse_ff_rr(0x66, 0xd2, RS, RD)
#define PSRLLrm(RS, MD, MB, MI, MS) \
_sse_id_mr(0x66, 0xd2, MD, MB, MI, MS, RD)
#define PSRLLir(IM, RD) \
(_O(0x66), \
_rex_if_rr(_NOREG, RD), \
_O(0x0f), \
_O(0x72), \
_Mrm(_b11, _b10, _rX(RD)), \
_O(IM))
/* SSE2 */
#define PSRLQrr(RS, RD) \
_sse_ff_rr(0x66, 0xd3, RS, RD)
#define PSRLQrm(RS, MD, MB, MI, MS) \
_sse_id_mr(0x66, 0xd3, MD, MB, MI, MS, RD)
#define PSRLQir(IM, RD) \
(_O(0x66), \
_rex_if_rr(_NOREG, RD), \
_O(0x0f), \
_O(0x73), \
_Mrm(_b11, _b10, _rX(RD)), \
_O(IM))
/* SSE4.1 */
#define ROUNDSSrri(RS, RD, IM) \
(_O(0x66), _rex_ff_rr(RD, RS), _OO(0xf00|X86_SSE_ROUND), _O(0x0a), \
_Mrm(_b11, _rX(RD), _rX(RS)), _O(IM))
#define ROUNDSDrri(RS, RD, IM) \
(_O(0x66), _rex_ff_rr(RD, RS), _OO(0xf00|X86_SSE_ROUND), _O(0x0b), \
_Mrm(_b11, _rX(RD), _rX(RS)), _O(IM))
#define PCMPEQQrr(RS, RD) \
(_O(0x66), _rex_ff_rr(RD, RS), _OO(0x0f38), _O(0x29), \
_Mrm(_b11, _rX(RD), _rX(RS)))
#ifdef JIT_X86_64
#define _rex_q_rr(rr, mr) x86_REXw_x_(0, 1, rr, 0, mr)
#define _rex_dl_rr(rr, mr) _rex_q_rr(rr, mr)
#define _rex_ld_rr(rr, mr) _rex_q_rr(rr, mr)
#define __sse_lf_rr(op, rs, rd) __sse_ld_rr(op, rs, rd)
#define __sse_ld_rr(op, rs, rd) (_rex_ld_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rA(rd), _rX(rs)))
#define __sse_fl_rr(op, rs, rd) __sse_dl_rr(op, rs, rd)
#define __sse_dl_rr(op, rs, rd) (_rex_dl_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rA(rs)))
#define _sse_lf_rr(px, op, rs, rd) _sse_ld_rr(px, op, rs, rd)
#define _sse_ld_rr(px, op, rs, rd) (_jit_B(px), __sse_ld_rr(op, rs, rd))
#define _sse_fl_rr(px, op, rs, rd) _sse_dl_rr(px, op, rs, rd)
#define _sse_dl_rr(px, op, rs, rd) (_jit_B(px), __sse_dl_rr(op, rs, rd))
#define CVTTSD2SIQrr(RS, RD) _sse_lf_rr(0xf2, X86_SSE_CVTTSI, RS, RD)
#define CVTSI2SDQrr(RS, RD) _sse_dl_rr(0xf2, X86_SSE_CVTIS, RS, RD)
#define MOVDQXrr(RS, RD) _sse_dl_rr(0x66, X86_SSE_X2G, RS, RD)
#endif
/*** References: */
/* */

View File

@ -65,6 +65,14 @@ struct jit_local_state {
#ifdef JIT_X86_64
int argpushes;
#endif
#ifdef JIT_X86_SSE
union {
int i[2];
long l;
double d;
} d_data;
jit_insn *tmp_label;
#endif
};
/* 3-parameter operation */
@ -345,6 +353,7 @@ struct jit_local_state {
#define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) ))
/* Stack */
#define jit_pushi_i(is) PUSHLi(is)
#define jit_pushr_i(rs) PUSHLr(rs)
#define jit_popr_i(rs) POPLr(rs)
#define jit_pushr_l(rs) jit_pushr_i(rs)
@ -598,14 +607,16 @@ static const int const jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
#define jit_bxeqi_s(label, rs, is) (CMPWim(is, 0, rs, 0, 0), JEm(label,0,0,0), _jit.x.pc)
#define jit_bxnei_s(label, rs, is) (CMPWim(is, 0, rs, 0, 0), JNEm(label,0,0,0), _jit.x.pc)
#if 0
XFORM_NONGCING static intptr_t _CHECK_TINY(intptr_t diff) { if ((diff < -128) || (diff > 127)) *(intptr_t *)0x0 = 1; return diff; }
#else
# define _CHECK_TINY(x) x
#endif
#define jit_patch_tiny_at(jump_pc,v) (*_PSC((jump_pc) - sizeof(char)) = _jit_SC(_CHECK_TINY((jit_insn *)(v) - (jump_pc))))
#ifdef SUPPORT_TINY_JUMPS
# if 0
static intptr_t _CHECK_TINY(intptr_t diff) { if ((diff < -128) || (diff > 127)) *(intptr_t *)0x0 = 1; return diff; }
# else
# define _CHECK_TINY(x) x
# endif
# define jit_patch_normal_at(jump_pc,v) (_jitl.tiny_jumps \
? (*_PSC((jump_pc) - sizeof(char)) = _jit_SC(_CHECK_TINY((jit_insn *)(v) - (jump_pc)))) \
? jit_patch_tiny_at(jump_pc, v) \
: (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc))))
#else
# define jit_patch_normal_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc)))

View File

@ -61,8 +61,6 @@
#define jit_abs_f(rd,rs) jit_abs_d(rd,rs)
#define jit_negr_f(rd,rs) jit_negr_d(rd,rs)
#define jit_sqrt_f(rd,rs) jit_sqrt_d(rd,rs)
#define jit_extr_f_d(rs, rd)
#define jit_extr_d_f(rs, rd)
#define jit_extr_i_f(rd, rs) jit_extr_i_d(rd, rs)
#define jit_roundr_f_i(rd, rs) jit_roundr_d_i(rd, rs)
#define jit_floorr_f_i(rd, rs) jit_floorr_d_i(rd, rs)

View File

@ -0,0 +1,202 @@
/******************************** -*- C -*- ****************************
*
* Support macros for SSE floating-point math
*
***********************************************************************/
/***********************************************************************
*
* Copyright 2006,2010 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
* Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*
* Authors:
* Paolo Bonzini
* Paulo Cesar Pereira de Andrade
***********************************************************************/
#ifndef __lightning_fp_sse_h
#define __lightning_fp_sse_h
#define JIT_FPR_NUM 6
#define _XMM0 0x60
#ifdef JIT_X86_64
# define JIT_FPR(i) (_XMM0 + 8 + (i))
#else
# define JIT_FPR(i) (_XMM0 + (i))
#endif
#define JIT_FPTMP0 JIT_FPR(6)
#define jit_addr_d(f0, f1, f2) \
((f0 == f1) \
? ADDSDrr(f2, f0) \
: ((f0 == f2) \
? ADDSDrr(f1, f0) \
: (MOVSDrr(f1, f0), ADDSDrr(f2, f0))))
#define jit_subr_d(f0, f1, f2) \
((f0 == f1) \
? SUBSDrr(f2, f0) \
: ((f0 == f2) \
? (MOVSDrr(f0, JIT_FPTMP0), MOVSDrr(f1, f0), SUBSDrr(JIT_FPTMP0, f0)) \
: (MOVSDrr(f1, f0), SUBSDrr(f2, f0))))
#define jit_subrr_d(f0, f1, f2) jit_subr_d(f0, f2, f1)
#define jit_mulr_d(f0, f1, f2) \
((f0 == f1) \
? MULSDrr(f2, f0) \
: ((f0 == f2) \
? MULSDrr(f1, f0) \
: (MOVSDrr(f1, f0), MULSDrr(f2, f0))))
#define jit_divr_d(f0, f1, f2) \
((f0 == f1) \
? DIVSDrr(f2, f0) \
: ((f0 == f2) \
? (MOVSDrr(f0, JIT_FPTMP0), MOVSDrr(f1, f0), DIVSDrr(JIT_FPTMP0, f0)) \
: (MOVSDrr(f1, f0), DIVSDrr(f2, f0))))
#define jit_divrr_d(f0, f1, f2) jit_divr_d(f0, f2, f1)
#define jit_ldr_f(f0, r0) MOVSSmr(0, r0, _NOREG, _SCL1, f0)
#define jit_ldr_d(f0, r0) MOVSDmr(0, r0, _NOREG, _SCL1, f0)
#define _jit_ldi_d(f0, i0) MOVSDmr((long)i0, _NOREG, _NOREG, _SCL1, f0)
#ifdef JIT_X86_64
# define jit_ldi_d(f0, i0) \
(_u32P((intptr_t)(i0)) \
? _jit_ldi_d(f0, i0) \
: (jit_movi_l(JIT_REXTMP, i0), jit_ldr_d(f0, JIT_REXTMP)))
#else
# define jit_ldi_d(f0, i0) _jit_ldi_d(f0, i0)
#endif
#define jit_ldxr_d(f0, r0, r1) MOVSDmr(0, r0, r1, _SCL1, f0)
#define jit_ldxi_d(f0, r0, i0) MOVSDmr(i0, r0, _NOREG, _SCL1, f0);
#define jit_str_d(r0, f0) MOVSDrm(f0, 0, r0, _NOREG, _SCL1)
#define _jit_sti_d(i0, f0) MOVSDrm(f0, (long)i0, _NOREG, _NOREG, _SCL1)
#ifdef JIT_X86_64
# define jit_sti_d(i0, f0) \
(_u32P((intptr_t)(i0)) \
? _jit_sti_d(i0, f0) \
: (jit_movi_l(JIT_REXTMP, i0), jit_str_d(JIT_REXTMP, f0)))
#else
# define jit_sti_d(i0, f0) _jit_sti_d(i0, f0)
#endif
#define jit_stxr_d(r0, r1, f0) MOVSDrm(f0, 0, r0, r1, _SCL1)
#define jit_stxi_d(i0, r1, f0) MOVSDrm(f0, i0, r1, _NOREG, _SCL1)
#define jit_movi_d(f0, i0) \
(_jitl.d_data.d = i0, \
((_jitl.d_data.d == 0.0 && !(_jitl.d_data.i[1] & 0x80000000)) \
? XORPDrr(f0, f0) \
: finish_movi_d(f0, i0)))
#ifdef JIT_X86_64
# define finish_movi_d(f0, i0) (jit_movi_l(JIT_REXTMP, _jitl.d_data.l), MOVDQXrr(JIT_REXTMP, f0))
#else
# define finish_movi_d(f0, i0) \
(jit_pushi_i(_jitl.d_data.i[1]), jit_pushi_i(_jitl.d_data.i[0]), \
jit_ldr_d(f0, JIT_SP), \
jit_addi_l(JIT_SP, JIT_SP, sizeof(double)))
#endif
# define jit_movr_d(f0, f1) ((f0 != f1) ? MOVSDrr(f1, f0) : (void)0)
# define jit_extr_i_d(f0, r0) CVTSI2SDLrr(r0, f0)
#ifdef JIT_X86_64
# define jit_extr_l_d(f0, r0) CVTSI2SDQrr(r0, f0)
#else
# define jit_extr_l_d(f0, r0) jit_extr_i_d(f0, r0)
#endif
# define jit_extr_d_f(f0, f1) CVTSD2SSrr(f1, f0)
#define jit_abs_d(f0, f1) \
((f0 == f1) \
? (PCMPEQLrr(JIT_FPTMP0, JIT_FPTMP0), PSRLQir(1, JIT_FPTMP0), ANDPDrr(JIT_FPTMP0, f0)) \
: (PCMPEQLrr(f0, f0), PSRLQir(1, f0), ANDPDrr(f1, f0)))
#define jit_sqrt_d(f0, f1) SQRTSDrr(f1, f0)
#ifdef JIT_X86_64
# define jit_negr_d(f0, f1) \
(jit_movi_l(JIT_REXTMP, 0x8000000000000000), \
((f0 == f1) \
? (MOVDQXrr(JIT_REXTMP, JIT_FPTMP0), \
XORPDrr(JIT_FPTMP0, f0)) \
: (MOVDQXrr(JIT_REXTMP, f0), \
XORPDrr(f1, f0))))
#else
# define jit_negr_d(f0, f1) \
(jit_pushi_i(0x80000000), \
jit_pushi_i(0), \
((f0 == f1) \
? (jit_ldr_d(JIT_FPTMP0, JIT_SP), \
XORPDrr(JIT_FPTMP0, f0)) \
: (jit_ldr_d(f0, JIT_SP), \
XORPDrr(f1, f0))), \
jit_addi_l(JIT_SP, JIT_SP, sizeof(int) << 1))
#endif
/* Racket uses jit_roundr_l only for inexact->exact of fixnums,
so a truncate is good enough. */
#define jit_roundr_d_i(r0, f0) jit_truncr_d_i(r0, f0)
#define jit_roundr_d_l(r0, f0) jit_truncr_d_l(r0, f0)
#define jit_truncr_d_i(r0, f0) CVTTSD2SILrr(f0, r0)
#ifdef JIT_X86_64
# define jit_truncr_d_l(r0, f0) CVTTSD2SIQrr(f0, r0)
#else
# define jit_truncr_d_l(r0, f0) jit_truncr_d_i(r0, f0)
#endif
#define jit_bltr_d(label, f0, f1) (UCOMISDrr(f0, f1), JAEm(label,0,0,0), (_jit.x.pc))
#define jit_bler_d(label, f0, f1) (UCOMISDrr(f0, f1), JBEm(label,0,0,0), (_jit.x.pc))
#define jit_bgtr_d(label, f0, f1) (UCOMISDrr(f1, f0), JAm(label,0,0,0), (_jit.x.pc))
#define jit_bger_d(label, f0, f1) (UCOMISDrr(f1, f0), JAEm(label,0,0,0), (_jit.x.pc))
#define jit_beqr_d(label, f0, f1) \
(UCOMISDrr(f0, f1), \
_O_D8(0x70|(0xa), 0), /*JP */ \
_jitl.tmp_label = _jit.x.pc, \
JEm(label,0,0,0), \
jit_patch_tiny_at(_jitl.tmp_label, _jit.x.pc), \
_jit.x.pc)
#define jit_bantiltr_d(label, f0, f1) (UCOMISDrr(f0, f1), JBEm(label,0,0,0), (_jit.x.pc))
#define jit_bantiler_d(label, f0, f1) (UCOMISDrr(f0, f1), JBm(label,0,0,0), (_jit.x.pc))
#define jit_bantigtr_d(label, f0, f1) (UCOMISDrr(f1, f0), JBEm(label,0,0,0), (_jit.x.pc))
#define jit_bantiger_d(label, f0, f1) (UCOMISDrr(f1, f0), JBm(label,0,0,0), (_jit.x.pc))
#define jit_bantieqr_d(label, f0, f1) \
(UCOMISDrr(f0, f1), \
_O_D8(0x70|(0xb), 0), /*JNP */ \
_jitl.tmp_label = _jit.x.pc, \
CMPLir(0, JIT_SP), \
jit_patch_tiny_at(_jitl.tmp_label, _jit.x.pc), \
JNEm(label,0,0,0), \
_jit.x.pc)
#endif /* __lightning_fp_sse_h */

View File

@ -33,6 +33,12 @@
#ifndef __lightning_asm_fp_h
#define __lightning_asm_fp_h
#ifdef JIT_X86_SSE
# include "fp-sse.h"
#else
/* We really must map the x87 stack onto a flat register file. In practice,
we can provide something sensible and make it work on the x86 using the
stack like a file of eight registers.
@ -478,4 +484,6 @@ union jit_double_imm {
_OO(0xd9f1)) /* fyl2x */
#endif
#endif
#endif /* __lightning_asm_h */