implement JITted x86 floating-point operations with SSE

Currently, the choice of SSE vs. x87 is when Racket is compiled.
SSE is always used for x86_64 and on Mac OS X, and x87 is used
for other platforms.
This commit is contained in:
Matthew Flatt 2011-11-23 16:51:23 -07:00
parent b7d2d5da62
commit e438793d43
9 changed files with 819 additions and 24 deletions

View File

@ -162,8 +162,10 @@
# if defined(i386) # if defined(i386)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "i386-linux" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "i386-linux"
# define REGISTER_POOR_MACHINE # define REGISTER_POOR_MACHINE
# ifndef MZ_USE_JIT_SSE
# define ASM_DBLPREC_CONTROL_87 # define ASM_DBLPREC_CONTROL_87
# endif # endif
# endif
# if defined(powerpc) # if defined(powerpc)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "ppc-linux" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "ppc-linux"
# endif # endif
@ -186,8 +188,10 @@
# if defined(__x86_64__) # if defined(__x86_64__)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "x86_64-linux" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "x86_64-linux"
# define REGISTER_POOR_MACHINE # define REGISTER_POOR_MACHINE
# ifdef MZ_NO_JIT_SSE
# define ASM_DBLPREC_CONTROL_87 # define ASM_DBLPREC_CONTROL_87
# endif # endif
# endif
# ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH # ifndef SCHEME_PLATFORM_LIBRARY_SUBPATH
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "unknown-linux" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "unknown-linux"
# endif # endif
@ -336,18 +340,22 @@
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "i386-freebsd" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "i386-freebsd"
# define REGISTER_POOR_MACHINE # define REGISTER_POOR_MACHINE
# define MZ_USE_JIT_I386 # define MZ_USE_JIT_I386
# ifndef MZ_JIT_X86_SSE
# if defined(__FreeBSD_kernel__) # if defined(__FreeBSD_kernel__)
# define ASM_DBLPREC_CONTROL_87 # define ASM_DBLPREC_CONTROL_87
# else # else
# define FREEBSD_CONTROL_387 # define FREEBSD_CONTROL_387
# endif # endif
# endif
# elif defined(__amd64__) # elif defined(__amd64__)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "amd64-freebsd" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "amd64-freebsd"
# define REGISTER_POOR_MACHINE # define REGISTER_POOR_MACHINE
# define MZ_USE_JIT_X86_64 # define MZ_USE_JIT_X86_64
# ifdef MZ_NO_JIT_SSE
# if defined(__FreeBSD_kernel__) # if defined(__FreeBSD_kernel__)
# define ASM_DBLPREC_CONTROL_87 # define ASM_DBLPREC_CONTROL_87
# endif # endif
# endif
# elif defined(__sparc64__) # elif defined(__sparc64__)
# define SCHEME_PLATFORM_LIBRARY_SUBPATH "sparc64-freebsd" # define SCHEME_PLATFORM_LIBRARY_SUBPATH "sparc64-freebsd"
# define FLUSH_SPARC_REGISTER_WINDOWS # define FLUSH_SPARC_REGISTER_WINDOWS
@ -751,8 +759,11 @@
# define MZ_USE_JIT_X86_64 # define MZ_USE_JIT_X86_64
#else #else
# define MZ_USE_JIT_I386 # define MZ_USE_JIT_I386
# ifndef MZ_NO_JIT_SSE
# define MZ_USE_JIT_SSE
# define ASM_DBLPREC_CONTROL_87 # define ASM_DBLPREC_CONTROL_87
# endif # endif
#endif
# define MZ_JIT_USE_MPROTECT # define MZ_JIT_USE_MPROTECT
# define FLAGS_ALREADY_SET # define FLAGS_ALREADY_SET

View File

@ -298,7 +298,7 @@ JIT_HEADERS = $(srcdir)/../src/jit.h \
$(srcdir)/lightning/i386/core.h $(srcdir)/lightning/i386/core-common.h \ $(srcdir)/lightning/i386/core.h $(srcdir)/lightning/i386/core-common.h \
$(srcdir)/lightning/i386/asm.h $(srcdir)/lightning/i386/asm-common.h \ $(srcdir)/lightning/i386/asm.h $(srcdir)/lightning/i386/asm-common.h \
$(srcdir)/lightning/i386/funcs.h $(srcdir)/lightning/i386/funcs-common.h \ $(srcdir)/lightning/i386/funcs.h $(srcdir)/lightning/i386/funcs-common.h \
$(srcdir)/lightning/i386/fp.h $(srcdir)/lightning/i386/fp-common.h \ $(srcdir)/lightning/i386/fp.h $(srcdir)/lightning/i386/fp-sse.h $(srcdir)/lightning/i386/fp-common.h \
$(srcdir)/lightning/ppc/core.h $(srcdir)/lightning/ppc/core-common.h \ $(srcdir)/lightning/ppc/core.h $(srcdir)/lightning/ppc/core-common.h \
$(srcdir)/lightning/ppc/asm.h $(srcdir)/lightning/ppc/asm-common.h \ $(srcdir)/lightning/ppc/asm.h $(srcdir)/lightning/ppc/asm-common.h \
$(srcdir)/lightning/ppc/funcs.h $(srcdir)/lightning/ppc/funcs-common.h \ $(srcdir)/lightning/ppc/funcs.h $(srcdir)/lightning/ppc/funcs-common.h \

View File

@ -43,6 +43,9 @@ END_XFORM_ARITH;
#ifdef MZ_USE_JIT_X86_64 #ifdef MZ_USE_JIT_X86_64
# define MZ_USE_JIT_I386 # define MZ_USE_JIT_I386
# define JIT_X86_64 # define JIT_X86_64
# ifndef MZ_NO_JIT_SSE
# define JIT_X86_SSE
# endif
#endif #endif
#ifdef MZ_USE_JIT_I386 #ifdef MZ_USE_JIT_I386
@ -51,6 +54,12 @@ END_XFORM_ARITH;
# endif # endif
#endif #endif
#ifdef MZ_USE_JIT_SSE
# ifndef JIT_X86_SSE
# define JIT_X86_SSE
# endif
#endif
#ifdef MZ_USE_JIT_PPC #ifdef MZ_USE_JIT_PPC
# ifndef DEFINE_LIGHTNING_FUNCS # ifndef DEFINE_LIGHTNING_FUNCS
# define SUPPRESS_LIGHTNING_FUNCS # define SUPPRESS_LIGHTNING_FUNCS
@ -275,7 +284,7 @@ extern struct scheme_jit_common_record scheme_jit_common;
#define sjc scheme_jit_common #define sjc scheme_jit_common
typedef struct { typedef struct mz_jit_state {
MZTAG_IF_REQUIRED MZTAG_IF_REQUIRED
GC_CAN_IGNORE jit_state js; GC_CAN_IGNORE jit_state js;
char *limit; char *limit;
@ -835,7 +844,7 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
#define __START_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __START_TINY_JUMPS__(1); } else { __START_SHORT_JUMPS__(cond); } #define __START_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __START_TINY_JUMPS__(1); } else { __START_SHORT_JUMPS__(cond); }
#define __END_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __END_TINY_JUMPS__(1); } else { __END_SHORT_JUMPS__(cond); } #define __END_TINY_OR_SHORT_JUMPS__(tcond, cond) if (tcond) { __END_TINY_JUMPS__(1); } else { __END_SHORT_JUMPS__(cond); }
#ifdef JIT_X86_64 #if defined(JIT_X86_64) || defined(JIT_X86_SSE)
# define __START_TINY_JUMPS_IF_COMPACT__(cond) /* empty */ # define __START_TINY_JUMPS_IF_COMPACT__(cond) /* empty */
# define __END_TINY_JUMPS_IF_COMPACT__(cond) /* empty */ # define __END_TINY_JUMPS_IF_COMPACT__(cond) /* empty */
#else #else
@ -941,7 +950,7 @@ static void emit_indentation(mz_jit_state *jitter)
pushes and pops much balance. The popping branch operations pop pushes and pops much balance. The popping branch operations pop
both arguments before branching. */ both arguments before branching. */
#if !defined(MZ_USE_JIT_I386) #if !defined(MZ_USE_JIT_I386) || defined(JIT_X86_SSE)
/* Not FP stack, so use normal variants. */ /* Not FP stack, so use normal variants. */
#define DIRECT_FPR_ACCESS #define DIRECT_FPR_ACCESS
#define jit_movi_d_fppush(rd,immd) jit_movi_d(rd,immd) #define jit_movi_d_fppush(rd,immd) jit_movi_d(rd,immd)
@ -959,7 +968,7 @@ static void emit_indentation(mz_jit_state *jitter)
#define jit_abs_d_fppop(rd,rs) jit_abs_d(rd,rs) #define jit_abs_d_fppop(rd,rs) jit_abs_d(rd,rs)
#define jit_sqrt_d_fppop(rd,rs) jit_sqrt_d(rd,rs) #define jit_sqrt_d_fppop(rd,rs) jit_sqrt_d(rd,rs)
#define jit_sti_d_fppop(id, rs) jit_sti_d(id, rs) #define jit_sti_d_fppop(id, rs) jit_sti_d(id, rs)
#define jit_str_d_fppop(id, rd, rs) jit_str_d(id, rd, rs) #define jit_str_d_fppop(id, rd) jit_str_d(id, rd)
#define jit_stxi_d_fppop(id, rd, rs) jit_stxi_d(id, rd, rs) #define jit_stxi_d_fppop(id, rd, rs) jit_stxi_d(id, rd, rs)
#define jit_stxr_d_fppop(id, rd, rs) jit_stxr_d(id, rd, rs) #define jit_stxr_d_fppop(id, rd, rs) jit_stxr_d(id, rd, rs)
#define jit_bger_d_fppop(d, s1, s2) jit_bger_d(d, s1, s2) #define jit_bger_d_fppop(d, s1, s2) jit_bger_d(d, s1, s2)

View File

@ -278,7 +278,7 @@ int scheme_generate_alloc_retry(mz_jit_state *jitter, int i)
#ifdef JIT_USE_FP_OPS #ifdef JIT_USE_FP_OPS
if (i == 2) { if (i == 2) {
(void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR1, JIT_R2); (void)mz_tl_sti_d_fppop(tl_scheme_jit_save_fp, JIT_FPR0, JIT_R2);
} }
#endif #endif
JIT_UPDATE_THREAD_RSPTR(); JIT_UPDATE_THREAD_RSPTR();
@ -299,7 +299,7 @@ int scheme_generate_alloc_retry(mz_jit_state *jitter, int i)
} }
#ifdef JIT_USE_FP_OPS #ifdef JIT_USE_FP_OPS
if (i == 2) { if (i == 2) {
(void)mz_tl_ldi_d_fppush(JIT_FPR1, tl_scheme_jit_save_fp, JIT_R2); (void)mz_tl_ldi_d_fppush(JIT_FPR0, tl_scheme_jit_save_fp, JIT_R2);
} }
#endif #endif
return 1; return 1;

View File

@ -1220,6 +1220,562 @@ typedef _uc jit_insn;
( ((N)&7) == 0) ? 0 : \ ( ((N)&7) == 0) ? 0 : \
JITFAIL(".align argument too large"))) JITFAIL(".align argument too large")))
/* --- Media 128-bit instructions ------------------------------------------ */
typedef enum {
X86_SSE_MOV = 0x10,
X86_SSE_MOVLP = 0x12,
X86_SSE_MOVHP = 0x16,
X86_SSE_MOVA = 0x28,
X86_SSE_CVTIS = 0x2a,
X86_SSE_CVTTSI = 0x2c,
X86_SSE_CVTSI = 0x2d,
X86_SSE_UCOMI = 0x2e,
X86_SSE_COMI = 0x2f,
X86_SSE_ROUND = 0x3a,
X86_SSE_SQRT = 0x51,
X86_SSE_RSQRT = 0x52,
X86_SSE_RCP = 0x53,
X86_SSE_AND = 0x54,
X86_SSE_ANDN = 0x55,
X86_SSE_OR = 0x56,
X86_SSE_XOR = 0x57,
X86_SSE_ADD = 0x58,
X86_SSE_MUL = 0x59,
X86_SSE_CVTSD = 0x5a,
X86_SSE_CVTDT = 0x5b,
X86_SSE_SUB = 0x5c,
X86_SSE_MIN = 0x5d,
X86_SSE_DIV = 0x5e,
X86_SSE_MAX = 0x5f,
X86_SSE_X2G = 0x6e,
X86_SSE_EQB = 0x74,
X86_SSE_EQW = 0x75,
X86_SSE_EQD = 0x76,
X86_SSE_G2X = 0x7e,
X86_SSE_MOV2 = 0xd6
} x86_sse_t;
#define _BIT(X) (!!(X))
#define _rR(R) ((R) & 0x0f)
#define _rX(R) _rN(R)
#define _rXP(R) ((R) > 0 && _rR(R) > 7)
#define _SCL1 _b00
#define _rA(R) _r4(R)
#define _RSP 0x54
#define _i_X(op, md, rb, ri, ms) _r_X(op, md, rb, ri, ms)
#define _f_X(rd, md, rb, ri, ms) _i_X((int)_rX(rd), md, rb, ri, ms)
#ifdef JIT_X86_64
# define x86_REXwrxb(l, w, r, x, b) \
(((l) || (((int)(w) << 3) | (((int)(r)) << 2) | (((int)(x)) << 1) | ((int)(b)))) \
? _jit_B(0x40 | (((int)(w) << 3) | (((int)(r)) << 2) | (((int)(x)) << 1) | ((int)(b)))) \
: (void)0)
#else
# define x86_REXwrxb(l, w, r, x, b) (void)0
#endif
#define x86_REXwrx_(l, w, r, x, mr) x86_REXwrxb(l, w, r, x, _BIT(_rXP(mr)))
#define x86_REXw_x_(l, w, r, x, mr) x86_REXwrx_(l, w, _BIT(_rXP(r)), x, mr)
#define x86_rex_l_rr(rr, mr) x86_REXw_x_(0, 0, rr, 0, mr)
#define x86_rex_l_mr(rb, ri, rd) x86_REXw_x_(0, 0, rd, _BIT(_rXP(ri)), rb)
#define x86_rex_l_rm(rs, rb, ri) x86_rex_l_mr(rb, ri, rs)
#define _rex_ff_rr(rr, mr) x86_rex_l_rr(rr, mr)
#define _rex_if_rr(rr, mr) x86_rex_l_rr(rr, mr)
#define _rex_fi_rr(rr, mr) x86_rex_l_rr(rr, mr)
#define _rex_if_mr(rb, ri, rd) x86_rex_l_mr(rb, ri, rd)
#define _rex_fi_rm(rs, rb, ri) x86_rex_l_rm(rs, rb, ri)
#define __sse_ff_rr(op, rs, rd) (_rex_ff_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rX(rs)))
#define __sse_id_rr(op, rs, rd) __sse_if_rr(op, rs, rd)
#define __sse_if_rr(op, rs, rd) (_rex_if_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rA(rd), _rX(rs)))
#define __sse_di_rr(op, rs, rd) __sse_fi_rr(op, rs, rd)
#define __sse_fi_rr(op, rs, rd) (_rex_fi_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rA(rs)))
#define __sse_id_mr(op, md, rb, mi, ms, rd) __sse_if_mr(op, md, rb, mi, ms, rd)
#define __sse_if_mr(op, md, rb, ri, ms, rd) (_rex_if_mr(rb, ri, rd), _O(0x0f), _O(op), _f_X(rd, md, rb, ri, ms))
#define __sse_di_rm(op, rs, md, rb, mi, ms) __sse_fi_rm(op, rs, md, rb, mi, ms)
#define __sse_fi_rm(op, rs, md, rb, ri, ms) (_rex_fi_rm(rs, rb, ri), _O(0x0f), _O(op), _f_X(rs, md, rb, ri, ms))
#define __sse1_di_rm(op, rs, md, mb, mi, ms) __sse1_fi_rm(op, rs, md, mb, mi, ms)
#define __sse1_fi_rm(op, rs, md, rb, ri, ms) (_rex_fi_rm(rs, rb, ri), _O(0x0f), _O(0x01 | op), _f_X(rs, md, rb, ri, ms))
#define _sse_ff_rr(px, op, rs, rd) (_jit_B(px), __sse_ff_rr(op, rs, rd))
#define _sse_id_rr(px, op, rs, rd) _sse_if_rr(px, op, rs, rd)
#define _sse_if_rr(px, op, rs, rd) (_jit_B(px), __sse_if_rr(op, rs, rd))
#define _sse_di_rr(px, op, rs, rd) _sse_fi_rr(px, op, rs, rd)
#define _sse_fi_rr(px, op, rs, rd) (_jit_B(px), __sse_fi_rr(op, rs, rd))
#define _sse_id_mr(px, op, md, rb, mi, ms, rd) _sse_if_mr(px, op, md, rb, mi, ms, rd)
#define _sse_if_mr(px, op, md, rb, ri, ms, rd) (_jit_B(px), __sse_if_mr(op, md, rb, ri, ms, rd))
#define _sse_di_rm(px, op, rs, md, rb, mi, ms) _sse_fi_rm(px, op, rs, md, rb, mi, ms)
#define _sse_fi_rm(px, op, rs, md, rb, ri, ms) (_jit_B(px), __sse_fi_rm(op, rs, md, rb, ri, ms))
#define _sse1_di_rm(px, op, rs, md, mb, mi, ms) _sse1_fi_rm(px, op, rs, md, mb, mi, ms)
#define _sse1_fi_rm(px, op, rs, md, rb, ri, ms) (_jit_B(px), __sse1_fi_rm(op, rs, md, rb, ri, ms))
#define _SSEPSrr(OP,RS,RD) __sse_ff_rr ( OP, RS, RD)
#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __sse_if_mr ( OP, MD, MB, MI, MS, RD)
#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __sse_fi_rm ( OP, RS, MD, MB, MI, MS)
#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __sse1_fi_rm( OP, RS, MD, MB, MI, MS)
#define _SSEPDrr(OP,RS,RD) _sse_ff_rr (0x66, OP, RS, RD)
#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _sse_if_mr (0x66, OP, MD, MB, MI, MS, RD)
#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _sse_fi_rm (0x66, OP, RS, MD, MB, MI, MS)
#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _sse1_fi_rm(0x66, OP, RS, MD, MB, MI, MS)
#define _SSESSrr(OP,RS,RD) _sse_ff_rr (0xf3, OP, RS, RD)
#define _SSESSmr(OP,MD,MB,MI,MS,RD) _sse_if_mr (0xf3, OP, MD, MB, MI, MS, RD)
#define _SSESSrm(OP,RS,MD,MB,MI,MS) _sse_fi_rm (0xf3, OP, RS, MD, MB, MI, MS)
#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _sse1_fi_rm(0xf3, OP, RS, MD, MB, MI, MS)
#define _SSESDrr(OP,RS,RD) _sse_ff_rr (0xf2, OP, RS, RD)
#define _SSESDmr(OP,MD,MB,MI,MS,RD) _sse_if_mr (0xf2, OP, MD, MB, MI, MS, RD)
#define _SSESDrm(OP,RS,MD,MB,MI,MS) _sse_fi_rm (0xf2, OP, RS, MD, MB, MI, MS)
#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _sse1_fi_rm(0xf2, OP, RS, MD, MB, MI, MS)
#define _NOREG 0
/* SSE */
#define LDMXCSRmr(MD, MB, MI, MS) \
(_REXLmr(MB, MI, _NOREG), \
_O(0x0f), \
_O(0xae), \
_i_X(_b10, MD, MB, MI, MS))
#define STMXCSRrm(MD, MB, MI, MS) \
(_REXLrm(_NOREG, MI, MB), \
_O(0x0f), \
_O(0xae), \
_i_X(_b11, MD, MB, MI, MS))
/* SSE2 */
#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
/* SSE */
#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
/* SSE2 */
#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS, RD)
/* SSE */
#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
/* SSE2 */
#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS, RD)
/* SSE */
#define ANDNSSrr ANDNPSrr
#define ANDNSSmr ANDNPSrr
/* SSE2 */
#define ANDNSDrr ANDNPDrr
#define ANDNSDmr ANDNPDrr
/* SSE */
#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS, RD)
/* SSE2 */
#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS, RD)
/* SSE */
#define ANDSSrr ANDPSrr
#define ANDSSmr ANDPSrr
/* SSE2 */
#define ANDSDrr ANDPDrr
#define ANDSDmr ANDPDrr
/* SSE */
#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE2 */
#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE */
#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE2 */
#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS, RD)
/* SSE */
#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE2 */
#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE */
#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE2 */
#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS, RD)
/* SSE */
#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE2 */
#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE */
#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE2 */
#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS, RD)
/* SSE */
#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE2 */
#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE */
#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE2 */
#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS, RD)
/* SSE */
#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
/* SSE2 */
#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
/* SSE */
#define ORSSrr ORPSrr
#define ORSSmr ORPSrr
/* SSE2 */
#define ORSDrr ORPDrr
#define ORSDmr ORPDrr
/* SSE */
#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS, RD)
/* SSE */
#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS, RD)
/* SSE */
#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE2 */
#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE */
#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE2 */
#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS, RD)
/* SSE */
#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE2 */
#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE */
#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE2 */
#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS, RD)
/* SSE */
#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
/* SSE2 */
#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS, RD)
/* SSE */
#define XORSSrr XORPSrr
#define XORSSmr XORPSrr
/* SSE2 */
#define XORSDrr XORPDrr
#define XORSDmr XORPDrr
/* No prefixes here. */
/* SSE */
#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
/* SSE2 */
#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS, RD)
/* No prefixes here. */
/* SSE */
#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
/* SSE2 */
#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS, RD)
/* SSE */
#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD)
#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD)
#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS, RD)
#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI, MS)
/* SSE */
#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD)
#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD)
#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS, RD)
#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI, MS)
/* SSE */
#define CVTPS2PIrr(RS, RD) __sse_ff_rr( X86_SSE_CVTSI, RS, RD)
#define CVTPS2PImr(MD, MB, MI, MS, RD) __sse_if_mr( X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTPD2PIrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_CVTSI, RS, RD)
#define CVTPD2PImr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE */
#define CVTPI2PSrr(RS, RD) __sse_ff_rr( X86_SSE_CVTIS, RS, RD)
#define CVTPI2PSmr(MD, MB, MI, MS, RD) __sse_if_mr( X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTPI2PDrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_CVTIS, RS, RD)
#define CVTPI2PDmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTPS2PDrr(RS, RD) __sse_ff_rr( X86_SSE_CVTSD, RS, RD)
#define CVTPS2PDmr(MD, MB, MI, MS, RD) __sse_if_mr( X86_SSE_CVTSD, MD, MB, MI, MS, RD)
#define CVTPD2PSrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_CVTSD, RS, RD)
#define CVTPD2PSmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTSS2SDrr(RS, RD) _sse_ff_rr(0xf3, X86_SSE_CVTSD, RS, RD)
#define CVTSS2SDmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
#define CVTSD2SSrr(RS, RD) _sse_ff_rr(0xf2, X86_SSE_CVTSD, RS, RD)
#define CVTSD2SSmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTSD, MD, MB, MI, MS, RD)
/* SSE */
#define CVTTSS2SILrr(RS, RD) _sse_id_rr(0xf3, X86_SSE_CVTTSI, RS, RD)
#define CVTTSS2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, X86_SSE_CVTTSI, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTTSD2SILrr(RS, RD) _sse_id_rr(0xf2, X86_SSE_CVTTSI, RS, RD)
#define CVTTSD2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTTSI, MD, MB, MI, MS, RD)
/* SSE */
#define CVTSS2SILrr(RS, RD) _sse_if_rr(0xf3, X86_SSE_CVTSI, RS, RD)
#define CVTSS2SILmr(MD, MB, MI, MS, RD) _sse_if_mr(0xf3, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTSD2SILrr(RS, RD) _sse_id_rr(0xf2, X86_SSE_CVTSI, RS, RD)
#define CVTSD2SILmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTSI, MD, MB, MI, MS, RD)
/* SSE */
#define CVTSI2SSLrr(RS, RD) _sse_fi_rr(0xf3, X86_SSE_CVTIS, RS, RD)
#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _sse_if_mr(0xf3, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define CVTSI2SDLrr(RS, RD) _sse_di_rr(0xf2, X86_SSE_CVTIS, RS, RD)
#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf2, X86_SSE_CVTIS, MD, MB, MI, MS, RD)
/* SSE2 */
#define MOVDLXrr(RS, RD) _sse_di_rr(0x66, X86_SSE_X2G, RS, RD)
#define MOVDLXmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, X86_SSE_X2G, MD, MB, MI, MS, RD)
/* SSE2 */
#define MOVDXLrr(RS, RD) _sse_ff_rr(0x66, X86_SSE_G2X, RS, RD)
#define MOVDXLrm(RS, MD, MB, MI, MS) _sse_di_rm(0x66, X86_SSE_G2X, RS, MD, MB, MI, MS)
/* SSE */
#define MOVDLMrr(RS, RD) __sse_ff_rr( X86_SSE_X2G, RS, RD)
#define MOVDLMmr(MD, MB, MI, MS, RD) __sse_id_mr( X86_SSE_X2G, MD, MB, MI, MS, RD)
/* SSE */
#define MOVDMLrr(RS, RD) __sse_ff_rr( X86_SSE_G2X, RS, RD)
#define MOVDMLrm(RS, MD, MB, MI, MS) __sse_fi_rm( X86_SSE_G2X, RS, MD, MB, MI, MS)
/* SSE3 */
#define MOVDQ2Qrr(RS, RD) _sse_ff_rr(0xf2, X86_SSE_MOV2, RS, RD)
#define MOVQ2DQrr(RS, RD) _sse_ff_rr(0xf3, X86_SSE_MOV2, RS, RD)
/* SSE */
#define MOVHLPSrr(RS, RD) __sse_ff_rr( X86_SSE_MOVLP, RS, RD)
#define MOVLHPSrr(RS, RD) __sse_ff_rr( X86_SSE_MOVHP, RS, RD)
/* SSE2 */
#define MOVDQArr(RS, RD) _sse_ff_rr(0x66, 0x6f, RS, RD)
#define MOVDQAmr(MD, MB, MI, MS, RD) _sse_id_mr(0x66, 0x6f, MD, MB, MI, MS, RD)
#define MOVDQArm(RS, MD, MB, MI, MS) _sse_di_rm(0x66, 0x7f, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVDQUrr(RS, RD) _sse_ff_rr(0xf3, 0x6f, RS, RD)
#define MOVDQUmr(MD, MB, MI, MS, RD) _sse_id_mr(0xf3, 0x6f, MD, MB, MI, MS, RD)
#define MOVDQUrm(RS, MD, MB, MI, MS) _sse_di_rm(0xf3, 0x7f, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVHPDmr(MD, MB, MI, MS, RD) _sse_id_mr (0x66, X86_SSE_MOVHP, MD, MB, MI, MS, RD)
#define MOVHPDrm(RS, MD, MB, MI, MS) _sse1_di_rm(0x66, X86_SSE_MOVHP, RS, MD, MB, MI, MS)
/* SSE */
#define MOVHPSmr(MD, MB, MI, MS, RD) __sse_if_mr ( X86_SSE_MOVHP, MD, MB, MI, MS, RD)
#define MOVHPSrm(RS, MD, MB, MI, MS) __sse1_fi_rm( X86_SSE_MOVHP, RS, MD, MB, MI, MS)
/* SSE2 */
#define MOVLPDmr(MD, MB, MI, MS, RD) _sse_id_mr (0x66, X86_SSE_MOVLP, MD, MB, MI, MS, RD)
#define MOVLPDrm(RS, MD, MB, MI, MS) _sse1_di_rm(0x66, X86_SSE_MOVLP, RS, MD, MB, MI, MS)
/* SSE */
#define MOVLPSmr(MD, MB, MI, MS, RD) __sse_if_mr ( X86_SSE_MOVLP, MD, MB, MI, MS, RD)
#define MOVLPSrm(RS, MD, MB, MI, MS) __sse1_fi_rm( X86_SSE_MOVLP, RS, MD, MB, MI, MS)
/* FIXME 0x66 prefix actually required to modify 128 bits register */
/* SSE or SSE2 with 0x66 prefix */
#define PCMPEQBrr(RS, RD) \
_sse_ff_rr(0x66, X86_SSE_EQB, RS, RD)
#define PCMPEQBrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, X86_SSE_EQB, MD, MB, MI, MS, RD)
#define PCMPEQWrr(RS, RD) \
_sse_ff_rr(0x66, X86_SSE_EQW, RS, RD)
#define PCMPEQWrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, X86_SSE_EQW, MD, MB, MI, MS, RD)
#define PCMPEQLrr(RS, RD) \
_sse_ff_rr(0x66, X86_SSE_EQD, RS, RD)
#define PCMPEQLrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, X86_SSE_EQD, MD, MB, MI, MS, RD)
/* SSE2 with 0x66 prefix, SSE otherwise */
#define PSRLWrr(RS, RD) \
_sse_ff_rr(0x66, 0xd1, RS, RD)
#define PSRLWrm(RS, MD, MB, MI, MS) \
_sse_if_mr(0x66, 0xd1, MD, MB, MI, MS, RD)
#define PSRLWir(IM, RD) \
(_O(0x66), \
_REXLrr(_NOREG, RD), \
_O(0x0f), \
_O(0x71), \
_Mrm(_b11, _b10, _rX(RD)), \
_O(IM))
/* SSE2 with 0x66 prefix, SSE otherwise */
#define PSRLLrr(RS, RD) \
_sse_ff_rr(0x66, 0xd2, RS, RD)
#define PSRLLrm(RS, MD, MB, MI, MS) \
_sse_id_mr(0x66, 0xd2, MD, MB, MI, MS, RD)
#define PSRLLir(IM, RD) \
(_O(0x66), \
_rex_if_rr(_NOREG, RD), \
_O(0x0f), \
_O(0x72), \
_Mrm(_b11, _b10, _rX(RD)), \
_O(IM))
/* SSE2 */
#define PSRLQrr(RS, RD) \
_sse_ff_rr(0x66, 0xd3, RS, RD)
#define PSRLQrm(RS, MD, MB, MI, MS) \
_sse_id_mr(0x66, 0xd3, MD, MB, MI, MS, RD)
#define PSRLQir(IM, RD) \
(_O(0x66), \
_rex_if_rr(_NOREG, RD), \
_O(0x0f), \
_O(0x73), \
_Mrm(_b11, _b10, _rX(RD)), \
_O(IM))
/* SSE4.1 */
#define ROUNDSSrri(RS, RD, IM) \
(_O(0x66), _rex_ff_rr(RD, RS), _OO(0xf00|X86_SSE_ROUND), _O(0x0a), \
_Mrm(_b11, _rX(RD), _rX(RS)), _O(IM))
#define ROUNDSDrri(RS, RD, IM) \
(_O(0x66), _rex_ff_rr(RD, RS), _OO(0xf00|X86_SSE_ROUND), _O(0x0b), \
_Mrm(_b11, _rX(RD), _rX(RS)), _O(IM))
#define PCMPEQQrr(RS, RD) \
(_O(0x66), _rex_ff_rr(RD, RS), _OO(0x0f38), _O(0x29), \
_Mrm(_b11, _rX(RD), _rX(RS)))
#ifdef JIT_X86_64
#define _rex_q_rr(rr, mr) x86_REXw_x_(0, 1, rr, 0, mr)
#define _rex_dl_rr(rr, mr) _rex_q_rr(rr, mr)
#define _rex_ld_rr(rr, mr) _rex_q_rr(rr, mr)
#define __sse_lf_rr(op, rs, rd) __sse_ld_rr(op, rs, rd)
#define __sse_ld_rr(op, rs, rd) (_rex_ld_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rA(rd), _rX(rs)))
#define __sse_fl_rr(op, rs, rd) __sse_dl_rr(op, rs, rd)
#define __sse_dl_rr(op, rs, rd) (_rex_dl_rr(rd, rs), _O(0x0f), _O(op), _Mrm(_b11, _rX(rd), _rA(rs)))
#define _sse_lf_rr(px, op, rs, rd) _sse_ld_rr(px, op, rs, rd)
#define _sse_ld_rr(px, op, rs, rd) (_jit_B(px), __sse_ld_rr(op, rs, rd))
#define _sse_fl_rr(px, op, rs, rd) _sse_dl_rr(px, op, rs, rd)
#define _sse_dl_rr(px, op, rs, rd) (_jit_B(px), __sse_dl_rr(op, rs, rd))
#define CVTTSD2SIQrr(RS, RD) _sse_lf_rr(0xf2, X86_SSE_CVTTSI, RS, RD)
#define CVTSI2SDQrr(RS, RD) _sse_dl_rr(0xf2, X86_SSE_CVTIS, RS, RD)
#define MOVDQXrr(RS, RD) _sse_dl_rr(0x66, X86_SSE_X2G, RS, RD)
#endif
/*** References: */ /*** References: */
/* */ /* */

View File

@ -65,6 +65,14 @@ struct jit_local_state {
#ifdef JIT_X86_64 #ifdef JIT_X86_64
int argpushes; int argpushes;
#endif #endif
#ifdef JIT_X86_SSE
union {
int i[2];
long l;
double d;
} d_data;
jit_insn *tmp_label;
#endif
}; };
/* 3-parameter operation */ /* 3-parameter operation */
@ -345,6 +353,7 @@ struct jit_local_state {
#define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) )) #define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX, jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) ))
/* Stack */ /* Stack */
#define jit_pushi_i(is) PUSHLi(is)
#define jit_pushr_i(rs) PUSHLr(rs) #define jit_pushr_i(rs) PUSHLr(rs)
#define jit_popr_i(rs) POPLr(rs) #define jit_popr_i(rs) POPLr(rs)
#define jit_pushr_l(rs) jit_pushr_i(rs) #define jit_pushr_l(rs) jit_pushr_i(rs)
@ -598,14 +607,16 @@ static const int const jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
#define jit_bxeqi_s(label, rs, is) (CMPWim(is, 0, rs, 0, 0), JEm(label,0,0,0), _jit.x.pc) #define jit_bxeqi_s(label, rs, is) (CMPWim(is, 0, rs, 0, 0), JEm(label,0,0,0), _jit.x.pc)
#define jit_bxnei_s(label, rs, is) (CMPWim(is, 0, rs, 0, 0), JNEm(label,0,0,0), _jit.x.pc) #define jit_bxnei_s(label, rs, is) (CMPWim(is, 0, rs, 0, 0), JNEm(label,0,0,0), _jit.x.pc)
#ifdef SUPPORT_TINY_JUMPS
#if 0 #if 0
static intptr_t _CHECK_TINY(intptr_t diff) { if ((diff < -128) || (diff > 127)) *(intptr_t *)0x0 = 1; return diff; } XFORM_NONGCING static intptr_t _CHECK_TINY(intptr_t diff) { if ((diff < -128) || (diff > 127)) *(intptr_t *)0x0 = 1; return diff; }
#else #else
# define _CHECK_TINY(x) x # define _CHECK_TINY(x) x
#endif #endif
#define jit_patch_tiny_at(jump_pc,v) (*_PSC((jump_pc) - sizeof(char)) = _jit_SC(_CHECK_TINY((jit_insn *)(v) - (jump_pc))))
#ifdef SUPPORT_TINY_JUMPS
# define jit_patch_normal_at(jump_pc,v) (_jitl.tiny_jumps \ # define jit_patch_normal_at(jump_pc,v) (_jitl.tiny_jumps \
? (*_PSC((jump_pc) - sizeof(char)) = _jit_SC(_CHECK_TINY((jit_insn *)(v) - (jump_pc)))) \ ? jit_patch_tiny_at(jump_pc, v) \
: (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc)))) : (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc))))
#else #else
# define jit_patch_normal_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc))) # define jit_patch_normal_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc)))

View File

@ -61,8 +61,6 @@
#define jit_abs_f(rd,rs) jit_abs_d(rd,rs) #define jit_abs_f(rd,rs) jit_abs_d(rd,rs)
#define jit_negr_f(rd,rs) jit_negr_d(rd,rs) #define jit_negr_f(rd,rs) jit_negr_d(rd,rs)
#define jit_sqrt_f(rd,rs) jit_sqrt_d(rd,rs) #define jit_sqrt_f(rd,rs) jit_sqrt_d(rd,rs)
#define jit_extr_f_d(rs, rd)
#define jit_extr_d_f(rs, rd)
#define jit_extr_i_f(rd, rs) jit_extr_i_d(rd, rs) #define jit_extr_i_f(rd, rs) jit_extr_i_d(rd, rs)
#define jit_roundr_f_i(rd, rs) jit_roundr_d_i(rd, rs) #define jit_roundr_f_i(rd, rs) jit_roundr_d_i(rd, rs)
#define jit_floorr_f_i(rd, rs) jit_floorr_d_i(rd, rs) #define jit_floorr_f_i(rd, rs) jit_floorr_d_i(rd, rs)

View File

@ -0,0 +1,202 @@
/******************************** -*- C -*- ****************************
*
* Support macros for SSE floating-point math
*
***********************************************************************/
/***********************************************************************
*
* Copyright 2006,2010 Free Software Foundation, Inc.
*
* This file is part of GNU lightning.
*
* GNU lightning is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published
* by the Free Software Foundation; either version 3, or (at your option)
* any later version.
*
* GNU lightning is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with GNU lightning; see the file COPYING.LESSER; if not, write to the
* Free Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301, USA.
*
* Authors:
* Paolo Bonzini
* Paulo Cesar Pereira de Andrade
***********************************************************************/
#ifndef __lightning_fp_sse_h
#define __lightning_fp_sse_h
#define JIT_FPR_NUM 6
#define _XMM0 0x60
#ifdef JIT_X86_64
# define JIT_FPR(i) (_XMM0 + 8 + (i))
#else
# define JIT_FPR(i) (_XMM0 + (i))
#endif
#define JIT_FPTMP0 JIT_FPR(6)
#define jit_addr_d(f0, f1, f2) \
((f0 == f1) \
? ADDSDrr(f2, f0) \
: ((f0 == f2) \
? ADDSDrr(f1, f0) \
: (MOVSDrr(f1, f0), ADDSDrr(f2, f0))))
#define jit_subr_d(f0, f1, f2) \
((f0 == f1) \
? SUBSDrr(f2, f0) \
: ((f0 == f2) \
? (MOVSDrr(f0, JIT_FPTMP0), MOVSDrr(f1, f0), SUBSDrr(JIT_FPTMP0, f0)) \
: (MOVSDrr(f1, f0), SUBSDrr(f2, f0))))
#define jit_subrr_d(f0, f1, f2) jit_subr_d(f0, f2, f1)
#define jit_mulr_d(f0, f1, f2) \
((f0 == f1) \
? MULSDrr(f2, f0) \
: ((f0 == f2) \
? MULSDrr(f1, f0) \
: (MOVSDrr(f1, f0), MULSDrr(f2, f0))))
#define jit_divr_d(f0, f1, f2) \
((f0 == f1) \
? DIVSDrr(f2, f0) \
: ((f0 == f2) \
? (MOVSDrr(f0, JIT_FPTMP0), MOVSDrr(f1, f0), DIVSDrr(JIT_FPTMP0, f0)) \
: (MOVSDrr(f1, f0), DIVSDrr(f2, f0))))
#define jit_divrr_d(f0, f1, f2) jit_divr_d(f0, f2, f1)
#define jit_ldr_f(f0, r0) MOVSSmr(0, r0, _NOREG, _SCL1, f0)
#define jit_ldr_d(f0, r0) MOVSDmr(0, r0, _NOREG, _SCL1, f0)
#define _jit_ldi_d(f0, i0) MOVSDmr((long)i0, _NOREG, _NOREG, _SCL1, f0)
#ifdef JIT_X86_64
# define jit_ldi_d(f0, i0) \
(_u32P((intptr_t)(i0)) \
? _jit_ldi_d(f0, i0) \
: (jit_movi_l(JIT_REXTMP, i0), jit_ldr_d(f0, JIT_REXTMP)))
#else
# define jit_ldi_d(f0, i0) _jit_ldi_d(f0, i0)
#endif
#define jit_ldxr_d(f0, r0, r1) MOVSDmr(0, r0, r1, _SCL1, f0)
#define jit_ldxi_d(f0, r0, i0) MOVSDmr(i0, r0, _NOREG, _SCL1, f0);
#define jit_str_d(r0, f0) MOVSDrm(f0, 0, r0, _NOREG, _SCL1)
#define _jit_sti_d(i0, f0) MOVSDrm(f0, (long)i0, _NOREG, _NOREG, _SCL1)
#ifdef JIT_X86_64
# define jit_sti_d(i0, f0) \
(_u32P((intptr_t)(i0)) \
? _jit_sti_d(i0, f0) \
: (jit_movi_l(JIT_REXTMP, i0), jit_str_d(JIT_REXTMP, f0)))
#else
# define jit_sti_d(i0, f0) _jit_sti_d(i0, f0)
#endif
#define jit_stxr_d(r0, r1, f0) MOVSDrm(f0, 0, r0, r1, _SCL1)
#define jit_stxi_d(i0, r1, f0) MOVSDrm(f0, i0, r1, _NOREG, _SCL1)
#define jit_movi_d(f0, i0) \
(_jitl.d_data.d = i0, \
((_jitl.d_data.d == 0.0 && !(_jitl.d_data.i[1] & 0x80000000)) \
? XORPDrr(f0, f0) \
: finish_movi_d(f0, i0)))
#ifdef JIT_X86_64
# define finish_movi_d(f0, i0) (jit_movi_l(JIT_REXTMP, _jitl.d_data.l), MOVDQXrr(JIT_REXTMP, f0))
#else
# define finish_movi_d(f0, i0) \
(jit_pushi_i(_jitl.d_data.i[1]), jit_pushi_i(_jitl.d_data.i[0]), \
jit_ldr_d(f0, JIT_SP), \
jit_addi_l(JIT_SP, JIT_SP, sizeof(double)))
#endif
# define jit_movr_d(f0, f1) ((f0 != f1) ? MOVSDrr(f1, f0) : (void)0)
# define jit_extr_i_d(f0, r0) CVTSI2SDLrr(r0, f0)
#ifdef JIT_X86_64
# define jit_extr_l_d(f0, r0) CVTSI2SDQrr(r0, f0)
#else
# define jit_extr_l_d(f0, r0) jit_extr_i_d(f0, r0)
#endif
# define jit_extr_d_f(f0, f1) CVTSD2SSrr(f1, f0)
#define jit_abs_d(f0, f1) \
((f0 == f1) \
? (PCMPEQLrr(JIT_FPTMP0, JIT_FPTMP0), PSRLQir(1, JIT_FPTMP0), ANDPDrr(JIT_FPTMP0, f0)) \
: (PCMPEQLrr(f0, f0), PSRLQir(1, f0), ANDPDrr(f1, f0)))
#define jit_sqrt_d(f0, f1) SQRTSDrr(f1, f0)
#ifdef JIT_X86_64
# define jit_negr_d(f0, f1) \
(jit_movi_l(JIT_REXTMP, 0x8000000000000000), \
((f0 == f1) \
? (MOVDQXrr(JIT_REXTMP, JIT_FPTMP0), \
XORPDrr(JIT_FPTMP0, f0)) \
: (MOVDQXrr(JIT_REXTMP, f0), \
XORPDrr(f1, f0))))
#else
# define jit_negr_d(f0, f1) \
(jit_pushi_i(0x80000000), \
jit_pushi_i(0), \
((f0 == f1) \
? (jit_ldr_d(JIT_FPTMP0, JIT_SP), \
XORPDrr(JIT_FPTMP0, f0)) \
: (jit_ldr_d(f0, JIT_SP), \
XORPDrr(f1, f0))), \
jit_addi_l(JIT_SP, JIT_SP, sizeof(int) << 1))
#endif
/* Racket uses jit_roundr_l only for inexact->exact of fixnums,
so a truncate is good enough. */
#define jit_roundr_d_i(r0, f0) jit_truncr_d_i(r0, f0)
#define jit_roundr_d_l(r0, f0) jit_truncr_d_l(r0, f0)
#define jit_truncr_d_i(r0, f0) CVTTSD2SILrr(f0, r0)
#ifdef JIT_X86_64
# define jit_truncr_d_l(r0, f0) CVTTSD2SIQrr(f0, r0)
#else
# define jit_truncr_d_l(r0, f0) jit_truncr_d_i(r0, f0)
#endif
#define jit_bltr_d(label, f0, f1) (UCOMISDrr(f0, f1), JAEm(label,0,0,0), (_jit.x.pc))
#define jit_bler_d(label, f0, f1) (UCOMISDrr(f0, f1), JBEm(label,0,0,0), (_jit.x.pc))
#define jit_bgtr_d(label, f0, f1) (UCOMISDrr(f1, f0), JAm(label,0,0,0), (_jit.x.pc))
#define jit_bger_d(label, f0, f1) (UCOMISDrr(f1, f0), JAEm(label,0,0,0), (_jit.x.pc))
#define jit_beqr_d(label, f0, f1) \
(UCOMISDrr(f0, f1), \
_O_D8(0x70|(0xa), 0), /*JP */ \
_jitl.tmp_label = _jit.x.pc, \
JEm(label,0,0,0), \
jit_patch_tiny_at(_jitl.tmp_label, _jit.x.pc), \
_jit.x.pc)
#define jit_bantiltr_d(label, f0, f1) (UCOMISDrr(f0, f1), JBEm(label,0,0,0), (_jit.x.pc))
#define jit_bantiler_d(label, f0, f1) (UCOMISDrr(f0, f1), JBm(label,0,0,0), (_jit.x.pc))
#define jit_bantigtr_d(label, f0, f1) (UCOMISDrr(f1, f0), JBEm(label,0,0,0), (_jit.x.pc))
#define jit_bantiger_d(label, f0, f1) (UCOMISDrr(f1, f0), JBm(label,0,0,0), (_jit.x.pc))
#define jit_bantieqr_d(label, f0, f1) \
(UCOMISDrr(f0, f1), \
_O_D8(0x70|(0xb), 0), /*JNP */ \
_jitl.tmp_label = _jit.x.pc, \
CMPLir(0, JIT_SP), \
jit_patch_tiny_at(_jitl.tmp_label, _jit.x.pc), \
JNEm(label,0,0,0), \
_jit.x.pc)
#endif /* __lightning_fp_sse_h */

View File

@ -33,6 +33,12 @@
#ifndef __lightning_asm_fp_h #ifndef __lightning_asm_fp_h
#define __lightning_asm_fp_h #define __lightning_asm_fp_h
#ifdef JIT_X86_SSE
# include "fp-sse.h"
#else
/* We really must map the x87 stack onto a flat register file. In practice, /* We really must map the x87 stack onto a flat register file. In practice,
we can provide something sensible and make it work on the x86 using the we can provide something sensible and make it work on the x86 using the
stack like a file of eight registers. stack like a file of eight registers.
@ -478,4 +484,6 @@ union jit_double_imm {
_OO(0xd9f1)) /* fyl2x */ _OO(0xd9f1)) /* fyl2x */
#endif #endif
#endif
#endif /* __lightning_asm_h */ #endif /* __lightning_asm_h */