From d99888d700ea1e98c478cf7e1f27ca8a7181d711 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 12 Nov 2012 12:35:24 -0700 Subject: [PATCH] x86_64: more direct thread-local access in JIT Use one instruction instead of two. --- src/racket/src/jit.h | 34 ++++++++++++++++++---------- src/racket/src/lightning/i386/asm.h | 2 ++ src/racket/src/lightning/i386/core.h | 4 ++-- 3 files changed, 26 insertions(+), 14 deletions(-) diff --git a/src/racket/src/jit.h b/src/racket/src/jit.h index 79437890db..28b7737d81 100644 --- a/src/racket/src/jit.h +++ b/src/racket/src/jit.h @@ -427,15 +427,20 @@ void *scheme_jit_get_threadlocal_table(); # ifdef JIT_X86_64 # define JIT_R10 JIT_R(10) # define JIT_R14 JIT_R(14) -# define mz_tl_addr(reg, addr) LEAQmQr((addr), (JIT_R14), 0, 0, (reg)) -# define mz_tl_addr_tmp(tmp_reg, addr) (mz_tl_addr(JIT_R10, addr)) +# define mz_tl_addr(reg, addr) (void)0 +# define mz_tl_addr_tmp(tmp_reg, addr) (void)0 # define mz_tl_addr_untmp(tmp_reg) (void)0 # define mz_tl_tmp_reg(tmp_reg) JIT_R10 -# define _mz_tl_str_p(addr, tmp_reg, reg) jit_str_p(tmp_reg, reg) -# define _mz_tl_str_l(addr, tmp_reg, reg) jit_str_l(tmp_reg, reg) -# define _mz_tl_str_i(addr, tmp_reg, reg) jit_str_i(tmp_reg, reg) -# define mz_tl_addr_tmp_i(tmp_reg, addr) (jit_movr_l(JIT_R10, tmp_reg), mz_tl_addr(tmp_reg, addr)) -# define mz_tl_addr_untmp_i(tmp_reg) jit_movr_l(tmp_reg, JIT_R10) +# define _mz_tl_str_p(addr, tmp_reg, reg) jit_stxi_p(addr, JIT_R14, reg) +# define _mz_tl_str_l(addr, tmp_reg, reg) jit_stxi_l(addr, JIT_R14, reg) +# define _mz_tl_str_i(addr, tmp_reg, reg) jit_stxi_i(addr, JIT_R14, reg) +# define mz_tl_ldr_p(reg, addr) jit_ldxi_p(reg, JIT_R14, addr) +# define mz_tl_ldr_l(reg, addr) jit_ldxi_l(reg, JIT_R14, addr) +# define mz_tl_ldr_i(reg, addr) jit_ldxi_i(reg, JIT_R14, addr) +# define mz_tl_str_d_fppop(tmp_reg, reg, addr) jit_stxi_d_fppop(addr, JIT_R14, reg) +# define mz_tl_ldr_d_fppush(reg, tmp_reg, addr) jit_ldxi_d_fppush(reg, JIT_R14, addr) +# define mz_tl_addr_tmp_i(tmp_reg, addr) (void)0 +# define mz_tl_addr_untmp_i(tmp_reg) (void)0 # define mz_tl_tmp_reg_i(tmp_reg) tmp_reg # else # define THREAD_LOCAL_USES_JIT_V2 @@ -459,17 +464,22 @@ void *scheme_jit_get_threadlocal_table(); # define mz_tl_addr_tmp_i(tmp_reg, addr) mz_tl_addr_tmp(tmp_reg, addr) # define mz_tl_addr_untmp_i(tmp_reg) mz_tl_addr_untmp(tmp_reg) # define mz_tl_tmp_reg_i(tmp_reg) mz_tl_tmp_reg(tmp_reg) +# define mz_tl_ldr_p(reg, addr) jit_ldr_p(reg, reg) +# define mz_tl_ldr_l(reg, addr) jit_ldr_l(reg, reg) +# define mz_tl_ldr_i(reg, addr) jit_ldr_i(reg, reg) +# define mz_tl_str_d_fppop(tmp_reg, reg, addr) jit_str_d_fppop(tmp_reg, reg) +# define mz_tl_ldr_d_fppush(reg, tmp_reg, addr) jit_ldr_d_fppush(reg, tmp_reg) # endif /* A given tmp_reg doesn't have to be unused; it just has to be distinct from other arguments. */ # define mz_tl_sti_p(addr, reg, tmp_reg) (mz_tl_addr_tmp(tmp_reg, addr), _mz_tl_str_p(addr, mz_tl_tmp_reg(tmp_reg), reg), mz_tl_addr_untmp(tmp_reg)) # define mz_tl_sti_l(addr, reg, tmp_reg) (mz_tl_addr_tmp(tmp_reg, addr), _mz_tl_str_l(addr, mz_tl_tmp_reg(tmp_reg), reg), mz_tl_addr_untmp(tmp_reg)) # define mz_tl_sti_i(addr, reg, tmp_reg) (mz_tl_addr_tmp_i(tmp_reg, addr), _mz_tl_str_i(addr, mz_tl_tmp_reg_i(tmp_reg), reg), mz_tl_addr_untmp_i(tmp_reg)) -# define mz_tl_ldi_p(reg, addr) (mz_tl_addr(reg, addr), jit_ldr_p(reg, reg)) -# define mz_tl_ldi_l(reg, addr) (mz_tl_addr(reg, addr), jit_ldr_l(reg, reg)) -# define mz_tl_ldi_i(reg, addr) (mz_tl_addr(reg, addr), jit_ldr_i(reg, reg)) -# define mz_tl_sti_d_fppop(addr, reg, tmp_reg) (mz_tl_addr(tmp_reg, addr), jit_str_d_fppop(tmp_reg, reg)) -# define mz_tl_ldi_d_fppush(reg, addr, tmp_reg) (mz_tl_addr(tmp_reg, addr), jit_ldr_d_fppush(reg, tmp_reg)) +# define mz_tl_ldi_p(reg, addr) (mz_tl_addr(reg, addr), mz_tl_ldr_p(reg, addr)) +# define mz_tl_ldi_l(reg, addr) (mz_tl_addr(reg, addr), mz_tl_ldr_l(reg, addr)) +# define mz_tl_ldi_i(reg, addr) (mz_tl_addr(reg, addr), mz_tl_ldr_i(reg, addr)) +# define mz_tl_sti_d_fppop(addr, reg, tmp_reg) (mz_tl_addr(tmp_reg, addr), mz_tl_str_d_fppop(tmp_reg, reg, addr)) +# define mz_tl_ldi_d_fppush(reg, addr, tmp_reg) (mz_tl_addr(tmp_reg, addr), mz_tl_ldr_d_fppush(reg, tmp_reg, addr)) #else # define mz_tl_sti_p(addr, reg, tmp_reg) jit_sti_p(addr, reg) # define mz_tl_sti_l(addr, reg, tmp_reg) jit_sti_l(addr, reg) diff --git a/src/racket/src/lightning/i386/asm.h b/src/racket/src/lightning/i386/asm.h index d391fc9fe5..6cc2b2de73 100644 --- a/src/racket/src/lightning/i386/asm.h +++ b/src/racket/src/lightning/i386/asm.h @@ -696,7 +696,9 @@ typedef _uc jit_insn; #define MOVLrr(RS, RD) _O_Mrm (0x89 ,_b11,_r4(RS),_r4(RD) ) #define MOVLmr(MD, MB, MI, MS, RD) _O_r_X (0x8b ,_r4(RD) ,MD,MB,MI,MS ) +#define MOVLmQr(MD, MB, MI, MS, RD) _qOd_r_X (0x8b ,_r8(RD) ,MD,MB,MI,MS ) #define MOVLrm(RS, MD, MB, MI, MS) _O_r_X (0x89 ,_r4(RS) ,MD,MB,MI,MS ) +#define MOVLQrm(RS, MD, MB, MI, MS) _qOd_r_X (0x89 ,_r8(RS) ,MD,MB,MI,MS ) #define MOVLir(IM, R) _Or_L (0xb8,_r4(R) ,IM ) #define MOVLim(IM, MD, MB, MI, MS) _O_X_L (0xc7 ,MD,MB,MI,MS ,IM ) diff --git a/src/racket/src/lightning/i386/core.h b/src/racket/src/lightning/i386/core.h index 37748a918b..9003a505b2 100644 --- a/src/racket/src/lightning/i386/core.h +++ b/src/racket/src/lightning/i386/core.h @@ -682,12 +682,12 @@ XFORM_NONGCING static intptr_t _CHECK_TINY(intptr_t diff) { if ((diff < -128) || #define _jit_ldi_i(d, is) MOVLmr((is), 0, 0, 0, (d)) #define jit_ldr_i(d, rs) MOVLmr(0, (rs), 0, 0, (d)) #define jit_ldxr_i(d, s1, s2) MOVLmr(0, (s1), (s2), 1, (d)) -#define jit_ldxi_i(d, rs, is) MOVLmr((is), (rs), 0, 0, (d)) +#define jit_ldxi_i(d, rs, is) MOVLmQr((is), (rs), 0, 0, (d)) #define _jit_sti_i(id, rs) MOVLrm((rs), (id), 0, 0, 0) #define jit_str_i(rd, rs) MOVLrm((rs), 0, (rd), 0, 0) #define jit_stxr_i(d1, d2, rs) MOVLrm((rs), 0, (d1), (d2), 1) -#define jit_stxi_i(id, rd, rs) MOVLrm((rs), (id), (rd), 0, 0) +#define jit_stxi_i(id, rd, rs) MOVLQrm((rs), (id), (rd), 0, 0) #define _jit_ldi_l(d, is) MOVQmQr((is), 0, 0, 0, (d)) #define jit_ldr_l(d, rs) MOVQmQr(0, (rs), 0, 0, (d))