diff --git a/src/racket/src/eval.c b/src/racket/src/eval.c index 1c7b38359c..f7fe67757b 100644 --- a/src/racket/src/eval.c +++ b/src/racket/src/eval.c @@ -2783,7 +2783,7 @@ scheme_do_eval(Scheme_Object *obj, int num_rands, Scheme_Object **rands, tmpv = obj; obj = NULL; /* save for space, since tmpv is ignored by the GC */ - v = data->start_code(tmpv, num_rands, rands); + v = data->start_code(tmpv, num_rands, rands EXTRA_NATIVE_ARGUMENT); if (v == SCHEME_TAIL_CALL_WAITING) { /* [TC-SFS]; see schnapp.inc */ diff --git a/src/racket/src/fun.c b/src/racket/src/fun.c index a50d848b8e..5a96275446 100644 --- a/src/racket/src/fun.c +++ b/src/racket/src/fun.c @@ -3124,7 +3124,7 @@ static Scheme_Object *_apply_native(Scheme_Object *obj, int num_rands, Scheme_Ob MZ_CONT_MARK_POS += 2; old_cont_mark_stack = MZ_CONT_MARK_STACK; - obj = data->start_code(obj, num_rands, rands); + obj = data->start_code(obj, num_rands, rands EXTRA_NATIVE_ARGUMENT); if (obj == SCHEME_TAIL_CALL_WAITING) return force_values(obj, 1); diff --git a/src/racket/src/future.c b/src/racket/src/future.c index 9beb2b0dc1..a3dbf4e4cc 100644 --- a/src/racket/src/future.c +++ b/src/racket/src/future.c @@ -1974,7 +1974,7 @@ void *worker_thread_future_loop(void *arg) Scheme_Future_Thread_State *fts = params->fts; Scheme_Future_State *fs = params->fs; Scheme_Object *v; - Scheme_Closed_Prim *jitcode; + Scheme_Native_Proc *jitcode; future_t *ft; mz_jmp_buf newbuf; int fid; diff --git a/src/racket/src/jit.c b/src/racket/src/jit.c index 3f60580637..0f618610a0 100644 --- a/src/racket/src/jit.c +++ b/src/racket/src/jit.c @@ -120,10 +120,6 @@ static Scheme_Object *clear_rs_arguments(Scheme_Object *v, int size, int delta) return v; } -#ifdef JIT_THREAD_LOCAL -void *scheme_jit_get_threadlocal_table() XFORM_SKIP_PROC { return &BOTTOM_VARIABLE; } -#endif - #define JIT_TS_PROCS #define JIT_BOX_TS_PROCS #include "jit_ts.c" @@ -134,14 +130,20 @@ void *scheme_jit_get_threadlocal_table() XFORM_SKIP_PROC { return &BOTTOM_VARIAB THREAD_LOCAL_DECL(Scheme_Current_LWC *scheme_current_lwc); -Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Closed_Prim *code, +Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Native_Proc *code, void *data, int argc, Scheme_Object **argv) { +#ifdef JIT_THREAD_LOCAL +# define THDLOC &BOTTOM_VARIABLE +#else +# define THDLOC NULL +#endif scheme_current_lwc->runstack_start = MZ_RUNSTACK; scheme_current_lwc->cont_mark_stack_start = MZ_CONT_MARK_STACK; - return sjc.native_starter_code(data, argc, argv, code, (void **)&scheme_current_lwc->stack_start); + return sjc.native_starter_code(data, argc, argv, THDLOC, code, (void **)&scheme_current_lwc->stack_start); +#undef THDLOC } void scheme_fill_stack_lwc_end(void) XFORM_SKIP_PROC @@ -2974,6 +2976,8 @@ static void generate_function_prolog(mz_jit_state *jitter, void *code, int max_l jit_prolog(NATIVE_ARG_COUNT); + mz_push_threadlocal_early(); + in = jit_arg_p(); jit_getarg_p(JIT_R0, in); /* closure */ in = jit_arg_i(); @@ -2982,7 +2986,7 @@ static void generate_function_prolog(mz_jit_state *jitter, void *code, int max_l jit_getarg_p(JIT_R2, in); /* argv */ mz_push_locals(); - mz_push_threadlocal(); + mz_push_threadlocal(in); mz_tl_ldi_p(JIT_RUNSTACK, tl_MZ_RUNSTACK); @@ -3835,7 +3839,7 @@ int scheme_native_arity_check(Scheme_Object *closure, int argc) return 1; } - return sjc.check_arity_code(closure, argc + 1, 0); + return sjc.check_arity_code(closure, argc + 1, 0 EXTRA_NATIVE_ARGUMENT); } Scheme_Object *scheme_get_native_arity(Scheme_Object *closure) @@ -3878,7 +3882,7 @@ Scheme_Object *scheme_get_native_arity(Scheme_Object *closure) return a; } - return sjc.get_arity_code(closure, 0, 0); + return sjc.get_arity_code(closure, 0, 0 EXTRA_NATIVE_ARGUMENT); } /**********************************************************************/ diff --git a/src/racket/src/jit.h b/src/racket/src/jit.h index cbf61a2677..c00faec873 100644 --- a/src/racket/src/jit.h +++ b/src/racket/src/jit.h @@ -14,9 +14,14 @@ is related to the way the x86_64 port shuffles arguments into temporary registers. - 5) On x86_64, arguments are delivered in JIT_V2, JIT_V3, and JIT_R2, - in that order. So don't set JIT_R2 before getting the third - argument, etc. + 5) On non-Win64 x86_64, arguments are delivered in JIT_V2, JIT_V3, + JIT_R2, and JIT_R1 in that order. So don't set JIT_R2 before + getting the third argument, etc. + + On non-Win64 x86_64, arguments are delivered in JIT_R1, JIT_R2, + and other registers. So don't set JIT_R2 before getting the + second argument, etc. + */ #ifdef __APPLE__ @@ -83,7 +88,11 @@ END_XFORM_ARITH; #define WORDS_TO_BYTES(x) ((x) << JIT_LOG_WORD_SIZE) #define MAX_TRY_SHIFT 30 -#define NATIVE_ARG_COUNT 3 +#ifdef USE_THREAD_LOCAL +# define NATIVE_ARG_COUNT 4 +#else +# define NATIVE_ARG_COUNT 3 +#endif #define JIT_LOG_DOUBLE_SIZE 3 #define JIT_DOUBLE_SIZE (1 << JIT_LOG_DOUBLE_SIZE) @@ -166,12 +175,13 @@ extern int scheme_jit_malloced; THREAD_LOCAL_DECL(extern double scheme_jit_save_fp); #endif -typedef int (*Native_Check_Arity_Proc)(Scheme_Object *o, int argc, int dummy); -typedef Scheme_Object *(*Native_Get_Arity_Proc)(Scheme_Object *o, int dumm1, int dummy2); +typedef int (*Native_Check_Arity_Proc)(Scheme_Object *o, int argc, int dummy EXTRA_NATIVE_ARGUMENT_TYPE); +typedef Scheme_Object *(*Native_Get_Arity_Proc)(Scheme_Object *o, int dumm1, int dummy2 EXTRA_NATIVE_ARGUMENT_TYPE); typedef Scheme_Object *(*LWC_Native_Starter)(void *data, int argc, Scheme_Object **argv, - Scheme_Closed_Prim *chain_to, + void *thdloc, + Scheme_Native_Proc *chain_to, void **save_pos); typedef struct Apply_LWC_Args { @@ -374,7 +384,6 @@ typedef struct { #endif #ifdef JIT_THREAD_LOCAL -# define BOTTOM_VARIABLE GC_variable_stack # define tl_delta(id) ((uintptr_t)&(id) - (uintptr_t)&BOTTOM_VARIABLE) # define tl_MZ_RUNSTACK tl_delta(MZ_RUNSTACK) # define tl_MZ_RUNSTACK_START tl_delta(MZ_RUNSTACK_START) @@ -787,19 +796,16 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg) #endif #ifdef JIT_THREAD_LOCAL -# define mz_get_threadlocal() (mz_prepare(0), (void)mz_finish(scheme_jit_get_threadlocal_table), jit_retval(JIT_R0)) # ifdef JIT_X86_64 # define mz_pop_threadlocal() mz_get_local_p(JIT_R14, JIT_LOCAL4) -# define mz_push_threadlocal() (mz_set_local_p(JIT_R14, JIT_LOCAL4), \ - PUSHQr(JIT_R0), PUSHQr(JIT_R1), PUSHQr(JIT_R2), PUSHQr(JIT_R2), \ - mz_get_threadlocal(), jit_retval(JIT_R0), jit_movr_p(JIT_R14, JIT_R0), \ - POPQr(JIT_R2), POPQr(JIT_R2), POPQr(JIT_R1), POPQr(JIT_R0)) +# define mz_push_threadlocal(in) /* empty */ +# define mz_push_threadlocal_early() (mz_set_local_p(JIT_R14, JIT_LOCAL4), jit_movr_p(JIT_R14, JIT_R1)) # define mz_repush_threadlocal() mz_set_local_p(JIT_R14, JIT_LOCAL4) # else # define mz_pop_threadlocal() /* empty */ # ifdef THREAD_LOCAL_USES_JIT_V2 # define _mz_install_threadlocal(reg) jit_movr_p(JIT_V2, reg) -# define mz_repush_threadlocal() /* empty */ +# define mz_repush_threadlocal(in) /* empty */ # else # define _mz_install_threadlocal(reg) mz_set_local_p(reg, JIT_LOCAL4) # define mz_repush_threadlocal() (PUSHQr(JIT_R0), jit_ldr_p(JIT_R0, _EBP), \ @@ -807,13 +813,13 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg) jit_stxi_p(JIT_LOCAL4, _EBP, JIT_R0), \ POPQr(JIT_R0)) # endif -# define mz_push_threadlocal() (PUSHQr(JIT_R0), PUSHQr(JIT_R1), PUSHQr(JIT_R2), PUSHQr(JIT_R2), \ - mz_get_threadlocal(), jit_retval(JIT_R0), _mz_install_threadlocal(JIT_R0), \ - POPQr(JIT_R2), POPQr(JIT_R2), POPQr(JIT_R1), POPQr(JIT_R0)) +# define mz_push_threadlocal(in) (in = jit_arg_p(), jit_getarg_p(JIT_V2, in), _mz_install_threadlocal(JIT_V2)) +# define mz_push_threadlocal_early() /* empty */ # endif #else # define mz_pop_threadlocal() /* empty */ -# define mz_push_threadlocal() /* empty */ +# define mz_push_threadlocal(in) /* empty */ +# define mz_push_threadlocal_early() /* empty */ # define mz_repush_threadlocal() /* empty */ #endif diff --git a/src/racket/src/jitcall.c b/src/racket/src/jitcall.c index ed80267f2a..c55d72b723 100644 --- a/src/racket/src/jitcall.c +++ b/src/racket/src/jitcall.c @@ -678,7 +678,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc jit_base_prolog(); #else refr = jit_patchable_movi_p(JIT_R1, jit_forward()); - _jit_prolog_again(jitter, 3, JIT_R1); /* saves V registers (or copied V registers) */ + _jit_prolog_again(jitter, NATIVE_ARG_COUNT, JIT_R1); /* saves V registers (or copied V registers) */ #endif if (num_rands >= 0) { if (nontail_self) { jit_movr_p(JIT_R1, JIT_R0); } diff --git a/src/racket/src/jitcommon.c b/src/racket/src/jitcommon.c index b91a41f8d5..cc4e1a4421 100644 --- a/src/racket/src/jitcommon.c +++ b/src/racket/src/jitcommon.c @@ -184,12 +184,15 @@ static int common0(mz_jit_state *jitter, void *_data) /* Called as a function: */ sjc.check_arity_code = (Native_Check_Arity_Proc)jit_get_ip().ptr; jit_prolog(NATIVE_ARG_COUNT); /* only need 2 arguments, but return path overlaps with proc conventions */ + mz_push_threadlocal_early(); in = jit_arg_p(); jit_getarg_p(JIT_R0, in); /* closure */ in = jit_arg_p(); jit_getarg_i(JIT_R2, in); /* argc */ + in = jit_arg_p(); + jit_getarg_i(JIT_R1, in); /* ignored */ mz_push_locals(); - mz_push_threadlocal(); + mz_push_threadlocal(in); jit_movi_i(JIT_R1, -1); jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code); jit_ldxi_p(JIT_V1, JIT_V1, &((Scheme_Native_Closure_Data *)0x0)->arity_code); @@ -200,10 +203,15 @@ static int common0(mz_jit_state *jitter, void *_data) /* Called as a function: */ sjc.get_arity_code = (Native_Get_Arity_Proc)jit_get_ip().ptr; jit_prolog(NATIVE_ARG_COUNT); /* only need 1 argument, but return path overlaps with proc conventions */ + mz_push_threadlocal_early(); in = jit_arg_p(); jit_getarg_p(JIT_R0, in); /* closure */ + in = jit_arg_p(); + jit_getarg_p(JIT_R1, in); /* ignored */ + in = jit_arg_p(); + jit_getarg_i(JIT_R1, in); /* ignored */ mz_push_locals(); - mz_push_threadlocal(); + mz_push_threadlocal(in); jit_movi_i(JIT_R1, -1); (void)jit_movi_p(JIT_R2, 0x0); jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code); @@ -658,6 +666,7 @@ static int common2(mz_jit_state *jitter, void *_data) for the state of registers on entry */ scheme_on_demand_jit_code = jit_get_ip().ptr; jit_prolog(NATIVE_ARG_COUNT); + mz_push_threadlocal_early(); in = jit_arg_p(); jit_getarg_p(JIT_R0, in); /* closure */ in = jit_arg_i(); @@ -666,7 +675,7 @@ static int common2(mz_jit_state *jitter, void *_data) jit_getarg_p(JIT_R2, in); /* argv */ CHECK_LIMIT(); mz_push_locals(); - mz_push_threadlocal(); + mz_push_threadlocal(in); mz_tl_ldi_p(JIT_RUNSTACK, tl_MZ_RUNSTACK); sjc.on_demand_jit_arity_code = jit_get_ip().ptr; /* <<<- arity variant starts here */ jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(2)); @@ -3198,9 +3207,9 @@ static int more_common1(mz_jit_state *jitter, void *_data) /* store stack pointer in address given by 5th argument, then jump to the address given by the 4th argument */ - jit_getprearg_pipp_p(JIT_PREARG); + jit_getprearg_pippp_p(JIT_PREARG); jit_str_p(JIT_PREARG, JIT_SP); - jit_getprearg_pip_p(JIT_PREARG); + jit_getprearg_pipp_p(JIT_PREARG); jit_jmpr(JIT_PREARG); CHECK_LIMIT(); diff --git a/src/racket/src/lightning/i386/core-common.h b/src/racket/src/lightning/i386/core-common.h index 7048af403a..39870bedea 100644 --- a/src/racket/src/lightning/i386/core-common.h +++ b/src/racket/src/lightning/i386/core-common.h @@ -454,15 +454,15 @@ static jit_state _jit; #define jit_getarg_ul(reg, ofs) jit_extr_uc_ul((reg), jit_arg_reg(ofs)) #define jit_getarg_us(reg, ofs) jit_extr_us_ul((reg), jit_arg_reg(ofs)) #else -#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs)); -#define jit_getarg_uc(reg, ofs) jit_ldxi_uc((reg), JIT_FP, (ofs)); -#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs)); -#define jit_getarg_us(reg, ofs) jit_ldxi_us((reg), JIT_FP, (ofs)); -#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs)); -#define jit_getarg_ui(reg, ofs) jit_ldxi_ui((reg), JIT_FP, (ofs)); -#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs)); -#define jit_getarg_ul(reg, ofs) jit_ldxi_ul((reg), JIT_FP, (ofs)); -#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs)); +#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs)) +#define jit_getarg_uc(reg, ofs) jit_ldxi_uc((reg), JIT_FP, (ofs)) +#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs)) +#define jit_getarg_us(reg, ofs) jit_ldxi_us((reg), JIT_FP, (ofs)) +#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs)) +#define jit_getarg_ui(reg, ofs) jit_ldxi_ui((reg), JIT_FP, (ofs)) +#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs)) +#define jit_getarg_ul(reg, ofs) jit_ldxi_ul((reg), JIT_FP, (ofs)) +#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs)) #endif #endif diff --git a/src/racket/src/lightning/i386/core.h b/src/racket/src/lightning/i386/core.h index 99cb6132d5..1d1adbca37 100644 --- a/src/racket/src/lightning/i386/core.h +++ b/src/racket/src/lightning/i386/core.h @@ -367,15 +367,18 @@ struct jit_local_state { # define jit_getprearg__p(r) (MOVQrr(_ECX, r)) # define jit_getprearg_pip_p(r) (MOVQrr(JIT_R(9), r)) # define jit_getprearg_pipp_p(r) (jit_ldxi_p(r, JIT_SP, 40)) +# define jit_getprearg_pippp_p(r) (jit_ldxi_p(r, JIT_SP, 48)) # else # define jit_getprearg__p(r) (MOVQrr(_EDI, r)) # define jit_getprearg_pip_p(r) (MOVQrr(_ECX, r)) # define jit_getprearg_pipp_p(r) (MOVQrr(JIT_R(8), r)) +# define jit_getprearg_pippp_p(r) (MOVQrr(JIT_R(9), r)) # endif #else # define jit_getprearg__p(r) (jit_ldxi_p(r, JIT_SP, 4)) # define jit_getprearg_pip_p(r) (jit_ldxi_p(r, JIT_SP, 16)) # define jit_getprearg_pipp_p(r) (jit_ldxi_p(r, JIT_SP, 20)) +# define jit_getprearg_pippp_p(r) (jit_ldxi_p(r, JIT_SP, 24)) #endif #ifdef JIT_X86_64 diff --git a/src/racket/src/schpriv.h b/src/racket/src/schpriv.h index f9556d0d8b..2fdfab4bf3 100644 --- a/src/racket/src/schpriv.h +++ b/src/racket/src/schpriv.h @@ -1337,6 +1337,18 @@ void scheme_clean_cust_box_list(void); void scheme_notify_code_gc(void); #endif +#ifdef USE_THREAD_LOCAL +# define BOTTOM_VARIABLE GC_variable_stack +# define EXTRA_NATIVE_ARGUMENT , &BOTTOM_VARIABLE +# define EXTRA_NATIVE_ARGUMENT_TYPE , void* thdloc +#else +# define EXTRA_NATIVE_ARGUMENT /* empty */ +# define EXTRA_NATIVE_ARGUMENT_TYPE /* empty */ +#endif + +typedef struct Scheme_Object *(Scheme_Native_Proc)(void *d, int argc, struct Scheme_Object *argv[] + EXTRA_NATIVE_ARGUMENT_TYPE); + /*========================================================================*/ /* control flow */ /*========================================================================*/ @@ -2321,7 +2333,7 @@ typedef struct { typedef struct Scheme_Native_Closure_Data { Scheme_Inclhash_Object iso; /* type tag only set when needed, but flags always needed */ - Scheme_Closed_Prim *start_code; /* When not yet JITted, this is = to + Scheme_Native_Proc *start_code; /* When not yet JITted, this is = to scheme_on_demand_jit_code */ union { void *tail_code; /* For non-case-lambda */ @@ -2379,7 +2391,7 @@ void scheme_clear_lwc(void); THREAD_LOCAL_DECL(MZ_EXTERN Scheme_Current_LWC *scheme_current_lwc); -Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Closed_Prim *code, +Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Native_Proc *code, void *data, int argc, Scheme_Object **argv);