streamline thread-local setup in JIT-generated code
Pass a pointer to the thread-local table on entry to JIT-generated code, instead of having the JIT-generated code call a C function to get the table. This doesn't seem to improve performance on my machine, but it generates less code and is probably faster in some cases.
This commit is contained in:
parent
761a40d483
commit
558b03034a
|
@ -2783,7 +2783,7 @@ scheme_do_eval(Scheme_Object *obj, int num_rands, Scheme_Object **rands,
|
|||
|
||||
tmpv = obj;
|
||||
obj = NULL; /* save for space, since tmpv is ignored by the GC */
|
||||
v = data->start_code(tmpv, num_rands, rands);
|
||||
v = data->start_code(tmpv, num_rands, rands EXTRA_NATIVE_ARGUMENT);
|
||||
|
||||
if (v == SCHEME_TAIL_CALL_WAITING) {
|
||||
/* [TC-SFS]; see schnapp.inc */
|
||||
|
|
|
@ -3124,7 +3124,7 @@ static Scheme_Object *_apply_native(Scheme_Object *obj, int num_rands, Scheme_Ob
|
|||
MZ_CONT_MARK_POS += 2;
|
||||
old_cont_mark_stack = MZ_CONT_MARK_STACK;
|
||||
|
||||
obj = data->start_code(obj, num_rands, rands);
|
||||
obj = data->start_code(obj, num_rands, rands EXTRA_NATIVE_ARGUMENT);
|
||||
|
||||
if (obj == SCHEME_TAIL_CALL_WAITING)
|
||||
return force_values(obj, 1);
|
||||
|
|
|
@ -1974,7 +1974,7 @@ void *worker_thread_future_loop(void *arg)
|
|||
Scheme_Future_Thread_State *fts = params->fts;
|
||||
Scheme_Future_State *fs = params->fs;
|
||||
Scheme_Object *v;
|
||||
Scheme_Closed_Prim *jitcode;
|
||||
Scheme_Native_Proc *jitcode;
|
||||
future_t *ft;
|
||||
mz_jmp_buf newbuf;
|
||||
int fid;
|
||||
|
|
|
@ -120,10 +120,6 @@ static Scheme_Object *clear_rs_arguments(Scheme_Object *v, int size, int delta)
|
|||
return v;
|
||||
}
|
||||
|
||||
#ifdef JIT_THREAD_LOCAL
|
||||
void *scheme_jit_get_threadlocal_table() XFORM_SKIP_PROC { return &BOTTOM_VARIABLE; }
|
||||
#endif
|
||||
|
||||
#define JIT_TS_PROCS
|
||||
#define JIT_BOX_TS_PROCS
|
||||
#include "jit_ts.c"
|
||||
|
@ -134,14 +130,20 @@ void *scheme_jit_get_threadlocal_table() XFORM_SKIP_PROC { return &BOTTOM_VARIAB
|
|||
|
||||
THREAD_LOCAL_DECL(Scheme_Current_LWC *scheme_current_lwc);
|
||||
|
||||
Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Closed_Prim *code,
|
||||
Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Native_Proc *code,
|
||||
void *data,
|
||||
int argc,
|
||||
Scheme_Object **argv)
|
||||
{
|
||||
#ifdef JIT_THREAD_LOCAL
|
||||
# define THDLOC &BOTTOM_VARIABLE
|
||||
#else
|
||||
# define THDLOC NULL
|
||||
#endif
|
||||
scheme_current_lwc->runstack_start = MZ_RUNSTACK;
|
||||
scheme_current_lwc->cont_mark_stack_start = MZ_CONT_MARK_STACK;
|
||||
return sjc.native_starter_code(data, argc, argv, code, (void **)&scheme_current_lwc->stack_start);
|
||||
return sjc.native_starter_code(data, argc, argv, THDLOC, code, (void **)&scheme_current_lwc->stack_start);
|
||||
#undef THDLOC
|
||||
}
|
||||
|
||||
void scheme_fill_stack_lwc_end(void) XFORM_SKIP_PROC
|
||||
|
@ -2974,6 +2976,8 @@ static void generate_function_prolog(mz_jit_state *jitter, void *code, int max_l
|
|||
|
||||
jit_prolog(NATIVE_ARG_COUNT);
|
||||
|
||||
mz_push_threadlocal_early();
|
||||
|
||||
in = jit_arg_p();
|
||||
jit_getarg_p(JIT_R0, in); /* closure */
|
||||
in = jit_arg_i();
|
||||
|
@ -2982,7 +2986,7 @@ static void generate_function_prolog(mz_jit_state *jitter, void *code, int max_l
|
|||
jit_getarg_p(JIT_R2, in); /* argv */
|
||||
|
||||
mz_push_locals();
|
||||
mz_push_threadlocal();
|
||||
mz_push_threadlocal(in);
|
||||
|
||||
mz_tl_ldi_p(JIT_RUNSTACK, tl_MZ_RUNSTACK);
|
||||
|
||||
|
@ -3835,7 +3839,7 @@ int scheme_native_arity_check(Scheme_Object *closure, int argc)
|
|||
return 1;
|
||||
}
|
||||
|
||||
return sjc.check_arity_code(closure, argc + 1, 0);
|
||||
return sjc.check_arity_code(closure, argc + 1, 0 EXTRA_NATIVE_ARGUMENT);
|
||||
}
|
||||
|
||||
Scheme_Object *scheme_get_native_arity(Scheme_Object *closure)
|
||||
|
@ -3878,7 +3882,7 @@ Scheme_Object *scheme_get_native_arity(Scheme_Object *closure)
|
|||
return a;
|
||||
}
|
||||
|
||||
return sjc.get_arity_code(closure, 0, 0);
|
||||
return sjc.get_arity_code(closure, 0, 0 EXTRA_NATIVE_ARGUMENT);
|
||||
}
|
||||
|
||||
/**********************************************************************/
|
||||
|
|
|
@ -14,9 +14,14 @@
|
|||
is related to the way the x86_64 port shuffles arguments into
|
||||
temporary registers.
|
||||
|
||||
5) On x86_64, arguments are delivered in JIT_V2, JIT_V3, and JIT_R2,
|
||||
in that order. So don't set JIT_R2 before getting the third
|
||||
argument, etc.
|
||||
5) On non-Win64 x86_64, arguments are delivered in JIT_V2, JIT_V3,
|
||||
JIT_R2, and JIT_R1 in that order. So don't set JIT_R2 before
|
||||
getting the third argument, etc.
|
||||
|
||||
On non-Win64 x86_64, arguments are delivered in JIT_R1, JIT_R2,
|
||||
and other registers. So don't set JIT_R2 before getting the
|
||||
second argument, etc.
|
||||
|
||||
*/
|
||||
|
||||
#ifdef __APPLE__
|
||||
|
@ -83,7 +88,11 @@ END_XFORM_ARITH;
|
|||
#define WORDS_TO_BYTES(x) ((x) << JIT_LOG_WORD_SIZE)
|
||||
#define MAX_TRY_SHIFT 30
|
||||
|
||||
#define NATIVE_ARG_COUNT 3
|
||||
#ifdef USE_THREAD_LOCAL
|
||||
# define NATIVE_ARG_COUNT 4
|
||||
#else
|
||||
# define NATIVE_ARG_COUNT 3
|
||||
#endif
|
||||
|
||||
#define JIT_LOG_DOUBLE_SIZE 3
|
||||
#define JIT_DOUBLE_SIZE (1 << JIT_LOG_DOUBLE_SIZE)
|
||||
|
@ -166,12 +175,13 @@ extern int scheme_jit_malloced;
|
|||
THREAD_LOCAL_DECL(extern double scheme_jit_save_fp);
|
||||
#endif
|
||||
|
||||
typedef int (*Native_Check_Arity_Proc)(Scheme_Object *o, int argc, int dummy);
|
||||
typedef Scheme_Object *(*Native_Get_Arity_Proc)(Scheme_Object *o, int dumm1, int dummy2);
|
||||
typedef int (*Native_Check_Arity_Proc)(Scheme_Object *o, int argc, int dummy EXTRA_NATIVE_ARGUMENT_TYPE);
|
||||
typedef Scheme_Object *(*Native_Get_Arity_Proc)(Scheme_Object *o, int dumm1, int dummy2 EXTRA_NATIVE_ARGUMENT_TYPE);
|
||||
typedef Scheme_Object *(*LWC_Native_Starter)(void *data,
|
||||
int argc,
|
||||
Scheme_Object **argv,
|
||||
Scheme_Closed_Prim *chain_to,
|
||||
void *thdloc,
|
||||
Scheme_Native_Proc *chain_to,
|
||||
void **save_pos);
|
||||
|
||||
typedef struct Apply_LWC_Args {
|
||||
|
@ -374,7 +384,6 @@ typedef struct {
|
|||
#endif
|
||||
|
||||
#ifdef JIT_THREAD_LOCAL
|
||||
# define BOTTOM_VARIABLE GC_variable_stack
|
||||
# define tl_delta(id) ((uintptr_t)&(id) - (uintptr_t)&BOTTOM_VARIABLE)
|
||||
# define tl_MZ_RUNSTACK tl_delta(MZ_RUNSTACK)
|
||||
# define tl_MZ_RUNSTACK_START tl_delta(MZ_RUNSTACK_START)
|
||||
|
@ -787,19 +796,16 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
|
|||
#endif
|
||||
|
||||
#ifdef JIT_THREAD_LOCAL
|
||||
# define mz_get_threadlocal() (mz_prepare(0), (void)mz_finish(scheme_jit_get_threadlocal_table), jit_retval(JIT_R0))
|
||||
# ifdef JIT_X86_64
|
||||
# define mz_pop_threadlocal() mz_get_local_p(JIT_R14, JIT_LOCAL4)
|
||||
# define mz_push_threadlocal() (mz_set_local_p(JIT_R14, JIT_LOCAL4), \
|
||||
PUSHQr(JIT_R0), PUSHQr(JIT_R1), PUSHQr(JIT_R2), PUSHQr(JIT_R2), \
|
||||
mz_get_threadlocal(), jit_retval(JIT_R0), jit_movr_p(JIT_R14, JIT_R0), \
|
||||
POPQr(JIT_R2), POPQr(JIT_R2), POPQr(JIT_R1), POPQr(JIT_R0))
|
||||
# define mz_push_threadlocal(in) /* empty */
|
||||
# define mz_push_threadlocal_early() (mz_set_local_p(JIT_R14, JIT_LOCAL4), jit_movr_p(JIT_R14, JIT_R1))
|
||||
# define mz_repush_threadlocal() mz_set_local_p(JIT_R14, JIT_LOCAL4)
|
||||
# else
|
||||
# define mz_pop_threadlocal() /* empty */
|
||||
# ifdef THREAD_LOCAL_USES_JIT_V2
|
||||
# define _mz_install_threadlocal(reg) jit_movr_p(JIT_V2, reg)
|
||||
# define mz_repush_threadlocal() /* empty */
|
||||
# define mz_repush_threadlocal(in) /* empty */
|
||||
# else
|
||||
# define _mz_install_threadlocal(reg) mz_set_local_p(reg, JIT_LOCAL4)
|
||||
# define mz_repush_threadlocal() (PUSHQr(JIT_R0), jit_ldr_p(JIT_R0, _EBP), \
|
||||
|
@ -807,13 +813,13 @@ void scheme_jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg)
|
|||
jit_stxi_p(JIT_LOCAL4, _EBP, JIT_R0), \
|
||||
POPQr(JIT_R0))
|
||||
# endif
|
||||
# define mz_push_threadlocal() (PUSHQr(JIT_R0), PUSHQr(JIT_R1), PUSHQr(JIT_R2), PUSHQr(JIT_R2), \
|
||||
mz_get_threadlocal(), jit_retval(JIT_R0), _mz_install_threadlocal(JIT_R0), \
|
||||
POPQr(JIT_R2), POPQr(JIT_R2), POPQr(JIT_R1), POPQr(JIT_R0))
|
||||
# define mz_push_threadlocal(in) (in = jit_arg_p(), jit_getarg_p(JIT_V2, in), _mz_install_threadlocal(JIT_V2))
|
||||
# define mz_push_threadlocal_early() /* empty */
|
||||
# endif
|
||||
#else
|
||||
# define mz_pop_threadlocal() /* empty */
|
||||
# define mz_push_threadlocal() /* empty */
|
||||
# define mz_push_threadlocal(in) /* empty */
|
||||
# define mz_push_threadlocal_early() /* empty */
|
||||
# define mz_repush_threadlocal() /* empty */
|
||||
#endif
|
||||
|
||||
|
|
|
@ -678,7 +678,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
|
|||
jit_base_prolog();
|
||||
#else
|
||||
refr = jit_patchable_movi_p(JIT_R1, jit_forward());
|
||||
_jit_prolog_again(jitter, 3, JIT_R1); /* saves V registers (or copied V registers) */
|
||||
_jit_prolog_again(jitter, NATIVE_ARG_COUNT, JIT_R1); /* saves V registers (or copied V registers) */
|
||||
#endif
|
||||
if (num_rands >= 0) {
|
||||
if (nontail_self) { jit_movr_p(JIT_R1, JIT_R0); }
|
||||
|
|
|
@ -184,12 +184,15 @@ static int common0(mz_jit_state *jitter, void *_data)
|
|||
/* Called as a function: */
|
||||
sjc.check_arity_code = (Native_Check_Arity_Proc)jit_get_ip().ptr;
|
||||
jit_prolog(NATIVE_ARG_COUNT); /* only need 2 arguments, but return path overlaps with proc conventions */
|
||||
mz_push_threadlocal_early();
|
||||
in = jit_arg_p();
|
||||
jit_getarg_p(JIT_R0, in); /* closure */
|
||||
in = jit_arg_p();
|
||||
jit_getarg_i(JIT_R2, in); /* argc */
|
||||
in = jit_arg_p();
|
||||
jit_getarg_i(JIT_R1, in); /* ignored */
|
||||
mz_push_locals();
|
||||
mz_push_threadlocal();
|
||||
mz_push_threadlocal(in);
|
||||
jit_movi_i(JIT_R1, -1);
|
||||
jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code);
|
||||
jit_ldxi_p(JIT_V1, JIT_V1, &((Scheme_Native_Closure_Data *)0x0)->arity_code);
|
||||
|
@ -200,10 +203,15 @@ static int common0(mz_jit_state *jitter, void *_data)
|
|||
/* Called as a function: */
|
||||
sjc.get_arity_code = (Native_Get_Arity_Proc)jit_get_ip().ptr;
|
||||
jit_prolog(NATIVE_ARG_COUNT); /* only need 1 argument, but return path overlaps with proc conventions */
|
||||
mz_push_threadlocal_early();
|
||||
in = jit_arg_p();
|
||||
jit_getarg_p(JIT_R0, in); /* closure */
|
||||
in = jit_arg_p();
|
||||
jit_getarg_p(JIT_R1, in); /* ignored */
|
||||
in = jit_arg_p();
|
||||
jit_getarg_i(JIT_R1, in); /* ignored */
|
||||
mz_push_locals();
|
||||
mz_push_threadlocal();
|
||||
mz_push_threadlocal(in);
|
||||
jit_movi_i(JIT_R1, -1);
|
||||
(void)jit_movi_p(JIT_R2, 0x0);
|
||||
jit_ldxi_p(JIT_V1, JIT_R0, &((Scheme_Native_Closure *)0x0)->code);
|
||||
|
@ -658,6 +666,7 @@ static int common2(mz_jit_state *jitter, void *_data)
|
|||
for the state of registers on entry */
|
||||
scheme_on_demand_jit_code = jit_get_ip().ptr;
|
||||
jit_prolog(NATIVE_ARG_COUNT);
|
||||
mz_push_threadlocal_early();
|
||||
in = jit_arg_p();
|
||||
jit_getarg_p(JIT_R0, in); /* closure */
|
||||
in = jit_arg_i();
|
||||
|
@ -666,7 +675,7 @@ static int common2(mz_jit_state *jitter, void *_data)
|
|||
jit_getarg_p(JIT_R2, in); /* argv */
|
||||
CHECK_LIMIT();
|
||||
mz_push_locals();
|
||||
mz_push_threadlocal();
|
||||
mz_push_threadlocal(in);
|
||||
mz_tl_ldi_p(JIT_RUNSTACK, tl_MZ_RUNSTACK);
|
||||
sjc.on_demand_jit_arity_code = jit_get_ip().ptr; /* <<<- arity variant starts here */
|
||||
jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(2));
|
||||
|
@ -3198,9 +3207,9 @@ static int more_common1(mz_jit_state *jitter, void *_data)
|
|||
|
||||
/* store stack pointer in address given by 5th argument, then jump to
|
||||
the address given by the 4th argument */
|
||||
jit_getprearg_pipp_p(JIT_PREARG);
|
||||
jit_getprearg_pippp_p(JIT_PREARG);
|
||||
jit_str_p(JIT_PREARG, JIT_SP);
|
||||
jit_getprearg_pip_p(JIT_PREARG);
|
||||
jit_getprearg_pipp_p(JIT_PREARG);
|
||||
jit_jmpr(JIT_PREARG);
|
||||
|
||||
CHECK_LIMIT();
|
||||
|
|
|
@ -454,15 +454,15 @@ static jit_state _jit;
|
|||
#define jit_getarg_ul(reg, ofs) jit_extr_uc_ul((reg), jit_arg_reg(ofs))
|
||||
#define jit_getarg_us(reg, ofs) jit_extr_us_ul((reg), jit_arg_reg(ofs))
|
||||
#else
|
||||
#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_uc(reg, ofs) jit_ldxi_uc((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_us(reg, ofs) jit_ldxi_us((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_ui(reg, ofs) jit_ldxi_ui((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_ul(reg, ofs) jit_ldxi_ul((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs));
|
||||
#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_uc(reg, ofs) jit_ldxi_uc((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_us(reg, ofs) jit_ldxi_us((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_ui(reg, ofs) jit_ldxi_ui((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_ul(reg, ofs) jit_ldxi_ul((reg), JIT_FP, (ofs))
|
||||
#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
|
|
@ -367,15 +367,18 @@ struct jit_local_state {
|
|||
# define jit_getprearg__p(r) (MOVQrr(_ECX, r))
|
||||
# define jit_getprearg_pip_p(r) (MOVQrr(JIT_R(9), r))
|
||||
# define jit_getprearg_pipp_p(r) (jit_ldxi_p(r, JIT_SP, 40))
|
||||
# define jit_getprearg_pippp_p(r) (jit_ldxi_p(r, JIT_SP, 48))
|
||||
# else
|
||||
# define jit_getprearg__p(r) (MOVQrr(_EDI, r))
|
||||
# define jit_getprearg_pip_p(r) (MOVQrr(_ECX, r))
|
||||
# define jit_getprearg_pipp_p(r) (MOVQrr(JIT_R(8), r))
|
||||
# define jit_getprearg_pippp_p(r) (MOVQrr(JIT_R(9), r))
|
||||
# endif
|
||||
#else
|
||||
# define jit_getprearg__p(r) (jit_ldxi_p(r, JIT_SP, 4))
|
||||
# define jit_getprearg_pip_p(r) (jit_ldxi_p(r, JIT_SP, 16))
|
||||
# define jit_getprearg_pipp_p(r) (jit_ldxi_p(r, JIT_SP, 20))
|
||||
# define jit_getprearg_pippp_p(r) (jit_ldxi_p(r, JIT_SP, 24))
|
||||
#endif
|
||||
|
||||
#ifdef JIT_X86_64
|
||||
|
|
|
@ -1337,6 +1337,18 @@ void scheme_clean_cust_box_list(void);
|
|||
void scheme_notify_code_gc(void);
|
||||
#endif
|
||||
|
||||
#ifdef USE_THREAD_LOCAL
|
||||
# define BOTTOM_VARIABLE GC_variable_stack
|
||||
# define EXTRA_NATIVE_ARGUMENT , &BOTTOM_VARIABLE
|
||||
# define EXTRA_NATIVE_ARGUMENT_TYPE , void* thdloc
|
||||
#else
|
||||
# define EXTRA_NATIVE_ARGUMENT /* empty */
|
||||
# define EXTRA_NATIVE_ARGUMENT_TYPE /* empty */
|
||||
#endif
|
||||
|
||||
typedef struct Scheme_Object *(Scheme_Native_Proc)(void *d, int argc, struct Scheme_Object *argv[]
|
||||
EXTRA_NATIVE_ARGUMENT_TYPE);
|
||||
|
||||
/*========================================================================*/
|
||||
/* control flow */
|
||||
/*========================================================================*/
|
||||
|
@ -2321,7 +2333,7 @@ typedef struct {
|
|||
typedef struct Scheme_Native_Closure_Data {
|
||||
Scheme_Inclhash_Object iso; /* type tag only set when needed, but
|
||||
flags always needed */
|
||||
Scheme_Closed_Prim *start_code; /* When not yet JITted, this is = to
|
||||
Scheme_Native_Proc *start_code; /* When not yet JITted, this is = to
|
||||
scheme_on_demand_jit_code */
|
||||
union {
|
||||
void *tail_code; /* For non-case-lambda */
|
||||
|
@ -2379,7 +2391,7 @@ void scheme_clear_lwc(void);
|
|||
|
||||
THREAD_LOCAL_DECL(MZ_EXTERN Scheme_Current_LWC *scheme_current_lwc);
|
||||
|
||||
Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Closed_Prim *code,
|
||||
Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Native_Proc *code,
|
||||
void *data,
|
||||
int argc,
|
||||
Scheme_Object **argv);
|
||||
|
|
Loading…
Reference in New Issue
Block a user