From ad890077d0250a4e03d897e6bbb02140bfddf51a Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Tue, 21 Dec 2010 11:05:27 -0600 Subject: [PATCH] JIT-inline `values' and make it synced (not "unsafe") for futures --- src/racket/src/fun.c | 3 + src/racket/src/future.c | 33 +++++++++++ src/racket/src/future.h | 5 +- src/racket/src/jit.c | 122 +++++++++++++++++++++++++++++++++++++++- 4 files changed, 160 insertions(+), 3 deletions(-) diff --git a/src/racket/src/fun.c b/src/racket/src/fun.c index c16b7451d3..319b9e4d81 100644 --- a/src/racket/src/fun.c +++ b/src/racket/src/fun.c @@ -281,6 +281,9 @@ scheme_init_fun (Scheme_Env *env) "values", 0, -1, 0, -1); + SCHEME_PRIM_PROC_FLAGS(scheme_values_func) |= (SCHEME_PRIM_IS_UNARY_INLINED + | SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("values", scheme_values_func, env); diff --git a/src/racket/src/future.c b/src/racket/src/future.c index 1e00147f8b..427d2e6a10 100644 --- a/src/racket/src/future.c +++ b/src/racket/src/future.c @@ -1346,6 +1346,28 @@ void scheme_rtcall_void_void_3args(const char *who, int src_type, prim_void_void future->arg_S0 = NULL; } +void scheme_rtcall_allocate_values(const char *who, int src_type, int count, Scheme_Thread *t, + prim_allocate_values_t f) + XFORM_SKIP_PROC +/* Called in future thread */ +{ + Scheme_Future_Thread_State *fts = scheme_future_thread_state; + future_t *future = fts->thread->current_ft; + + future->prim_protocol = SIG_ALLOC_VALUES; + + future->arg_i0 = count; + future->arg_s0 = (Scheme_Object *)t; + + future->time_of_request = scheme_get_inexact_milliseconds(); + future->source_of_request = who; + future->source_type = src_type; + + future_do_runtimecall(fts, (void*)f, 1); + + future->arg_s0 = NULL; +} + #ifdef MZ_PRECISE_GC uintptr_t scheme_rtcall_alloc(const char *who, int src_type) @@ -1568,6 +1590,17 @@ static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future) p_seg = (Scheme_Thread *)future->arg_s0; future->arg_s0 = NULL; scheme_new_mark_segment(p_seg); + break; + } + case SIG_ALLOC_VALUES: + { + prim_allocate_values_t func = (prim_allocate_values_t)future->prim_func; + GC_CAN_IGNORE Scheme_Object *arg_s0 = future->arg_s0; + + future->arg_s0 = NULL; + + func(future->arg_i0, (Scheme_Thread *)arg_s0); + break; } # define JIT_TS_LOCALIZE(t, f) GC_CAN_IGNORE t f = future->f diff --git a/src/racket/src/future.h b/src/racket/src/future.h index 8ae59db19e..8b42feb2a9 100644 --- a/src/racket/src/future.h +++ b/src/racket/src/future.h @@ -29,6 +29,7 @@ typedef Scheme_Object* (*prim_obj_int_pobj_obj_t)(Scheme_Object*, int, Scheme_Ob typedef Scheme_Object* (*prim_int_pobj_obj_t)(int, Scheme_Object**); typedef Scheme_Object* (*prim_int_pobj_obj_obj_t)(int, Scheme_Object**, Scheme_Object*); typedef void* (*prim_pvoid_pvoid_pvoid_t)(void*, void*); +typedef void (*prim_allocate_values_t)(int, Scheme_Thread *); #define PENDING 0 #define RUNNING 1 @@ -121,6 +122,7 @@ typedef struct future_t { #define SIG_VOID_VOID_3ARGS 1 #define SIG_ALLOC 2 #define SIG_ALLOC_MARK_SEGMENT 3 +#define SIG_ALLOC_VALUES 4 # include "jit_ts_protos.h" @@ -139,7 +141,8 @@ extern Scheme_Object *scheme_ts_scheme_force_value_same_mark(Scheme_Object *v); extern void scheme_rtcall_void_void_3args(const char *who, int src_type, prim_void_void_3args_t f); extern uintptr_t scheme_rtcall_alloc(const char *who, int src_type); extern void scheme_rtcall_new_mark_segment(Scheme_Thread *p); - +extern void scheme_rtcall_allocate_values(const char *who, int src_type, int count, Scheme_Thread *t, + prim_allocate_values_t f); #else #define IS_WORKER_THREAD 0 diff --git a/src/racket/src/jit.c b/src/racket/src/jit.c index c4e9660ddc..faabfd4fdf 100644 --- a/src/racket/src/jit.c +++ b/src/racket/src/jit.c @@ -179,6 +179,7 @@ SHARED_OK static void *struct_set_code, *struct_set_multi_code; SHARED_OK static void *struct_proc_extract_code; SHARED_OK static void *bad_app_vals_target; SHARED_OK static void *app_values_slow_code, *app_values_multi_slow_code, *app_values_tail_slow_code; +SHARED_OK static void *values_code; SHARED_OK static void *finish_tail_call_code, *finish_tail_call_fixup_code; SHARED_OK static void *module_run_start_code, *module_exprun_start_code, *module_start_start_code; SHARED_OK static void *box_flonum_from_stack_code; @@ -3056,6 +3057,29 @@ static int generate_pause_for_gc_and_retry(mz_jit_state *jitter, #endif } +static void allocate_values(int count, Scheme_Thread *p) +{ + Scheme_Object **a; + + a = MALLOC_N(Scheme_Object *, count); + + p->values_buffer = a; + p->values_buffer_size = count; +} + +#ifdef MZ_USE_FUTURES +static void ts_allocate_values(int count, Scheme_Thread *p) XFORM_SKIP_PROC +{ + if (scheme_use_rtcall) { + scheme_rtcall_allocate_values("[allocate_values]", FSRC_OTHER, count, p, allocate_values); + } else + allocate_values(count, p); +} +#else +# define ts_allocate_values allocate_values +#endif + + static int generate_direct_prim_tail_call(mz_jit_state *jitter, int num_rands) { /* JIT_V1 must have the target function pointer. @@ -7416,8 +7440,9 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in } else if (IS_NAMED_PRIM(rator, "vector-immutable") || IS_NAMED_PRIM(rator, "vector")) { return generate_vector_alloc(jitter, rator, NULL, app, NULL); - } else if (IS_NAMED_PRIM(rator, "list*")) { - /* on a single argument, `list*' is identity */ + } else if (IS_NAMED_PRIM(rator, "list*") + || IS_NAMED_PRIM(rator, "values")) { + /* on a single argument, `list*' or `values' is identity */ mz_runstack_skipped(jitter, 1); generate_non_tail(app->rand, jitter, 0, 1, 0); CHECK_LIMIT(); @@ -8708,6 +8733,25 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i allocate_rectangular(jitter); + return 1; + } else if (IS_NAMED_PRIM(rator, "values")) { + Scheme_Object *args[3]; + + args[0] = rator; + args[1] = app->rand1; + args[2] = app->rand2; + + generate_app(NULL, args, 2, jitter, 0, 0, 2); + + CHECK_LIMIT(); + mz_rs_sync(); + + jit_movi_l(JIT_V1, 2); + (void)jit_calli(values_code); + + mz_rs_inc(2); /* no sync */ + mz_runstack_popped(jitter, 2); + return 1; } } @@ -9166,6 +9210,28 @@ static int generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int mz_runstack_popped(jitter, c); } + return 1; + } else if (IS_NAMED_PRIM(rator, "values")) { + int c = app->num_args; + + if (c) { + generate_app(app, NULL, c, jitter, 0, 0, 2); + CHECK_LIMIT(); + mz_rs_sync(); + + jit_movi_l(JIT_V1, c); + (void)jit_calli(values_code); + + mz_rs_inc(c); /* no sync */ + mz_runstack_popped(jitter, c); + } else { + mz_tl_ldi_p(JIT_R2, tl_scheme_current_thread); + jit_movi_l(JIT_R0, 0); + jit_stxi_l(((int)&((Scheme_Thread *)0x0)->ku.multiple.count), JIT_R2, JIT_R0); + jit_stxi_p(((int)&((Scheme_Thread *)0x0)->ku.multiple.array), JIT_R2, JIT_R0); + jit_movi_p(JIT_R0, SCHEME_MULTIPLE_VALUES); + } + return 1; } else if (IS_NAMED_PRIM(rator, "+")) { return generate_nary_arith(jitter, app, 1, 0, NULL, 1); @@ -11958,6 +12024,58 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) CHECK_LIMIT(); } + /*** values_code ***/ + /* Arguments on runstack, V1 has count */ + { + GC_CAN_IGNORE jit_insn *refslow, *ref1, *refloop, *ref2; + + values_code = jit_get_ip().ptr; + mz_prolog(JIT_R1); + mz_tl_ldi_p(JIT_R2, tl_scheme_current_thread); + jit_ldxi_p(JIT_R1, JIT_R2, &((Scheme_Thread *)0x0)->values_buffer); + ref1 = jit_bnei_p(jit_forward(), JIT_R1, NULL); + CHECK_LIMIT(); + + /* Allocate new array: */ + refslow = _jit.x.pc; + JIT_UPDATE_THREAD_RSPTR(); + mz_prepare(2); + jit_pusharg_p(JIT_R2); + jit_pusharg_i(JIT_V1); + (void)mz_finish_lwe(ts_allocate_values, ref2); + CHECK_LIMIT(); + + /* Try again... */ + mz_tl_ldi_p(JIT_R2, tl_scheme_current_thread); + jit_ldxi_p(JIT_R1, JIT_R2, &((Scheme_Thread *)0x0)->values_buffer); + + /* Buffer is non-NULL... big enough? */ + mz_patch_branch(ref1); + jit_ldxi_i(JIT_R0, JIT_R2, &((Scheme_Thread *)0x0)->values_buffer_size); + jit_bltr_i(refslow, JIT_R0, JIT_V1); + + /* Buffer is ready */ + jit_stxi_p(&((Scheme_Thread *)0x0)->ku.multiple.array, JIT_R2, JIT_R1); + jit_stxi_i(&((Scheme_Thread *)0x0)->ku.multiple.count, JIT_R2, JIT_V1); + CHECK_LIMIT(); + + /* Copy values over: */ + jit_movr_p(JIT_R0, JIT_RUNSTACK); + refloop = _jit.x.pc; + jit_ldr_p(JIT_R2, JIT_R0); + jit_str_p(JIT_R1, JIT_R2); + jit_subi_l(JIT_V1, JIT_V1, 1); + jit_addi_p(JIT_R0, JIT_R0, JIT_WORD_SIZE); + jit_addi_p(JIT_R1, JIT_R1, JIT_WORD_SIZE); + jit_bnei_l(refloop, JIT_V1, 0); + CHECK_LIMIT(); + + jit_movi_p(JIT_R0, SCHEME_MULTIPLE_VALUES); + + mz_epilog(JIT_R1); + CHECK_LIMIT(); + } + /* *** {vector,string,bytes}_{ref,set}_[check_index_]code *** */ /* R0 is vector/string/bytes, R1 is index (Scheme number in check-index mode), V1 is vector/string/bytes offset in non-check-index mode (and for