From bdca9c1e8fe206717d367fc18532ff31b51f9781 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 18 Nov 2009 19:10:25 +0000 Subject: [PATCH] closer to thread-local allocation in futures svn: r16876 --- src/mzscheme/gc2/newgc.c | 27 ++++++++------ src/mzscheme/include/schthread.h | 10 ++++++ src/mzscheme/src/future.c | 49 +++++++++++++++++++++++--- src/mzscheme/src/future.h | 12 +++++-- src/mzscheme/src/jit.c | 60 ++++++++++++++++++++++---------- 5 files changed, 121 insertions(+), 37 deletions(-) diff --git a/src/mzscheme/gc2/newgc.c b/src/mzscheme/gc2/newgc.c index c4431478b2..0b43220677 100644 --- a/src/mzscheme/gc2/newgc.c +++ b/src/mzscheme/gc2/newgc.c @@ -721,24 +721,28 @@ inline static void gen0_free_nursery_mpage(NewGC *gc, mpage *page, size_t page_s free_mpage(page); } +/* Needs to be consistent with GC_alloc_alignment(): */ +#define THREAD_LOCAL_PAGE_SIZE APAGE_SIZE + void *GC_make_jit_nursery_page() { NewGC *gc = GC_get_GC(); mpage *new_mpage; { - new_mpage = gen0_create_new_nursery_mpage(gc, APAGE_SIZE); + new_mpage = gen0_create_new_nursery_mpage(gc, THREAD_LOCAL_PAGE_SIZE); /* push page */ new_mpage->next = gc->thread_local_pages; - new_mpage->next->prev = new_mpage; + if (new_mpage->next) + new_mpage->next->prev = new_mpage; gc->thread_local_pages = new_mpage; } - return new_mpage->addr; + return (void *)(NUM(new_mpage->addr) + new_mpage->size); } inline static void gen0_free_jit_nursery_page(NewGC *gc, mpage *page) { - gen0_free_nursery_mpage(gc, page, APAGE_SIZE); + gen0_free_nursery_mpage(gc, page, THREAD_LOCAL_PAGE_SIZE); } inline static mpage *gen0_create_new_mpage(NewGC *gc) { @@ -800,15 +804,15 @@ inline static void *allocate(const size_t request_size, const int type) GC_gen0_alloc_page_end = NUM(new_mpage->addr) + GEN0_PAGE_SIZE; } else { - #ifdef INSTRUMENT_PRIMITIVES - LOG_PRIM_START(((void*)garbage_collect)); - #endif - +#ifdef INSTRUMENT_PRIMITIVES + LOG_PRIM_START(((void*)garbage_collect)); +#endif + garbage_collect(gc, 0); - #ifdef INSTRUMENT_PRIMITIVES - LOG_PRIM_END(((void*)garbage_collect)); - #endif +#ifdef INSTRUMENT_PRIMITIVES + LOG_PRIM_END(((void*)garbage_collect)); +#endif } newptr = GC_gen0_alloc_page_ptr + allocate_size; ASSERT_VALID_OBJPTR(newptr); @@ -1044,6 +1048,7 @@ inline static void resize_gen0(NewGC *gc, unsigned long new_size) mpage *work = gc->thread_local_pages; while(work) { gen0_free_jit_nursery_page(gc, work); + work = work->next; } gc->thread_local_pages = NULL; diff --git a/src/mzscheme/include/schthread.h b/src/mzscheme/include/schthread.h index a6c96508c7..d8c1d0ed43 100644 --- a/src/mzscheme/include/schthread.h +++ b/src/mzscheme/include/schthread.h @@ -75,6 +75,10 @@ typedef long objhead; /* **************************************** */ +#if FUTURES_ENABLED +# include +#endif + typedef struct Thread_Local_Variables { void **GC_variable_stack_; objhead GC_objhead_template_; @@ -208,6 +212,10 @@ typedef struct Thread_Local_Variables { unsigned long current_total_allocation_; struct gmp_tmp_stack gmp_tmp_xxx_; struct gmp_tmp_stack *gmp_tmp_current_; +#if FUTURES_ENABLED + pthread_cond_t worker_can_continue_cv_; + void *jit_future_storage_[2]; +#endif } Thread_Local_Variables; #if defined(IMPLEMENT_THREAD_LOCAL_VIA_PTHREADS) @@ -367,6 +375,8 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define current_total_allocation XOA (scheme_get_thread_local_variables()->current_total_allocation_) #define gmp_tmp_xxx XOA (scheme_get_thread_local_variables()->gmp_tmp_xxx_) #define gmp_tmp_current XOA (scheme_get_thread_local_variables()->gmp_tmp_current_) +#define worker_can_continue_cv XOA (scheme_get_thread_local_variables()->worker_can_continue_cv_) +#define jit_future_storage XOA (scheme_get_thread_local_variables()->jit_future_storage_) /* **************************************** */ diff --git a/src/mzscheme/src/future.c b/src/mzscheme/src/future.c index cf2c4c58c7..050b50a980 100644 --- a/src/mzscheme/src/future.c +++ b/src/mzscheme/src/future.c @@ -66,6 +66,8 @@ pthread_t g_rt_threadid = 0; static pthread_mutex_t g_future_queue_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t g_future_pending_cv = PTHREAD_COND_INITIALIZER; +THREAD_LOCAL_DECL(static pthread_cond_t worker_can_continue_cv); + static pthread_mutex_t gc_ok_m = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t gc_ok_c = PTHREAD_COND_INITIALIZER; static int gc_not_ok; @@ -76,6 +78,7 @@ THREAD_LOCAL_DECL(extern unsigned long GC_gen0_alloc_page_ptr); future_t **g_current_ft; Scheme_Object ***g_scheme_current_runstack; Scheme_Object ***g_scheme_current_runstack_start; +void **g_jit_future_storage; static void register_traversers(void); extern void scheme_on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Scheme_Object **argv); @@ -310,6 +313,7 @@ void futures_init(void) scheme_register_static(g_current_ft, sizeof(void*)); scheme_register_static(g_scheme_current_runstack, sizeof(void*)); scheme_register_static(g_scheme_current_runstack_start, sizeof(void*)); + scheme_register_static(g_jit_future_storage, 2 * sizeof(void*)); g_pool_threads[i] = threadid; } @@ -433,7 +437,6 @@ Scheme_Object *future(int argc, Scheme_Object *argv[]) //Create the future descriptor and add to the queue as 'pending' pthread_mutex_lock(&g_future_queue_mutex); ft = enqueue_future(); - pthread_cond_init(&ft->can_continue_cv, NULL); futureid = ++g_next_futureid; ft->id = futureid; ft->orig_lambda = lambda; @@ -564,7 +567,7 @@ Scheme_Object *touch(int argc, Scheme_Object *argv[]) //Signal the waiting worker thread that it //can continue running machine code - pthread_cond_signal(&ft->can_continue_cv); + pthread_cond_signal(ft->can_continue_cv); pthread_mutex_unlock(&g_future_queue_mutex); goto wait_for_rtcall_or_completion; @@ -605,6 +608,8 @@ void *worker_thread_future_loop(void *arg) pthread_mutex_unlock(&g_future_queue_mutex); */ + pthread_cond_init(&worker_can_continue_cv, NULL); + scheme_fuel_counter = 1; scheme_jit_stack_boundary = ((unsigned long)&v) - INITIAL_C_STACK_SIZE; @@ -612,12 +617,13 @@ void *worker_thread_future_loop(void *arg) g_stack_boundary_pointers[id] = &scheme_jit_stack_boundary; #ifdef MZ_PRECISE_GC - GC_gen0_alloc_page_ptr = 1; /* weirdly, disables inline allocation */ + GC_gen0_alloc_page_ptr = 0; /* disables inline allocation */ #endif g_current_ft = ¤t_ft; g_scheme_current_runstack = &scheme_current_runstack; g_scheme_current_runstack_start = &scheme_current_runstack_start; + g_jit_future_storage = &jit_future_storage[0]; sema_signal(&ready_sema); wait_for_work: @@ -726,8 +732,9 @@ int future_do_runtimecall( scheme_signal_received_at(g_signal_handle); //Wait for the signal that the RT call is finished + future->can_continue_cv = &worker_can_continue_cv; end_gc_not_ok(); - pthread_cond_wait(&future->can_continue_cv, &g_future_queue_mutex); + pthread_cond_wait(&worker_can_continue_cv, &g_future_queue_mutex); start_gc_not_ok(); //Fetch the future instance again, in case the GC has moved the pointer @@ -770,6 +777,33 @@ int rtcall_void_void(void (*f)()) } +int rtcall_void_pvoid(void (*f)(), void **retval) +{ + START_XFORM_SKIP; + future_t *future; + prim_data_t data; + memset(&data, 0, sizeof(prim_data_t)); + if (!IS_WORKER_THREAD) + { + return 0; + } + + data.void_pvoid = f; + data.sigtype = SIG_VOID_PVOID; + + future = get_my_future(); + future->rt_prim = (void*)f; + future->prim_data = data; + + future_do_runtimecall((void*)f, NULL); + + *retval = future->prim_data.retval; + + return 1; + END_XFORM_SKIP; +} + + int rtcall_obj_int_pobj_obj( prim_obj_int_pobj_obj_t f, Scheme_Object *rator, @@ -961,6 +995,13 @@ void *invoke_rtcall(future_t *future) pret = &dummy_ret; break; } + case SIG_VOID_PVOID: + { + prim_void_pvoid_t func = pdata->void_pvoid; + ret = func(); + pdata->retval = ret; + break; + } case SIG_OBJ_INT_POBJ_OBJ: { prim_obj_int_pobj_obj_t func = pdata->obj_int_pobj_obj; diff --git a/src/mzscheme/src/future.h b/src/mzscheme/src/future.h index 55a17dbbbc..1438ccc488 100644 --- a/src/mzscheme/src/future.h +++ b/src/mzscheme/src/future.h @@ -33,6 +33,7 @@ extern int future_do_runtimecall(void *func, void *retval); extern void futures_init(void); typedef void (*prim_void_void_t)(void); +typedef void *(*prim_void_pvoid_t)(void); typedef Scheme_Object* (*prim_obj_int_pobj_obj_t)(Scheme_Object*, int, Scheme_Object**); typedef Scheme_Object* (*prim_int_pobj_obj_t)(int, Scheme_Object**); typedef Scheme_Object* (*prim_int_pobj_obj_obj_t)(int, Scheme_Object**, Scheme_Object*); @@ -42,6 +43,7 @@ typedef struct { unsigned int sigtype; prim_void_void_t void_void; + prim_void_pvoid_t void_pvoid; prim_obj_int_pobj_obj_t obj_int_pobj_obj; prim_int_pobj_obj_t int_pobj_obj; prim_int_pobj_obj_obj_t int_pobj_obj_obj; @@ -76,7 +78,7 @@ typedef struct future { pthread_t threadid; int status; int work_completed; - pthread_cond_t can_continue_cv; + pthread_cond_t *can_continue_cv; Scheme_Object **runstack; Scheme_Object **runstack_start; @@ -150,8 +152,9 @@ extern void print_ms_and_us(void); //Signature flags for primitive invocations //Here the convention is SIG_[arg1type]_[arg2type]..._[return type] #define SIG_VOID_VOID 1 //void -> void -#define SIG_OBJ_INT_POBJ_OBJ 2 //Scheme_Object* -> int -> Scheme_Object** -> Scheme_Object* -#define SIG_INT_OBJARR_OBJ 3 //int -> Scheme_Object*[] -> Scheme_Object +#define SIG_VOID_PVOID 2 //void -> void* +#define SIG_OBJ_INT_POBJ_OBJ 3 //Scheme_Object* -> int -> Scheme_Object** -> Scheme_Object* +#define SIG_INT_OBJARR_OBJ 4 //int -> Scheme_Object*[] -> Scheme_Object #define SIG_INT_POBJ_OBJ_OBJ 17 //int -> Scheme_Object** -> Scheme_Object* -> Scheme_Object* #define SIG_PVOID_PVOID_PVOID 18 //void* -> void* -> void* @@ -166,6 +169,7 @@ extern void print_ms_and_us(void); } extern int rtcall_void_void(void (*f)()); +extern int rtcall_void_pvoid(void (*f)(), void **retval); extern int rtcall_obj_int_pobj_obj( Scheme_Object* (*f)(Scheme_Object*, int, Scheme_Object**), Scheme_Object *a, @@ -191,6 +195,7 @@ extern int rtcall_int_pobj_obj( #define LOG_THISCALL LOG(__FUNCTION__) #define LOG_RTCALL_VOID_VOID(f) LOG("(function=%p)", f) +#define LOG_RTCALL_VOID_PVOID(f) LOG("(function=%p)", f) #define LOG_RTCALL_OBJ_INT_POBJ_OBJ(f,a,b,c) LOG("(function = %p, a=%p, b=%d, c=%p)", f, a, b, c) #define LOG_RTCALL_OBJ_INT_POBJ_VOID(a,b,c) LOG("(%p, %d, %p)", a, b,c) #define LOG_RTCALL_INT_OBJARR_OBJ(a,b) LOG("(%d, %p)", a, b) @@ -213,6 +218,7 @@ extern int rtcall_int_pobj_obj( #define LOG_THISCALL #define LOG_RTCALL_VOID_VOID(f) +#define LOG_RTCALL_VOID_PVOID(f) #define LOG_RTCALL_OBJ_INT_POBJ_OBJ(f,a,b,c) #define LOG_RTCALL_OBJ_INT_POBJ_VOID(a,b,c) #define LOG_RTCALL_INT_OBJARR_OBJ(a,b) diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index 850ddd55f0..fc850b2d1f 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -271,7 +271,7 @@ void scheme_jit_fill_threadlocal_table(); On x86, the thread-local table pointer is loaded on entry to the JIT world into a C stack slot. On x86_64, it is loaded into the callee-saved R14 (and the old value is saved on the C stack). */ -#if defined(MZ_USE_PLACES) || defined(FUTURES_ENABLED) +#ifdef USE_THREAD_LOCAL # define JIT_THREAD_LOCAL #endif @@ -1436,7 +1436,7 @@ static int inline_alloc(mz_jit_state *jitter, int amt, Scheme_Type ty, int immut __START_TINY_JUMPS__(1); reffail = _jit.x.pc; - jit_ldi_p(JIT_V1, &GC_gen0_alloc_page_ptr); + mz_tl_ldi_p(JIT_V1, tl_GC_gen0_alloc_page_ptr); jit_subi_l(JIT_R2, JIT_V1, 1); jit_andi_l(JIT_R2, JIT_R2, (algn - 1)); ref = jit_blti_l(jit_forward(), JIT_R2, (algn - sz)); @@ -2186,6 +2186,7 @@ static Scheme_Object *prim_indirect(Scheme_Primitive_Closure_Proc proc, int argc static Scheme_Object *ts_scheme_apply_multi_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv) { + START_XFORM_SKIP; Scheme_Object *retptr; if (rtcall_obj_int_pobj_obj(_scheme_apply_multi_from_native, rator, @@ -2196,10 +2197,12 @@ static Scheme_Object *ts_scheme_apply_multi_from_native(Scheme_Object *rator, in } return _scheme_apply_multi_from_native(rator, argc, argv); + END_XFORM_SKIP; } static Scheme_Object *ts_scheme_apply_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv) { + START_XFORM_SKIP; Scheme_Object *retptr; if (rtcall_obj_int_pobj_obj(_scheme_apply_from_native, rator, @@ -2210,10 +2213,12 @@ static Scheme_Object *ts_scheme_apply_from_native(Scheme_Object *rator, int argc } return _scheme_apply_from_native(rator, argc, argv); + END_XFORM_SKIP; } static Scheme_Object *ts_scheme_tail_apply_from_native(Scheme_Object *rator, int argc, Scheme_Object **argv) { + START_XFORM_SKIP; Scheme_Object *retptr; if (rtcall_obj_int_pobj_obj(_scheme_tail_apply_from_native, rator, @@ -2224,35 +2229,52 @@ static Scheme_Object *ts_scheme_tail_apply_from_native(Scheme_Object *rator, int } return _scheme_tail_apply_from_native(rator, argc, argv); + END_XFORM_SKIP; } static void ts_on_demand(void) { + START_XFORM_SKIP; if (rtcall_void_void(on_demand)) { return; } on_demand(); + END_XFORM_SKIP; } - #ifdef MZ_PRECISE_GC - static void *ts_prepare_retry_alloc(void *p, void *p2) - { - void *ret; - LOG_PRIM_START(&prepare_retry_alloc); - if (rtcall_pvoid_pvoid_pvoid(prepare_retry_alloc, - p, - p2, - &ret)) - { - return ret; - } +#ifdef MZ_PRECISE_GC +static void *get_new_local_memory(void *p, void *p2) +{ + return GC_make_jit_nursery_page(); +} - ret = prepare_retry_alloc(p, p2); - LOG_PRIM_END(&prepare_retry_alloc); - return ret; - } - #endif +static void *ts_prepare_retry_alloc(void *p, void *p2) +{ + START_XFORM_SKIP; + void *ret; + LOG_PRIM_START(&prepare_retry_alloc); + jit_future_storage[0] = p; + jit_future_storage[1] = p2; + if (rtcall_void_pvoid(GC_make_jit_nursery_page, + &ret)) { + GC_gen0_alloc_page_ptr = ret; + retry_alloc_r1 = jit_future_storage[1]; + p = jit_future_storage[0]; + jit_future_storage[0] = NULL; + jit_future_storage[1] = NULL; + return p; + } + + jit_future_storage[0] = NULL; + jit_future_storage[1] = NULL; + + ret = prepare_retry_alloc(p, p2); + LOG_PRIM_END(&prepare_retry_alloc); + return ret; + END_XFORM_SKIP; +} +#endif #else /* futures not enabled */ # define mz_prepare_direct_prim(n) mz_prepare(n)