diff --git a/src/mzscheme/gc2/Makefile.in b/src/mzscheme/gc2/Makefile.in index 13d638b17b..f82c620491 100644 --- a/src/mzscheme/gc2/Makefile.in +++ b/src/mzscheme/gc2/Makefile.in @@ -332,6 +332,7 @@ gc2.@LTO@: \ $(srcdir)/gc2_obj.h \ $(srcdir)/immobile_boxes.c \ $(srcdir)/../include/scheme.h \ + $(srcdir)/../include/schthread.h \ $(srcdir)/../include/../sconfig.h \ $(srcdir)/../include/../uconfig.h \ $(srcdir)/../include/../src/stypes.h \ diff --git a/src/mzscheme/include/schthread.h b/src/mzscheme/include/schthread.h index 8a481638dc..37fb3b7dba 100644 --- a/src/mzscheme/include/schthread.h +++ b/src/mzscheme/include/schthread.h @@ -91,6 +91,7 @@ typedef struct Thread_Local_Variables { volatile int scheme_fuel_counter_; unsigned long scheme_stack_boundary_; unsigned long volatile scheme_jit_stack_boundary_; + volatile int scheme_future_need_gc_pause_; struct Scheme_Object *quick_stx_; int scheme_continuation_application_count_; int scheme_cont_capture_count_; @@ -253,6 +254,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define scheme_fuel_counter XOA (scheme_get_thread_local_variables()->scheme_fuel_counter_) #define scheme_stack_boundary XOA (scheme_get_thread_local_variables()->scheme_stack_boundary_) #define scheme_jit_stack_boundary XOA (scheme_get_thread_local_variables()->scheme_jit_stack_boundary_) +#define scheme_future_need_gc_pause XOA (scheme_get_thread_local_variables()->scheme_future_need_gc_pause_) #define quick_stx XOA (scheme_get_thread_local_variables()->quick_stx_) #define scheme_continuation_application_count XOA (scheme_get_thread_local_variables()->scheme_continuation_application_count_) #define scheme_cont_capture_count XOA (scheme_get_thread_local_variables()->scheme_cont_capture_count_) diff --git a/src/mzscheme/src/future.c b/src/mzscheme/src/future.c index 8799435239..b7fa5e1c85 100644 --- a/src/mzscheme/src/future.c +++ b/src/mzscheme/src/future.c @@ -55,12 +55,14 @@ extern void *on_demand_jit_code; static pthread_t g_pool_threads[THREAD_POOL_SIZE]; static int *g_fuel_pointers[THREAD_POOL_SIZE]; static unsigned long *g_stack_boundary_pointers[THREAD_POOL_SIZE]; +static int *g_need_gc_pointers[THREAD_POOL_SIZE]; static int g_num_avail_threads = 0; static unsigned long g_cur_cpu_mask = 1; static void *g_signal_handle = NULL; static struct NewGC *g_shared_GC; future_t *g_future_queue = NULL; +future_t *g_future_waiting_atomic = NULL; int g_next_futureid = 0; pthread_t g_rt_threadid = 0; @@ -71,7 +73,8 @@ THREAD_LOCAL_DECL(static pthread_cond_t worker_can_continue_cv); static pthread_mutex_t gc_ok_m = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t gc_ok_c = PTHREAD_COND_INITIALIZER; -static int gc_not_ok; +static pthread_cond_t gc_done_c = PTHREAD_COND_INITIALIZER; +static int gc_not_ok, wait_for_gc; #ifdef MZ_PRECISE_GC THREAD_LOCAL_DECL(extern unsigned long GC_gen0_alloc_page_ptr); #endif @@ -86,8 +89,10 @@ THREAD_LOCAL_DECL(static int worker_gc_counter); static void register_traversers(void); extern void scheme_on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Scheme_Object **argv); -static void start_gc_not_ok(); -static void end_gc_not_ok(future_t *ft); +static void start_gc_not_ok(int with_lock); +static void end_gc_not_ok(future_t *ft, int with_lock); + +static int future_do_runtimecall(void *func, int is_atomic, void *retval); THREAD_LOCAL_DECL(static future_t *current_ft); @@ -328,11 +333,18 @@ void futures_init(void) g_num_avail_threads = THREAD_POOL_SIZE; } -static void start_gc_not_ok() +static void start_gc_not_ok(int with_lock) { - pthread_mutex_lock(&gc_ok_m); + if (with_lock) + pthread_mutex_lock(&gc_ok_m); + + while (wait_for_gc) { + pthread_cond_wait(&gc_done_c, &gc_ok_m); + } + gc_not_ok++; - pthread_mutex_unlock(&gc_ok_m); + if (with_lock) + pthread_mutex_unlock(&gc_ok_m); #ifdef MZ_PRECISE_GC if (worker_gc_counter != *gc_counter_ptr) { GC_gen0_alloc_page_ptr = 0; /* forces future to ask for memory */ @@ -341,7 +353,7 @@ static void start_gc_not_ok() #endif } -static void end_gc_not_ok(future_t *ft) +static void end_gc_not_ok(future_t *ft, int with_lock) { if (ft) { scheme_set_runstack_limits(ft->runstack_start, @@ -349,19 +361,26 @@ static void end_gc_not_ok(future_t *ft) ft->runstack - ft->runstack_start, ft->runstack_size); } - pthread_mutex_lock(&gc_ok_m); + if (with_lock) + pthread_mutex_lock(&gc_ok_m); --gc_not_ok; pthread_cond_signal(&gc_ok_c); - pthread_mutex_unlock(&gc_ok_m); + if (with_lock) + pthread_mutex_unlock(&gc_ok_m); } void scheme_future_block_until_gc() { int i; + pthread_mutex_lock(&gc_ok_m); + wait_for_gc = 1; + pthread_mutex_unlock(&gc_ok_m); + for (i = 0; i < THREAD_POOL_SIZE; i++) { if (g_fuel_pointers[i] != NULL) { + *(g_need_gc_pointers[i]) = 1; *(g_fuel_pointers[i]) = 0; *(g_stack_boundary_pointers[i]) += INITIAL_C_STACK_SIZE; } @@ -382,11 +401,26 @@ void scheme_future_continue_after_gc() for (i = 0; i < THREAD_POOL_SIZE; i++) { if (g_fuel_pointers[i] != NULL) { + *(g_need_gc_pointers[i]) = 0; *(g_fuel_pointers[i]) = 1; *(g_stack_boundary_pointers[i]) -= INITIAL_C_STACK_SIZE; } } + + pthread_mutex_lock(&gc_ok_m); + wait_for_gc = 0; + pthread_cond_broadcast(&gc_done_c); + pthread_mutex_unlock(&gc_ok_m); +} + +void scheme_future_gc_pause() +/* Called in future thread */ +{ + pthread_mutex_lock(&gc_ok_m); + end_gc_not_ok(current_ft, 0); + start_gc_not_ok(0); /* waits until wait_for_gc is 0 */ + pthread_mutex_unlock(&gc_ok_m); } /**********************************************************************/ @@ -605,14 +639,6 @@ Scheme_Object *touch(int argc, Scheme_Object *argv[]) LOG("Invoking primitive %p on behalf of future %d...", ft->rt_prim, ft->id); invoke_rtcall(ft); LOG("done.\n"); - pthread_mutex_lock(&g_future_queue_mutex); - - ft->rt_prim = NULL; - - //Signal the waiting worker thread that it - //can continue running machine code - pthread_cond_signal(ft->can_continue_cv); - pthread_mutex_unlock(&g_future_queue_mutex); goto wait_for_rtcall_or_completion; } @@ -660,6 +686,7 @@ void *worker_thread_future_loop(void *arg) scheme_fuel_counter = 1; scheme_jit_stack_boundary = ((unsigned long)&v) - INITIAL_C_STACK_SIZE; + g_need_gc_pointers[id] = &scheme_future_need_gc_pause; g_fuel_pointers[id] = &scheme_fuel_counter; g_stack_boundary_pointers[id] = &scheme_jit_stack_boundary; @@ -670,13 +697,13 @@ void *worker_thread_future_loop(void *arg) sema_signal(&ready_sema); wait_for_work: - start_gc_not_ok(); + start_gc_not_ok(1); pthread_mutex_lock(&g_future_queue_mutex); while (!(ft = get_pending_future())) { - end_gc_not_ok(NULL); + end_gc_not_ok(NULL, 1); pthread_cond_wait(&g_future_pending_cv, &g_future_queue_mutex); - start_gc_not_ok(); + start_gc_not_ok(1); } LOG("Got a signal that a future is pending..."); @@ -731,7 +758,7 @@ void *worker_thread_future_loop(void *arg) scheme_signal_received_at(g_signal_handle); pthread_mutex_unlock(&g_future_queue_mutex); - end_gc_not_ok(NULL); + end_gc_not_ok(NULL, 1); goto wait_for_work; @@ -745,16 +772,32 @@ void scheme_check_future_work() /* Check for work that future threads need from the runtime thread and that can be done in any Scheme thread (e.g., get a new page for allocation). */ + future_t *ft; + + while (1) { + /* Try to get a future waiting on a atomic operation */ + pthread_mutex_lock(&g_future_queue_mutex); + ft = g_future_waiting_atomic; + if (ft) { + g_future_waiting_atomic = ft->next_waiting_atomic; + } + pthread_mutex_unlock(&g_future_queue_mutex); + + if (ft) { + invoke_rtcall(ft); + } else + break; + } } - //Returns 0 if the call isn't actually executed by this function, //i.e. if we are already running on the runtime thread. Otherwise returns //1, and 'retval' is set to point to the return value of the runtime //call invocation. int future_do_runtimecall( void *func, + int is_atomic, //int sigtype, //void *args, void *retval) @@ -786,7 +829,13 @@ int future_do_runtimecall( //will use this value to temporarily swap its stack //for the worker thread's future->runstack = MZ_RUNSTACK; - future->rt_prim = func; + future->rt_prim = 1; + future->rt_prim_is_atomic = is_atomic; + + if (is_atomic) { + future->next_waiting_atomic = g_future_waiting_atomic; + g_future_waiting_atomic = future; + } //Update the future's status to waiting future->status = WAITING_FOR_PRIM; @@ -795,16 +844,13 @@ int future_do_runtimecall( //Wait for the signal that the RT call is finished future->can_continue_cv = &worker_can_continue_cv; - end_gc_not_ok(future); + end_gc_not_ok(future, 1); pthread_cond_wait(&worker_can_continue_cv, &g_future_queue_mutex); - start_gc_not_ok(); + start_gc_not_ok(1); //Fetch the future instance again, in case the GC has moved the pointer future = current_ft; - //Clear rt call fields before releasing the lock on the descriptor - future->rt_prim = NULL; - pthread_mutex_unlock(&g_future_queue_mutex); return 1; @@ -834,7 +880,7 @@ int rtcall_void_void_3args(void (*f)()) future = current_ft; future->prim_data = data; - future_do_runtimecall((void*)f, NULL); + future_do_runtimecall((void*)f, 1, NULL); future = current_ft; return 1; @@ -863,7 +909,7 @@ int rtcall_alloc_void_pvoid(void (*f)(), void **retval) future = current_ft; future->prim_data = data; - future_do_runtimecall((void*)f, NULL); + future_do_runtimecall((void*)f, 1, NULL); future = current_ft; *retval = future->alloc_retval; @@ -913,7 +959,7 @@ int rtcall_obj_int_pobj_obj( future = current_ft; future->prim_data = data; - future_do_runtimecall((void*)f, NULL); + future_do_runtimecall((void*)f, 0, NULL); future = current_ft; *retval = future->prim_data.retval; future->prim_data.retval = NULL; @@ -956,7 +1002,7 @@ int rtcall_int_pobj_obj( future = current_ft; future->prim_data = data; - future_do_runtimecall((void*)f, NULL); + future_do_runtimecall((void*)f, 0, NULL); future = current_ft; *retval = future->prim_data.retval; future->prim_data.retval = NULL; @@ -1000,7 +1046,7 @@ int rtcall_pvoid_pvoid_pvoid( future = current_ft; future->prim_data = data; - future_do_runtimecall((void*)f, NULL); + future_do_runtimecall((void*)f, 0, NULL); future = current_ft; *retval = future->prim_data.c; @@ -1045,7 +1091,7 @@ int rtcall_int_pobj_obj_obj( future = current_ft; future->prim_data = data; - future_do_runtimecall((void*)f, NULL); + future_do_runtimecall((void*)f, 0, NULL); future = current_ft; *retval = future->prim_data.retval; future->prim_data.retval = NULL; @@ -1064,6 +1110,8 @@ void invoke_rtcall(future_t *future) g_rtcall_count++; #endif + future->rt_prim = 0; + switch (future->prim_data.sigtype) { case SIG_VOID_VOID_3ARGS: @@ -1149,6 +1197,12 @@ void invoke_rtcall(future_t *future) break; } } + + pthread_mutex_lock(&g_future_queue_mutex); + //Signal the waiting worker thread that it + //can continue running machine code + pthread_cond_signal(future->can_continue_cv); + pthread_mutex_unlock(&g_future_queue_mutex); } diff --git a/src/mzscheme/src/future.h b/src/mzscheme/src/future.h index e9848b5b42..69d03ae6b5 100644 --- a/src/mzscheme/src/future.h +++ b/src/mzscheme/src/future.h @@ -29,7 +29,6 @@ extern Scheme_Object *end_primitive_tracking(int argc, Scheme_Object *argv[]); extern Scheme_Object *future(int argc, Scheme_Object *argv[]); extern Scheme_Object *touch(int argc, Scheme_Object *argv[]); extern Scheme_Object *num_processors(int argc, Scheme_Object *argv[]); -extern int future_do_runtimecall(void *func, void *retval); extern void futures_init(void); typedef void (*prim_void_void_3args_t)(Scheme_Object **); @@ -74,28 +73,30 @@ typedef struct { typedef struct future { Scheme_Object so; - int id; - pthread_t threadid; - int status; - int work_completed; - pthread_cond_t *can_continue_cv; + int id; + pthread_t threadid; + int status; + int work_completed; + pthread_cond_t *can_continue_cv; - long runstack_size; - Scheme_Object **runstack; - Scheme_Object **runstack_start; - Scheme_Object *orig_lambda; - void *code; + long runstack_size; + Scheme_Object **runstack; + Scheme_Object **runstack_start; + Scheme_Object *orig_lambda; + void *code; - //Runtime call stuff - void *rt_prim; + //Runtime call stuff + int rt_prim; /* flag to indicate waiting for a prim call */ + int rt_prim_is_atomic; - prim_data_t prim_data; + prim_data_t prim_data; void *alloc_retval; int alloc_retval_counter; - Scheme_Object *retval; - struct future *prev; - struct future *next; + Scheme_Object *retval; + struct future *prev; + struct future *next; + struct future *next_waiting_atomic; } future_t; #ifdef UNIT_TEST @@ -243,6 +244,7 @@ extern int rtcall_int_pobj_obj( void scheme_future_block_until_gc(); void scheme_future_continue_after_gc(); void scheme_check_future_work(); +void scheme_future_gc_pause(); #ifdef UNIT_TEST //These forwarding decls only need to be here to make diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index dbf8846dc6..03afa3dea5 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -293,6 +293,8 @@ void scheme_jit_fill_threadlocal_table(); # define tl_save_fp tl_delta(save_fp) # define tl_scheme_fuel_counter tl_delta(scheme_fuel_counter) # define tl_scheme_jit_stack_boundary tl_delta(scheme_jit_stack_boundary) +# define tl_jit_future_storage tl_delta(jit_future_storage) +# define tl_scheme_future_need_gc_pause tl_delta(scheme_future_need_gc_pause) #ifdef MZ_XFORM START_XFORM_SKIP; @@ -2295,6 +2297,60 @@ static void *ts_prepare_retry_alloc(void *p, void *p2) # define ts_prepare_retry_alloc prepare_retry_alloc #endif +static int generate_pause_for_gc_and_retry(mz_jit_state *jitter, + int in_short_jumps, + int gc_reg, /* must not be JIT_R1 */ + GC_CAN_IGNORE jit_insn *refagain) +{ +#ifdef FUTURES_ENABLED + GC_CAN_IGNORE jit_insn *refslow = 0, *refpause; + int i; + + /* expose gc_reg to GC */ + mz_tl_sti_p(tl_jit_future_storage, gc_reg, JIT_R1); + + /* Save non-preserved registers. Use a multiple of 4 to avoid + alignment problems. */ + jit_pushr_l(JIT_R1); + jit_pushr_l(JIT_R2); + jit_pushr_l(JIT_R0); + jit_pushr_l(JIT_R0); + CHECK_LIMIT(); + + mz_tl_ldi_i(JIT_R0, tl_scheme_future_need_gc_pause); + refpause = jit_bgti_i(jit_forward(), JIT_R0, 0); + + for (i = 0; i < 2; i++) { + /* Restore non-preserved registers, and also move the gc-exposed + register back. */ + if (i == 1) { + mz_patch_branch(refpause); + jit_prepare(0); + mz_finish(scheme_future_gc_pause); + } + jit_popr_l(JIT_R0); + jit_popr_l(JIT_R0); + jit_popr_l(JIT_R2); + CHECK_LIMIT(); + mz_tl_ldi_p(gc_reg, tl_jit_future_storage); + jit_movi_p(JIT_R1, NULL); + mz_tl_sti_p(tl_jit_future_storage, JIT_R1, JIT_R2); + jit_popr_l(JIT_R1); + CHECK_LIMIT(); + if (!i) + refslow = jit_jmpi(jit_forward()); + else + (void)jit_jmpi(refagain); + } + + mz_patch_ucbranch(refslow); + + return 1; +#else + return 1; +#endif +} + static int generate_direct_prim_tail_call(mz_jit_state *jitter, int num_rands) { /* JIT_V1 must have the target function pointer. @@ -2433,12 +2489,17 @@ static int generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_na /* The slow way: */ /* V1 and RUNSTACK must be intact! */ + mz_patch_branch(ref5); + generate_pause_for_gc_and_retry(jitter, + num_rands < 100, /* in short jumps */ + JIT_V1, /* expose V1 to GC */ + refagain); /* retry code pointer */ + CHECK_LIMIT(); if (!direct_native) { mz_patch_branch(ref); mz_patch_branch(ref2); } mz_patch_branch(ref4); - mz_patch_branch(ref5); CHECK_LIMIT(); if (need_set_rs) { JIT_UPDATE_THREAD_RSPTR(); @@ -2851,13 +2912,18 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc } /* The slow way: */ + mz_patch_branch(ref9); + generate_pause_for_gc_and_retry(jitter, + 1, /* in short jumps */ + JIT_V1, /* expose V1 to GC */ + refagain); /* retry code pointer */ + CHECK_LIMIT(); if (!direct_native) { mz_patch_branch(ref); mz_patch_branch(ref2); mz_patch_branch(ref7); } - mz_patch_branch(ref4); - mz_patch_branch(ref9); + mz_patch_branch(ref4); #ifndef FUEL_AUTODECEREMENTS mz_patch_branch(ref11); #endif @@ -2907,13 +2973,12 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, jit_insn *slow_code, int args_already_in_place) +/* Last argument is in R0 */ { - jit_insn *refslow; + jit_insn *refslow, *refagain; int i, jmp_tiny, jmp_short; int closure_size = jitter->self_closure_size; - /* Last argument is in R0 */ - #ifdef JIT_PRECISE_GC closure_size += 1; /* Skip procedure pointer, too */ #endif @@ -2923,6 +2988,8 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short); + refagain = _jit.x.pc; + /* Check for thread swap: */ (void)mz_tl_ldi_i(JIT_R2, tl_scheme_fuel_counter); refslow = jit_blei_i(jit_forward(), JIT_R2, 0); @@ -2954,6 +3021,11 @@ static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, i __START_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short); mz_patch_branch(refslow); __END_TINY_OR_SHORT_JUMPS__(jmp_tiny, jmp_short); + generate_pause_for_gc_and_retry(jitter, + 0, /* in short jumps */ + JIT_R0, /* expose R0 to GC */ + refagain); /* retry code pointer */ + CHECK_LIMIT(); if (args_already_in_place) { jit_movi_l(JIT_R2, args_already_in_place); @@ -8256,7 +8328,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_ldxi_i(JIT_V1, JIT_V1, &((Scheme_Native_Closure_Data *)0x0)->max_let_depth); mz_set_local_p(JIT_R2, JIT_LOCAL2); mz_tl_ldi_p(JIT_R2, tl_MZ_RUNSTACK_START); - jit_subr_ul(JIT_R2, JIT_RUNSTACK, JIT_V2); + jit_subr_ul(JIT_R2, JIT_RUNSTACK, JIT_R2); jit_subr_ul(JIT_V1, JIT_R2, JIT_V1); mz_get_local_p(JIT_R2, JIT_LOCAL2); ref2 = jit_blti_l(jit_forward(), JIT_V1, 0); diff --git a/src/mzscheme/src/mzmark.c b/src/mzscheme/src/mzmark.c index 98b95aae41..c2c8dc974d 100644 --- a/src/mzscheme/src/mzmark.c +++ b/src/mzscheme/src/mzmark.c @@ -5429,6 +5429,7 @@ static int future_MARK(void *p) { gcMARK(f->retval); gcMARK(f->prev); gcMARK(f->next); + gcMARK(f->next_waiting_atomic); return gcBYTES_TO_WORDS(sizeof(future_t)); } @@ -5444,6 +5445,7 @@ static int future_FIXUP(void *p) { gcFIXUP(f->retval); gcFIXUP(f->prev); gcFIXUP(f->next); + gcFIXUP(f->next_waiting_atomic); return gcBYTES_TO_WORDS(sizeof(future_t)); } diff --git a/src/mzscheme/src/mzmarksrc.c b/src/mzscheme/src/mzmarksrc.c index 65bbb67180..12599cc9ed 100644 --- a/src/mzscheme/src/mzmarksrc.c +++ b/src/mzscheme/src/mzmarksrc.c @@ -2232,6 +2232,7 @@ future { gcMARK(f->retval); gcMARK(f->prev); gcMARK(f->next); + gcMARK(f->next_waiting_atomic); size: gcBYTES_TO_WORDS(sizeof(future_t)); }