diff --git a/collects/tests/future/future.rkt b/collects/tests/future/future.rkt index dbcc24eb14..34ec6ae2df 100644 --- a/collects/tests/future/future.rkt +++ b/collects/tests/future/future.rkt @@ -114,6 +114,7 @@ We should also test deep continuations. (with-continuation-mark 'x 1 (current-continuation-marks))))]) + (sleep 0.1) (list (continuation-mark-set->list (touch f1) 'x) (continuation-mark-set->list (touch f2) 'x)))) @@ -170,7 +171,7 @@ We should also test deep continuations. ;on a worker thread (let ([f1 (future (λ () (current-future)))] [f2 (future (λ () (current-future)))]) - (sleep 3) + (sleep 0.1) (check-equal? #t (equal? f1 (touch f1))) (check-equal? #f (equal? f2 (touch f1))) (check-equal? #t (equal? f2 (touch f2))) diff --git a/src/racket/gc2/newgc.c b/src/racket/gc2/newgc.c index 4092c9db1d..78015cb3e8 100644 --- a/src/racket/gc2/newgc.c +++ b/src/racket/gc2/newgc.c @@ -769,6 +769,8 @@ static void *allocate_big(const size_t request_size_bytes, int type) size_t allocate_size; void *addr; + if (GC_gen0_alloc_only) return NULL; + #ifdef NEWGC_BTC_ACCOUNT if(GC_out_of_memory) { #ifdef MZ_USE_PLACES @@ -1119,7 +1121,7 @@ inline static void *allocate(const size_t request_size, const int type) unsigned long newptr; if(request_size == 0) return (void *) zero_sized; - + allocate_size = COMPUTE_ALLOC_SIZE_FOR_OBJECT_SIZE(request_size); if(allocate_size > MAX_OBJECT_SIZE) return allocate_big(request_size, type); @@ -1131,6 +1133,8 @@ inline static void *allocate(const size_t request_size, const int type) if(OVERFLOWS_GEN0(newptr)) { NewGC *gc = GC_get_GC(); + if (GC_gen0_alloc_only) return NULL; + #ifdef MZ_USE_PLACES if (postmaster_and_master_gc(gc)) { return allocate_medium(request_size, type); } #endif diff --git a/src/racket/include/schthread.h b/src/racket/include/schthread.h index 5cff0cea4a..95ff33f427 100644 --- a/src/racket/include/schthread.h +++ b/src/racket/include/schthread.h @@ -108,6 +108,7 @@ typedef struct Thread_Local_Variables { struct NewGC *GC_instance_; unsigned long GC_gen0_alloc_page_ptr_; unsigned long GC_gen0_alloc_page_end_; + int GC_gen0_alloc_only_; void *bignum_cache_[BIGNUM_CACHE_SIZE]; int cache_count_; struct Scheme_Hash_Table *toplevels_ht_; @@ -206,6 +207,7 @@ typedef struct Thread_Local_Variables { struct Scheme_Thread *scheme_main_thread_; struct Scheme_Thread *scheme_first_thread_; struct Scheme_Thread_Set *scheme_thread_set_top_; + struct Scheme_Current_LWC *scheme_current_lwc_; int num_running_threads_; int swap_no_setjmp_; int thread_swap_count_; @@ -402,6 +404,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define GC_instance XOA (scheme_get_thread_local_variables()->GC_instance_) #define GC_gen0_alloc_page_ptr XOA (scheme_get_thread_local_variables()->GC_gen0_alloc_page_ptr_) #define GC_gen0_alloc_page_end XOA (scheme_get_thread_local_variables()->GC_gen0_alloc_page_end_) +#define GC_gen0_alloc_only XOA (scheme_get_thread_local_variables()->GC_gen0_alloc_only_) #define GC_variable_stack XOA (scheme_get_thread_local_variables()->GC_variable_stack_) #define bignum_cache XOA (scheme_get_thread_local_variables()->bignum_cache_) #define cache_count XOA (scheme_get_thread_local_variables()->cache_count_) @@ -502,6 +505,7 @@ XFORM_GC_VARIABLE_STACK_THROUGH_THREAD_LOCAL; #define scheme_main_thread XOA (scheme_get_thread_local_variables()->scheme_main_thread_) #define scheme_first_thread XOA (scheme_get_thread_local_variables()->scheme_first_thread_) #define scheme_thread_set_top XOA (scheme_get_thread_local_variables()->scheme_thread_set_top_) +#define scheme_current_lwc XOA (scheme_get_thread_local_variables()->scheme_current_lwc_) #define num_running_threads XOA (scheme_get_thread_local_variables()->num_running_threads_) #define swap_no_setjmp XOA (scheme_get_thread_local_variables()->swap_no_setjmp_) #define thread_swap_count XOA (scheme_get_thread_local_variables()->thread_swap_count_) diff --git a/src/racket/src/env.c b/src/racket/src/env.c index a46811def1..68fc08596e 100644 --- a/src/racket/src/env.c +++ b/src/racket/src/env.c @@ -475,8 +475,9 @@ static Scheme_Env *place_instance_init(void *stack_base, int initial_main_os_thr scheme_init_stack_check(); scheme_init_overflow(); - init_toplevel_local_offsets_hashtable_caches(); + scheme_init_thread_lwc(); + init_toplevel_local_offsets_hashtable_caches(); #ifdef TIME_STARTUP_PROCESS printf("pre-process @ %ld\n", scheme_get_process_milliseconds()); diff --git a/src/racket/src/fun.c b/src/racket/src/fun.c index f8043f8282..9bae691941 100644 --- a/src/racket/src/fun.c +++ b/src/racket/src/fun.c @@ -8322,6 +8322,276 @@ static Scheme_Object *continuation_prompt_available(int argc, Scheme_Object *arg return scheme_false; } +/*========================================================================*/ +/* lightweight continuations */ +/*========================================================================*/ + +/* A lightweight continuation is one that contains only frames from + JIT-generated code. The code here manages capture and restore for + the runstack and mark stack, while the rest is in the JIT. */ + +struct Scheme_Lightweight_Continuation { + MZTAG_IF_REQUIRED /* scheme_rt_lightweight_cont */ + Scheme_Current_LWC *saved_lwc; + void *stack_slice; + Scheme_Object **runstack_slice; + Scheme_Cont_Mark *cont_mark_stack_slice; +}; + +void scheme_init_thread_lwc(void) XFORM_SKIP_PROC +{ + scheme_current_lwc = (Scheme_Current_LWC *)malloc(sizeof(Scheme_Current_LWC)); +} + +void scheme_fill_lwc_start(void) XFORM_SKIP_PROC +{ + scheme_current_lwc->runstack_start = MZ_RUNSTACK; + scheme_current_lwc->cont_mark_stack_start = MZ_CONT_MARK_STACK; + scheme_current_lwc->cont_mark_pos_start = MZ_CONT_MARK_POS; +} + +void scheme_fill_lwc_end(void) XFORM_SKIP_PROC +{ + scheme_current_lwc->runstack_end = MZ_RUNSTACK; + scheme_current_lwc->cont_mark_stack_end = MZ_CONT_MARK_STACK; + scheme_current_lwc->cont_mark_pos_end = MZ_CONT_MARK_POS; + scheme_fill_stack_lwc_end(); +} + +void scheme_clear_lwc(void) XFORM_SKIP_PROC +{ +} + +Scheme_Lightweight_Continuation *scheme_capture_lightweight_continuation(Scheme_Thread *p, + Scheme_Current_LWC *p_lwc, + void **storage) + XFORM_SKIP_PROC +/* This function explicitly coorperates with the GC by storing the + pointers it needs to save across a collection in `storage'. Also, + if allocation fails, it can abort and return NULL. The combination + allows it to work in a thread for runing futures (where allocation + and GC in general ae disallowed). */ +{ + long len, i, j, pos; + Scheme_Object **runstack_slice; + Scheme_Cont_Mark *cont_mark_stack_slice; + Scheme_Current_LWC *lwc; + Scheme_Cont_Mark *seg; + Scheme_Lightweight_Continuation *lw; + void *stack; + +#ifndef MZ_PRECISE_GC + return NULL; +#endif + + storage[1] = p; + + lw = MALLOC_ONE_RT(Scheme_Lightweight_Continuation); + if (!lw) return NULL; +#ifdef MZTAG_REQUIRED + lw->type = scheme_rt_lightweight_cont; +#endif + + storage[0] = lw; + + lwc = (Scheme_Current_LWC *)scheme_malloc_atomic(sizeof(Scheme_Current_LWC)); + if (!lwc) return NULL; + + memcpy(lwc, p_lwc, sizeof(Scheme_Current_LWC)); + + lw = (Scheme_Lightweight_Continuation *)storage[0]; + lw->saved_lwc = lwc; + + stack = scheme_save_lightweight_continuation_stack(p_lwc); + if (!stack) return NULL; + + lw = (Scheme_Lightweight_Continuation *)storage[0]; + lw->stack_slice = stack; + + len = lwc->runstack_start - lwc->runstack_end; + runstack_slice = MALLOC_N(Scheme_Object*, len); + if (!runstack_slice) return NULL; + + lw = (Scheme_Lightweight_Continuation *)storage[0]; + lw->runstack_slice = runstack_slice; + memcpy(runstack_slice, lw->saved_lwc->runstack_end, len * sizeof(Scheme_Object *)); + + /* The runstack may contain pointers to itself, but they are just + cleared slots where a register containing the runstack pointer + was handy; zero out such slots to avoid retaining a runstack + unnecessarily: */ + for (i = 0; i < len; i++) { + if (((unsigned long)runstack_slice[i] >= (unsigned long)lwc->runstack_end) + && ((unsigned long)runstack_slice[i] <= (unsigned long)lwc->runstack_start)) + runstack_slice[i] = 0; + } + + len = lwc->cont_mark_stack_end - lwc->cont_mark_stack_start; + + if (len) { + cont_mark_stack_slice = MALLOC_N(Scheme_Cont_Mark, len); + if (!cont_mark_stack_slice) return NULL; + lw = (Scheme_Lightweight_Continuation *)storage[0]; + } else + cont_mark_stack_slice = NULL; + + lw->cont_mark_stack_slice = cont_mark_stack_slice; + + lwc = lw->saved_lwc; + p = (Scheme_Thread *)storage[1]; + + for (j = 0; j < len; j++) { + i = j + lwc->cont_mark_stack_start; + + seg = p->cont_mark_stack_segments[i >> SCHEME_LOG_MARK_SEGMENT_SIZE]; + pos = i & SCHEME_MARK_SEGMENT_MASK; + + memcpy(cont_mark_stack_slice + i, seg + pos, sizeof(Scheme_Cont_Mark)); + } + + return lw; +} + +Scheme_Object **scheme_adjust_runstack_argument(Scheme_Lightweight_Continuation *lw, + Scheme_Object **arg) + XFORM_SKIP_PROC +{ + if (arg == lw->saved_lwc->runstack_end) + return lw->runstack_slice; + else + return arg; +} + +static void *apply_lwc_k() +{ + Scheme_Thread *p = scheme_current_thread; + Scheme_Lightweight_Continuation *lw = (Scheme_Lightweight_Continuation *)p->ku.k.p1; + Scheme_Object *result = (Scheme_Object *)p->ku.k.p2; + + p->ku.k.p1 = NULL; + p->ku.k.p2 = NULL; + + return scheme_apply_lightweight_continuation(lw, result); +} + +Scheme_Object *scheme_apply_lightweight_continuation(Scheme_Lightweight_Continuation *lw, + Scheme_Object *result) XFORM_SKIP_PROC +{ + long len, cm_len, cm_pos_delta, cm_delta, i, cm; + Scheme_Cont_Mark *seg; + Scheme_Object **rs; + + len = lw->saved_lwc->runstack_start - lw->saved_lwc->runstack_end; + + if (!scheme_check_runstack(len)) { + /* This will not happen when restoring a future-thread-captured + continuation in a future thread. */ + scheme_current_thread->ku.k.p1 = lw; + scheme_current_thread->ku.k.p2 = result; + return (Scheme_Object *)scheme_enlarge_runstack(len, apply_lwc_k); + } + + /* FIXME: check whether the C stack is big enough */ + + /* application of a lightweight continuation forms a lightweight continuation: */ + scheme_current_lwc->runstack_start = MZ_RUNSTACK; + scheme_current_lwc->cont_mark_stack_start = MZ_CONT_MARK_STACK; + scheme_current_lwc->cont_mark_pos_start = MZ_CONT_MARK_POS + 2; + + cm_len = lw->saved_lwc->cont_mark_stack_end - lw->saved_lwc->cont_mark_stack_start; + if (cm_len) { + /* install captured continuation marks, adjusting the pos + to match the new context: */ + seg = lw->cont_mark_stack_slice; + cm_pos_delta = MZ_CONT_MARK_POS + 2 - lw->saved_lwc->cont_mark_pos_start; + for (i = 0; i < cm_len; i++) { + MZ_CONT_MARK_POS = seg[i].pos + cm_pos_delta; + scheme_set_cont_mark(seg[i].key, seg[i].val); + } + MZ_CONT_MARK_POS = lw->saved_lwc->cont_mark_pos_end + cm_pos_delta; + } + + cm_delta = (long)MZ_CONT_MARK_STACK - (long)lw->saved_lwc->cont_mark_stack_end; + + rs = MZ_RUNSTACK - len; + MZ_RUNSTACK = rs; + + memcpy(rs, lw->runstack_slice, len * sizeof(Scheme_Object*)); + + /* If SCHEME_EVAL_WAITING appears in the runstack slice, it + indicates that a cm position follows: */ + for (i = 0; i < len; i++) { + if (rs[i] == SCHEME_EVAL_WAITING) { + cm = SCHEME_INT_VAL(rs[i+1]); + cm += cm_delta; + rs[i+1] = scheme_make_integer(cm); + } + } + + return scheme_apply_lightweight_continuation_stack(lw->saved_lwc, lw->stack_slice, result); +} + +int scheme_push_marks_from_lightweight_continuation(Scheme_Lightweight_Continuation *lw, + Scheme_Cont_Frame_Data *d) +{ + Scheme_Thread *p; + long pos, len, delta; + Scheme_Cont_Mark *seg; + + len = (lw->saved_lwc->cont_mark_stack_end + - lw->saved_lwc->cont_mark_stack_start); + + if (len) { + scheme_push_continuation_frame(d); + + p = scheme_current_thread; + seg = lw->cont_mark_stack_slice; + + delta = MZ_CONT_MARK_POS + 2 - lw->saved_lwc->cont_mark_pos_start; + + for (pos = 0; pos < len; pos++) { + MZ_CONT_MARK_POS = seg[pos].pos + delta; + scheme_set_cont_mark(seg[pos].key, seg[pos].val); + } + + MZ_CONT_MARK_POS = lw->saved_lwc->cont_mark_pos_end + delta; + + return 1; + } + + return 0; +} + +int scheme_push_marks_from_thread(Scheme_Thread *p2, Scheme_Cont_Frame_Data *d) +{ + Scheme_Thread *p; + long i, pos, delta; + Scheme_Cont_Mark *seg; + + if (p2->cont_mark_stack) { + scheme_push_continuation_frame(d); + + p = scheme_current_thread; + + delta = MZ_CONT_MARK_POS - p2->cont_mark_pos; + if (delta < 0) delta = 0; + + for (i = 0; i < p2->cont_mark_stack; i++) { + seg = p2->cont_mark_stack_segments[i >> SCHEME_LOG_MARK_SEGMENT_SIZE]; + pos = i & SCHEME_MARK_SEGMENT_MASK; + + MZ_CONT_MARK_POS = seg[pos].pos + delta; + scheme_set_cont_mark(seg[pos].key, seg[pos].val); + } + + MZ_CONT_MARK_POS = p2->cont_mark_pos + delta; + + return 1; + } + + return 0; +} + /*========================================================================*/ /* dynamic-wind */ /*========================================================================*/ @@ -9494,6 +9764,7 @@ static void register_traversers(void) GC_REG_TRAV(scheme_rt_dyn_wind_cell, mark_dyn_wind_cell); GC_REG_TRAV(scheme_rt_dyn_wind_info, mark_dyn_wind_info); GC_REG_TRAV(scheme_cont_mark_chain_type, mark_cont_mark_chain); + GC_REG_TRAV(scheme_rt_lightweight_cont, mark_lightweight_cont); } END_XFORM_SKIP; diff --git a/src/racket/src/future.c b/src/racket/src/future.c index cbbb9a6c13..1f7ba10d53 100644 --- a/src/racket/src/future.c +++ b/src/racket/src/future.c @@ -89,7 +89,7 @@ static Scheme_Object *touch(int argc, Scheme_Object *argv[]) mz_jmp_buf newbuf, * volatile savebuf; Scheme_Thread *p = scheme_current_thread; - /* In case another Scheme thread touchs the future. */ + /* In case another Scheme thread touches the future. */ sema = scheme_make_sema(0); ft->running_sema = sema; @@ -218,6 +218,7 @@ typedef struct Scheme_Future_State { future_t *future_queue; future_t *future_queue_end; future_t *future_waiting_atomic; + future_t *future_waiting_lwc; int next_futureid; mzrt_mutex *future_mutex; @@ -253,6 +254,8 @@ THREAD_LOCAL_DECL(void *jit_future_storage[2]); #ifdef MZ_PRECISE_GC THREAD_LOCAL_DECL(extern unsigned long GC_gen0_alloc_page_ptr); +THREAD_LOCAL_DECL(extern unsigned long GC_gen0_alloc_page_end); +THREAD_LOCAL_DECL(extern int GC_gen0_alloc_only); #endif static void start_gc_not_ok(Scheme_Future_State *fs); @@ -293,6 +296,7 @@ typedef struct future_thread_params_t { Scheme_Object ***scheme_current_runstack_start_ptr; Scheme_Thread **current_thread_ptr; void *jit_future_storage_ptr; + Scheme_Current_LWC *lwc; } future_thread_params_t; /**********************************************************************/ @@ -375,6 +379,7 @@ void futures_init(void) REGISTER_SO(fs->future_queue); REGISTER_SO(fs->future_queue_end); REGISTER_SO(fs->future_waiting_atomic); + REGISTER_SO(fs->future_waiting_lwc); REGISTER_SO(jit_future_storage); /* Create a 'dummy' future thread state object for the runtime @@ -725,7 +730,6 @@ Scheme_Object *touch(int argc, Scheme_Object *argv[]) ft->work_completed = 1; ft->retval = retval; ft->status = FINISHED; - dequeue_future(fs, ft); mzrt_mutex_unlock(fs->future_mutex); receive_special_result(ft, retval, 0); @@ -833,7 +837,7 @@ void *worker_thread_future_loop(void *arg) Scheme_Future_Thread_State *fts = params->fts; Scheme_Future_State *fs = params->fs; Scheme_Object *v; - Scheme_Object* (*jitcode)(Scheme_Object*, int, Scheme_Object**); + Scheme_Closed_Prim *jitcode; future_t *ft; mz_jmp_buf newbuf; @@ -843,6 +847,8 @@ void *worker_thread_future_loop(void *arg) GC_instance = params->shared_GC; scheme_current_thread = params->thread_skeleton; + GC_gen0_alloc_only = 1; + /* Set processor affinity */ /*mzrt_mutex_lock(fs->future_mutex); static unsigned long cur_cpu_mask = 1; @@ -876,6 +882,9 @@ void *worker_thread_future_loop(void *arg) params->current_thread_ptr = &scheme_current_thread; params->jit_future_storage_ptr = &jit_future_storage[0]; + scheme_init_thread_lwc(); + params->lwc = scheme_current_lwc; + mzrt_sema_post(params->ready_sema); while (1) { @@ -895,59 +904,89 @@ void *worker_thread_future_loop(void *arg) /* Set up the JIT compiler for this thread */ scheme_jit_fill_threadlocal_table(); - - jitcode = (Scheme_Object* (*)(Scheme_Object*, int, Scheme_Object**))(ft->code); fts->current_ft = ft; - /* Run the code: - The lambda passed to a future will always be a parameterless - function. - From this thread's perspective, this call will never return - until all the work to be done in the future has been completed, - including runtime calls. - If jitcode asks the runrtime thread to do work, then - a GC can occur. */ - LOG("Running JIT code at %p...\n", ft->code); - MZ_RUNSTACK = MZ_RUNSTACK_START + fts->runstack_size; MZ_CONT_MARK_STACK = 0; MZ_CONT_MARK_POS = (MZ_MARK_POS_TYPE)1; - scheme_current_thread->error_buf = &newbuf; - if (scheme_future_setjmp(newbuf)) { - /* failed */ - v = NULL; + + if (ft->suspended_lw) { + /* invoke a lightweight continuation */ + scheme_current_thread->error_buf = &newbuf; + if (scheme_future_setjmp(newbuf)) { + /* failed or suspended */ + v = NULL; + } else { + struct Scheme_Lightweight_Continuation *lw = ft->suspended_lw; + ft->suspended_lw = NULL; + + v = ft->retval_s; + ft->retval_s = NULL; + receive_special_result(ft, v, 1); + + v = scheme_apply_lightweight_continuation(lw, v); + + if (SAME_OBJ(v, SCHEME_TAIL_CALL_WAITING)) { + v = scheme_ts_scheme_force_value_same_mark(v); + } + } } else { - v = jitcode(ft->orig_lambda, 0, NULL); - if (SAME_OBJ(v, SCHEME_TAIL_CALL_WAITING)) { - v = scheme_ts_scheme_force_value_same_mark(v); + jitcode = ft->code; + + /* Run the code: + The lambda passed to a future will always be a parameterless + function. + From this thread's perspective, this call will never return + until all the work to be done in the future has been completed, + including runtime calls. + If jitcode asks the runrtime thread to do work, then + a GC can occur. */ + LOG("Running JIT code at %p...\n", ft->code); + + scheme_current_thread->error_buf = &newbuf; + if (scheme_future_setjmp(newbuf)) { + /* failed or suspended */ + v = NULL; + } else { + scheme_fill_lwc_start(); + v = scheme_call_as_lightweight_continuation(jitcode, ft->orig_lambda, 0, NULL); + if (SAME_OBJ(v, SCHEME_TAIL_CALL_WAITING)) { + v = scheme_ts_scheme_force_value_same_mark(v); + } } + + LOG("Finished running JIT code at %p.\n", ft->code); } - LOG("Finished running JIT code at %p.\n", ft->code); - - /* Get future again, since a GC may have occurred */ + /* Get future again, since a GC may have occurred or + future may have been suspended */ ft = fts->current_ft; - - /* Set the return val in the descriptor */ + mzrt_mutex_lock(fs->future_mutex); - ft->work_completed = 1; - ft->retval = v; - /* In case of multiple values: */ - send_special_result(ft, v); - - /* Update the status */ - ft->status = FINISHED; - dequeue_future(fs, ft); + if (!ft) { + /* continuation of future will be requeued, and this future + thread can do something else */ + } else { + /* Set the return val in the descriptor */ + ft->work_completed = 1; + ft->retval = v; + + /* In case of multiple values: */ + send_special_result(ft, v); + + /* Update the status */ + ft->status = FINISHED; + } /* Clear stacks */ MZ_RUNSTACK = MZ_RUNSTACK_START + fts->runstack_size; MZ_CONT_MARK_STACK = 0; - - scheme_signal_received_at(fs->signal_handle); + if (ft) + scheme_signal_received_at(fs->signal_handle); } end_gc_not_ok(fts, fs, NULL); mzrt_mutex_unlock(fs->future_mutex); @@ -956,6 +995,45 @@ void *worker_thread_future_loop(void *arg) return NULL; } +static int capture_future_continuation(future_t *ft, void **storage) + XFORM_SKIP_PROC +/* This function explicitly coorperates with the GC by storing the + pointers it needs to save across a collection in `storage', so + it can be used in a future thread. If future-thread-local + allocation fails, the result is 0. */ +{ + Scheme_Lightweight_Continuation *lw; + Scheme_Object **arg_S; + + storage[2] = ft; + + lw = scheme_capture_lightweight_continuation(ft->arg_p, ft->lwc, storage); + if (!lw) return 0; + + ft = (future_t *)storage[2]; + + ft->suspended_lw = lw; + ft->status = WAITING_FOR_REQUEUE; + ft->want_lw = 0; + ft->fts->current_ft = NULL; /* tells worker thread that it no longer + needs to handle the future */ + + if (ft->arg_S0) { + arg_S = scheme_adjust_runstack_argument(lw, ft->arg_S0); + ft->arg_S0 = arg_S; + } + if (ft->arg_S1) { + arg_S = scheme_adjust_runstack_argument(lw, ft->arg_S1); + ft->arg_S1 = arg_S; + } + if (ft->arg_S2) { + arg_S = scheme_adjust_runstack_argument(lw, ft->arg_S2); + ft->arg_S2 = arg_S; + } + + return 1; +} + void scheme_check_future_work() /* Called in the runtime thread by the scheduler */ { @@ -974,7 +1052,6 @@ void scheme_check_future_work() if (ft) { fs->future_waiting_atomic = ft->next_waiting_atomic; ft->next_waiting_atomic = NULL; - ft->waiting_atomic = 0; } mzrt_mutex_unlock(fs->future_mutex); @@ -985,6 +1062,33 @@ void scheme_check_future_work() } else break; } + + while (1) { + /* Try to get a future waiting to be suspended */ + mzrt_mutex_lock(fs->future_mutex); + ft = fs->future_waiting_lwc; + if (ft) { + fs->future_waiting_lwc = ft->next_waiting_lwc; + ft->next_waiting_lwc = NULL; + } + mzrt_mutex_unlock(fs->future_mutex); + + if (ft && ft->want_lw) { + void *storage[3]; + + (void)capture_future_continuation(ft, storage); + + /* Signal the waiting worker thread that it + can continue doing other things: */ + mzrt_mutex_lock(fs->future_mutex); + if (ft->can_continue_sema) { + mzrt_sema_post(ft->can_continue_sema); + ft->can_continue_sema = NULL; + } + mzrt_mutex_unlock(fs->future_mutex); + } else + break; + } } static void future_do_runtimecall(Scheme_Future_Thread_State *fts, @@ -995,10 +1099,18 @@ static void future_do_runtimecall(Scheme_Future_Thread_State *fts, { future_t *future; Scheme_Future_State *fs = scheme_future_state; + void *storage[3]; /* Fetch the future descriptor for this thread */ future = fts->current_ft; + if (!is_atomic) { + scheme_fill_lwc_end(); + future->lwc = scheme_current_lwc; + future->fts = fts; + } else + future->lwc = NULL; + /* Set up the arguments for the runtime call to be picked up by the main rt thread */ mzrt_mutex_lock(fs->future_mutex); @@ -1006,37 +1118,52 @@ static void future_do_runtimecall(Scheme_Future_Thread_State *fts, future->prim_func = func; future->rt_prim = 1; future->rt_prim_is_atomic = is_atomic; + future->arg_p = scheme_current_thread; if (is_atomic) { - if (!future->waiting_atomic) { - future->next_waiting_atomic = fs->future_waiting_atomic; - fs->future_waiting_atomic = future; - future->waiting_atomic = 1; - } + future->next_waiting_atomic = fs->future_waiting_atomic; + fs->future_waiting_atomic = future; + future->status = WAITING_FOR_PRIM; + } else if (GC_gen0_alloc_page_ptr + && capture_future_continuation(future, storage)) { + /* this future thread will suspend handling the future + continuation until the result of the blocking call is ready; + fts->current_ft was set to NULL */ + } else { + /* couldn't capture the continuation locally, so ask + the runtime thread to capture it: */ + future->next_waiting_lwc = fs->future_waiting_lwc; + fs->future_waiting_lwc = future; + future->want_lw = 1; + future->status = WAITING_FOR_PRIM; } - /* Update the future's status to waiting */ - future->status = WAITING_FOR_PRIM; - scheme_signal_received_at(fs->signal_handle); - future->arg_p = scheme_current_thread; + if (fts->current_ft) { + /* Wait for the signal that the RT call is finished + or a lightweight continuation has been captured: */ + future->can_continue_sema = fts->worker_can_continue_sema; + end_gc_not_ok(fts, fs, MZ_RUNSTACK); /* we rely on this putting MZ_CONT_MARK_STACK into the thread record */ + mzrt_mutex_unlock(fs->future_mutex); - /* Wait for the signal that the RT call is finished */ - future->can_continue_sema = fts->worker_can_continue_sema; - end_gc_not_ok(fts, fs, MZ_RUNSTACK); /* we rely on this putting MZ_CONT_MARK_STACK into the thread record */ + mzrt_sema_wait(fts->worker_can_continue_sema); + + mzrt_mutex_lock(fs->future_mutex); + start_gc_not_ok(fs); + } + mzrt_mutex_unlock(fs->future_mutex); - mzrt_sema_wait(fts->worker_can_continue_sema); - - mzrt_mutex_lock(fs->future_mutex); - start_gc_not_ok(fs); - mzrt_mutex_unlock(fs->future_mutex); - - /* Fetch the future instance again, in case the GC has moved the pointer */ + /* Fetch the future instance again, in case the GC has moved the pointer + or the future has been requeued. */ future = fts->current_ft; - - if (future->no_retval) { + + if (!future) { + /* future continuation was requeued */ + scheme_future_longjmp(*scheme_current_thread->error_buf, 1); + } else if (future->no_retval) { + /* there was an error => abort the future */ future->no_retval = 0; scheme_future_longjmp(*scheme_current_thread->error_buf, 1); } @@ -1119,6 +1246,8 @@ unsigned long scheme_rtcall_alloc(const char *who, int src_type) } } + GC_gen0_alloc_page_end = retval + fts->gen0_size; + return retval; } @@ -1144,32 +1273,10 @@ void scheme_rtcall_new_mark_segment(Scheme_Thread *p) static int push_marks(future_t *f, Scheme_Cont_Frame_Data *d) { - Scheme_Thread *p2, *p; - long i, pos, delta; - Scheme_Cont_Mark *seg; - - if (f->arg_p) { - p2 = f->arg_p; - if (p2->cont_mark_stack) { - scheme_push_continuation_frame(d); - - p = scheme_current_thread; - - delta = MZ_CONT_MARK_POS - p2->cont_mark_pos; - if (delta < 0) delta = 0; - - for (i = p2->cont_mark_stack; i--; ) { - seg = p2->cont_mark_stack_segments[i >> SCHEME_LOG_MARK_SEGMENT_SIZE]; - pos = i & SCHEME_MARK_SEGMENT_MASK; - - MZ_CONT_MARK_POS = seg[pos].pos + delta; - scheme_set_cont_mark(seg[pos].key, seg[pos].val); - } - - MZ_CONT_MARK_POS = p2->cont_mark_pos + delta; - - return 1; - } + if (f->suspended_lw) { + return scheme_push_marks_from_lightweight_continuation(f->suspended_lw, d); + } else if (f->arg_p) { + return scheme_push_marks_from_thread(f->arg_p, d); } return 0; @@ -1243,7 +1350,9 @@ static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future) #endif future->rt_prim = 0; - + future->want_lw = 0; /* in case we got to the call before we got around + to capturing an LWC */ + if (scheme_log_level_p(scheme_main_logger, SCHEME_LOG_DEBUG)) { const char *src; @@ -1269,7 +1378,8 @@ static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future) } if ((future->source_type == FSRC_RATOR) - || (future->source_type == FSRC_MARKS)) + || (future->source_type == FSRC_MARKS) + || (future->source_type == FSRC_PRIM)) need_pop = push_marks(future, &mark_d); else need_pop = 0; @@ -1280,8 +1390,11 @@ static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future) case SIG_VOID_VOID_3ARGS: { prim_void_void_3args_t func = (prim_void_void_3args_t)future->prim_func; + GC_CAN_IGNORE Scheme_Object **arg_S0 = future->arg_S0; - func(future->arg_S0); + future->arg_S0 = NULL; + + func(arg_S0); break; } @@ -1297,12 +1410,13 @@ static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future) #endif case SIG_ALLOC_MARK_SEGMENT: { - Scheme_Thread *p_seg; + GC_CAN_IGNORE Scheme_Thread *p_seg; p_seg = (Scheme_Thread *)future->arg_s0; future->arg_s0 = NULL; scheme_new_mark_segment(p_seg); break; } +# define LOCALIZE(t, f) GC_CAN_IGNORE t f = future->f # include "jit_ts_runtime_glue.c" default: scheme_signal_error("unknown protocol %d", future->prim_protocol); @@ -1313,11 +1427,19 @@ static void do_invoke_rtcall(Scheme_Future_State *fs, future_t *future) pop_marks(&mark_d); mzrt_mutex_lock(fs->future_mutex); - /* Signal the waiting worker thread that it - can continue running machine code */ - if (future->can_continue_sema) { - mzrt_sema_post(future->can_continue_sema); - future->can_continue_sema= NULL; + if (future->suspended_lw) { + /* Re-enqueue the future so that some future thread can continue */ + future->status = PENDING; + enqueue_future(fs, future); + /* Signal that a future is pending */ + mzrt_sema_post(fs->future_pending_sema); + } else { + /* Signal the waiting worker thread that it + can continue running machine code */ + if (future->can_continue_sema) { + mzrt_sema_post(future->can_continue_sema); + future->can_continue_sema = NULL; + } } mzrt_mutex_unlock(fs->future_mutex); } @@ -1346,12 +1468,19 @@ static void invoke_rtcall(Scheme_Future_State * volatile fs, future_t * volatile if (scheme_setjmp(newbuf)) { mzrt_mutex_lock(fs->future_mutex); future->no_retval = 1; - /* Signal the waiting worker thread that it - can continue running machine code */ - mzrt_sema_post(future->can_continue_sema); - future->can_continue_sema = NULL; - mzrt_mutex_unlock(fs->future_mutex); - scheme_longjmp(*savebuf, 1); + if (future->suspended_lw) { + /* Abandon the future */ + future->status = FINISHED; + future->work_completed = 1; + future->retval = 0; + } else { + /* Signal the waiting worker thread that it + can continue running machine code */ + mzrt_sema_post(future->can_continue_sema); + future->can_continue_sema = NULL; + mzrt_mutex_unlock(fs->future_mutex); + scheme_longjmp(*savebuf, 1); + } } else { if (future->rt_prim_is_atomic) { do_invoke_rtcall(fs, future); @@ -1392,12 +1521,11 @@ future_t *get_pending_future(Scheme_Future_State *fs) { future_t *f; - for (f = fs->future_queue; f != NULL; f = f->next) { - if (f->status == PENDING) - return f; - } + f = fs->future_queue; + if (f) + dequeue_future(fs, f); - return NULL; + return f; } #endif diff --git a/src/racket/src/future.h b/src/racket/src/future.h index 03da196abe..3e6fb47749 100644 --- a/src/racket/src/future.h +++ b/src/racket/src/future.h @@ -35,6 +35,7 @@ typedef void* (*prim_pvoid_pvoid_pvoid_t)(void*, void*); #define WAITING_FOR_PRIM 2 #define FINISHED 3 #define PENDING_OVERSIZE 4 +#define WAITING_FOR_REQUEUE 5 #define FSRC_OTHER 0 #define FSRC_RATOR 1 @@ -55,6 +56,7 @@ typedef struct future_t { /* Runtime call stuff */ int rt_prim; /* flag to indicate waiting for a prim call */ + int want_lw; /* flag to indicate waiting for lw capture */ int rt_prim_is_atomic; double time_of_request; const char *source_of_request; @@ -81,7 +83,12 @@ typedef struct future_t { Scheme_Object *arg_s2; Scheme_Object **arg_S2; int arg_i2; + Scheme_Thread *arg_p; + struct Scheme_Current_LWC *lwc; + struct Scheme_Future_Thread_State *fts; + + struct Scheme_Lightweight_Continuation *suspended_lw; Scheme_Object *retval_s; void *retval_p; /* use only with conservative GC */ @@ -99,8 +106,8 @@ typedef struct future_t { struct future_t *prev; struct future_t *next; - int waiting_atomic; struct future_t *next_waiting_atomic; + struct future_t *next_waiting_lwc; } future_t; /* Primitive instrumentation stuff */ diff --git a/src/racket/src/gen-jit-ts.rkt b/src/racket/src/gen-jit-ts.rkt index 9e83cd1106..728b002bb3 100644 --- a/src/racket/src/gen-jit-ts.rkt +++ b/src/racket/src/gen-jit-ts.rkt @@ -16,6 +16,21 @@ [(#\v) "void"] [else (error 'char->type "unknown: ~e" c)])) +(define (is-pointer-type? c) + (case c + [(#\s) #t] + [(#\t) #t] + [(#\S) #t] + [(#\b) #t] + [(#\n) #t] + [(#\m) #f] + [(#\p) #t] + [(#\i) #f] + [(#\l) #f] + [(#\z) #f] + [(#\v) #f] + [else (error 'char->type "unknown: ~e" c)])) + (define (type->arg-string t) (let* ([t (symbol->string t)]) (substring t 0 (- (string-length t) 2)))) @@ -105,13 +120,24 @@ case SIG_@|ts|: { prim_@|ts| f = (prim_@|ts|)future->prim_func; - @(if (string=? result-type "void") "" @string-append{@|result-type| retval;}) + @(if (string=? result-type "void") "" @string-append{GC_CAN_IGNORE @|result-type| retval;}) @(if (equal? arg-types '("Scheme_Object*")) @string-append{receive_special_result(future, future->arg_s0, 1);} "") + @(string-join + (for/list ([t (in-string (type->arg-string t))] + [i (in-naturals)]) + @string-append{LOCALIZE(@(char->type t), arg_@|(string t)|@|(number->string i)|);}) + " ") + @(string-join + (for/list ([t (in-string (type->arg-string t))] + [i (in-naturals)] + #:when (is-pointer-type? t)) + @string-append{future->arg_@|(string t)|@|(number->string i)| = NULL;}) + " ") @(if (string=? result-type "void") "" "retval = ") f(@(string-join (for/list ([t (in-string (type->arg-string t))] [i (in-naturals)]) - @string-append{future->arg_@|(string t)|@|(number->string i)|}) + @string-append{arg_@|(string t)|@|(number->string i)|}) ", ")); @(if (string=? result-type "void") "" @string-append{future->retval_@(substring ts (sub1 (string-length ts))) = retval;}) @(if (string=? result-type "Scheme_Object*") @string-append{send_special_result(future, retval);} "") diff --git a/src/racket/src/jit.c b/src/racket/src/jit.c index be94d915db..9a482f8c89 100644 --- a/src/racket/src/jit.c +++ b/src/racket/src/jit.c @@ -75,6 +75,12 @@ END_XFORM_ARITH; # define JIT_X86_64 #endif +#ifdef MZ_USE_JIT_I386 +# ifndef JIT_X86_64 +# define JIT_X86_PLAIN +# endif +#endif + #include "lightning/lightning.h" #ifdef MZ_USE_JIT_X86_64 @@ -104,6 +110,10 @@ END_XFORM_ARITH; # define USE_FLONUM_UNBOXING #endif +#ifdef MZ_USE_FUTURES +# define MZ_USE_LWC +#endif + #define JIT_NOT_RET JIT_R1 #if JIT_NOT_RET == JIT_RET Fix me! See use. @@ -199,7 +209,7 @@ typedef struct { int need_set_rs; void **retain_start; double *retain_double_start; - int local1_busy; + int local1_busy, pushed_marks; int log_depth; int self_pos, self_closure_size, self_toplevel_pos; int self_to_closure_delta, closure_to_args_delta; @@ -343,13 +353,13 @@ void scheme_jit_fill_threadlocal_table(); # define tl_retry_alloc_r1 tl_delta(retry_alloc_r1) # define tl_fixup_runstack_base tl_delta(fixup_runstack_base) # define tl_fixup_already_in_place tl_delta(fixup_already_in_place) -# define tl_double_result tl_delta(double_result) # define tl_save_fp tl_delta(save_fp) # define tl_scheme_fuel_counter tl_delta(scheme_fuel_counter) # define tl_scheme_jit_stack_boundary tl_delta(scheme_jit_stack_boundary) # define tl_jit_future_storage tl_delta(jit_future_storage) # define tl_scheme_future_need_gc_pause tl_delta(scheme_future_need_gc_pause) # define tl_scheme_use_rtcall tl_delta(scheme_use_rtcall) +# define tl_scheme_current_lwc tl_delta(scheme_current_lwc) static void *get_threadlocal_table() XFORM_SKIP_PROC { return &BOTTOM_VARIABLE; } @@ -412,7 +422,6 @@ static void *get_threadlocal_table() XFORM_SKIP_PROC { return &BOTTOM_VARIABLE; # define tl_retry_alloc_r1 (&retry_alloc_r1) # define tl_fixup_runstack_base (&fixup_runstack_base) # define tl_fixup_already_in_place (&fixup_already_in_place) -# define tl_double_result (&double_result) # define tl_save_fp (&save_fp) # define tl_scheme_fuel_counter (&scheme_fuel_counter) # define tl_scheme_jit_stack_boundary (&scheme_jit_stack_boundary) @@ -1330,6 +1339,13 @@ int check_location; two locals). */ +/* LOCAL1 is used to save the value current_cont_mark_stack, + at least for the first time it needs to be saved in a + function body. If it needs to be saved again, it is + pushed onto the runstack. (The value of current_cont_mark_stack + is an integer that marks a point in the stack, as opposed + to being an address of a stack position.) */ + /* mz_prolog() and mz_epilog() bracket an internal "function" using a lighter-weight ABI that keeps all Rx and Vx registers as-is on @@ -1356,6 +1372,26 @@ int check_location; LOCAL2. So, LOCAL3 can always be used for temporary storage in such functions (assuming that they're called from a function that pushes locals, and that nothing else is using LOCAL2). + +*/ + +/* x86[_64] frame (counting down from frame pointer marked with <-): + return address + prev frame <- + saved EBX (= JIT_RUNSTACK, when saved from native call) + saved R12/ESI (= JIT_V1, when saved from native call) + saved R13/EDI (= JIT_V2 x86_64: = RUNSTACK_BASE, when saved from native call + x86: = THREAD_LOCAL or RUNSTACK_BASE, when saved from native call + LOCAL1 (which is a cont_mark_stack offset, if anything) + LOCAL2 (some pointer, never to stack or runstack) + LOCAL3 (temp space for misc uses; not saved across calls that might capture LWC) + LOCAL4 (x86_64: = saved R14 otherwise when THREAD_LOCAL + x86: = RUNSTACK_BASE or THREAD_LOCAL) + [some empty slots, maybe, depending on alignment] + [space for local, unboxed flonums] + Registers: JIT_V1 = RUNSTACK, JIT_V2 = x86_64: RUNSTACK_BASE + x86: RUNSTACK_BASE or THREAD_LOCAL + x86_64: JIT_R14 = THREAD_LOCAL */ #ifdef JIT_THREAD_LOCAL @@ -1440,7 +1476,7 @@ static void _jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg) # else # define LOCAL_FRAME_SIZE 7 # endif -# define JIT_LOCAL4 -(JIT_WORD_SIZE * 7) +# define JIT_LOCAL4_OFFSET 7 # else # define LOCAL_FRAME_SIZE 3 # endif @@ -1451,11 +1487,14 @@ static void _jit_prolog_again(mz_jit_state *jitter, int n, int ret_addr_reg) # define JIT_LOCAL3 JIT_LOCAL2 # ifdef NEED_LOCAL4 # define LOCAL_FRAME_SIZE 3 -# define JIT_LOCAL4 -(JIT_WORD_SIZE * 6) +# define JIT_LOCAL4_OFFSET 6 # else # define LOCAL_FRAME_SIZE 2 # endif # endif +# ifdef NEED_LOCAL4 +# define JIT_LOCAL4 -(JIT_WORD_SIZE * JIT_LOCAL4_OFFSET) +# endif # define mz_push_locals() SUBQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP) # define mz_pop_locals() ADDQir((LOCAL_FRAME_SIZE << JIT_LOG_WORD_SIZE), JIT_SP) # define JIT_FRAME_FLONUM_OFFSET (-(JIT_WORD_SIZE * (LOCAL_FRAME_SIZE + 3))) @@ -1725,10 +1764,9 @@ int scheme_can_inline_fp_comp() } #if defined(INLINE_FP_OPS) && !defined(CAN_INLINE_ALLOC) -static double double_result; static void *malloc_double(void) { - return scheme_make_double(double_result); + return scheme_make_double(save_fp); } #endif @@ -1806,6 +1844,256 @@ static Scheme_Object *make_two_element_ivector(Scheme_Object *a, Scheme_Object * } #endif +/*========================================================================*/ +/* lightweight continuations */ +/*========================================================================*/ + +/* A lightweight continuation is one that contains only frames from + JIT-generated code. Use scheme_call_as_lightweight_continuation() + to start such a continuation, and it must be exited from the JIT + world by mz_finish_lwe(). + + Use mz_finish_lwe(addr, tmp) for a call that may capture a lightweight + continuation: + + * JIT_V1 does not contain a value that needs to change if the runstack moves. + (Other JIT constraints imply that it isn't a pointer to GCable memory.) + + * Relevant thread-local state is confined to the C stack, runstack, + mark stack, and tl_save_fp. + + * A pointer to the runstack can be used as a Scheme_Object** argument, but + only when it points to MZ_RUNSTACK. + + The `tmp' is a `jit_insn *' that can be used by the expansion of the + macro. + +*/ + +#ifdef MZ_USE_LWC +# ifdef JIT_RUNSTACK_BASE +# define SAVE_RS_BASE_REG(x) jit_stxi_p((int)&((Scheme_Current_LWC *)0x0)->runstack_base_end, JIT_R0, JIT_RUNSTACK_BASE) +# else +# define SAVE_RS_BASE_REG(x) (void)0 +# endif +# define adjust_lwc_return_address(pc) ((jit_insn *)((char *)(pc) - jit_return_pop_insn_len())) +# define mz_finish_lwe(d, refr) (mz_tl_ldi_p(JIT_R0, tl_scheme_current_lwc), \ + jit_stxi_p((int)&((Scheme_Current_LWC *)0x0)->frame_end, JIT_R0, JIT_FP), \ + jit_stxi_p((int)&((Scheme_Current_LWC *)0x0)->stack_end, JIT_R0, JIT_SP), \ + jit_stxi_p((int)&((Scheme_Current_LWC *)0x0)->saved_v1, JIT_R0, JIT_V1), \ + SAVE_RS_BASE_REG(), \ + refr = jit_patchable_movi_p(JIT_R1, jit_forward()), \ + jit_stxi_p((int)&((Scheme_Current_LWC *)0x0)->original_dest, JIT_R0, JIT_R1), \ + mz_finish(d), \ + jit_patch_movi(refr, adjust_lwc_return_address(_jit.x.pc))) +#else +# define mz_finish_lwe(d, refr) (refr = NULL, mz_finish(d)) +#endif + +#define mz_nonrs_finish_lwe(d, refr) mz_finish_lwe(d, refr) + +THREAD_LOCAL_DECL(Scheme_Current_LWC *scheme_current_lwc); + +typedef Scheme_Object *(*LWC_Native_Starter)(void *data, + int argc, + Scheme_Object **argv, + Scheme_Closed_Prim *chain_to, + void **save_pos); + +static LWC_Native_Starter native_starter_code; + +Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Closed_Prim *code, + void *data, + int argc, + Scheme_Object **argv) +{ + scheme_current_lwc->runstack_start = MZ_RUNSTACK; + scheme_current_lwc->cont_mark_stack_start = MZ_CONT_MARK_STACK; + return native_starter_code(data, argc, argv, code, (void **)&scheme_current_lwc->stack_start); +} + +void scheme_fill_stack_lwc_end(void) XFORM_SKIP_PROC +{ +#ifdef JIT_THREAD_LOCAL + scheme_current_lwc->saved_save_fp = save_fp; +#endif +} + +typedef Scheme_Object *(*Continuation_Abort_Code)(void *result, void *stack_pos); + +static LWC_Native_Starter native_starter_code; + +void *scheme_save_lightweight_continuation_stack(Scheme_Current_LWC *lwc) + XFORM_SKIP_PROC +/* This function assumes that lwc won't move during an + allocation. Also, if allocation fails, it can abort and return + NULL, so it can work in a thread for runing futures (where + allocation and GC in general are disallowed). */ + +{ + /* Copies just the C-stack part for a lightweight continuation. + Copying the runstack, copying the cont-mark stack and saving + arguments is up to the caller. */ + void *p; + long size; + + size = (long)lwc->stack_start - (long)lwc->stack_end; + + p = (void *)scheme_malloc_atomic(size); + if (!p) return NULL; + + memcpy(p, lwc->stack_end, size); + + return p; +} + +typedef struct { + void *dest_stack_pos; /* must be first */ + Scheme_Current_LWC *lwc; + void *copy_to_install; + long full_size, copy_size; +#ifdef JIT_X86_64 + long saved_r14, saved_r15; +#endif + Scheme_Object *result; + void *new_runstack; + void *new_runstack_base; + void *new_threadlocal; +} Apply_LWC_Args; + +typedef Scheme_Object *(*Continuation_Apply_Indirect)(Apply_LWC_Args *, long); +typedef Scheme_Object *(*Continuation_Apply_Finish)(Apply_LWC_Args *args, void *stack, void *frame); + +static Continuation_Apply_Indirect continuation_apply_indirect_code; +#ifdef MZ_USE_LWC +static Continuation_Apply_Finish continuation_apply_finish_code; +#endif + +#ifdef MZ_USE_LWC +static Scheme_Object *continuation_apply_install(Apply_LWC_Args *args) XFORM_SKIP_PROC +{ + long delta, cm_delta; + void **old_fp, **new_fp, **next_old_fp, **next_new_fp; + Scheme_Current_LWC *lwc; + void *new_stack_start; + + /* application of a lightweight continuation forms a lightweight continuation: */ + scheme_current_lwc->stack_start = args->dest_stack_pos; + + /* args->dest_stack_pos has been set, and room has been made on the stack */ + new_stack_start = (char *)args->dest_stack_pos - (long)args->full_size; + memcpy(new_stack_start, args->copy_to_install, args->copy_size); + + lwc = args->lwc; + + args->new_runstack = MZ_RUNSTACK; + args->new_runstack_base = MZ_RUNSTACK + (lwc->runstack_base_end - lwc->runstack_end); +#ifdef USE_THREAD_LOCAL + args->new_threadlocal = &BOTTOM_VARIABLE; + save_fp = lwc->saved_save_fp; +#endif + + delta = (long)new_stack_start - (long)lwc->stack_end; + cm_delta = (long)MZ_CONT_MARK_STACK - (long)lwc->cont_mark_stack_end; + + /* fix frame pointers, etc. */ + old_fp = lwc->frame_end; + new_fp = NULL; + while ((unsigned long)old_fp < (unsigned long)lwc->stack_start) { + new_fp = (void **)((char *)old_fp + delta); + /* we usually do not copy/update the very last frame pointer, so check: */ + if ((unsigned long)old_fp < ((unsigned long)lwc->stack_end + args->copy_size)) { + /* we need to update */ + next_old_fp = *new_fp; + next_new_fp = (void **)((char *)next_old_fp + delta); + old_fp = next_old_fp; + } else { + /* no updates from here on; force old_lp to be past the saved area: */ + old_fp = lwc->stack_start; + next_new_fp = NULL; + } + + /* fixups of local variables in the fraame */ +#if defined(JIT_X86_64) || defined(JIT_X86_PLAIN) + new_fp[-4] = (void *)((long)new_fp[-4] + cm_delta); /* LOCAL1 */ +#endif +#ifdef JIT_X86_PLAIN +# ifdef JIT_THREAD_LOCAL +# ifdef THREAD_LOCAL_USES_JIT_V2 + /* LOCAL4 = RUNSTACK_BASE */ + new_fp[-JIT_LOCAL4_OFFSET] = (MZ_RUNSTACK + ((Scheme_Object **)new_fp[-JIT_LOCAL4_OFFSET] + - lwc->runstack_end)); +# else + new_fp[-JIT_LOCAL4_OFFSET] = &BOTTOM_VARIABLE; /* LOCAL4 = THREAD_LOCAL */ +# endif +# endif +#endif + + if ((unsigned long)old_fp < (unsigned long)lwc->stack_start) { + new_fp[0] = next_new_fp; + + /* fixups for saved registers in a call from JIT-generated code */ +#ifdef JIT_X86_64 + new_fp[-1] = MZ_RUNSTACK + ((Scheme_Object **)new_fp[-1] - lwc->runstack_end); /* JIT_RUNSTACK */ + new_fp[-3] = MZ_RUNSTACK + ((Scheme_Object **)new_fp[-3] - lwc->runstack_end); /* JIT_RUNSTACK_BASE */ +# ifdef JIT_THREAD_LOCAL + new_fp[-JIT_LOCAL4_OFFSET] = &BOTTOM_VARIABLE; /* LOCAL4 */ +# endif +#endif +#ifdef JIT_X86_PLAIN + new_fp[-1] = MZ_RUNSTACK + ((Scheme_Object **)new_fp[-1] - lwc->runstack_end); /* JIT_RUNSTACK */ +# ifdef THREAD_LOCAL_USES_JIT_V2 + new_fp[-3] = &BOTTOM_VARIABLE; +# else + new_fp[-3] = MZ_RUNSTACK + ((Scheme_Object **)new_fp[-3] - lwc->runstack_end); /* JIT_RUNSTACK_BASE */ +# endif +#endif + } else { +#ifdef JIT_X86_64 +# ifdef JIT_THREAD_LOCAL + /* topmost frame holds the original R14: */ + new_fp[-JIT_LOCAL4_OFFSET] = (void *)args->saved_r14; /* LOCAL4 */ +# endif +#endif + } + } + + /* jump to the old code */ + new_fp = (void **)((char *)lwc->frame_end + delta); + continuation_apply_finish_code(args, new_stack_start, new_fp); + + return NULL; +} +#endif + +Scheme_Object *scheme_apply_lightweight_continuation_stack(Scheme_Current_LWC *lwc, void *stack, + Scheme_Object *result) XFORM_SKIP_PROC +{ + /* Restores just the C-stack part and uses the given (updated) + arguments. Restring the runstack and cont-mark stack is up to the + caller. */ + Apply_LWC_Args args; + long size; + + size = (long)lwc->stack_start - (long)lwc->stack_end; + args.full_size = size; + + /* Adjust size to skip stack part that we don't want to overwrite. */ +#ifdef JIT_X86_64 + size -= 4 * sizeof(void*); /* frame pointer and 3 saved registers */ +#endif +#ifdef JIT_X86_PLAIN + size -= 4 * sizeof(void*); /* frame pointer and 3 saved registers */ +#endif + args.copy_size = size; + + args.lwc = lwc; + args.copy_to_install = stack; + args.result = result; + + return continuation_apply_indirect_code(&args, size); +} + /*========================================================================*/ /* bytecode properties */ /*========================================================================*/ @@ -2365,7 +2653,7 @@ static void patch_branch_addr(mz_jit_state *jitter, Branch_Info_Addr *addr, int } } -static void add_branch(Branch_Info *for_branch, jit_insn *ref, int mode, int kind) +static void add_branch(Branch_Info *for_branch, GC_CAN_IGNORE jit_insn *ref, int mode, int kind) { if (ref) { if (for_branch->addrs_count == for_branch->addrs_size) { @@ -2384,7 +2672,7 @@ static void add_branch(Branch_Info *for_branch, jit_insn *ref, int mode, int kin } } -static void add_or_patch_branch_true_uc(mz_jit_state *jitter, Branch_Info *for_branch, jit_insn *ref) +static void add_or_patch_branch_true_uc(mz_jit_state *jitter, Branch_Info *for_branch, GC_CAN_IGNORE jit_insn *ref) /* Short-jump mode for addr branch should be consistent with for_branch->banch_short */ { if (for_branch->true_needs_jump) { @@ -2394,7 +2682,7 @@ static void add_or_patch_branch_true_uc(mz_jit_state *jitter, Branch_Info *for_b } } -static void add_or_patch_branch_true_movi(mz_jit_state *jitter, Branch_Info *for_branch, jit_insn *ref) +static void add_or_patch_branch_true_movi(mz_jit_state *jitter, Branch_Info *for_branch, GC_CAN_IGNORE jit_insn *ref) /* Short-jump mode for addr move should be consistent with for_branch->banch_short */ { if (for_branch->true_needs_jump) { @@ -2404,13 +2692,13 @@ static void add_or_patch_branch_true_movi(mz_jit_state *jitter, Branch_Info *for } } -static void add_branch_false(Branch_Info *for_branch, jit_insn *ref) +static void add_branch_false(Branch_Info *for_branch, GC_CAN_IGNORE jit_insn *ref) /* Short-jump mode for addr branch should be consistent with for_branch->banch_short */ { add_branch(for_branch, ref, BRANCH_ADDR_FALSE, BRANCH_ADDR_BRANCH); } -static void add_branch_false_movi(Branch_Info *for_branch, jit_insn *ref) +static void add_branch_false_movi(Branch_Info *for_branch, GC_CAN_IGNORE jit_insn *ref) /* Short-jump mode for addr move should be consistent with for_branch->branch_short */ { add_branch(for_branch, ref, BRANCH_ADDR_FALSE, BRANCH_ADDR_MOVI); @@ -2581,19 +2869,19 @@ extern int g_print_prims; /* Support for intercepting direct calls to primitives: */ #ifdef MZ_USE_FUTURES # define mz_prepare_direct_prim(n) mz_prepare(n) -# define mz_finishr_direct_prim(reg, proc) (jit_pusharg_p(reg), (void)mz_finish(proc)) +# define mz_finishr_direct_prim(reg, proc, refr) (jit_pusharg_p(reg), (void)mz_finish_lwe(proc, refr)) # define mz_direct_only(p) /* skip this arg, so that total count <= 3 args */ /* Inlines check of scheme_use_rtcall: */ # define mz_generate_direct_prim(direct_only, first_arg, reg, prim_indirect) \ { \ - GC_CAN_IGNORE jit_insn *refdirect, *refcont; \ + GC_CAN_IGNORE jit_insn *refdirect, *refcont, *refitsr; \ int argstate; \ jit_save_argstate(argstate); \ mz_tl_ldi_i(JIT_R0, tl_scheme_use_rtcall); \ __START_TINY_JUMPS__(1); \ refdirect = jit_beqi_i(jit_forward(), JIT_R0, 0); \ first_arg; \ - mz_finishr_direct_prim(reg, prim_indirect); \ + mz_finishr_direct_prim(reg, prim_indirect, refitsr); \ refcont = jit_jmpi(jit_forward()); \ CHECK_LIMIT(); \ mz_patch_branch(refdirect); \ @@ -2944,7 +3232,8 @@ static int generate_finish_tail_call(mz_jit_state *jitter, int direct_native) if (direct_native > 1) { /* => some_args_already_in_place */ (void)mz_finish(_scheme_tail_apply_from_native_fixup_args); } else { - (void)mz_finish(ts__scheme_tail_apply_from_native); + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts__scheme_tail_apply_from_native, refr); } CHECK_LIMIT(); /* Return: */ @@ -3092,7 +3381,7 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc If num_rands < 0, then argc is in R0, and need to pop runstack before returning. If num_rands == -1, skip prolog. */ GC_CAN_IGNORE jit_insn *ref, *ref2, *ref4, *ref5, *ref6, *ref7, *ref8, *ref9; - GC_CAN_IGNORE jit_insn *ref10, *reftop = NULL, *refagain; + GC_CAN_IGNORE jit_insn *ref10, *reftop = NULL, *refagain, *refrts; #ifndef FUEL_AUTODECEREMENTS GC_CAN_IGNORE jit_insn *ref11; #endif @@ -3144,7 +3433,7 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc /* Fast inlined-native jump ok (proc will check argc, if necessary) */ { - jit_insn *refr; + GC_CAN_IGNORE jit_insn *refr; if (num_rands < 0) { /* We need to save argc to manually pop the runstack. So move V1 to R2 and move R0 to V1: */ @@ -3202,7 +3491,7 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc /* Fast common-case return */ if (pop_and_jump) { - jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc; __START_INNER_TINY__(1); refc = jit_blei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES); __END_INNER_TINY__(1); @@ -3221,7 +3510,7 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc } if (!multi_ok) { - jit_insn *refm; + GC_CAN_IGNORE jit_insn *refm; __END_SHORT_JUMPS__(1); refm = jit_beqi_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES); mz_patch_branch_at(refm, bad_result_arity_code); @@ -3245,9 +3534,9 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc mz_prepare(1); jit_pusharg_p(JIT_R0); if (multi_ok) { - (void)mz_finish(ts_scheme_force_value_same_mark); + (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refrts); } else { - (void)mz_finish(ts_scheme_force_one_value_same_mark); + (void)mz_finish_lwe(ts_scheme_force_one_value_same_mark, refrts); } ref5 = jit_jmpi(jit_forward()); CHECK_LIMIT(); @@ -3283,7 +3572,7 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc jit_retval(JIT_R0); VALIDATE_RESULT(JIT_R0); if (!multi_ok) { - jit_insn *refm; + GC_CAN_IGNORE jit_insn *refm; __END_SHORT_JUMPS__(1); refm = jit_beqi_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES); mz_patch_branch_at(refm, bad_result_arity_code); @@ -3304,9 +3593,9 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc mz_prepare(1); jit_pusharg_p(JIT_R0); if (multi_ok) { - (void)mz_finish(ts_scheme_force_value_same_mark); + (void)mz_finish_lwe(ts_scheme_force_value_same_mark, refrts); } else { - (void)mz_finish(ts_scheme_force_one_value_same_mark); + (void)mz_finish_lwe(ts_scheme_force_one_value_same_mark, refrts); } CHECK_LIMIT(); ref8 = jit_jmpi(jit_forward()); @@ -3350,9 +3639,9 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc jit_pusharg_p(JIT_V1); if (num_rands < 0) { jit_movr_p(JIT_V1, JIT_R0); } /* save argc to manually pop runstack */ if (multi_ok) { - (void)mz_finish(ts__scheme_apply_multi_from_native); + (void)mz_finish_lwe(ts__scheme_apply_multi_from_native, refrts); } else { - (void)mz_finish(ts__scheme_apply_from_native); + (void)mz_finish_lwe(ts__scheme_apply_from_native, refrts); } CHECK_LIMIT(); mz_patch_ucbranch(ref5); @@ -3381,11 +3670,11 @@ static int generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc return 1; } -static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, jit_insn *slow_code, +static int generate_self_tail_call(Scheme_Object *rator, mz_jit_state *jitter, int num_rands, GC_CAN_IGNORE jit_insn *slow_code, int args_already_in_place, Scheme_App_Rec *app, Scheme_Object **alt_rands) /* Last argument is in R0 */ { - jit_insn *refslow, *refagain; + GC_CAN_IGNORE jit_insn *refslow, *refagain; int i, jmp_tiny, jmp_short; int closure_size = jitter->self_closure_size; int space, offset, arg_offset, arg_tmp_offset; @@ -3585,6 +3874,9 @@ typedef struct { } Generate_Call_Data; static void register_sub_func(mz_jit_state *jitter, void *code, Scheme_Object *protocol) +/* protocol: #f => normal lightweight call protocol + void => next return address is in LOCAL2 + eof => name to use is in LOCAL2 */ { void *code_end; @@ -4343,7 +4635,7 @@ static jit_insn *generate_arith_slow_path(mz_jit_state *jitter, Scheme_Object *r *_ref is place to set for where to jump for false if for_branch, result if !for_branch; result is place to jump to start slow path if fixnum attempt fails */ { - jit_insn *ref, *ref4, *refslow; + GC_CAN_IGNORE jit_insn *ref, *ref4, *refslow; refslow = _jit.x.pc; @@ -4532,10 +4824,13 @@ static int generate_alloc_double(mz_jit_state *jitter, int inline_retry) jit_addi_p(JIT_R0, JIT_V1, OBJHEAD_SIZE); (void)jit_stxi_d_fppop(&((Scheme_Double *)0x0)->double_val, JIT_R0, JIT_FPR0); # else - (void)mz_tl_sti_d_fppop(tl_double_result, JIT_FPR0, JIT_R0); + (void)mz_tl_sti_d_fppop(tl_save_fp, JIT_FPR0, JIT_R0); JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); mz_prepare(0); - (void)mz_finish(ts_malloc_double); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_malloc_double, refr); + } jit_retval(JIT_R0); # endif #endif @@ -5524,7 +5819,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj arithmetic-shift, then you probably want it. */ int v1 = (reversed ? JIT_R0 : JIT_R1); int v2 = (reversed ? JIT_R1 : JIT_R0); - jit_insn *refi, *refc; + GC_CAN_IGNORE jit_insn *refi, *refc; if ((arith != -6) && (!unsafe_fx || overflow_refslow)) refi = jit_bgei_l(jit_forward(), v2, (long)scheme_make_integer(0)); @@ -5594,7 +5889,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj mz_patch_ucbranch(refc); } else if (arith == 9) { /* min */ - jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc; __START_INNER_TINY__(branch_short); refc = jit_bltr_l(jit_forward(), JIT_R0, JIT_R1); jit_movr_l(JIT_R0, JIT_R1); @@ -5602,7 +5897,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj __END_INNER_TINY__(branch_short); } else if (arith == 10) { /* max */ - jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc; __START_INNER_TINY__(branch_short); refc = jit_bgtr_l(jit_forward(), JIT_R0, JIT_R1); jit_movr_l(JIT_R0, JIT_R1); @@ -5699,7 +5994,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj jit_ori_ul(JIT_R0, JIT_R0, 0x1); } else if (arith == 9) { /* min */ - jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc; __START_INNER_TINY__(branch_short); refc = jit_blti_l(jit_forward(), JIT_R0, (long)scheme_make_integer(v)); jit_movi_l(JIT_R0, (long)scheme_make_integer(v)); @@ -5707,7 +6002,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj __END_INNER_TINY__(branch_short); } else if (arith == 10) { /* max */ - jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc; __START_INNER_TINY__(branch_short); refc = jit_bgti_l(jit_forward(), JIT_R0, (long)scheme_make_integer(v)); jit_movi_l(JIT_R0, (long)scheme_make_integer(v)); @@ -5715,7 +6010,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj __END_INNER_TINY__(branch_short); } else if (arith == 11) { /* abs */ - jit_insn *refc; + GC_CAN_IGNORE jit_insn *refc; __START_INNER_TINY__(branch_short); refc = jit_bgei_l(jit_forward(), JIT_R0, (long)scheme_make_integer(0)); __END_INNER_TINY__(branch_short); @@ -6871,6 +7166,7 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in jit_ldxi_s(JIT_R1, JIT_R0, &((Scheme_Object *)0x0)->type); ref = jit_bnei_i(jit_forward(), JIT_R1, scheme_chaperone_type); (void)jit_calli(unbox_code); + jit_retval(JIT_R0); ref2 = jit_jmpi(jit_forward()); jit_retval(JIT_R0); mz_patch_branch(ref); @@ -7117,7 +7413,10 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); mz_prepare(1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_box); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_box, refr); + } jit_retval(JIT_R0); #endif @@ -7516,7 +7815,10 @@ static int allocate_rectangular(mz_jit_state *jitter) mz_prepare(2); jit_pusharg_p(JIT_R1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_make_complex); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_complex, refr); + } jit_retval(JIT_R0); #endif @@ -7895,6 +8197,7 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i else if (IS_NAMED_PRIM(rator, "fxvector-ref")) { which = 0; for_fx = 1; + can_chaperone = 0; } else if (IS_NAMED_PRIM(rator, "unsafe-vector-ref")) { which = 0; unsafe = 1; @@ -7915,6 +8218,7 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i scheme_signal_error("internal error: bad depth for flvector-ref"); jitter->unbox = 0; } + can_chaperone = 0; } else if (IS_NAMED_PRIM(rator, "unsafe-struct-ref")) { which = 0; unsafe = 1; @@ -8242,7 +8546,10 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i mz_prepare(2); jit_pusharg_p(JIT_R1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_make_mutable_pair); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_mutable_pair, refr); + } jit_retval(JIT_R0); #endif @@ -8805,11 +9112,14 @@ static int generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int jit_movi_l(JIT_R0, c); mz_prepare(2); jit_pusharg_l(JIT_R0); - jit_pusharg_l(JIT_RUNSTACK); - if (star) - (void)mz_finish(ts_make_list_star); - else - (void)mz_finish(ts_make_list); + jit_pusharg_p(JIT_RUNSTACK); + { + GC_CAN_IGNORE jit_insn *refr; + if (star) + (void)mz_finish_lwe(ts_make_list_star, refr); + else + (void)mz_finish_lwe(ts_make_list, refr); + } jit_retval(JIT_R0); #endif @@ -8890,7 +9200,10 @@ static int generate_cons_alloc(mz_jit_state *jitter, int rev, int inline_retry) jit_pusharg_p(JIT_R1); jit_pusharg_p(JIT_R0); } - (void)mz_finish(ts_scheme_make_pair); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_pair, refr); + } jit_retval(JIT_R0); #endif @@ -8941,31 +9254,34 @@ static int generate_vector_alloc(mz_jit_state *jitter, Scheme_Object *rator, jit_stxi_i((long)&SCHEME_VEC_SIZE(0x0) + OBJHEAD_SIZE, JIT_V1, JIT_R1); jit_addi_p(JIT_R0, JIT_V1, OBJHEAD_SIZE); #else - /* Non-inlined */ - JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); - if (c == 1) { - mz_prepare(1); - jit_pusharg_p(JIT_R0); - if (imm) - (void)mz_finish(ts_make_one_element_ivector); - else - (void)mz_finish(ts_make_one_element_vector); - } else if (c == 2) { - mz_prepare(2); - jit_pusharg_p(JIT_R1); - jit_pusharg_p(JIT_R0); - if (imm) - (void)mz_finish(ts_make_two_element_ivector); - else - (void)mz_finish(ts_make_two_element_vector); - } else { - jit_movi_l(JIT_R1, c); - mz_prepare(1); - jit_pusharg_l(JIT_R1); - if (imm) - (void)mz_finish(ts_make_ivector); - else - (void)mz_finish(ts_make_vector); + { + /* Non-inlined */ + GC_CAN_IGNORE jit_insn *refr; + JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); + if (c == 1) { + mz_prepare(1); + jit_pusharg_p(JIT_R0); + if (imm) + (void)mz_finish_lwe(ts_make_one_element_ivector, refr); + else + (void)mz_finish_lwe(ts_make_one_element_vector, refr); + } else if (c == 2) { + mz_prepare(2); + jit_pusharg_p(JIT_R1); + jit_pusharg_p(JIT_R0); + if (imm) + (void)mz_finish_lwe(ts_make_two_element_ivector, refr); + else + (void)mz_finish_lwe(ts_make_two_element_vector, refr); + } else { + jit_movi_l(JIT_R1, c); + mz_prepare(1); + jit_pusharg_l(JIT_R1); + if (imm) + (void)mz_finish_lwe(ts_make_ivector, refr); + else + (void)mz_finish_lwe(ts_make_vector, refr); + } } jit_retval(JIT_R0); #endif @@ -9109,15 +9425,17 @@ static int generate_closure(Scheme_Closure_Data *data, # endif { /* Non-inlined alloc */ + GC_CAN_IGNORE jit_insn *refr; + JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); jit_movi_l(JIT_R0, sz); mz_prepare(1); jit_pusharg_l(JIT_R0); if (immediately_filled) { - (void)mz_finish(ts_GC_malloc_one_small_dirty_tagged); + (void)mz_finish_lwe(ts_GC_malloc_one_small_dirty_tagged, refr); } else { - (void)mz_finish(ts_GC_malloc_one_small_tagged); + (void)mz_finish_lwe(ts_GC_malloc_one_small_tagged, refr); } jit_retval(JIT_R0); memcpy(&init_word, &example_so, sizeof(long)); @@ -9142,7 +9460,10 @@ static int generate_closure(Scheme_Closure_Data *data, (void)jit_patchable_movi_p(JIT_R0, code); /* !! */ #endif jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_make_native_closure); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_native_closure, refr); + } jit_retval(JIT_R0); return 1; @@ -9276,7 +9597,10 @@ static int generate_case_closure(Scheme_Object *obj, mz_jit_state *jitter, int t (void)jit_patchable_movi_p(JIT_R0, ndata); /* !! */ #endif jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_make_native_case_closure); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_native_case_closure, refr); + } jit_retval(JIT_R1); CHECK_LIMIT(); @@ -9356,9 +9680,11 @@ static int generate_non_tail_with_branch(Scheme_Object *obj, mz_jit_state *jitte } { - int amt, need_ends = 1, using_local1 = 0; + int amt, need_ends = 1, using_local1 = 0, save_pushed_marks; START_JIT_DATA(); + save_pushed_marks = jitter->pushed_marks; + /* Might change the stack or marks: */ if (is_simple(obj, INIT_SIMPLE_DEPTH, 1, jitter, 0)) { need_ends = 0; @@ -9369,13 +9695,23 @@ static int generate_non_tail_with_branch(Scheme_Object *obj, mz_jit_state *jitte mz_tl_ldi_p(JIT_R2, tl_scheme_current_cont_mark_stack); if (!jitter->local1_busy) { using_local1 = 1; - jitter->local1_busy = 1; + jitter->local1_busy = save_pushed_marks + 1; mz_set_local_p(JIT_R2, JIT_LOCAL1); + } else if ((save_pushed_marks + 1) == jitter->local1_busy) { + /* value in LOCAL1 works here, too, because no marks + have been pushed */ + using_local1 = 2; } else { /* mark stack is an integer... turn it into a pointer */ jit_lshi_l(JIT_R2, JIT_R2, 0x1); jit_ori_l(JIT_R2, JIT_R2, 0x1); mz_pushr_p(JIT_R2); /* no sync */ +# ifdef MZ_USE_LWC + /* For lighweight continuations, we need to be able to recognize + and adjust mark-stack depths: */ + jit_movi_l(JIT_R2, SCHEME_EVAL_WAITING); + mz_pushr_p(JIT_R2); /* no sync */ +# endif } CHECK_LIMIT(); } @@ -9412,8 +9748,12 @@ static int generate_non_tail_with_branch(Scheme_Object *obj, mz_jit_state *jitte if (need_ends) { if (using_local1) { mz_get_local_p(JIT_R2, JIT_LOCAL1); - jitter->local1_busy = 0; + if (using_local1 == 1) + jitter->local1_busy = 0; } else { +# ifdef MZ_USE_LWC + mz_popr_p(JIT_R2); /* no sync */ +# endif mz_popr_p(JIT_R2); /* no sync */ jit_rshi_l(JIT_R2, JIT_R2, 0x1); /* pointer back to integer */ } @@ -9423,6 +9763,8 @@ static int generate_non_tail_with_branch(Scheme_Object *obj, mz_jit_state *jitte CHECK_LIMIT(); } + jitter->pushed_marks = save_pushed_marks; + END_JIT_DATA(21); } @@ -9497,7 +9839,8 @@ static int generate_branch(Scheme_Object *obj, mz_jit_state *jitter, int is_tail Scheme_Branch_Rec *branch = (Scheme_Branch_Rec *)obj; Branch_Info for_this_branch; GC_CAN_IGNORE Branch_Info_Addr addrs[NUM_QUICK_INFO_ADDRS]; - jit_insn *ref2; + GC_CAN_IGNORE jit_insn *ref2; + int pushed_marks; int nsrs, nsrs1, g1, g2, amt, need_sync, flostack, flostack_pos; int else_is_empty = 0, i, can_chain_branch, chain_true, chain_false; #ifdef NEED_LONG_JUMPS @@ -9574,6 +9917,7 @@ static int generate_branch(Scheme_Object *obj, mz_jit_state *jitter, int is_tail mz_runstack_saved(jitter); flostack = mz_flostack_save(jitter, &flostack_pos); nsrs = jitter->need_set_rs; + pushed_marks = jitter->pushed_marks; PAUSE_JIT_DATA(); LOG_IT(("...then...\n")); FOR_LOG(++jitter->log_depth); @@ -9629,6 +9973,7 @@ static int generate_branch(Scheme_Object *obj, mz_jit_state *jitter, int is_tail } } jitter->need_set_rs = nsrs; + jitter->pushed_marks = pushed_marks; if (need_sync) mz_rs_sync_0(); /* False branch */ @@ -9988,7 +10333,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w jit_pusharg_p(JIT_R0); jit_pusharg_p(JIT_R2); CHECK_LIMIT(); - (void)mz_finish(ts_call_set_global_bucket); + (void)mz_finish_lwe(ts_call_set_global_bucket, ref1); CHECK_LIMIT(); __START_SHORT_JUMPS__(1); @@ -10177,7 +10522,10 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w jit_ldxi_p(JIT_R2, JIT_RUNSTACK, WORDS_TO_BYTES(pos)); mz_prepare(1); jit_pusharg_p(JIT_R2); - (void)mz_finish(ts_scheme_make_envunbox); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_envunbox, refr); + } jit_retval(JIT_R0); #endif jit_stxi_p(WORDS_TO_BYTES(pos), JIT_RUNSTACK, JIT_R0); @@ -10208,7 +10556,10 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w JIT_UPDATE_THREAD_RSPTR_IF_NEEDED(); mz_prepare(1); jit_pusharg_p(JIT_R2); - (void)mz_finish(ts_make_global_ref); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_make_global_ref, refr); + } CHECK_LIMIT(); jit_retval(target); VALIDATE_RESULT(target); @@ -10404,7 +10755,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w CHECK_LIMIT(); } else { /* Expect multiple results: */ - jit_insn *ref, *ref2, *ref3; + GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3; generate_non_tail(lv->value, jitter, 1, 1, 0); CHECK_LIMIT(); @@ -10442,7 +10793,7 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w CHECK_LIMIT(); jit_movi_i(JIT_V1, lv->count); jit_pusharg_i(JIT_V1); - (void)mz_finish(ts_lexical_binding_wrong_return_arity); + (void)mz_finish_lwe(ts_lexical_binding_wrong_return_arity, ref); CHECK_LIMIT(); /* Continue with expected values; R2 has value array: */ @@ -10505,7 +10856,10 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w (void)jit_movi_p(JIT_R0, scheme_undefined); mz_prepare(1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_make_envunbox); + { + GC_CAN_IGNORE jit_insn *refr; + (void)mz_finish_lwe(ts_scheme_make_envunbox, refr); + } jit_retval(JIT_R0); #endif jit_stxi_p(WORDS_TO_BYTES(i), JIT_RUNSTACK, JIT_R0); @@ -10701,6 +11055,8 @@ static int generate(Scheme_Object *obj, mz_jit_state *jitter, int is_tail, int w END_JIT_DATA(18); LOG_IT(("...in\n")); + + jitter->pushed_marks++; return generate(wcm->body, jitter, is_tail, wcm_may_replace, multi_ok, orig_target, for_branch); } @@ -10806,7 +11162,7 @@ static void generate_function_prolog(mz_jit_state *jitter, void *code, int max_l START_JIT_DATA(); jit_prolog(NATIVE_ARG_COUNT); - + in = jit_arg_p(); jit_getarg_p(JIT_R0, in); /* closure */ in = jit_arg_i(); @@ -10825,7 +11181,7 @@ static void generate_function_prolog(mz_jit_state *jitter, void *code, int max_l static int generate_function_getarg(mz_jit_state *jitter, int has_rest, int num_params) { int i, cnt; - jit_insn *ref; + GC_CAN_IGNORE jit_insn *ref; int set_ref; /* If rands == runstack and there are no rest args, set runstack @@ -10959,7 +11315,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) CHECK_LIMIT(); jit_movi_i(JIT_V1, 1); jit_pusharg_i(JIT_V1); - (void)mz_finish(ts_call_wrong_return_arity); + (void)mz_finish_lwe(ts_call_wrong_return_arity, ref); CHECK_LIMIT(); /* *** unbound_global_code *** */ @@ -10967,7 +11323,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) JIT_UPDATE_THREAD_RSPTR(); mz_prepare(1); jit_pusharg_p(JIT_R2); - (void)mz_finish(ts_scheme_unbound_global); + (void)mz_finish_lwe(ts_scheme_unbound_global, ref); CHECK_LIMIT(); /* *** quote_syntax_code *** */ @@ -11005,7 +11361,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) mz_prepare(2); jit_pusharg_l(JIT_R1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_delayed_rename); + (void)mz_finish_lwe(ts_scheme_delayed_rename, ref2); CHECK_LIMIT(); jit_retval(JIT_R0); /* Restore global array into JIT_R1, and put computed element at i+p+1: */ @@ -11080,40 +11436,40 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_pusharg_i(JIT_R1); switch (i) { case 0: - (void)mz_finish(ts_scheme_checked_car); + (void)mz_finish_lwe(ts_scheme_checked_car, ref); break; case 1: - (void)mz_finish(ts_scheme_checked_cdr); + (void)mz_finish_lwe(ts_scheme_checked_cdr, ref); break; case 2: - (void)mz_finish(ts_scheme_checked_caar); + (void)mz_finish_lwe(ts_scheme_checked_caar, ref); break; case 3: - (void)mz_finish(ts_scheme_checked_cadr); + (void)mz_finish_lwe(ts_scheme_checked_cadr, ref); break; case 4: - (void)mz_finish(ts_scheme_checked_cdar); + (void)mz_finish_lwe(ts_scheme_checked_cdar, ref); break; case 5: - (void)mz_finish(ts_scheme_checked_cddr); + (void)mz_finish_lwe(ts_scheme_checked_cddr, ref); break; case 6: - (void)mz_finish(ts_scheme_checked_mcar); + (void)mz_finish_lwe(ts_scheme_checked_mcar, ref); break; case 7: - (void)mz_finish(ts_scheme_checked_mcdr); + (void)mz_finish_lwe(ts_scheme_checked_mcdr, ref); break; case 8: - (void)mz_finish(ts_scheme_checked_real_part); + (void)mz_finish_lwe(ts_scheme_checked_real_part, ref); break; case 9: - (void)mz_finish(ts_scheme_checked_imag_part); + (void)mz_finish_lwe(ts_scheme_checked_imag_part, ref); break; case 10: - (void)mz_finish(ts_scheme_checked_flreal_part); + (void)mz_finish_lwe(ts_scheme_checked_flreal_part, ref); break; case 11: - (void)mz_finish(ts_scheme_checked_flimag_part); + (void)mz_finish_lwe(ts_scheme_checked_flimag_part, ref); break; } CHECK_LIMIT(); @@ -11166,19 +11522,19 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_pusharg_i(JIT_R1); switch (i) { case 0: - (void)mz_finish(ts_scheme_checked_set_mcar); + (void)mz_finish_lwe(ts_scheme_checked_set_mcar, ref); break; case 1: - (void)mz_finish(ts_scheme_checked_set_mcdr); + (void)mz_finish_lwe(ts_scheme_checked_set_mcdr, ref); break; case 2: - (void)mz_finish(ts_scheme_checked_make_rectangular); + (void)mz_finish_lwe(ts_scheme_checked_make_rectangular, ref); jit_retval(JIT_R0); jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(2)); mz_epilog(JIT_R2); break; case 3: - (void)mz_finish(ts_scheme_checked_make_flrectangular); + (void)mz_finish_lwe(ts_scheme_checked_make_flrectangular, ref); break; } CHECK_LIMIT(); @@ -11192,7 +11548,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) JIT_UPDATE_THREAD_RSPTR(); jit_prepare(1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_unbox); + (void)mz_finish_lwe(ts_scheme_unbox, ref); CHECK_LIMIT(); jit_retval(JIT_R0); /* returns if proxied */ mz_epilog(JIT_R1); @@ -11206,7 +11562,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_prepare(2); jit_pusharg_p(JIT_R1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_scheme_set_box); + (void)mz_finish_lwe(ts_scheme_set_box, ref); CHECK_LIMIT(); /* returns if proxied */ mz_epilog(JIT_R2); @@ -11228,8 +11584,8 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) mz_patch_branch(ref); mz_patch_branch(ref2); jit_prepare(1); - jit_pusharg_i(JIT_R0); - (void)mz_finish(ts_scheme_vector_length); + jit_pusharg_p(JIT_R0); + (void)mz_finish_lwe(ts_scheme_vector_length, ref); CHECK_LIMIT(); register_sub_func(jitter, bad_vector_length_code, scheme_false); @@ -11238,8 +11594,8 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) bad_flvector_length_code = jit_get_ip().ptr; mz_prolog(JIT_R1); jit_prepare(1); - jit_pusharg_i(JIT_R0); - (void)mz_finish(ts_scheme_flvector_length); + jit_pusharg_p(JIT_R0); + (void)mz_finish_lwe(ts_scheme_flvector_length, ref); CHECK_LIMIT(); register_sub_func(jitter, bad_flvector_length_code, scheme_false); @@ -11248,8 +11604,8 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) bad_fxvector_length_code = jit_get_ip().ptr; mz_prolog(JIT_R1); jit_prepare(1); - jit_pusharg_i(JIT_R0); - (void)mz_finish(ts_scheme_fxvector_length); + jit_pusharg_p(JIT_R0); + (void)mz_finish_lwe(ts_scheme_fxvector_length, ref); CHECK_LIMIT(); register_sub_func(jitter, bad_fxvector_length_code, scheme_false); @@ -11419,9 +11775,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) JIT_UPDATE_THREAD_RSPTR(); mz_prepare(3); jit_pusharg_p(JIT_R2); - jit_pusharg_p(JIT_R1); + jit_pusharg_i(JIT_R1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts__scheme_apply_multi_from_native); + (void)mz_finish_lwe(ts__scheme_apply_multi_from_native, ref); CHECK_LIMIT(); mz_pop_threadlocal(); mz_pop_locals(); @@ -11436,7 +11792,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) JIT_UPDATE_THREAD_RSPTR(); mz_prepare(1); jit_pusharg_p(JIT_V1); - (void)mz_finish(ts_tail_call_with_values_from_multiple_result); + (void)mz_finish_lwe(ts_tail_call_with_values_from_multiple_result, ref); jit_retval(JIT_R0); VALIDATE_RESULT(JIT_R0); /* Return: */ @@ -11511,7 +11867,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) JIT_UPDATE_THREAD_RSPTR(); mz_prepare(1); jit_pusharg_p(JIT_R0); - (void)mz_finish(ts_raise_bad_call_with_values); + (void)mz_finish_lwe(ts_raise_bad_call_with_values, ref); /* Doesn't return */ CHECK_LIMIT(); @@ -11527,9 +11883,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) mz_prepare(1); jit_pusharg_p(JIT_V1); if (i) { - (void)mz_finish(ts_call_with_values_from_multiple_result_multi); + (void)mz_finish_lwe(ts_call_with_values_from_multiple_result_multi, ref); } else { - (void)mz_finish(ts_call_with_values_from_multiple_result); + (void)mz_finish_lwe(ts_call_with_values_from_multiple_result, ref); } jit_retval(JIT_R0); VALIDATE_RESULT(JIT_R0); @@ -11545,7 +11901,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) for (iii = 0; iii < 2; iii++) { /* ref, set */ for (ii = 0; ii < 4; ii++) { /* vector, string, bytes, fx */ for (i = 0; i < 2; i++) { /* check index? */ - jit_insn *ref, *reffail; + GC_CAN_IGNORE jit_insn *ref, *reffail, *refrts; Scheme_Type ty; int offset, count_offset, log_elem_size; void *code; @@ -11632,7 +11988,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) break; } - __START_TINY_JUMPS__(1); + __START_SHORT_JUMPS__(1); mz_prolog(JIT_R2); @@ -11662,9 +12018,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) switch (ii) { case 0: if (!iii) { - (void)mz_finish(ts_scheme_checked_vector_ref); + (void)mz_finish_lwe(ts_scheme_checked_vector_ref, refrts); } else { - (void)mz_finish(ts_scheme_checked_vector_set); + (void)mz_finish_lwe(ts_scheme_checked_vector_set, refrts); } CHECK_LIMIT(); /* Might return, if arg was chaperone */ @@ -11676,7 +12032,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) break; case 1: if (!iii) { - (void)mz_finish(ts_scheme_checked_string_ref); + (void)mz_finish_lwe(ts_scheme_checked_string_ref, refrts); CHECK_LIMIT(); /* might return, if char was outside Latin-1 */ jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(2)); @@ -11684,21 +12040,21 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_retval(JIT_R0); mz_epilog(JIT_R2); } else { - (void)mz_finish(ts_scheme_checked_string_set); + (void)mz_finish_lwe(ts_scheme_checked_string_set, refrts); } break; case 2: if (!iii) { - (void)mz_finish(ts_scheme_checked_byte_string_ref); + (void)mz_finish_lwe(ts_scheme_checked_byte_string_ref, refrts); } else { - (void)mz_finish(ts_scheme_checked_byte_string_set); + (void)mz_finish_lwe(ts_scheme_checked_byte_string_set, refrts); } break; case 3: if (!iii) { - (void)mz_finish(ts_scheme_checked_fxvector_ref); + (void)mz_finish_lwe(ts_scheme_checked_fxvector_ref, refrts); } else { - (void)mz_finish(ts_scheme_checked_fxvector_set); + (void)mz_finish_lwe(ts_scheme_checked_fxvector_set, refrts); } break; } @@ -11789,7 +12145,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) mz_epilog(JIT_R2); CHECK_LIMIT(); - __END_TINY_JUMPS__(1); + __END_SHORT_JUMPS__(1); register_sub_func(jitter, code, scheme_false); } @@ -11835,9 +12191,9 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_pusharg_p(JIT_RUNSTACK); jit_pusharg_i(JIT_R1); if (!i) { - (void)mz_finish(ts_scheme_checked_flvector_ref); + (void)mz_finish_lwe(ts_scheme_checked_flvector_ref, ref); } else { - (void)mz_finish(ts_scheme_checked_flvector_set); + (void)mz_finish_lwe(ts_scheme_checked_flvector_set, ref); } /* does not return */ CHECK_LIMIT(); @@ -11872,10 +12228,10 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_pusharg_p(JIT_R1); jit_pusharg_i(JIT_R0); if (!iii) { - (void)mz_finish(ts_scheme_struct_ref); + (void)mz_finish_lwe(ts_scheme_struct_ref, ref); jit_retval(JIT_R0); } else - (void)mz_finish(ts_scheme_struct_set); + (void)mz_finish_lwe(ts_scheme_struct_set, ref); CHECK_LIMIT(); jit_retval(JIT_R0); mz_epilog(JIT_R2); @@ -11886,7 +12242,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) /* *** syntax_ecode *** */ /* R0 is (potential) syntax object */ { - jit_insn *ref, *reffail; + GC_CAN_IGNORE jit_insn *ref, *reffail, *refrts; syntax_e_code = jit_get_ip().ptr; __START_TINY_JUMPS__(1); mz_prolog(JIT_R2); @@ -11903,7 +12259,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_prepare(2); jit_pusharg_p(JIT_RUNSTACK); jit_pusharg_i(JIT_R1); - (void)mz_finish(ts_scheme_checked_syntax_e); + (void)mz_finish_lwe(ts_scheme_checked_syntax_e, refrts); jit_retval(JIT_R0); jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(1)); mz_epilog(JIT_R2); @@ -11941,7 +12297,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) void *code; int kind, for_branch; GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *refslow, *bref1, *bref2, *refretry; - GC_CAN_IGNORE jit_insn *bref3, *bref4, *bref5, *bref6, *bref8, *ref9; + GC_CAN_IGNORE jit_insn *bref3, *bref4, *bref5, *bref6, *bref8, *ref9, *refrts; if ((ii == 1) && (i == 1)) continue; /* no multi variant of pred branch */ @@ -11999,12 +12355,12 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) jit_movi_i(JIT_V1, ((kind == 3) ? 2 : 1)); jit_prepare(3); jit_pusharg_p(JIT_RUNSTACK); - jit_pusharg_p(JIT_V1); + jit_pusharg_i(JIT_V1); jit_pusharg_p(JIT_R0); if (ii == 1) { - (void)mz_finish(ts__scheme_apply_multi_from_native); + (void)mz_finish_lwe(ts__scheme_apply_multi_from_native, refrts); } else { - (void)mz_finish(ts__scheme_apply_from_native); + (void)mz_finish_lwe(ts__scheme_apply_from_native, refrts); } jit_retval(JIT_R0); VALIDATE_RESULT(JIT_R0); @@ -12186,7 +12542,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) /* *** make_list_code *** */ /* R2 has length, args are on runstack */ for (i = 0; i < 2; i++) { - jit_insn *ref, *refnext; + GC_CAN_IGNORE jit_insn *ref, *refnext; if (i == 0) make_list_code = jit_get_ip().ptr; @@ -12474,7 +12830,7 @@ static int do_generate_more_common(mz_jit_state *jitter, void *_data) /* *** check_proc_extract_code *** */ /* arguments are on the Scheme stack */ { - GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *refslow; + GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *refslow, *refrts; struct_proc_extract_code = jit_get_ip().ptr; mz_prolog(JIT_V1); @@ -12492,7 +12848,7 @@ static int do_generate_more_common(mz_jit_state *jitter, void *_data) jit_prepare(2); jit_pusharg_p(JIT_RUNSTACK); jit_pusharg_i(JIT_V1); - (void)mz_finish(ts_scheme_extract_checked_procedure); + (void)mz_finish_lwe(ts_scheme_extract_checked_procedure, refrts); jit_retval(JIT_R0); VALIDATE_RESULT(JIT_R0); mz_epilog(JIT_V1); @@ -12559,7 +12915,7 @@ static int do_generate_more_common(mz_jit_state *jitter, void *_data) JIT_UPDATE_THREAD_RSPTR(); jit_prepare(1); jit_pusharg_p(JIT_RUNSTACK); - (void)mz_finish(ts_apply_checked_fail); + (void)mz_finish_lwe(ts_apply_checked_fail, refrts); CHECK_LIMIT(); jit_retval(JIT_R0); VALIDATE_RESULT(JIT_R0); @@ -12980,12 +13336,121 @@ static int do_generate_more_common(mz_jit_state *jitter, void *_data) } } + /* native_starter_code */ + { + native_starter_code = (LWC_Native_Starter)jit_get_ip().ptr; + + /* store stack pointer in address given by 5th argument, then jump to + the address given by the 4th argument */ + jit_getprearg_pipp_p(JIT_PREARG); + jit_str_p(JIT_PREARG, JIT_SP); + jit_getprearg_pip_p(JIT_PREARG); + jit_jmpr(JIT_PREARG); + + CHECK_LIMIT(); + } + +#ifdef MZ_USE_LWC + /* continuation_apply_indirect_code */ + { + int in; + + continuation_apply_indirect_code = (Continuation_Apply_Indirect)jit_get_ip().ptr; + + /* install stack pointer into first argument before doing anything */ + jit_getprearg__p(JIT_PREARG); + jit_str_p(JIT_PREARG, JIT_SP); + + /* accept the two arguments */ + jit_prolog(2); + in = jit_arg_p(); + jit_getarg_p(JIT_R0, in); + in = jit_arg_p(); + jit_getarg_l(JIT_R1, in); + + /* make room on the stack to copy a continuation in */ + jit_subr_p(JIT_SP, JIT_SP, JIT_R1); + + /* get preserved registers that we otherwise don't use in JIT-generated + code; put them back in place just before we get to the + continuation */ +#ifdef JIT_X86_64 + jit_stxi_p((int)&((Apply_LWC_Args *)0x0)->saved_r14, JIT_R0, JIT_R(14)); + jit_stxi_p((int)&((Apply_LWC_Args *)0x0)->saved_r15, JIT_R0, JIT_R(15)); +#endif + + jit_prepare(1); + jit_pusharg_p(JIT_R0); + (void)jit_calli(continuation_apply_install); + + CHECK_LIMIT(); + } +#endif + +#ifdef MZ_USE_LWC + /* continuation_apply_finish_code */ + { + int in; + + continuation_apply_finish_code = (Continuation_Apply_Finish)jit_get_ip().ptr; + + jit_prolog(2); + in = jit_arg_p(); + jit_getarg_p(JIT_R0, in); /* Apply_LWC_Args */ + in = jit_arg_p(); + jit_getarg_p(JIT_R1, in); /* new stack position */ + in = jit_arg_p(); + jit_getarg_p(JIT_R2, in); /* new frame position */ + CHECK_LIMIT(); + + /* Restore old stack and frame pointers: */ + jit_movr_p(JIT_SP, JIT_R1); + jit_movr_p(JIT_FP, JIT_R2); + + /* Restore saved V1: */ + jit_ldxi_p(JIT_R1, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->lwc); + jit_ldxi_l(JIT_V1, JIT_R1, (int)&((Scheme_Current_LWC *)0x0)->saved_v1); + CHECK_LIMIT(); + + /* Restore runstack, runstack_start, and thread-local pointer */ + jit_ldxi_p(JIT_RUNSTACK, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->new_runstack); +# ifdef THREAD_LOCAL_USES_JIT_V2 + jit_ldxi_p(JIT_V2, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->new_threadlocal); +# else + jit_ldxi_p(JIT_RUNSTACK_BASE, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->new_runstack_base); +# endif +# ifdef JIT_X86_64 + jit_ldxi_p(JIT_R14, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->new_threadlocal); +# endif + + /* restore preserved registers that we otherwise don't use */ +# ifdef JIT_X86_64 + /* saved_r14 is installed in the topmost frame already */ + jit_ldxi_p(JIT_R(15), JIT_R0, (int)&((Apply_LWC_Args *)0x0)->saved_r15); +# endif + CHECK_LIMIT(); + + /* Prepare to jump to original return: */ + jit_ldxi_p(JIT_R1, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->lwc); + jit_ldxi_l(JIT_R2, JIT_R1, (int)&((Scheme_Current_LWC *)0x0)->original_dest); + + /* install result value: */ + jit_ldxi_p(JIT_R0, JIT_R0, (int)&((Apply_LWC_Args *)0x0)->result); + + jit_jmpr(JIT_R2); + + CHECK_LIMIT(); + } +#endif + return 1; } #ifdef CAN_INLINE_ALLOC static int generate_alloc_retry(mz_jit_state *jitter, int i) { + GC_CAN_IGNORE jit_insn *refr; + #ifdef JIT_USE_FP_OPS if (i == 2) { (void)mz_tl_sti_d_fppop(tl_save_fp, JIT_FPR1, JIT_R2); @@ -13002,7 +13467,7 @@ static int generate_alloc_retry(mz_jit_state *jitter, int i) jit_pusharg_p(JIT_R0); jit_pusharg_p(JIT_R0); } - (void)mz_finish(ts_prepare_retry_alloc); + (void)mz_finish_lwe(ts_prepare_retry_alloc, refr); jit_retval(JIT_R0); if (i == 1) { mz_tl_ldi_l(JIT_R1, tl_retry_alloc_r1); @@ -13091,7 +13556,7 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) if (has_rest && data->num_params) { /* If runstack == argv and argc == cnt, then we didn't copy args down, and we need to make room for scheme_null. */ - jit_insn *ref, *ref2, *ref3; + GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *refrts; CHECK_LIMIT(); @@ -13129,7 +13594,7 @@ static int do_generate_closure(mz_jit_state *jitter, void *_data) jit_pusharg_p(JIT_R2); jit_pusharg_i(JIT_R1); CHECK_LIMIT(); - (void)mz_finish(ts_scheme_build_list_offset); + (void)mz_finish_lwe(ts_scheme_build_list_offset, refrts); jit_retval(JIT_V1); #ifndef JIT_PRECISE_GC if (data->closure_size) @@ -13486,7 +13951,7 @@ static int generate_simple_arity_check(mz_jit_state *jitter, int num_params, int true dynamically for all jumps to the code. Also, at JIT time, make sure that jitter is initialized with a size-3 prolog. */ - jit_insn *ref, *ref2; + GC_CAN_IGNORE jit_insn *ref, *ref2, *refrts; __START_TINY_JUMPS__(1); @@ -13512,7 +13977,7 @@ static int generate_simple_arity_check(mz_jit_state *jitter, int num_params, int jit_pusharg_i(JIT_R1); jit_pusharg_p(JIT_R0); CHECK_LIMIT(); - (void)mz_nonrs_finish(ts_wrong_argument_count); + (void)mz_nonrs_finish_lwe(ts_wrong_argument_count, refrts); CHECK_LIMIT(); /* Arity check or reporting. If argv is NULL, it's a reporting request */ @@ -13547,7 +14012,7 @@ static int generate_simple_arity_check(mz_jit_state *jitter, int num_params, int if (is_method) { mz_prepare(1); jit_pusharg_p(JIT_R0); - (void)mz_nonrs_finish(ts_scheme_box); + (void)mz_nonrs_finish_lwe(ts_scheme_box, refrts); mz_pop_threadlocal(); mz_pop_locals(); jit_ret(); @@ -13598,7 +14063,7 @@ static int generate_case_lambda_dispatch(mz_jit_state *jitter, Scheme_Case_Lambd Scheme_Closure_Data *data; Scheme_Object *o; int i, cnt, has_rest, offset, num_params; - jit_insn *ref = NULL; + GC_CAN_IGNORE jit_insn *ref = NULL, *refrts; cnt = c->count; for (i = 0; i < cnt; i++) { @@ -13650,7 +14115,7 @@ static int generate_case_lambda_dispatch(mz_jit_state *jitter, Scheme_Case_Lambd jit_pusharg_i(JIT_R1); jit_pusharg_p(JIT_R0); CHECK_LIMIT(); - (void)mz_finish(ts_wrong_argument_count); + (void)mz_finish_lwe(ts_wrong_argument_count, refrts); CHECK_LIMIT(); } diff --git a/src/racket/src/jit_ts_runtime_glue.c b/src/racket/src/jit_ts_runtime_glue.c index cb1d1ecdf7..fb457494a6 100644 --- a/src/racket/src/jit_ts_runtime_glue.c +++ b/src/racket/src/jit_ts_runtime_glue.c @@ -1,10 +1,12 @@ case SIG_siS_s: { prim_siS_s f = (prim_siS_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(int, arg_i1); LOCALIZE(Scheme_Object**, arg_S2); + future->arg_s0 = NULL; future->arg_S2 = NULL; retval = - f(future->arg_s0, future->arg_i1, future->arg_S2); + f(arg_s0, arg_i1, arg_S2); future->retval_s = retval; send_special_result(future, retval); break; @@ -12,10 +14,12 @@ case SIG_siS_s: case SIG_iSs_s: { prim_iSs_s f = (prim_iSs_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(int, arg_i0); LOCALIZE(Scheme_Object**, arg_S1); LOCALIZE(Scheme_Object*, arg_s2); + future->arg_S1 = NULL; future->arg_s2 = NULL; retval = - f(future->arg_i0, future->arg_S1, future->arg_s2); + f(arg_i0, arg_S1, arg_s2); future->retval_s = retval; send_special_result(future, retval); break; @@ -23,10 +27,12 @@ case SIG_iSs_s: case SIG_s_s: { prim_s_s f = (prim_s_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; receive_special_result(future, future->arg_s0, 1); + LOCALIZE(Scheme_Object*, arg_s0); + future->arg_s0 = NULL; retval = - f(future->arg_s0); + f(arg_s0); future->retval_s = retval; send_special_result(future, retval); break; @@ -34,10 +40,12 @@ case SIG_s_s: case SIG_n_s: { prim_n_s f = (prim_n_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Native_Closure_Data*, arg_n0); + future->arg_n0 = NULL; retval = - f(future->arg_n0); + f(arg_n0); future->retval_s = retval; send_special_result(future, retval); break; @@ -45,7 +53,9 @@ case SIG_n_s: case SIG__s: { prim__s f = (prim__s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + + retval = f(); @@ -56,10 +66,12 @@ case SIG__s: case SIG_ss_s: { prim_ss_s f = (prim_ss_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(Scheme_Object*, arg_s1); + future->arg_s0 = NULL; future->arg_s1 = NULL; retval = - f(future->arg_s0, future->arg_s1); + f(arg_s0, arg_s1); future->retval_s = retval; send_special_result(future, retval); break; @@ -67,10 +79,12 @@ case SIG_ss_s: case SIG_tt_s: { prim_tt_s f = (prim_tt_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(const Scheme_Object*, arg_t0); LOCALIZE(const Scheme_Object*, arg_t1); + future->arg_t0 = NULL; future->arg_t1 = NULL; retval = - f(future->arg_t0, future->arg_t1); + f(arg_t0, arg_t1); future->retval_s = retval; send_special_result(future, retval); break; @@ -78,10 +92,12 @@ case SIG_tt_s: case SIG_ss_m: { prim_ss_m f = (prim_ss_m)future->prim_func; - MZ_MARK_STACK_TYPE retval; + GC_CAN_IGNORE MZ_MARK_STACK_TYPE retval; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(Scheme_Object*, arg_s1); + future->arg_s0 = NULL; future->arg_s1 = NULL; retval = - f(future->arg_s0, future->arg_s1); + f(arg_s0, arg_s1); future->retval_m = retval; break; @@ -89,10 +105,12 @@ case SIG_ss_m: case SIG_Sl_s: { prim_Sl_s f = (prim_Sl_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Object**, arg_S0); LOCALIZE(long, arg_l1); + future->arg_S0 = NULL; retval = - f(future->arg_S0, future->arg_l1); + f(arg_S0, arg_l1); future->retval_s = retval; send_special_result(future, retval); break; @@ -100,10 +118,12 @@ case SIG_Sl_s: case SIG_l_s: { prim_l_s f = (prim_l_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + + LOCALIZE(long, arg_l0); retval = - f(future->arg_l0); + f(arg_l0); future->retval_s = retval; send_special_result(future, retval); break; @@ -113,8 +133,10 @@ case SIG_bsi_v: prim_bsi_v f = (prim_bsi_v)future->prim_func; + LOCALIZE(Scheme_Bucket*, arg_b0); LOCALIZE(Scheme_Object*, arg_s1); LOCALIZE(int, arg_i2); + future->arg_b0 = NULL; future->arg_s1 = NULL; - f(future->arg_b0, future->arg_s1, future->arg_i2); + f(arg_b0, arg_s1, arg_i2); break; @@ -124,8 +146,10 @@ case SIG_iiS_v: prim_iiS_v f = (prim_iiS_v)future->prim_func; + LOCALIZE(int, arg_i0); LOCALIZE(int, arg_i1); LOCALIZE(Scheme_Object**, arg_S2); + future->arg_S2 = NULL; - f(future->arg_i0, future->arg_i1, future->arg_S2); + f(arg_i0, arg_i1, arg_S2); break; @@ -135,8 +159,10 @@ case SIG_ss_v: prim_ss_v f = (prim_ss_v)future->prim_func; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(Scheme_Object*, arg_s1); + future->arg_s0 = NULL; future->arg_s1 = NULL; - f(future->arg_s0, future->arg_s1); + f(arg_s0, arg_s1); break; @@ -146,8 +172,10 @@ case SIG_b_v: prim_b_v f = (prim_b_v)future->prim_func; + LOCALIZE(Scheme_Bucket*, arg_b0); + future->arg_b0 = NULL; - f(future->arg_b0); + f(arg_b0); break; @@ -155,10 +183,12 @@ case SIG_b_v: case SIG_sl_s: { prim_sl_s f = (prim_sl_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(long, arg_l1); + future->arg_s0 = NULL; retval = - f(future->arg_s0, future->arg_l1); + f(arg_s0, arg_l1); future->retval_s = retval; send_special_result(future, retval); break; @@ -166,10 +196,12 @@ case SIG_sl_s: case SIG_iS_s: { prim_iS_s f = (prim_iS_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(int, arg_i0); LOCALIZE(Scheme_Object**, arg_S1); + future->arg_S1 = NULL; retval = - f(future->arg_i0, future->arg_S1); + f(arg_i0, arg_S1); future->retval_s = retval; send_special_result(future, retval); break; @@ -177,10 +209,12 @@ case SIG_iS_s: case SIG_S_s: { prim_S_s f = (prim_S_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Object**, arg_S0); + future->arg_S0 = NULL; retval = - f(future->arg_S0); + f(arg_S0); future->retval_s = retval; send_special_result(future, retval); break; @@ -190,8 +224,10 @@ case SIG_s_v: prim_s_v f = (prim_s_v)future->prim_func; receive_special_result(future, future->arg_s0, 1); + LOCALIZE(Scheme_Object*, arg_s0); + future->arg_s0 = NULL; - f(future->arg_s0); + f(arg_s0); break; @@ -199,10 +235,12 @@ case SIG_s_v: case SIG_iSi_s: { prim_iSi_s f = (prim_iSi_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(int, arg_i0); LOCALIZE(Scheme_Object**, arg_S1); LOCALIZE(int, arg_i2); + future->arg_S1 = NULL; retval = - f(future->arg_i0, future->arg_S1, future->arg_i2); + f(arg_i0, arg_S1, arg_i2); future->retval_s = retval; send_special_result(future, retval); break; @@ -212,8 +250,10 @@ case SIG_siS_v: prim_siS_v f = (prim_siS_v)future->prim_func; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(int, arg_i1); LOCALIZE(Scheme_Object**, arg_S2); + future->arg_s0 = NULL; future->arg_S2 = NULL; - f(future->arg_s0, future->arg_i1, future->arg_S2); + f(arg_s0, arg_i1, arg_S2); break; @@ -221,10 +261,12 @@ case SIG_siS_v: case SIG_z_p: { prim_z_p f = (prim_z_p)future->prim_func; - void* retval; + GC_CAN_IGNORE void* retval; + + LOCALIZE(size_t, arg_z0); retval = - f(future->arg_z0); + f(arg_z0); future->retval_p = retval; break; @@ -232,10 +274,12 @@ case SIG_z_p: case SIG_si_s: { prim_si_s f = (prim_si_s)future->prim_func; - Scheme_Object* retval; + GC_CAN_IGNORE Scheme_Object* retval; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(int, arg_i1); + future->arg_s0 = NULL; retval = - f(future->arg_s0, future->arg_i1); + f(arg_s0, arg_i1); future->retval_s = retval; send_special_result(future, retval); break; @@ -245,8 +289,10 @@ case SIG_sis_v: prim_sis_v f = (prim_sis_v)future->prim_func; + LOCALIZE(Scheme_Object*, arg_s0); LOCALIZE(int, arg_i1); LOCALIZE(Scheme_Object*, arg_s2); + future->arg_s0 = NULL; future->arg_s2 = NULL; - f(future->arg_s0, future->arg_i1, future->arg_s2); + f(arg_s0, arg_i1, arg_s2); break; diff --git a/src/racket/src/lightning/i386/core.h b/src/racket/src/lightning/i386/core.h index 72dd4bd2c6..5bd8637d3f 100644 --- a/src/racket/src/lightning/i386/core.h +++ b/src/racket/src/lightning/i386/core.h @@ -62,6 +62,9 @@ struct jit_local_state { #endif int r0_can_be_tmp; int argssize; +#ifdef JIT_X86_64 + int argpushes; +#endif }; /* 3-parameter operation */ @@ -347,6 +350,19 @@ struct jit_local_state { #define jit_pushr_l(rs) jit_pushr_i(rs) #define jit_popr_l(rs) jit_popr_i(rs) +/* For getting certain arguments (e.g., after pointer, int, and pointer) + before we set up the local frame: */ +#define JIT_PREARG JIT_R0 +#ifdef JIT_X86_64 +# define jit_getprearg__p(r) (MOVQrr(_EDI, r)) +# define jit_getprearg_pip_p(r) (MOVQrr(_ECX, r)) +# define jit_getprearg_pipp_p(r) (MOVQrr(JIT_R(8), r)) +#else +# define jit_getprearg__p(r) (jit_ldxi_p(r, JIT_SP, 4)) +# define jit_getprearg_pip_p(r) (jit_ldxi_p(r, JIT_SP, 16)) +# define jit_getprearg_pipp_p(r) (jit_ldxi_p(r, JIT_SP, 20)) +#endif + #ifdef JIT_X86_64 # define jit_base_prolog() (PUSHQr(_EBP), MOVQrr(_ESP, _EBP), PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13)) # define jit_prolog(n) (_jitl.nextarg_geti = 0, jit_base_prolog()) @@ -359,7 +375,7 @@ struct jit_local_state { #ifdef JIT_X86_64 /* Stack isn't used for arguments: */ -# define jit_prepare_i(ni) (_jitl.argssize = 0) +# define jit_prepare_i(ni) (_jitl.argssize = (ni), _jitl.argpushes = _jitl.argssize) #else # ifdef _CALL_DARWIN /* Stack must stay 16-byte aligned: */ @@ -375,12 +391,13 @@ struct jit_local_state { #define jit_prepare_f(nf) (_jitl.argssize += (nf)) #define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd)) #ifdef JIT_X86_64 -# define jit_pusharg_i(rs) (_jitl.argssize++, MOVQrr(rs, JIT_CALLTMPSTART + _jitl.argssize - 1)) -# define jit_normal_pushonlyarg_i(rs) (_jitl.argssize++, MOVQrr(rs, _EDI)) -# define jit_save_argstate(curstate) curstate = _jitl.argssize; -# define jit_restore_argstate(curstate) _jitl.argssize = curstate; +# define jit_pusharg_i(rs) (_jitl.argpushes--, MOVQrr(rs, JIT_CALLTMPSTART + _jitl.argpushes)) +# define jit_normal_pushonlyarg_i(rs) (_jitl.argpushes--, MOVQrr(rs, _EDI)) +# define jit_save_argstate(curstate) curstate = _jitl.argpushes; +# define jit_restore_argstate(curstate) _jitl.argpushes = curstate; # define jit_finish(sub) (jit_shift_args(), (void)jit_calli((sub)), jit_restore_locals()) # define jit_normal_finish(sub) jit_calli((sub)) +# define jit_return_pop_insn_len() 0 # define jit_reg_is_arg(reg) ((reg == _EDI) || (reg ==_ESI) || (reg == _EDX)) # define jit_finishr(reg) ((jit_reg_is_arg((reg)) ? MOVQrr(reg, JIT_REXTMP) : (void)0), \ jit_shift_args(), \ @@ -389,12 +406,12 @@ struct jit_local_state { /* R12 and R13 are callee-save, instead of EDI and ESI */ # define jit_shift_args() \ (MOVQrr(_ESI, _R12), MOVQrr(_EDI, _R13), \ - (_jitl.argssize-- \ - ? (MOVQrr(JIT_CALLTMPSTART + _jitl.argssize, jit_arg_reg_order[0]), \ - (_jitl.argssize-- \ - ? (MOVQrr(JIT_CALLTMPSTART + _jitl.argssize, jit_arg_reg_order[1]), \ - (_jitl.argssize-- \ - ? MOVQrr(JIT_CALLTMPSTART, jit_arg_reg_order[2]) \ + (_jitl.argssize \ + ? (MOVQrr(JIT_CALLTMPSTART, jit_arg_reg_order[0]), \ + ((_jitl.argssize > 1) \ + ? (MOVQrr(JIT_CALLTMPSTART + 1, jit_arg_reg_order[1]), \ + ((_jitl.argssize > 2) \ + ? MOVQrr(JIT_CALLTMPSTART + 2, jit_arg_reg_order[2]) \ : (void)0)) \ : (void)0)) \ : (void)0)) @@ -407,6 +424,7 @@ struct jit_local_state { # define jit_restore_argstate(curstate) _jitl.argssize = curstate; # define jit_finish(sub) ((void)jit_calli((sub)), ADDLir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) # define jit_finishr(reg) (jit_callr((reg)), ADDLir(sizeof(long) * _jitl.argssize, JIT_SP), _jitl.argssize = 0) +# define jit_return_pop_insn_len() 3 /* size of ADDLir() */ # define jit_normal_finish(sub) jit_finish(sub) #endif #define jit_pusharg_l(rs) jit_pusharg_i(rs) @@ -418,7 +436,7 @@ struct jit_local_state { #define jit_arg_l() (_jitl.nextarg_geti++) #define jit_arg_p() (_jitl.nextarg_geti++) #define jit_arg_reg(p) (jit_arg_reg_order[p]) -static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX }; +static const int const jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX }; #else #define jit_arg_c() ((_jitl.framesize += sizeof(int)) - sizeof(int)) #define jit_arg_uc() ((_jitl.framesize += sizeof(int)) - sizeof(int)) diff --git a/src/racket/src/mzmark.c b/src/racket/src/mzmark.c index 0eb9d6f139..8a6f1869bf 100644 --- a/src/racket/src/mzmark.c +++ b/src/racket/src/mzmark.c @@ -3315,6 +3315,39 @@ static int mark_cont_mark_chain_FIXUP(void *p, struct NewGC *gc) { #define mark_cont_mark_chain_IS_CONST_SIZE 1 +static int mark_lightweight_cont_SIZE(void *p, struct NewGC *gc) { + return + gcBYTES_TO_WORDS(sizeof(Scheme_Lightweight_Continuation)); +} + +static int mark_lightweight_cont_MARK(void *p, struct NewGC *gc) { + Scheme_Lightweight_Continuation *lw = (Scheme_Lightweight_Continuation *)p; + + gcMARK2(lw->saved_lwc, gc); + gcMARK2(lw->stack_slice, gc); + gcMARK2(lw->runstack_slice, gc); + gcMARK2(lw->cont_mark_stack_slice, gc); + + return + gcBYTES_TO_WORDS(sizeof(Scheme_Lightweight_Continuation)); +} + +static int mark_lightweight_cont_FIXUP(void *p, struct NewGC *gc) { + Scheme_Lightweight_Continuation *lw = (Scheme_Lightweight_Continuation *)p; + + gcFIXUP2(lw->saved_lwc, gc); + gcFIXUP2(lw->stack_slice, gc); + gcFIXUP2(lw->runstack_slice, gc); + gcFIXUP2(lw->cont_mark_stack_slice, gc); + + return + gcBYTES_TO_WORDS(sizeof(Scheme_Lightweight_Continuation)); +} + +#define mark_lightweight_cont_IS_ATOMIC 0 +#define mark_lightweight_cont_IS_CONST_SIZE 1 + + #endif /* FUN */ /**********************************************************************/ @@ -5722,6 +5755,8 @@ static int future_MARK(void *p, struct NewGC *gc) { gcMARK2(f->prev, gc); gcMARK2(f->next, gc); gcMARK2(f->next_waiting_atomic, gc); + gcMARK2(f->next_waiting_lwc, gc); + gcMARK2(f->suspended_lw, gc); return gcBYTES_TO_WORDS(sizeof(future_t)); } @@ -5748,6 +5783,8 @@ static int future_FIXUP(void *p, struct NewGC *gc) { gcFIXUP2(f->prev, gc); gcFIXUP2(f->next, gc); gcFIXUP2(f->next_waiting_atomic, gc); + gcFIXUP2(f->next_waiting_lwc, gc); + gcFIXUP2(f->suspended_lw, gc); return gcBYTES_TO_WORDS(sizeof(future_t)); } diff --git a/src/racket/src/mzmarksrc.c b/src/racket/src/mzmarksrc.c index 27df32d713..9150202fbb 100644 --- a/src/racket/src/mzmarksrc.c +++ b/src/racket/src/mzmarksrc.c @@ -1334,6 +1334,19 @@ mark_cont_mark_chain { gcBYTES_TO_WORDS(sizeof(Scheme_Cont_Mark_Chain)); } +mark_lightweight_cont { + mark: + Scheme_Lightweight_Continuation *lw = (Scheme_Lightweight_Continuation *)p; + + gcMARK2(lw->saved_lwc, gc); + gcMARK2(lw->stack_slice, gc); + gcMARK2(lw->runstack_slice, gc); + gcMARK2(lw->cont_mark_stack_slice, gc); + + size: + gcBYTES_TO_WORDS(sizeof(Scheme_Lightweight_Continuation)); +} + END fun; /**********************************************************************/ @@ -2353,6 +2366,8 @@ future { gcMARK2(f->prev, gc); gcMARK2(f->next, gc); gcMARK2(f->next_waiting_atomic, gc); + gcMARK2(f->next_waiting_lwc, gc); + gcMARK2(f->suspended_lw, gc); size: gcBYTES_TO_WORDS(sizeof(future_t)); } diff --git a/src/racket/src/schpriv.h b/src/racket/src/schpriv.h index d81321ddf8..4d5f9358a5 100644 --- a/src/racket/src/schpriv.h +++ b/src/racket/src/schpriv.h @@ -2254,6 +2254,53 @@ typedef struct { Scheme_Native_Closure_Data *scheme_generate_lambda(Scheme_Closure_Data *obj, int drop_code, Scheme_Native_Closure_Data *case_lam); +typedef struct Scheme_Current_LWC { + /* !! All of these fields are treated as atomic by the GC !! */ + Scheme_Object **runstack_start; + MZ_MARK_STACK_TYPE cont_mark_stack_start; + MZ_MARK_POS_TYPE cont_mark_pos_start; + void *stack_start; + Scheme_Object **runstack_end; + Scheme_Object **runstack_base_end; + MZ_MARK_STACK_TYPE cont_mark_stack_end; + MZ_MARK_POS_TYPE cont_mark_pos_end; + void *frame_end; + void *stack_end; + void *original_dest; + void *saved_v1; + double saved_save_fp; +} Scheme_Current_LWC; + +void scheme_init_thread_lwc(void); +void scheme_fill_lwc_start(void); +void scheme_fill_lwc_end(void); +void scheme_fill_stack_lwc_end(void); +void scheme_clear_lwc(void); + +THREAD_LOCAL_DECL(MZ_EXTERN Scheme_Current_LWC *scheme_current_lwc); + +Scheme_Object *scheme_call_as_lightweight_continuation(Scheme_Closed_Prim *code, + void *data, + int argc, + Scheme_Object **argv); +void *scheme_save_lightweight_continuation_stack(Scheme_Current_LWC *lwc); +Scheme_Object *scheme_apply_lightweight_continuation_stack(Scheme_Current_LWC *lwc, void *stack, + Scheme_Object *result); + +struct Scheme_Lightweight_Continuation; +typedef struct Scheme_Lightweight_Continuation Scheme_Lightweight_Continuation; +Scheme_Lightweight_Continuation *scheme_capture_lightweight_continuation(Scheme_Thread *p, + Scheme_Current_LWC *p_lwc, + void **storage); +Scheme_Object *scheme_apply_lightweight_continuation(Scheme_Lightweight_Continuation *captured, + Scheme_Object *result); +Scheme_Object **scheme_adjust_runstack_argument(Scheme_Lightweight_Continuation *captured, + Scheme_Object **arg); + +int scheme_push_marks_from_thread(Scheme_Thread *p2, Scheme_Cont_Frame_Data *d); +int scheme_push_marks_from_lightweight_continuation(Scheme_Lightweight_Continuation *captured, + Scheme_Cont_Frame_Data *d); + #define scheme_new_frame(n) scheme_new_special_frame(n, 0) #define scheme_extend_env(f, e) (f->basic.next = e, f) #define scheme_next_frame(e) ((e)->basic.next) diff --git a/src/racket/src/stypes.h b/src/racket/src/stypes.h index fb8ec69451..950061b7e4 100644 --- a/src/racket/src/stypes.h +++ b/src/racket/src/stypes.h @@ -257,6 +257,7 @@ enum { scheme_rt_validate_clearing, /* 234 */ scheme_rt_rb_node, /* 235 */ scheme_rt_frozen_tramp, /* 236 */ + scheme_rt_lightweight_cont, /* 237 */ #endif