diff --git a/pkgs/racket-test-core/tests/racket/optimize.rktl b/pkgs/racket-test-core/tests/racket/optimize.rktl index abe0933782..f7a77c3000 100644 --- a/pkgs/racket-test-core/tests/racket/optimize.rktl +++ b/pkgs/racket-test-core/tests/racket/optimize.rktl @@ -4492,6 +4492,25 @@ (err/rt-test (f 10)) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Check that JIT-inlined `apply` doesn't overflow the runstack + +(define (f n done? . args) + (cond + [(positive? n) + (or (f (sub1 n) done?) #t)] + [done? #t] + [(= 50 (length args)) + 100] + [(apply f 0 #t 1 2 3 4 5 6 7 8 9 10 args) + (apply f 0 #f (cons 1 args))])) + +(for/fold ([v 0]) ([i 2]) + (+ v + (for/fold ([v2 0]) ([i (in-range 100 512)]) + (f i #f)))) +(collect-garbage) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/racket/src/racket/src/eval.c b/racket/src/racket/src/eval.c index 3fa15870fa..8b6e9d7f8a 100644 --- a/racket/src/racket/src/eval.c +++ b/racket/src/racket/src/eval.c @@ -796,6 +796,8 @@ void *scheme_enlarge_runstack(intptr_t size, void *(*k)()) escape = 0; p = scheme_current_thread; /* might have changed! */ + scheme_check_runstack_edge(MZ_RUNSTACK_START); + if (cont_count == scheme_cont_capture_count) { if (!p->spare_runstack || (p->runstack_size > p->spare_runstack_size)) { p->spare_runstack = MZ_RUNSTACK_START; diff --git a/racket/src/racket/src/jit.c b/racket/src/racket/src/jit.c index 5c94a8a860..8c4c350007 100644 --- a/racket/src/racket/src/jit.c +++ b/racket/src/racket/src/jit.c @@ -1334,6 +1334,7 @@ static Scheme_Native_Closure_Data *create_native_case_lambda(Scheme_Case_Lambda if (data->u.native_code->max_let_depth > max_let_depth) max_let_depth = data->u.native_code->max_let_depth; } + printf("%d\n", max_let_depth); ndata->max_let_depth = max_let_depth; ndata->closure_size = -(count + 1); /* Indicates case-lambda */ @@ -3714,9 +3715,9 @@ static void on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Schem #endif } - /* Add a couple of extra slots to computed let-depth, in case - we haven't quite computed right for inlined uses, etc. */ - max_depth = WORDS_TO_BYTES(data->max_let_depth + gdata.max_extra + 2); + /* Add a couple of extra slots to computed let-depth, as needed + by various inlined operations. */ + max_depth = WORDS_TO_BYTES(data->max_let_depth + gdata.max_extra + 4); if (gdata.max_tail_depth > max_depth) max_depth = gdata.max_tail_depth; diff --git a/racket/src/racket/src/jit.h b/racket/src/racket/src/jit.h index ad053e820e..31a77ab7ab 100644 --- a/racket/src/racket/src/jit.h +++ b/racket/src/racket/src/jit.h @@ -635,7 +635,7 @@ void *scheme_jit_get_threadlocal_table(); static void *top; static void *cr_tmp; # define CHECK_RUNSTACK_OVERFLOW_NOCL() \ - jit_sti_l(&cr_tmp, JIT_R0); jit_ldi_l(JIT_R0, &scheme_current_runstack_start); \ + jit_sti_l(&cr_tmp, JIT_R0); jit_movi_l(JIT_R0, __LINE__); jit_ldi_l(JIT_R0, &scheme_current_runstack_start); \ top = (_jit.x.pc); (void)jit_bltr_ul(top, JIT_RUNSTACK, JIT_R0); jit_ldi_l(JIT_R0, &cr_tmp) # define CHECK_RUNSTACK_OVERFLOW() \ CHECK_LIMIT(); CHECK_RUNSTACK_OVERFLOW_NOCL() @@ -1471,7 +1471,7 @@ int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_na Scheme_Closure_Data *direct_data); int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs, int multi_ok, int result_ignored, int nontail_self, int pop_and_jump, - int is_inlined, int unboxed_args); + int is_inlined, int unboxed_args, jit_insn *reftop); int scheme_generate_finish_tail_call(mz_jit_state *jitter, int direct_native); int scheme_generate_finish_apply(mz_jit_state *jitter); int scheme_generate_finish_multi_apply(mz_jit_state *jitter); diff --git a/racket/src/racket/src/jitcall.c b/racket/src/racket/src/jitcall.c index 4ca4e301e9..7ec60a5600 100644 --- a/racket/src/racket/src/jitcall.c +++ b/racket/src/racket/src/jitcall.c @@ -642,7 +642,7 @@ static int generate_direct_prim_non_tail_call(mz_jit_state *jitter, int num_rand static int generate_retry_call(mz_jit_state *jitter, int num_rands, int multi_ok, int result_ignored, GC_CAN_IGNORE jit_insn *reftop) /* If num_rands < 0, original argc is in V1, and we should - pop argc arguments off runstack before pushing more. + pop argc arguments off runstack before pushing more (unless num_rands == -3). This function is called with short jumps enabled. */ { GC_CAN_IGNORE jit_insn *ref, *ref2, *refloop; @@ -657,15 +657,14 @@ static int generate_retry_call(mz_jit_state *jitter, int num_rands, int multi_ok /* Get new argc: */ (void)mz_tl_ldi_p(JIT_R1, tl_scheme_current_thread); jit_ldxi_l(JIT_R2, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands); - if (num_rands >= 0) { - jit_movi_l(JIT_V1, 0); - } - /* Thread is in R1. New argc is in R2. Old argc to cancel is in V1. */ + /* Thread is in R1. New argc is in R2. Old argc to cancel may be in V1. */ /* Enough room on runstack? */ mz_tl_ldi_p(JIT_R0, tl_MZ_RUNSTACK_START); jit_subr_ul(JIT_R0, JIT_RUNSTACK, JIT_R0); /* R0 is space left (in bytes) */ - jit_subr_l(JIT_R2, JIT_R2, JIT_V1); + if ((num_rands < 0) && (num_rands != -3)) { + jit_subr_l(JIT_R2, JIT_R2, JIT_V1); + } jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE); ref = jit_bltr_ul(jit_forward(), JIT_R0, JIT_R2); CHECK_LIMIT(); @@ -753,7 +752,7 @@ static int generate_ignored_result_check(mz_jit_state *jitter) int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs, int multi_ok, int result_ignored, int nontail_self, int pop_and_jump, - int is_inlined, int unboxed_args) + int is_inlined, int unboxed_args, jit_insn *reftop) { /* Non-tail call. Proc is in V1, args are at RUNSTACK. @@ -762,21 +761,24 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc where R2 is set before jumping to the old FP, and R1 holds return address back here, and V1 and R0 must be preserved; num_rands >= 0 in this case, and the "slow path" returns NULL. - If num_rands < 0, then argc is in R0, and need to pop runstack before returning. - If num_rands == -1, skip prolog. */ + If num_rands < 0, then argc is in R0, and + if num_rands != -3, need to pop runstack before returning. + If num_rands == -1 or -3, skip prolog. */ GC_CAN_IGNORE jit_insn *ref, *ref2, *ref4, *ref5, *ref6, *ref7, *ref8, *ref9; - GC_CAN_IGNORE jit_insn *ref10, *reftop = NULL, *refagain; + GC_CAN_IGNORE jit_insn *ref10, *refagain; GC_CAN_IGNORE jit_insn *refrts USED_ONLY_FOR_FUTURES; #ifndef FUEL_AUTODECEREMENTS GC_CAN_IGNORE jit_insn *ref11; #endif + CHECK_RUNSTACK_OVERFLOW(); + __START_SHORT_JUMPS__(1); if (pop_and_jump) { - if (num_rands != -1) { + if ((num_rands != -1) && (num_rands != -3)) { mz_prolog(JIT_R1); - } else { + } else if (!reftop) { reftop = jit_get_ip(); } } @@ -827,7 +829,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc GC_CAN_IGNORE jit_insn *refxr; #endif if (num_rands < 0) { - /* We need to save argc to manually pop the + /* We need to save argc to clear and manually pop the runstack. So move V1 to R2 and move R0 to V1: */ jit_movr_p(JIT_R2, JIT_V1); jit_movr_p(JIT_V1, JIT_R0); @@ -915,7 +917,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc __START_INNER_TINY__(1); refc = jit_blei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES); __END_INNER_TINY__(1); - if (num_rands < 0) { + if ((num_rands < 0) && (num_rands != -3)) { /* At this point, argc must be in V1 */ jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE); jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1); @@ -940,7 +942,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc generate_clear_previous_args(jitter, num_rands); CHECK_LIMIT(); if (pop_and_jump) { - /* Expects argc in V1 if num_rands < 0: */ + /* Expects argc in V1 if num_rands < 0 and num_rands != -3: */ generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop); } CHECK_LIMIT(); @@ -1003,7 +1005,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc generate_clear_previous_args(jitter, num_rands); CHECK_LIMIT(); if (pop_and_jump) { - /* Expects argc in V1 if num_rands < 0: */ + /* Expects argc in V1 if num_rands < 0 and num_rands != -3: */ generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop); } CHECK_LIMIT(); @@ -1094,7 +1096,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc } } /* Note: same return code is above for faster common-case return */ - if (num_rands < 0) { + if ((num_rands < 0) && (num_rands != -3)) { /* At this point, argc must be in V1 */ jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE); jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1); @@ -1452,7 +1454,7 @@ static int do_generate_shared_call(mz_jit_state *jitter, void *_data) else ok = scheme_generate_non_tail_call(jitter, data->num_rands, data->direct_native, 1, data->multi_ok, data->result_ignored, data->nontail_self, - 1, 0, data->unboxed_args); + 1, 0, data->unboxed_args, NULL); scheme_jit_register_sub_func(jitter, code, scheme_false); @@ -2157,7 +2159,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ generate_nontail_self_setup(jitter); } scheme_generate_non_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs, - multi_ok, result_ignored, nontail_self, 0, 1, 0); + multi_ok, result_ignored, nontail_self, 0, 1, 0, NULL); } } } else { diff --git a/racket/src/racket/src/jitcommon.c b/racket/src/racket/src/jitcommon.c index b016422b19..48722c4a76 100644 --- a/racket/src/racket/src/jitcommon.c +++ b/racket/src/racket/src/jitcommon.c @@ -990,6 +990,7 @@ static int generate_apply_proxy(mz_jit_state *jitter, int setter) jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(1)); /* index */ if (setter) { jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(4)); + CHECK_RUNSTACK_OVERFLOW(); jit_stxi_p(WORDS_TO_BYTES(3), JIT_RUNSTACK, JIT_R0); /* save value */ } else { jit_stxi_p(WORDS_TO_BYTES(1), JIT_RUNSTACK, JIT_R0); /* save value */ @@ -1001,7 +1002,7 @@ static int generate_apply_proxy(mz_jit_state *jitter, int setter) CHECK_LIMIT(); JIT_UPDATE_THREAD_RSPTR(); __END_SHORT_JUMPS__(1); - scheme_generate_non_tail_call(jitter, 3, 0, 0, 0, 0, 0, 0, 1, 0); + scheme_generate_non_tail_call(jitter, 3, 0, 0, 0, 0, 0, 0, 1, 0, NULL); __START_SHORT_JUMPS__(1); CHECK_LIMIT(); if (setter) { @@ -3394,8 +3395,10 @@ static int more_common0(mz_jit_state *jitter, void *_data) CHECK_LIMIT(); mz_rs_sync(); + CHECK_RUNSTACK_OVERFLOW(); + __END_SHORT_JUMPS__(1); - scheme_generate_non_tail_call(jitter, 2, 0, 1, 0, 0, 0, 0, 0, 0); + scheme_generate_non_tail_call(jitter, 2, 0, 1, 0, 0, 0, 0, 0, 0, NULL); CHECK_LIMIT(); __START_SHORT_JUMPS__(1); @@ -3693,7 +3696,7 @@ static int more_common1(mz_jit_state *jitter, void *_data) /* argc is in V1 */ { int multi_ok; - GC_CAN_IGNORE jit_insn *ref1, *ref2, *ref3, *ref4, *ref6, *ref7, *refloop; + GC_CAN_IGNORE jit_insn *ref1, *ref2, *ref3, *ref4, *ref6, *ref7, *refloop, *reftop; void *code; for (multi_ok = 0; multi_ok < 2; multi_ok++) { @@ -3798,7 +3801,8 @@ static int more_common1(mz_jit_state *jitter, void *_data) jit_movr_p(JIT_RUNSTACK, JIT_R2); jit_rshi_ul(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE); jit_movr_i(JIT_R0, JIT_R1); - ref6 = jit_jmpi(jit_forward()); + reftop = jit_get_ip(); + scheme_generate_non_tail_call(jitter, -1, 0, 1, multi_ok, 0, 0, 1, 0, 0, NULL); CHECK_LIMIT(); /***********************************/ @@ -3806,31 +3810,15 @@ static int more_common1(mz_jit_state *jitter, void *_data) mz_patch_branch(ref1); mz_patch_branch(ref3); mz_patch_branch(ref4); + __END_SHORT_JUMPS__(1); - /* We have to copy the args, because the generic apply - wants to pop N arguments. */ - jit_lshi_ul(JIT_R0, JIT_V1, JIT_LOG_WORD_SIZE); - jit_subr_p(JIT_R2, JIT_RUNSTACK, JIT_R0); - refloop = jit_get_ip(); - jit_subi_l(JIT_R0, JIT_R0, JIT_WORD_SIZE); - jit_ldxr_p(JIT_R1, JIT_RUNSTACK, JIT_R0); - jit_stxr_p(JIT_R0, JIT_R2, JIT_R1); - CHECK_LIMIT(); - __START_INNER_TINY__(1); - (void)jit_bnei_l(refloop, JIT_R0, 0); - __END_INNER_TINY__(1); - - jit_movr_p(JIT_RUNSTACK, JIT_R2); - - /* Set V1 and local2 for arguments to generic tail-call handler: */ + /* Set R0 and V1 for arguments to generic tail-call handler: */ jit_movr_p(JIT_R0, JIT_V1); (void)jit_movi_p(JIT_V1, scheme_apply_proc); - mz_patch_ucbranch(ref6); - - __END_SHORT_JUMPS__(1); - - scheme_generate_non_tail_call(jitter, -1, 0, 1, multi_ok, 0, 0, 1, 0, 0); + /* -3 here means "don't pop the arguments"; need regular argument + handling via `reftop` for tail calls */ + scheme_generate_non_tail_call(jitter, -3, 0, 1, multi_ok, 0, 0, 1, 0, 0, reftop); scheme_jit_register_sub_func(jitter, code, scheme_false); } diff --git a/racket/src/racket/src/mzmark_type.inc b/racket/src/racket/src/mzmark_type.inc index 82574ed78b..f15b940996 100644 --- a/racket/src/racket/src/mzmark_type.inc +++ b/racket/src/racket/src/mzmark_type.inc @@ -2023,8 +2023,8 @@ static int runstack_val_SIZE(void *p, struct NewGC *gc) { static int runstack_val_MARK(void *p, struct NewGC *gc) { intptr_t *s = (intptr_t *)p; void **a, **b; - a = (void **)s + 4 + s[2]; - b = (void **)s + 4 + s[3]; + a = (void **)s + 5 + s[2]; + b = (void **)s + 5 + s[3]; while (a < b) { gcMARK2(*a, gc); a++; @@ -2036,8 +2036,8 @@ static int runstack_val_MARK(void *p, struct NewGC *gc) { static int runstack_val_FIXUP(void *p, struct NewGC *gc) { intptr_t *s = (intptr_t *)p; void **a, **b; - a = (void **)s + 4 + s[2]; - b = (void **)s + 4 + s[3]; + a = (void **)s + 5 + s[2]; + b = (void **)s + 5 + s[3]; while (a < b) { gcFIXUP2(*a, gc); a++; @@ -2045,14 +2045,14 @@ static int runstack_val_FIXUP(void *p, struct NewGC *gc) { /* Zero out the part that we didn't mark, in case it becomes live later. */ - a = (void **)s + 4; - b = (void **)s + 4 + s[2]; + a = (void **)s + 5; + b = (void **)s + 5 + s[2]; while (a < b) { *a = RUNSTACK_ZERO_VAL; a++; } - a = (void **)s + 4 + s[3]; - b = (void **)s + 4 + (s[1] - 4); + a = (void **)s + 5 + s[3]; + b = (void **)s + 5 + (s[1] - 5); while (a < b) { *a = RUNSTACK_ZERO_VAL; a++; diff --git a/racket/src/racket/src/mzmarksrc.c b/racket/src/racket/src/mzmarksrc.c index 72794ce019..4ce76e608c 100644 --- a/racket/src/racket/src/mzmarksrc.c +++ b/racket/src/racket/src/mzmarksrc.c @@ -804,8 +804,8 @@ runstack_val { intptr_t *s = (intptr_t *)p; mark: void **a, **b; - a = (void **)s + 4 + s[2]; - b = (void **)s + 4 + s[3]; + a = (void **)s + 5 + s[2]; + b = (void **)s + 5 + s[3]; while (a < b) { gcMARK2(*a, gc); a++; @@ -814,14 +814,14 @@ runstack_val { fixup: /* Zero out the part that we didn't mark, in case it becomes live later. */ - a = (void **)s + 4; - b = (void **)s + 4 + s[2]; + a = (void **)s + 5; + b = (void **)s + 5 + s[2]; while (a < b) { *a = RUNSTACK_ZERO_VAL; a++; } - a = (void **)s + 4 + s[3]; - b = (void **)s + 4 + (s[1] - 4); + a = (void **)s + 5 + s[3]; + b = (void **)s + 5 + (s[1] - 5); while (a < b) { *a = RUNSTACK_ZERO_VAL; a++; diff --git a/racket/src/racket/src/schpriv.h b/racket/src/racket/src/schpriv.h index bf503335f6..8c7495f629 100644 --- a/racket/src/racket/src/schpriv.h +++ b/racket/src/racket/src/schpriv.h @@ -653,6 +653,7 @@ void scheme_prepare_this_thread_for_GC(Scheme_Thread *t); Scheme_Object **scheme_alloc_runstack(intptr_t len); void scheme_set_runstack_limits(Scheme_Object **rs, intptr_t len, intptr_t start, intptr_t end); +void scheme_check_runstack_edge(Scheme_Object **rs); void scheme_alloc_list_stack(Scheme_Thread *p); void scheme_clean_list_stack(Scheme_Thread *p); diff --git a/racket/src/racket/src/thread.c b/racket/src/racket/src/thread.c index 8dad29dc56..d79964c6b0 100644 --- a/racket/src/racket/src/thread.c +++ b/racket/src/racket/src/thread.c @@ -2583,13 +2583,15 @@ Scheme_Object **scheme_alloc_runstack(intptr_t len) #ifdef MZ_PRECISE_GC intptr_t sz; void **p; - sz = sizeof(Scheme_Object*) * (len + 4); + sz = sizeof(Scheme_Object*) * (len + 5); p = (void **)GC_malloc_tagged_allow_interior(sz); *(Scheme_Type *)(void *)p = scheme_rt_runstack; ((intptr_t *)(void *)p)[1] = gcBYTES_TO_WORDS(sz); ((intptr_t *)(void *)p)[2] = 0; ((intptr_t *)(void *)p)[3] = len; - return (Scheme_Object **)(p + 4); +# define MZ_RUNSTACK_OVERFLOW_CANARY 0xFF77FF77 + ((intptr_t *)(void *)p)[4] = MZ_RUNSTACK_OVERFLOW_CANARY; + return (Scheme_Object **)(p + 5); #else return (Scheme_Object **)scheme_malloc_allow_interior(sizeof(Scheme_Object*) * len); #endif @@ -2602,17 +2604,28 @@ void scheme_set_runstack_limits(Scheme_Object **rs, intptr_t len, intptr_t start writing and scanning pages that could be skipped for a minor GC. For CGC, we have to just clear out the unused part. */ { + scheme_check_runstack_edge(rs); #ifdef MZ_PRECISE_GC - if (((intptr_t *)(void *)rs)[-2] != start) - ((intptr_t *)(void *)rs)[-2] = start; - if (((intptr_t *)(void *)rs)[-1] != end) - ((intptr_t *)(void *)rs)[-1] = end; + if (((intptr_t *)(void *)rs)[-3] != start) + ((intptr_t *)(void *)rs)[-3] = start; + if (((intptr_t *)(void *)rs)[-2] != end) + ((intptr_t *)(void *)rs)[-2] = end; #else memset(rs, 0, start * sizeof(Scheme_Object *)); memset(rs + end, 0, (len - end) * sizeof(Scheme_Object *)); #endif } +void scheme_check_runstack_edge(Scheme_Object **rs) +{ +#ifdef MZ_PRECISE_GC + if (((intptr_t *)rs)[-1] != MZ_RUNSTACK_OVERFLOW_CANARY) { + scheme_log_abort("internal error: runstack overflow detected"); + abort(); + } +#endif +} + void *scheme_register_process_global(const char *key, void *val) { void *old_val = NULL;