fix potential stack overflow with JIT-inlined apply

If the slow path has to be taken because the number of
list elements is greater than the stack size, then the
old implementation would copy all the arguments --- which
still might be too much for the available stack space.
Avoid that copy.

Also, add pad word to the end of the stack to help detect
overflow.
This commit is contained in:
Matthew Flatt 2015-01-06 10:55:37 -07:00
parent 2b07cc34f7
commit 3408209f66
10 changed files with 95 additions and 69 deletions

View File

@ -4492,6 +4492,25 @@
(err/rt-test (f 10))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Check that JIT-inlined `apply` doesn't overflow the runstack
(define (f n done? . args)
(cond
[(positive? n)
(or (f (sub1 n) done?) #t)]
[done? #t]
[(= 50 (length args))
100]
[(apply f 0 #t 1 2 3 4 5 6 7 8 9 10 args)
(apply f 0 #f (cons 1 args))]))
(for/fold ([v 0]) ([i 2])
(+ v
(for/fold ([v2 0]) ([i (in-range 100 512)])
(f i #f))))
(collect-garbage)
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)

View File

@ -796,6 +796,8 @@ void *scheme_enlarge_runstack(intptr_t size, void *(*k)())
escape = 0;
p = scheme_current_thread; /* might have changed! */
scheme_check_runstack_edge(MZ_RUNSTACK_START);
if (cont_count == scheme_cont_capture_count) {
if (!p->spare_runstack || (p->runstack_size > p->spare_runstack_size)) {
p->spare_runstack = MZ_RUNSTACK_START;

View File

@ -1334,6 +1334,7 @@ static Scheme_Native_Closure_Data *create_native_case_lambda(Scheme_Case_Lambda
if (data->u.native_code->max_let_depth > max_let_depth)
max_let_depth = data->u.native_code->max_let_depth;
}
printf("%d\n", max_let_depth);
ndata->max_let_depth = max_let_depth;
ndata->closure_size = -(count + 1); /* Indicates case-lambda */
@ -3714,9 +3715,9 @@ static void on_demand_generate_lambda(Scheme_Native_Closure *nc, int argc, Schem
#endif
}
/* Add a couple of extra slots to computed let-depth, in case
we haven't quite computed right for inlined uses, etc. */
max_depth = WORDS_TO_BYTES(data->max_let_depth + gdata.max_extra + 2);
/* Add a couple of extra slots to computed let-depth, as needed
by various inlined operations. */
max_depth = WORDS_TO_BYTES(data->max_let_depth + gdata.max_extra + 4);
if (gdata.max_tail_depth > max_depth)
max_depth = gdata.max_tail_depth;

View File

@ -635,7 +635,7 @@ void *scheme_jit_get_threadlocal_table();
static void *top;
static void *cr_tmp;
# define CHECK_RUNSTACK_OVERFLOW_NOCL() \
jit_sti_l(&cr_tmp, JIT_R0); jit_ldi_l(JIT_R0, &scheme_current_runstack_start); \
jit_sti_l(&cr_tmp, JIT_R0); jit_movi_l(JIT_R0, __LINE__); jit_ldi_l(JIT_R0, &scheme_current_runstack_start); \
top = (_jit.x.pc); (void)jit_bltr_ul(top, JIT_RUNSTACK, JIT_R0); jit_ldi_l(JIT_R0, &cr_tmp)
# define CHECK_RUNSTACK_OVERFLOW() \
CHECK_LIMIT(); CHECK_RUNSTACK_OVERFLOW_NOCL()
@ -1471,7 +1471,7 @@ int scheme_generate_tail_call(mz_jit_state *jitter, int num_rands, int direct_na
Scheme_Closure_Data *direct_data);
int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
int multi_ok, int result_ignored, int nontail_self, int pop_and_jump,
int is_inlined, int unboxed_args);
int is_inlined, int unboxed_args, jit_insn *reftop);
int scheme_generate_finish_tail_call(mz_jit_state *jitter, int direct_native);
int scheme_generate_finish_apply(mz_jit_state *jitter);
int scheme_generate_finish_multi_apply(mz_jit_state *jitter);

View File

@ -642,7 +642,7 @@ static int generate_direct_prim_non_tail_call(mz_jit_state *jitter, int num_rand
static int generate_retry_call(mz_jit_state *jitter, int num_rands, int multi_ok, int result_ignored,
GC_CAN_IGNORE jit_insn *reftop)
/* If num_rands < 0, original argc is in V1, and we should
pop argc arguments off runstack before pushing more.
pop argc arguments off runstack before pushing more (unless num_rands == -3).
This function is called with short jumps enabled. */
{
GC_CAN_IGNORE jit_insn *ref, *ref2, *refloop;
@ -657,15 +657,14 @@ static int generate_retry_call(mz_jit_state *jitter, int num_rands, int multi_ok
/* Get new argc: */
(void)mz_tl_ldi_p(JIT_R1, tl_scheme_current_thread);
jit_ldxi_l(JIT_R2, JIT_R1, &((Scheme_Thread *)0x0)->ku.apply.tail_num_rands);
if (num_rands >= 0) {
jit_movi_l(JIT_V1, 0);
}
/* Thread is in R1. New argc is in R2. Old argc to cancel is in V1. */
/* Thread is in R1. New argc is in R2. Old argc to cancel may be in V1. */
/* Enough room on runstack? */
mz_tl_ldi_p(JIT_R0, tl_MZ_RUNSTACK_START);
jit_subr_ul(JIT_R0, JIT_RUNSTACK, JIT_R0); /* R0 is space left (in bytes) */
if ((num_rands < 0) && (num_rands != -3)) {
jit_subr_l(JIT_R2, JIT_R2, JIT_V1);
}
jit_lshi_l(JIT_R2, JIT_R2, JIT_LOG_WORD_SIZE);
ref = jit_bltr_ul(jit_forward(), JIT_R0, JIT_R2);
CHECK_LIMIT();
@ -753,7 +752,7 @@ static int generate_ignored_result_check(mz_jit_state *jitter)
int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direct_native, int need_set_rs,
int multi_ok, int result_ignored, int nontail_self, int pop_and_jump,
int is_inlined, int unboxed_args)
int is_inlined, int unboxed_args, jit_insn *reftop)
{
/* Non-tail call.
Proc is in V1, args are at RUNSTACK.
@ -762,21 +761,24 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
where R2 is set before jumping to the old FP, and R1 holds
return address back here, and V1 and R0 must be preserved;
num_rands >= 0 in this case, and the "slow path" returns NULL.
If num_rands < 0, then argc is in R0, and need to pop runstack before returning.
If num_rands == -1, skip prolog. */
If num_rands < 0, then argc is in R0, and
if num_rands != -3, need to pop runstack before returning.
If num_rands == -1 or -3, skip prolog. */
GC_CAN_IGNORE jit_insn *ref, *ref2, *ref4, *ref5, *ref6, *ref7, *ref8, *ref9;
GC_CAN_IGNORE jit_insn *ref10, *reftop = NULL, *refagain;
GC_CAN_IGNORE jit_insn *ref10, *refagain;
GC_CAN_IGNORE jit_insn *refrts USED_ONLY_FOR_FUTURES;
#ifndef FUEL_AUTODECEREMENTS
GC_CAN_IGNORE jit_insn *ref11;
#endif
CHECK_RUNSTACK_OVERFLOW();
__START_SHORT_JUMPS__(1);
if (pop_and_jump) {
if (num_rands != -1) {
if ((num_rands != -1) && (num_rands != -3)) {
mz_prolog(JIT_R1);
} else {
} else if (!reftop) {
reftop = jit_get_ip();
}
}
@ -827,7 +829,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
GC_CAN_IGNORE jit_insn *refxr;
#endif
if (num_rands < 0) {
/* We need to save argc to manually pop the
/* We need to save argc to clear and manually pop the
runstack. So move V1 to R2 and move R0 to V1: */
jit_movr_p(JIT_R2, JIT_V1);
jit_movr_p(JIT_V1, JIT_R0);
@ -915,7 +917,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
__START_INNER_TINY__(1);
refc = jit_blei_p(jit_forward(), JIT_R0, SCHEME_MULTIPLE_VALUES);
__END_INNER_TINY__(1);
if (num_rands < 0) {
if ((num_rands < 0) && (num_rands != -3)) {
/* At this point, argc must be in V1 */
jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
@ -940,7 +942,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
generate_clear_previous_args(jitter, num_rands);
CHECK_LIMIT();
if (pop_and_jump) {
/* Expects argc in V1 if num_rands < 0: */
/* Expects argc in V1 if num_rands < 0 and num_rands != -3: */
generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop);
}
CHECK_LIMIT();
@ -1003,7 +1005,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
generate_clear_previous_args(jitter, num_rands);
CHECK_LIMIT();
if (pop_and_jump) {
/* Expects argc in V1 if num_rands < 0: */
/* Expects argc in V1 if num_rands < 0 and num_rands != -3: */
generate_retry_call(jitter, num_rands, multi_ok, result_ignored, reftop);
}
CHECK_LIMIT();
@ -1094,7 +1096,7 @@ int scheme_generate_non_tail_call(mz_jit_state *jitter, int num_rands, int direc
}
}
/* Note: same return code is above for faster common-case return */
if (num_rands < 0) {
if ((num_rands < 0) && (num_rands != -3)) {
/* At this point, argc must be in V1 */
jit_lshi_l(JIT_R1, JIT_V1, JIT_LOG_WORD_SIZE);
jit_addr_p(JIT_RUNSTACK, JIT_RUNSTACK, JIT_R1);
@ -1452,7 +1454,7 @@ static int do_generate_shared_call(mz_jit_state *jitter, void *_data)
else
ok = scheme_generate_non_tail_call(jitter, data->num_rands, data->direct_native, 1,
data->multi_ok, data->result_ignored, data->nontail_self,
1, 0, data->unboxed_args);
1, 0, data->unboxed_args, NULL);
scheme_jit_register_sub_func(jitter, code, scheme_false);
@ -2157,7 +2159,7 @@ int scheme_generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_
generate_nontail_self_setup(jitter);
}
scheme_generate_non_tail_call(jitter, num_rands, direct_native, jitter->need_set_rs,
multi_ok, result_ignored, nontail_self, 0, 1, 0);
multi_ok, result_ignored, nontail_self, 0, 1, 0, NULL);
}
}
} else {

View File

@ -990,6 +990,7 @@ static int generate_apply_proxy(mz_jit_state *jitter, int setter)
jit_ldxi_p(JIT_R1, JIT_RUNSTACK, WORDS_TO_BYTES(1)); /* index */
if (setter) {
jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(4));
CHECK_RUNSTACK_OVERFLOW();
jit_stxi_p(WORDS_TO_BYTES(3), JIT_RUNSTACK, JIT_R0); /* save value */
} else {
jit_stxi_p(WORDS_TO_BYTES(1), JIT_RUNSTACK, JIT_R0); /* save value */
@ -1001,7 +1002,7 @@ static int generate_apply_proxy(mz_jit_state *jitter, int setter)
CHECK_LIMIT();
JIT_UPDATE_THREAD_RSPTR();
__END_SHORT_JUMPS__(1);
scheme_generate_non_tail_call(jitter, 3, 0, 0, 0, 0, 0, 0, 1, 0);
scheme_generate_non_tail_call(jitter, 3, 0, 0, 0, 0, 0, 0, 1, 0, NULL);
__START_SHORT_JUMPS__(1);
CHECK_LIMIT();
if (setter) {
@ -3394,8 +3395,10 @@ static int more_common0(mz_jit_state *jitter, void *_data)
CHECK_LIMIT();
mz_rs_sync();
CHECK_RUNSTACK_OVERFLOW();
__END_SHORT_JUMPS__(1);
scheme_generate_non_tail_call(jitter, 2, 0, 1, 0, 0, 0, 0, 0, 0);
scheme_generate_non_tail_call(jitter, 2, 0, 1, 0, 0, 0, 0, 0, 0, NULL);
CHECK_LIMIT();
__START_SHORT_JUMPS__(1);
@ -3693,7 +3696,7 @@ static int more_common1(mz_jit_state *jitter, void *_data)
/* argc is in V1 */
{
int multi_ok;
GC_CAN_IGNORE jit_insn *ref1, *ref2, *ref3, *ref4, *ref6, *ref7, *refloop;
GC_CAN_IGNORE jit_insn *ref1, *ref2, *ref3, *ref4, *ref6, *ref7, *refloop, *reftop;
void *code;
for (multi_ok = 0; multi_ok < 2; multi_ok++) {
@ -3798,7 +3801,8 @@ static int more_common1(mz_jit_state *jitter, void *_data)
jit_movr_p(JIT_RUNSTACK, JIT_R2);
jit_rshi_ul(JIT_R1, JIT_R1, JIT_LOG_WORD_SIZE);
jit_movr_i(JIT_R0, JIT_R1);
ref6 = jit_jmpi(jit_forward());
reftop = jit_get_ip();
scheme_generate_non_tail_call(jitter, -1, 0, 1, multi_ok, 0, 0, 1, 0, 0, NULL);
CHECK_LIMIT();
/***********************************/
@ -3806,31 +3810,15 @@ static int more_common1(mz_jit_state *jitter, void *_data)
mz_patch_branch(ref1);
mz_patch_branch(ref3);
mz_patch_branch(ref4);
__END_SHORT_JUMPS__(1);
/* We have to copy the args, because the generic apply
wants to pop N arguments. */
jit_lshi_ul(JIT_R0, JIT_V1, JIT_LOG_WORD_SIZE);
jit_subr_p(JIT_R2, JIT_RUNSTACK, JIT_R0);
refloop = jit_get_ip();
jit_subi_l(JIT_R0, JIT_R0, JIT_WORD_SIZE);
jit_ldxr_p(JIT_R1, JIT_RUNSTACK, JIT_R0);
jit_stxr_p(JIT_R0, JIT_R2, JIT_R1);
CHECK_LIMIT();
__START_INNER_TINY__(1);
(void)jit_bnei_l(refloop, JIT_R0, 0);
__END_INNER_TINY__(1);
jit_movr_p(JIT_RUNSTACK, JIT_R2);
/* Set V1 and local2 for arguments to generic tail-call handler: */
/* Set R0 and V1 for arguments to generic tail-call handler: */
jit_movr_p(JIT_R0, JIT_V1);
(void)jit_movi_p(JIT_V1, scheme_apply_proc);
mz_patch_ucbranch(ref6);
__END_SHORT_JUMPS__(1);
scheme_generate_non_tail_call(jitter, -1, 0, 1, multi_ok, 0, 0, 1, 0, 0);
/* -3 here means "don't pop the arguments"; need regular argument
handling via `reftop` for tail calls */
scheme_generate_non_tail_call(jitter, -3, 0, 1, multi_ok, 0, 0, 1, 0, 0, reftop);
scheme_jit_register_sub_func(jitter, code, scheme_false);
}

View File

@ -2023,8 +2023,8 @@ static int runstack_val_SIZE(void *p, struct NewGC *gc) {
static int runstack_val_MARK(void *p, struct NewGC *gc) {
intptr_t *s = (intptr_t *)p;
void **a, **b;
a = (void **)s + 4 + s[2];
b = (void **)s + 4 + s[3];
a = (void **)s + 5 + s[2];
b = (void **)s + 5 + s[3];
while (a < b) {
gcMARK2(*a, gc);
a++;
@ -2036,8 +2036,8 @@ static int runstack_val_MARK(void *p, struct NewGC *gc) {
static int runstack_val_FIXUP(void *p, struct NewGC *gc) {
intptr_t *s = (intptr_t *)p;
void **a, **b;
a = (void **)s + 4 + s[2];
b = (void **)s + 4 + s[3];
a = (void **)s + 5 + s[2];
b = (void **)s + 5 + s[3];
while (a < b) {
gcFIXUP2(*a, gc);
a++;
@ -2045,14 +2045,14 @@ static int runstack_val_FIXUP(void *p, struct NewGC *gc) {
/* Zero out the part that we didn't mark, in case it becomes
live later. */
a = (void **)s + 4;
b = (void **)s + 4 + s[2];
a = (void **)s + 5;
b = (void **)s + 5 + s[2];
while (a < b) {
*a = RUNSTACK_ZERO_VAL;
a++;
}
a = (void **)s + 4 + s[3];
b = (void **)s + 4 + (s[1] - 4);
a = (void **)s + 5 + s[3];
b = (void **)s + 5 + (s[1] - 5);
while (a < b) {
*a = RUNSTACK_ZERO_VAL;
a++;

View File

@ -804,8 +804,8 @@ runstack_val {
intptr_t *s = (intptr_t *)p;
mark:
void **a, **b;
a = (void **)s + 4 + s[2];
b = (void **)s + 4 + s[3];
a = (void **)s + 5 + s[2];
b = (void **)s + 5 + s[3];
while (a < b) {
gcMARK2(*a, gc);
a++;
@ -814,14 +814,14 @@ runstack_val {
fixup:
/* Zero out the part that we didn't mark, in case it becomes
live later. */
a = (void **)s + 4;
b = (void **)s + 4 + s[2];
a = (void **)s + 5;
b = (void **)s + 5 + s[2];
while (a < b) {
*a = RUNSTACK_ZERO_VAL;
a++;
}
a = (void **)s + 4 + s[3];
b = (void **)s + 4 + (s[1] - 4);
a = (void **)s + 5 + s[3];
b = (void **)s + 5 + (s[1] - 5);
while (a < b) {
*a = RUNSTACK_ZERO_VAL;
a++;

View File

@ -653,6 +653,7 @@ void scheme_prepare_this_thread_for_GC(Scheme_Thread *t);
Scheme_Object **scheme_alloc_runstack(intptr_t len);
void scheme_set_runstack_limits(Scheme_Object **rs, intptr_t len, intptr_t start, intptr_t end);
void scheme_check_runstack_edge(Scheme_Object **rs);
void scheme_alloc_list_stack(Scheme_Thread *p);
void scheme_clean_list_stack(Scheme_Thread *p);

View File

@ -2583,13 +2583,15 @@ Scheme_Object **scheme_alloc_runstack(intptr_t len)
#ifdef MZ_PRECISE_GC
intptr_t sz;
void **p;
sz = sizeof(Scheme_Object*) * (len + 4);
sz = sizeof(Scheme_Object*) * (len + 5);
p = (void **)GC_malloc_tagged_allow_interior(sz);
*(Scheme_Type *)(void *)p = scheme_rt_runstack;
((intptr_t *)(void *)p)[1] = gcBYTES_TO_WORDS(sz);
((intptr_t *)(void *)p)[2] = 0;
((intptr_t *)(void *)p)[3] = len;
return (Scheme_Object **)(p + 4);
# define MZ_RUNSTACK_OVERFLOW_CANARY 0xFF77FF77
((intptr_t *)(void *)p)[4] = MZ_RUNSTACK_OVERFLOW_CANARY;
return (Scheme_Object **)(p + 5);
#else
return (Scheme_Object **)scheme_malloc_allow_interior(sizeof(Scheme_Object*) * len);
#endif
@ -2602,17 +2604,28 @@ void scheme_set_runstack_limits(Scheme_Object **rs, intptr_t len, intptr_t start
writing and scanning pages that could be skipped for a minor
GC. For CGC, we have to just clear out the unused part. */
{
scheme_check_runstack_edge(rs);
#ifdef MZ_PRECISE_GC
if (((intptr_t *)(void *)rs)[-2] != start)
((intptr_t *)(void *)rs)[-2] = start;
if (((intptr_t *)(void *)rs)[-1] != end)
((intptr_t *)(void *)rs)[-1] = end;
if (((intptr_t *)(void *)rs)[-3] != start)
((intptr_t *)(void *)rs)[-3] = start;
if (((intptr_t *)(void *)rs)[-2] != end)
((intptr_t *)(void *)rs)[-2] = end;
#else
memset(rs, 0, start * sizeof(Scheme_Object *));
memset(rs + end, 0, (len - end) * sizeof(Scheme_Object *));
#endif
}
void scheme_check_runstack_edge(Scheme_Object **rs)
{
#ifdef MZ_PRECISE_GC
if (((intptr_t *)rs)[-1] != MZ_RUNSTACK_OVERFLOW_CANARY) {
scheme_log_abort("internal error: runstack overflow detected");
abort();
}
#endif
}
void *scheme_register_process_global(const char *key, void *val)
{
void *old_val = NULL;