improve performance interaction of GC (especially 3m) and lots of threads
svn: r5490
This commit is contained in:
parent
a742c67ab6
commit
60b1287342
|
@ -83,7 +83,8 @@ MzScheme allocates the following kinds of memory objects:
|
|||
|
||||
Note that a two-space copying collector might use only the fixup
|
||||
operation, while a non-moving collector might use only the mark
|
||||
operation.
|
||||
operation. However, MzScheme currently relies on at least fixup
|
||||
calls for scheme_runstack_rt-tagged objects.
|
||||
|
||||
* Atomic - The allocated object contains no pointers to other
|
||||
allocated objects.
|
||||
|
|
|
@ -440,7 +440,7 @@ static int just_checking, the_size;
|
|||
|
||||
#if defined(OS_X)
|
||||
# if GENERATIONS
|
||||
static void designate_modified(void *p);
|
||||
static int designate_modified(void *p);
|
||||
# endif
|
||||
|
||||
# define TEST 0
|
||||
|
@ -3920,6 +3920,11 @@ void *GC_malloc_allow_interior(size_t size_in_bytes)
|
|||
return malloc_bigblock(size_in_bytes, MTYPE_ARRAY, 1);
|
||||
}
|
||||
|
||||
void *GC_malloc_tagged_allow_interior(size_t size_in_bytes)
|
||||
{
|
||||
return malloc_bigblock(size_in_bytes, MTYPE_TAGGED, 1);
|
||||
}
|
||||
|
||||
void *GC_malloc_array_tagged(size_t size_in_bytes)
|
||||
{
|
||||
return malloc_untagged(size_in_bytes, MTYPE_TAGGED_ARRAY, &tagged_array);
|
||||
|
|
|
@ -179,6 +179,10 @@ GC2_EXTERN void *GC_malloc_allow_interior(size_t size_in_bytes);
|
|||
pointers into the middle of the array, or just past the end of the
|
||||
array. */
|
||||
|
||||
GC2_EXTERN void *GC_malloc_tagged_allow_interior(size_t size_in_bytes);
|
||||
/*
|
||||
Like GC_malloc_allow_interior(), but for a tagged object. */
|
||||
|
||||
GC2_EXTERN void *GC_malloc_weak_array(size_t size_in_bytes, void *replace_val);
|
||||
/*
|
||||
Alloc an array of weak pointers, initially zeroed. When a value in
|
||||
|
|
|
@ -510,6 +510,7 @@ void *GC_malloc_array_tagged(size_t s) { return allocate(s, PAGE_TARRAY); }
|
|||
void *GC_malloc_atomic(size_t s) { return allocate(s, PAGE_ATOMIC); }
|
||||
void *GC_malloc_atomic_uncollectable(size_t s) { return malloc(s); }
|
||||
void *GC_malloc_allow_interior(size_t s) {return allocate_big(s, PAGE_ARRAY);}
|
||||
void *GC_malloc_tagged_allow_interior(size_t s) {return allocate_big(s, PAGE_TAGGED);}
|
||||
void GC_free(void *p) {}
|
||||
|
||||
void *GC_malloc_one_small_tagged(size_t sizeb)
|
||||
|
|
|
@ -716,16 +716,16 @@ void *scheme_enlarge_runstack(long size, void *(*k)())
|
|||
size = 1000;
|
||||
}
|
||||
|
||||
p->runstack_saved = saved;
|
||||
if (p->spare_runstack && (size <= p->spare_runstack_size)) {
|
||||
size = p->spare_runstack_size;
|
||||
MZ_RUNSTACK_START = p->spare_runstack;
|
||||
p->spare_runstack = NULL;
|
||||
} else {
|
||||
MZ_RUNSTACK_START = scheme_malloc_allow_interior(sizeof(Scheme_Object*) * size);
|
||||
MZ_RUNSTACK_START = scheme_alloc_runstack(size);
|
||||
}
|
||||
p->runstack_size = size;
|
||||
MZ_RUNSTACK = MZ_RUNSTACK_START + size;
|
||||
p->runstack_saved = saved;
|
||||
|
||||
cont_count = scheme_cont_capture_count;
|
||||
|
||||
|
|
|
@ -4277,7 +4277,7 @@ static void restore_continuation(Scheme_Cont *cont, Scheme_Thread *p, int for_pr
|
|||
MZ_MARK_STACK_TYPE copied_cms = 0;
|
||||
Scheme_Object **mv, *sub_conts = NULL;
|
||||
int mc;
|
||||
|
||||
|
||||
if (SAME_OBJ(result, SCHEME_MULTIPLE_VALUES)) {
|
||||
/* Get values out before GC */
|
||||
mv = p->ku.multiple.array;
|
||||
|
@ -4349,6 +4349,7 @@ static void restore_continuation(Scheme_Cont *cont, Scheme_Thread *p, int for_pr
|
|||
mc = cont->meta_continuation;
|
||||
p->meta_continuation = mc;
|
||||
}
|
||||
|
||||
if (shortcut_prompt) {
|
||||
/* In shortcut mode, we need to preserve saved runstacks
|
||||
that were pruned when capturing the continuation. */
|
||||
|
@ -4370,14 +4371,19 @@ static void restore_continuation(Scheme_Cont *cont, Scheme_Thread *p, int for_pr
|
|||
p->runstack_saved = rs;
|
||||
} else
|
||||
p->runstack_saved = cont->runstack_saved;
|
||||
|
||||
MZ_RUNSTACK_START = cont->runstack_start;
|
||||
p->runstack_size = cont->runstack_size;
|
||||
|
||||
scheme_restore_env_stack_w_thread(cont->ss, p);
|
||||
|
||||
if (p->runstack_owner
|
||||
&& (*p->runstack_owner == p))
|
||||
&& (*p->runstack_owner == p)) {
|
||||
*p->runstack_owner = NULL;
|
||||
}
|
||||
|
||||
if (resume)
|
||||
p->meta_prompt = NULL; /* in case there's a GC before we can set it */
|
||||
|
||||
p->runstack_owner = cont->runstack_owner;
|
||||
if (p->runstack_owner && (*p->runstack_owner != p)) {
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
(define re:mark "^ mark:")
|
||||
(define re:size "^ size:")
|
||||
(define re:size-or-more "^ (?:size|more):")
|
||||
(define re:fixup-start "^ fixup:")
|
||||
(define re:close "^}")
|
||||
|
||||
(define re:const-size (regexp "^[ \t]*gcBYTES_TO_WORDS[(]sizeof[(][A-Za-z0-9_]*[)][)];[ \t]*$"))
|
||||
|
@ -24,7 +26,8 @@
|
|||
[(regexp-match re:done l)
|
||||
null]
|
||||
[(or (regexp-match re:mark l)
|
||||
(regexp-match re:size l))
|
||||
(regexp-match re:size-or-more l)
|
||||
(regexp-match re:fixup-start l))
|
||||
(error 'mkmark.ss "unexpected label: ~a at ~a" l
|
||||
(file-position (current-input-port)))]
|
||||
[(regexp-match re:close l)
|
||||
|
@ -36,7 +39,12 @@
|
|||
(printf "~a~n" s))
|
||||
l))])
|
||||
(let ([prefix (read-lines re:mark)]
|
||||
[mark (read-lines re:size)]
|
||||
[mark (read-lines re:size-or-more)]
|
||||
[fixup (if (regexp-match-peek re:fixup-start (current-input-port))
|
||||
(begin
|
||||
(regexp-match re:fixup-start (current-input-port))
|
||||
(read-lines re:size))
|
||||
null)]
|
||||
[size (read-lines re:close)])
|
||||
(printf "static int ~a_SIZE(void *p) {~n" name)
|
||||
(print-lines prefix)
|
||||
|
@ -69,7 +77,9 @@
|
|||
s
|
||||
"FIXUP")
|
||||
"\\1"))
|
||||
mark))
|
||||
(append
|
||||
mark
|
||||
fixup)))
|
||||
(printf " return~n")
|
||||
(print-lines size)
|
||||
(printf "}~n~n")
|
||||
|
|
|
@ -1752,6 +1752,57 @@ static int thread_val_FIXUP(void *p) {
|
|||
#define thread_val_IS_CONST_SIZE 1
|
||||
|
||||
|
||||
static int runstack_val_SIZE(void *p) {
|
||||
long *s = (long *)p;
|
||||
return
|
||||
s[1];
|
||||
}
|
||||
|
||||
static int runstack_val_MARK(void *p) {
|
||||
long *s = (long *)p;
|
||||
void **a, **b;
|
||||
a = (void **)s + 4 + s[2];
|
||||
b = (void **)s + 4 + s[3];
|
||||
while (a < b) {
|
||||
gcMARK(*a);
|
||||
a++;
|
||||
}
|
||||
return
|
||||
s[1];
|
||||
}
|
||||
|
||||
static int runstack_val_FIXUP(void *p) {
|
||||
long *s = (long *)p;
|
||||
void **a, **b;
|
||||
a = (void **)s + 4 + s[2];
|
||||
b = (void **)s + 4 + s[3];
|
||||
while (a < b) {
|
||||
gcFIXUP(*a);
|
||||
a++;
|
||||
}
|
||||
|
||||
/* Zero out the part that we didn't mark, in case it becomes
|
||||
live later. */
|
||||
a = (void **)s + 4;
|
||||
b = (void **)s + 4 + s[2];
|
||||
while (a < b) {
|
||||
*a = NULL;
|
||||
a++;
|
||||
}
|
||||
a = (void **)s + 4 + s[3];
|
||||
b = (void **)s + 4 + (s[1] - 4);
|
||||
while (a < b) {
|
||||
*a = NULL;
|
||||
a++;
|
||||
}
|
||||
return
|
||||
s[1];
|
||||
}
|
||||
|
||||
#define runstack_val_IS_ATOMIC 0
|
||||
#define runstack_val_IS_CONST_SIZE 0
|
||||
|
||||
|
||||
static int prompt_val_SIZE(void *p) {
|
||||
return
|
||||
gcBYTES_TO_WORDS(sizeof(Scheme_Prompt));
|
||||
|
|
|
@ -684,6 +684,36 @@ thread_val {
|
|||
gcBYTES_TO_WORDS(sizeof(Scheme_Thread));
|
||||
}
|
||||
|
||||
runstack_val {
|
||||
long *s = (long *)p;
|
||||
mark:
|
||||
void **a, **b;
|
||||
a = (void **)s + 4 + s[2];
|
||||
b = (void **)s + 4 + s[3];
|
||||
while (a < b) {
|
||||
gcMARK(*a);
|
||||
a++;
|
||||
}
|
||||
more:
|
||||
fixup:
|
||||
/* Zero out the part that we didn't mark, in case it becomes
|
||||
live later. */
|
||||
a = (void **)s + 4;
|
||||
b = (void **)s + 4 + s[2];
|
||||
while (a < b) {
|
||||
*a = NULL;
|
||||
a++;
|
||||
}
|
||||
a = (void **)s + 4 + s[3];
|
||||
b = (void **)s + 4 + (s[1] - 4);
|
||||
while (a < b) {
|
||||
*a = NULL;
|
||||
a++;
|
||||
}
|
||||
size:
|
||||
s[1];
|
||||
}
|
||||
|
||||
prompt_val {
|
||||
mark:
|
||||
Scheme_Prompt *pr = (Scheme_Prompt *)p;
|
||||
|
|
|
@ -324,6 +324,9 @@ void scheme_start_itimer_thread(long usec);
|
|||
void scheme_block_child_signals(int block);
|
||||
#endif
|
||||
|
||||
Scheme_Object **scheme_alloc_runstack(long len);
|
||||
void scheme_set_runstack_limits(Scheme_Object **rs, long len, long start, long end);
|
||||
|
||||
void scheme_alloc_list_stack(Scheme_Thread *p);
|
||||
void scheme_clean_list_stack(Scheme_Thread *p);
|
||||
|
||||
|
|
|
@ -227,6 +227,7 @@ enum {
|
|||
scheme_rt_delay_load_info, /* 205 */
|
||||
scheme_rt_marshal_info, /* 206 */
|
||||
scheme_rt_unmarshal_info, /* 207 */
|
||||
scheme_rt_runstack, /* 208 */
|
||||
#endif
|
||||
|
||||
_scheme_last_type_
|
||||
|
|
|
@ -2037,7 +2037,7 @@ static Scheme_Thread *make_thread(Scheme_Config *config,
|
|||
process->runstack_size = init_stack_size;
|
||||
{
|
||||
Scheme_Object **sa;
|
||||
sa = scheme_malloc_allow_interior(sizeof(Scheme_Object*) * init_stack_size);
|
||||
sa = scheme_alloc_runstack(init_stack_size);
|
||||
process->runstack_start = sa;
|
||||
}
|
||||
process->runstack = process->runstack_start + init_stack_size;
|
||||
|
@ -2162,6 +2162,48 @@ void *scheme_tls_get(int pos)
|
|||
return p->user_tls[pos];
|
||||
}
|
||||
|
||||
#ifdef MZ_XFORM
|
||||
START_XFORM_SKIP;
|
||||
#endif
|
||||
|
||||
Scheme_Object **scheme_alloc_runstack(long len)
|
||||
{
|
||||
#ifdef MZ_PRECISE_GC
|
||||
long sz;
|
||||
void **p;
|
||||
sz = sizeof(Scheme_Object*) * (len + 4);
|
||||
p = (void **)GC_malloc_tagged_allow_interior(sz);
|
||||
*(Scheme_Type *)(void *)p = scheme_rt_runstack;
|
||||
((long *)(void *)p)[1] = gcBYTES_TO_WORDS(sz);
|
||||
((long *)(void *)p)[2] = 0;
|
||||
((long *)(void *)p)[3] = len;
|
||||
return (Scheme_Object **)(p + 4);
|
||||
#else
|
||||
return (Scheme_Object **)scheme_malloc_allow_interior(sizeof(Scheme_Object*) * len);
|
||||
#endif
|
||||
}
|
||||
|
||||
void scheme_set_runstack_limits(Scheme_Object **rs, long len, long start, long end)
|
||||
/* With 3m, we can tell the GC not to scan the unused parts, and we
|
||||
can have the fixup function zero out the unused parts; that avoids
|
||||
writing and scanning pages that could be skipped for a minor
|
||||
GC. For CGC, we have to just clear out the unused part. */
|
||||
{
|
||||
#ifdef MZ_PRECISE_GC
|
||||
if (((long *)(void *)rs)[-2] != start)
|
||||
((long *)(void *)rs)[-2] = start;
|
||||
if (((long *)(void *)rs)[-1] != end)
|
||||
((long *)(void *)rs)[-1] = end;
|
||||
#else
|
||||
memset(rs, 0, start * sizeof(Scheme_Object *));
|
||||
memset(rs + end, 0, (len - end) * sizeof(Scheme_Object *));
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef MZ_XFORM
|
||||
END_XFORM_SKIP;
|
||||
#endif
|
||||
|
||||
/*========================================================================*/
|
||||
/* thread creation and swapping */
|
||||
/*========================================================================*/
|
||||
|
@ -2371,10 +2413,12 @@ static void remove_thread(Scheme_Thread *r)
|
|||
if (r->runstack_owner) {
|
||||
/* Drop ownership, if active, and clear the stack */
|
||||
if (r == *(r->runstack_owner)) {
|
||||
memset(r->runstack_start, 0, r->runstack_size * sizeof(Scheme_Object*));
|
||||
r->runstack_start = NULL;
|
||||
if (r->runstack_start) {
|
||||
scheme_set_runstack_limits(r->runstack_start, r->runstack_size, 0, 0);
|
||||
r->runstack_start = NULL;
|
||||
}
|
||||
for (saved = r->runstack_saved; saved; saved = saved->prev) {
|
||||
memset(saved->runstack_start, 0, saved->runstack_size * sizeof(Scheme_Object*));
|
||||
scheme_set_runstack_limits(saved->runstack_start, saved->runstack_size, 0, 0);
|
||||
}
|
||||
r->runstack_saved = NULL;
|
||||
*(r->runstack_owner) = NULL;
|
||||
|
@ -6596,39 +6640,41 @@ static void prepare_thread_for_GC(Scheme_Object *t)
|
|||
/* zero ununsed part of env stack in each thread */
|
||||
|
||||
if (!p->nestee) {
|
||||
Scheme_Object **o, **e, **e2;
|
||||
Scheme_Saved_Stack *saved;
|
||||
# define RUNSTACK_TUNE(x) /* x - Used for performance tuning */
|
||||
RUNSTACK_TUNE( long size; );
|
||||
|
||||
if (!p->runstack_owner
|
||||
|| (p == *p->runstack_owner)) {
|
||||
o = p->runstack_start;
|
||||
e = p->runstack;
|
||||
e2 = p->runstack_tmp_keep;
|
||||
|
||||
while (o < e && (o != e2)) {
|
||||
*(o++) = NULL;
|
||||
}
|
||||
long rs_end;
|
||||
|
||||
/* If there's a meta-prompt, we can also zero out past the unused part */
|
||||
if (p->meta_prompt && (p->meta_prompt->runstack_boundary_start == p->runstack_start)) {
|
||||
e = p->runstack_start + p->runstack_size;
|
||||
o = p->runstack_start + p->meta_prompt->runstack_boundary_offset;
|
||||
while (o < e) {
|
||||
*(o++) = NULL;
|
||||
}
|
||||
rs_end = p->meta_prompt->runstack_boundary_offset;
|
||||
} else {
|
||||
rs_end = p->runstack_size;
|
||||
}
|
||||
|
||||
scheme_set_runstack_limits(p->runstack_start,
|
||||
p->runstack_size,
|
||||
p->runstack - p->runstack_start,
|
||||
rs_end);
|
||||
|
||||
RUNSTACK_TUNE( size = p->runstack_size - (p->runstack - p->runstack_start); );
|
||||
|
||||
for (saved = p->runstack_saved; saved; saved = saved->prev) {
|
||||
o = saved->runstack_start;
|
||||
e = o + saved->runstack_offset;
|
||||
RUNSTACK_TUNE( size += saved->runstack_size; );
|
||||
while (o < e) {
|
||||
*(o++) = NULL;
|
||||
}
|
||||
|
||||
if (p->meta_prompt && (p->meta_prompt->runstack_boundary_start == saved->runstack_start)) {
|
||||
rs_end = p->meta_prompt->runstack_boundary_offset;
|
||||
} else {
|
||||
rs_end = saved->runstack_size;
|
||||
}
|
||||
|
||||
scheme_set_runstack_limits(saved->runstack_start,
|
||||
saved->runstack_size,
|
||||
saved->runstack_offset,
|
||||
rs_end);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6662,11 +6708,16 @@ static void prepare_thread_for_GC(Scheme_Object *t)
|
|||
|
||||
if (segpos < p->cont_mark_seg_count) {
|
||||
Scheme_Cont_Mark *seg = p->cont_mark_stack_segments[segpos];
|
||||
int stackpos = ((long)p->cont_mark_stack & SCHEME_MARK_SEGMENT_MASK), i;
|
||||
int stackpos = ((long)p->cont_mark_stack & SCHEME_MARK_SEGMENT_MASK);
|
||||
for (i = stackpos; i < SCHEME_MARK_SEGMENT_SIZE; i++) {
|
||||
seg[i].key = NULL;
|
||||
seg[i].val = NULL;
|
||||
seg[i].cache = NULL;
|
||||
if (seg[i].key) {
|
||||
seg[i].key = NULL;
|
||||
seg[i].val = NULL;
|
||||
seg[i].cache = NULL;
|
||||
} else {
|
||||
/* NULL means we already cleared from here on. */
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -6729,9 +6780,7 @@ static void get_ready_for_GC()
|
|||
scheme_current_thread->cont_mark_pos = MZ_CONT_MARK_POS;
|
||||
#endif
|
||||
|
||||
if (scheme_fuel_counter) {
|
||||
for_each_managed(scheme_thread_type, prepare_thread_for_GC);
|
||||
}
|
||||
for_each_managed(scheme_thread_type, prepare_thread_for_GC);
|
||||
|
||||
#ifdef MZ_PRECISE_GC
|
||||
scheme_flush_stack_copy_cache();
|
||||
|
|
|
@ -540,6 +540,8 @@ void scheme_register_traversers(void)
|
|||
GC_REG_TRAV(scheme_global_ref_type, small_object);
|
||||
|
||||
GC_REG_TRAV(scheme_delay_syntax_type, small_object);
|
||||
|
||||
GC_REG_TRAV(scheme_rt_runstack, runstack_val);
|
||||
}
|
||||
|
||||
END_XFORM_SKIP;
|
||||
|
|
Loading…
Reference in New Issue
Block a user