improve performance interaction of GC (especially 3m) and lots of threads

svn: r5490
This commit is contained in:
Matthew Flatt 2007-01-29 01:50:52 +00:00
parent a742c67ab6
commit 60b1287342
13 changed files with 201 additions and 38 deletions

View File

@ -83,7 +83,8 @@ MzScheme allocates the following kinds of memory objects:
Note that a two-space copying collector might use only the fixup
operation, while a non-moving collector might use only the mark
operation.
operation. However, MzScheme currently relies on at least fixup
calls for scheme_runstack_rt-tagged objects.
* Atomic - The allocated object contains no pointers to other
allocated objects.

View File

@ -440,7 +440,7 @@ static int just_checking, the_size;
#if defined(OS_X)
# if GENERATIONS
static void designate_modified(void *p);
static int designate_modified(void *p);
# endif
# define TEST 0
@ -3920,6 +3920,11 @@ void *GC_malloc_allow_interior(size_t size_in_bytes)
return malloc_bigblock(size_in_bytes, MTYPE_ARRAY, 1);
}
void *GC_malloc_tagged_allow_interior(size_t size_in_bytes)
{
return malloc_bigblock(size_in_bytes, MTYPE_TAGGED, 1);
}
void *GC_malloc_array_tagged(size_t size_in_bytes)
{
return malloc_untagged(size_in_bytes, MTYPE_TAGGED_ARRAY, &tagged_array);

View File

@ -179,6 +179,10 @@ GC2_EXTERN void *GC_malloc_allow_interior(size_t size_in_bytes);
pointers into the middle of the array, or just past the end of the
array. */
GC2_EXTERN void *GC_malloc_tagged_allow_interior(size_t size_in_bytes);
/*
Like GC_malloc_allow_interior(), but for a tagged object. */
GC2_EXTERN void *GC_malloc_weak_array(size_t size_in_bytes, void *replace_val);
/*
Alloc an array of weak pointers, initially zeroed. When a value in

View File

@ -510,6 +510,7 @@ void *GC_malloc_array_tagged(size_t s) { return allocate(s, PAGE_TARRAY); }
void *GC_malloc_atomic(size_t s) { return allocate(s, PAGE_ATOMIC); }
void *GC_malloc_atomic_uncollectable(size_t s) { return malloc(s); }
void *GC_malloc_allow_interior(size_t s) {return allocate_big(s, PAGE_ARRAY);}
void *GC_malloc_tagged_allow_interior(size_t s) {return allocate_big(s, PAGE_TAGGED);}
void GC_free(void *p) {}
void *GC_malloc_one_small_tagged(size_t sizeb)

View File

@ -716,16 +716,16 @@ void *scheme_enlarge_runstack(long size, void *(*k)())
size = 1000;
}
p->runstack_saved = saved;
if (p->spare_runstack && (size <= p->spare_runstack_size)) {
size = p->spare_runstack_size;
MZ_RUNSTACK_START = p->spare_runstack;
p->spare_runstack = NULL;
} else {
MZ_RUNSTACK_START = scheme_malloc_allow_interior(sizeof(Scheme_Object*) * size);
MZ_RUNSTACK_START = scheme_alloc_runstack(size);
}
p->runstack_size = size;
MZ_RUNSTACK = MZ_RUNSTACK_START + size;
p->runstack_saved = saved;
cont_count = scheme_cont_capture_count;

View File

@ -4277,7 +4277,7 @@ static void restore_continuation(Scheme_Cont *cont, Scheme_Thread *p, int for_pr
MZ_MARK_STACK_TYPE copied_cms = 0;
Scheme_Object **mv, *sub_conts = NULL;
int mc;
if (SAME_OBJ(result, SCHEME_MULTIPLE_VALUES)) {
/* Get values out before GC */
mv = p->ku.multiple.array;
@ -4349,6 +4349,7 @@ static void restore_continuation(Scheme_Cont *cont, Scheme_Thread *p, int for_pr
mc = cont->meta_continuation;
p->meta_continuation = mc;
}
if (shortcut_prompt) {
/* In shortcut mode, we need to preserve saved runstacks
that were pruned when capturing the continuation. */
@ -4370,14 +4371,19 @@ static void restore_continuation(Scheme_Cont *cont, Scheme_Thread *p, int for_pr
p->runstack_saved = rs;
} else
p->runstack_saved = cont->runstack_saved;
MZ_RUNSTACK_START = cont->runstack_start;
p->runstack_size = cont->runstack_size;
scheme_restore_env_stack_w_thread(cont->ss, p);
if (p->runstack_owner
&& (*p->runstack_owner == p))
&& (*p->runstack_owner == p)) {
*p->runstack_owner = NULL;
}
if (resume)
p->meta_prompt = NULL; /* in case there's a GC before we can set it */
p->runstack_owner = cont->runstack_owner;
if (p->runstack_owner && (*p->runstack_owner != p)) {

View File

@ -6,6 +6,8 @@
(define re:mark "^ mark:")
(define re:size "^ size:")
(define re:size-or-more "^ (?:size|more):")
(define re:fixup-start "^ fixup:")
(define re:close "^}")
(define re:const-size (regexp "^[ \t]*gcBYTES_TO_WORDS[(]sizeof[(][A-Za-z0-9_]*[)][)];[ \t]*$"))
@ -24,7 +26,8 @@
[(regexp-match re:done l)
null]
[(or (regexp-match re:mark l)
(regexp-match re:size l))
(regexp-match re:size-or-more l)
(regexp-match re:fixup-start l))
(error 'mkmark.ss "unexpected label: ~a at ~a" l
(file-position (current-input-port)))]
[(regexp-match re:close l)
@ -36,7 +39,12 @@
(printf "~a~n" s))
l))])
(let ([prefix (read-lines re:mark)]
[mark (read-lines re:size)]
[mark (read-lines re:size-or-more)]
[fixup (if (regexp-match-peek re:fixup-start (current-input-port))
(begin
(regexp-match re:fixup-start (current-input-port))
(read-lines re:size))
null)]
[size (read-lines re:close)])
(printf "static int ~a_SIZE(void *p) {~n" name)
(print-lines prefix)
@ -69,7 +77,9 @@
s
"FIXUP")
"\\1"))
mark))
(append
mark
fixup)))
(printf " return~n")
(print-lines size)
(printf "}~n~n")

View File

@ -1752,6 +1752,57 @@ static int thread_val_FIXUP(void *p) {
#define thread_val_IS_CONST_SIZE 1
static int runstack_val_SIZE(void *p) {
long *s = (long *)p;
return
s[1];
}
static int runstack_val_MARK(void *p) {
long *s = (long *)p;
void **a, **b;
a = (void **)s + 4 + s[2];
b = (void **)s + 4 + s[3];
while (a < b) {
gcMARK(*a);
a++;
}
return
s[1];
}
static int runstack_val_FIXUP(void *p) {
long *s = (long *)p;
void **a, **b;
a = (void **)s + 4 + s[2];
b = (void **)s + 4 + s[3];
while (a < b) {
gcFIXUP(*a);
a++;
}
/* Zero out the part that we didn't mark, in case it becomes
live later. */
a = (void **)s + 4;
b = (void **)s + 4 + s[2];
while (a < b) {
*a = NULL;
a++;
}
a = (void **)s + 4 + s[3];
b = (void **)s + 4 + (s[1] - 4);
while (a < b) {
*a = NULL;
a++;
}
return
s[1];
}
#define runstack_val_IS_ATOMIC 0
#define runstack_val_IS_CONST_SIZE 0
static int prompt_val_SIZE(void *p) {
return
gcBYTES_TO_WORDS(sizeof(Scheme_Prompt));

View File

@ -684,6 +684,36 @@ thread_val {
gcBYTES_TO_WORDS(sizeof(Scheme_Thread));
}
runstack_val {
long *s = (long *)p;
mark:
void **a, **b;
a = (void **)s + 4 + s[2];
b = (void **)s + 4 + s[3];
while (a < b) {
gcMARK(*a);
a++;
}
more:
fixup:
/* Zero out the part that we didn't mark, in case it becomes
live later. */
a = (void **)s + 4;
b = (void **)s + 4 + s[2];
while (a < b) {
*a = NULL;
a++;
}
a = (void **)s + 4 + s[3];
b = (void **)s + 4 + (s[1] - 4);
while (a < b) {
*a = NULL;
a++;
}
size:
s[1];
}
prompt_val {
mark:
Scheme_Prompt *pr = (Scheme_Prompt *)p;

View File

@ -324,6 +324,9 @@ void scheme_start_itimer_thread(long usec);
void scheme_block_child_signals(int block);
#endif
Scheme_Object **scheme_alloc_runstack(long len);
void scheme_set_runstack_limits(Scheme_Object **rs, long len, long start, long end);
void scheme_alloc_list_stack(Scheme_Thread *p);
void scheme_clean_list_stack(Scheme_Thread *p);

View File

@ -227,6 +227,7 @@ enum {
scheme_rt_delay_load_info, /* 205 */
scheme_rt_marshal_info, /* 206 */
scheme_rt_unmarshal_info, /* 207 */
scheme_rt_runstack, /* 208 */
#endif
_scheme_last_type_

View File

@ -2037,7 +2037,7 @@ static Scheme_Thread *make_thread(Scheme_Config *config,
process->runstack_size = init_stack_size;
{
Scheme_Object **sa;
sa = scheme_malloc_allow_interior(sizeof(Scheme_Object*) * init_stack_size);
sa = scheme_alloc_runstack(init_stack_size);
process->runstack_start = sa;
}
process->runstack = process->runstack_start + init_stack_size;
@ -2162,6 +2162,48 @@ void *scheme_tls_get(int pos)
return p->user_tls[pos];
}
#ifdef MZ_XFORM
START_XFORM_SKIP;
#endif
Scheme_Object **scheme_alloc_runstack(long len)
{
#ifdef MZ_PRECISE_GC
long sz;
void **p;
sz = sizeof(Scheme_Object*) * (len + 4);
p = (void **)GC_malloc_tagged_allow_interior(sz);
*(Scheme_Type *)(void *)p = scheme_rt_runstack;
((long *)(void *)p)[1] = gcBYTES_TO_WORDS(sz);
((long *)(void *)p)[2] = 0;
((long *)(void *)p)[3] = len;
return (Scheme_Object **)(p + 4);
#else
return (Scheme_Object **)scheme_malloc_allow_interior(sizeof(Scheme_Object*) * len);
#endif
}
void scheme_set_runstack_limits(Scheme_Object **rs, long len, long start, long end)
/* With 3m, we can tell the GC not to scan the unused parts, and we
can have the fixup function zero out the unused parts; that avoids
writing and scanning pages that could be skipped for a minor
GC. For CGC, we have to just clear out the unused part. */
{
#ifdef MZ_PRECISE_GC
if (((long *)(void *)rs)[-2] != start)
((long *)(void *)rs)[-2] = start;
if (((long *)(void *)rs)[-1] != end)
((long *)(void *)rs)[-1] = end;
#else
memset(rs, 0, start * sizeof(Scheme_Object *));
memset(rs + end, 0, (len - end) * sizeof(Scheme_Object *));
#endif
}
#ifdef MZ_XFORM
END_XFORM_SKIP;
#endif
/*========================================================================*/
/* thread creation and swapping */
/*========================================================================*/
@ -2371,10 +2413,12 @@ static void remove_thread(Scheme_Thread *r)
if (r->runstack_owner) {
/* Drop ownership, if active, and clear the stack */
if (r == *(r->runstack_owner)) {
memset(r->runstack_start, 0, r->runstack_size * sizeof(Scheme_Object*));
r->runstack_start = NULL;
if (r->runstack_start) {
scheme_set_runstack_limits(r->runstack_start, r->runstack_size, 0, 0);
r->runstack_start = NULL;
}
for (saved = r->runstack_saved; saved; saved = saved->prev) {
memset(saved->runstack_start, 0, saved->runstack_size * sizeof(Scheme_Object*));
scheme_set_runstack_limits(saved->runstack_start, saved->runstack_size, 0, 0);
}
r->runstack_saved = NULL;
*(r->runstack_owner) = NULL;
@ -6596,39 +6640,41 @@ static void prepare_thread_for_GC(Scheme_Object *t)
/* zero ununsed part of env stack in each thread */
if (!p->nestee) {
Scheme_Object **o, **e, **e2;
Scheme_Saved_Stack *saved;
# define RUNSTACK_TUNE(x) /* x - Used for performance tuning */
RUNSTACK_TUNE( long size; );
if (!p->runstack_owner
|| (p == *p->runstack_owner)) {
o = p->runstack_start;
e = p->runstack;
e2 = p->runstack_tmp_keep;
while (o < e && (o != e2)) {
*(o++) = NULL;
}
long rs_end;
/* If there's a meta-prompt, we can also zero out past the unused part */
if (p->meta_prompt && (p->meta_prompt->runstack_boundary_start == p->runstack_start)) {
e = p->runstack_start + p->runstack_size;
o = p->runstack_start + p->meta_prompt->runstack_boundary_offset;
while (o < e) {
*(o++) = NULL;
}
rs_end = p->meta_prompt->runstack_boundary_offset;
} else {
rs_end = p->runstack_size;
}
scheme_set_runstack_limits(p->runstack_start,
p->runstack_size,
p->runstack - p->runstack_start,
rs_end);
RUNSTACK_TUNE( size = p->runstack_size - (p->runstack - p->runstack_start); );
for (saved = p->runstack_saved; saved; saved = saved->prev) {
o = saved->runstack_start;
e = o + saved->runstack_offset;
RUNSTACK_TUNE( size += saved->runstack_size; );
while (o < e) {
*(o++) = NULL;
}
if (p->meta_prompt && (p->meta_prompt->runstack_boundary_start == saved->runstack_start)) {
rs_end = p->meta_prompt->runstack_boundary_offset;
} else {
rs_end = saved->runstack_size;
}
scheme_set_runstack_limits(saved->runstack_start,
saved->runstack_size,
saved->runstack_offset,
rs_end);
}
}
@ -6662,11 +6708,16 @@ static void prepare_thread_for_GC(Scheme_Object *t)
if (segpos < p->cont_mark_seg_count) {
Scheme_Cont_Mark *seg = p->cont_mark_stack_segments[segpos];
int stackpos = ((long)p->cont_mark_stack & SCHEME_MARK_SEGMENT_MASK), i;
int stackpos = ((long)p->cont_mark_stack & SCHEME_MARK_SEGMENT_MASK);
for (i = stackpos; i < SCHEME_MARK_SEGMENT_SIZE; i++) {
seg[i].key = NULL;
seg[i].val = NULL;
seg[i].cache = NULL;
if (seg[i].key) {
seg[i].key = NULL;
seg[i].val = NULL;
seg[i].cache = NULL;
} else {
/* NULL means we already cleared from here on. */
break;
}
}
}
@ -6729,9 +6780,7 @@ static void get_ready_for_GC()
scheme_current_thread->cont_mark_pos = MZ_CONT_MARK_POS;
#endif
if (scheme_fuel_counter) {
for_each_managed(scheme_thread_type, prepare_thread_for_GC);
}
for_each_managed(scheme_thread_type, prepare_thread_for_GC);
#ifdef MZ_PRECISE_GC
scheme_flush_stack_copy_cache();

View File

@ -540,6 +540,8 @@ void scheme_register_traversers(void)
GC_REG_TRAV(scheme_global_ref_type, small_object);
GC_REG_TRAV(scheme_delay_syntax_type, small_object);
GC_REG_TRAV(scheme_rt_runstack, runstack_val);
}
END_XFORM_SKIP;