Chez Scheme GC: refactor thread and allocation organization

Move per-thread allocation information to a separate object, so the
needs of the collector are better separated from the thread
representation. This refactoring sets up a change in the collector to
detangle sweeper threads from Scheme threads.
This commit is contained in:
Matthew Flatt 2020-09-26 13:12:54 -06:00
parent c46e4f91c1
commit 16d71ac249
14 changed files with 562 additions and 543 deletions

View File

@ -338,7 +338,7 @@ RACKET_FOR_BOOTFILES = $(RACKET)
RACKET_FOR_BUILD = $(RACKET)
# This branch name changes each time the pb boot files are updated:
PB_BRANCH == circa-7.8.0.10-11
PB_BRANCH == circa-7.8.0.10-12
PB_REPO == https://github.com/racket/pb
# Alternative source for Chez Scheme boot files, normally set by

View File

@ -47,7 +47,7 @@ RACKETCS_SUFFIX =
RACKET =
RACKET_FOR_BOOTFILES = $(RACKET)
RACKET_FOR_BUILD = $(RACKET)
PB_BRANCH = circa-7.8.0.10-11
PB_BRANCH = circa-7.8.0.10-12
PB_REPO = https://github.com/racket/pb
EXTRA_REPOS_BASE =
CS_CROSS_SUFFIX =
@ -306,14 +306,14 @@ maybe-fetch-pb-as-is:
echo done
fetch-pb-from:
mkdir -p racket/src/ChezScheme/boot
if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-11 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-11:remotes/origin/circa-7.8.0.10-11 ; fi
cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-11
if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-12 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-12:remotes/origin/circa-7.8.0.10-12 ; fi
cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-12
pb-stage:
cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-11
cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-11
cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-12
cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-12
cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build"
pb-push:
cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-11
cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-12
win-cs-base:
IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)"
IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)"

View File

@ -26,13 +26,16 @@ void S_alloc_init() {
if (S_boot_time) {
ptr tc = TO_PTR(S_G.thread_context);
GCDATA(tc) = TO_PTR(&S_G.main_thread_gc);
S_G.main_thread_gc.tc = tc;
/* reset the allocation tables */
for (g = 0; g <= static_generation; g++) {
S_G.bytes_of_generation[g] = 0;
for (s = 0; s <= max_real_space; s++) {
BASELOC_AT(tc, s, g) = FIX(0);
NEXTLOC_AT(tc, s, g) = FIX(0);
BYTESLEFT_AT(tc, s, g) = 0;
S_G.main_thread_gc.base_loc[g][s] = FIX(0);
S_G.main_thread_gc.next_loc[g][s] = FIX(0);
S_G.main_thread_gc.bytes_left[g][s] = 0;
S_G.bytes_of_space[g][s] = 0;
}
}
@ -159,9 +162,9 @@ ptr S_compute_bytes_allocated(xg, xs) ptr xg; ptr xs; {
/* add in bytes previously recorded */
n += S_G.bytes_of_space[g][s];
/* add in bytes in active segments */
next_loc = NEXTLOC_AT(tc, s, g);
next_loc = THREAD_GC(tc)->next_loc[g][s];
if (next_loc != FIX(0))
n += (uptr)next_loc - (uptr)BASELOC_AT(tc, s, g);
n += (uptr)next_loc - (uptr)THREAD_GC(tc)->base_loc[g][s];
if (s == space_data) {
/* don't count space used for bitmaks */
n -= S_G.bitmask_overhead[g];
@ -190,7 +193,7 @@ static void maybe_fire_collector() {
}
/* suitable mutex (either tc_mutex or gc_tc_mutex) must be held */
static void close_off_segment(ptr tc, ptr old, ptr base_loc, ptr sweep_loc, ISPC s, IGEN g)
static void close_off_segment(thread_gc *tgc, ptr old, ptr base_loc, ptr sweep_loc, ISPC s, IGEN g)
{
if (base_loc) {
seginfo *si;
@ -206,12 +209,12 @@ static void close_off_segment(ptr tc, ptr old, ptr base_loc, ptr sweep_loc, ISPC
/* in case this is during a GC, add to sweep list */
si = SegInfo(addr_get_segment(base_loc));
si->sweep_start = sweep_loc;
si->sweep_next = TO_VOIDP(SWEEPNEXT_AT(tc, s, g));
SWEEPNEXT_AT(tc, s, g) = TO_PTR(si);
si->sweep_next = tgc->sweep_next[g][s];
tgc->sweep_next[g][s] = si;
}
}
ptr S_find_more_thread_room(ptr tc, ISPC s, IGEN g, iptr n, ptr old) {
ptr S_find_more_gc_room(thread_gc *tgc, ISPC s, IGEN g, iptr n, ptr old) {
iptr nsegs, seg;
ptr new;
iptr new_bytes;
@ -224,8 +227,8 @@ ptr S_find_more_thread_room(ptr tc, ISPC s, IGEN g, iptr n, ptr old) {
#else
tc_mutex_acquire();
#endif
close_off_segment(tc, old, BASELOC_AT(tc, s, g), SWEEPLOC_AT(tc, s, g), s, g);
close_off_segment(tgc, old, tgc->base_loc[g][s], tgc->sweep_loc[g][s], s, g);
S_pants_down += 1;
@ -234,15 +237,15 @@ ptr S_find_more_thread_room(ptr tc, ISPC s, IGEN g, iptr n, ptr old) {
/* block requests to minimize fragmentation and improve cache locality */
if (s == space_code && nsegs < 16) nsegs = 16;
seg = S_find_segments(tc, s, g, nsegs);
seg = S_find_segments(tgc, s, g, nsegs);
new = build_ptr(seg, 0);
new_bytes = nsegs * bytes_per_segment;
BASELOC_AT(tc, s, g) = new;
SWEEPLOC_AT(tc, s, g) = new;
BYTESLEFT_AT(tc, s, g) = (new_bytes - n) - ptr_bytes;
NEXTLOC_AT(tc, s, g) = (ptr)((uptr)new + n);
tgc->base_loc[g][s] = new;
tgc->sweep_loc[g][s] = new;
tgc->bytes_left[g][s] = (new_bytes - n) - ptr_bytes;
tgc->next_loc[g][s] = (ptr)((uptr)new + n);
if (g == 0 && S_pants_down == 1) maybe_fire_collector();
@ -262,12 +265,14 @@ ptr S_find_more_thread_room(ptr tc, ISPC s, IGEN g, iptr n, ptr old) {
/* tc_mutex must be held */
void S_close_off_thread_local_segment(ptr tc, ISPC s, IGEN g) {
close_off_segment(tc, NEXTLOC_AT(tc, s, g), BASELOC_AT(tc, s, g), SWEEPLOC_AT(tc, s, g), s, g);
thread_gc *tgc = THREAD_GC(tc);
BASELOC_AT(tc, s, g) = (ptr)0;
BYTESLEFT_AT(tc, s, g) = 0;
NEXTLOC_AT(tc, s, g) = (ptr)0;
SWEEPLOC_AT(tc, s, g) = (ptr)0;
close_off_segment(tgc, tgc->next_loc[g][s], tgc->base_loc[g][s], tgc->sweep_loc[g][s], s, g);
tgc->base_loc[g][s] = (ptr)0;
tgc->bytes_left[g][s] = 0;
tgc->next_loc[g][s] = (ptr)0;
tgc->sweep_loc[g][s] = (ptr)0;
}
/* S_reset_allocation_pointer is always called with mutex */
@ -285,14 +290,14 @@ void S_reset_allocation_pointer(tc) ptr tc; {
S_pants_down += 1;
seg = S_find_segments(tc, space_new, 0, 1);
seg = S_find_segments(THREAD_GC(tc), space_new, 0, 1);
/* NB: if allocate_segments didn't already ensure we don't use the last segment
of memory, we'd have to reject it here so cp2-alloc can avoid a carry check for
small allocation requests, using something like this:
if (seg == (((uptr)1 << (ptr_bits - segment_offset_bits)) - 1))
seg = S_find_segments(tc, space_new, 0, 1);
seg = S_find_segments(THREAD_GC(tc), space_new, 0, 1);
*/
S_G.bytes_of_space[0][space_new] += bytes_per_segment;
@ -306,7 +311,7 @@ void S_reset_allocation_pointer(tc) ptr tc; {
S_pants_down -= 1;
}
void S_record_new_dirty_card(ptr tc, ptr *ppp, IGEN to_g) {
void S_record_new_dirty_card(thread_gc *tgc, ptr *ppp, IGEN to_g) {
uptr card = (uptr)TO_PTR(ppp) >> card_offset_bits;
dirtycardinfo *ndc;
@ -316,7 +321,7 @@ void S_record_new_dirty_card(ptr tc, ptr *ppp, IGEN to_g) {
if (to_g < ndc->youngest) ndc->youngest = to_g;
} else {
dirtycardinfo *next = ndc;
find_room_voidp(tc, space_new, 0, ptr_align(sizeof(dirtycardinfo)), ndc);
find_gc_room_voidp(tgc, space_new, 0, ptr_align(sizeof(dirtycardinfo)), ndc);
ndc->card = card;
ndc->youngest = to_g;
ndc->next = next;
@ -353,7 +358,7 @@ void S_dirty_set(ptr *loc, ptr x) {
if (!IMMEDIATE(x)) {
seginfo *t_si = SegInfo(ptr_get_segment(x));
if (t_si->generation < si->generation)
S_record_new_dirty_card(get_thread_context(), loc, t_si->generation);
S_record_new_dirty_card(THREAD_GC(get_thread_context()), loc, t_si->generation);
}
} else {
IGEN from_g = si->generation;

View File

@ -66,8 +66,8 @@ extern void S_reset_allocation_pointer PROTO((ptr tc));
extern ptr S_compute_bytes_allocated PROTO((ptr xg, ptr xs));
extern ptr S_bytes_finalized PROTO(());
extern ptr S_find_more_room PROTO((ISPC s, IGEN g, iptr n, ptr old));
extern void S_record_new_dirty_card PROTO((ptr tc, ptr *ppp, IGEN to_g));
extern ptr S_find_more_thread_room PROTO((ptr tc, IGEN g, ISPC s, iptr n, ptr old));
extern void S_record_new_dirty_card PROTO((thread_gc *tgc, ptr *ppp, IGEN to_g));
extern ptr S_find_more_gc_room PROTO((thread_gc *tgc, IGEN g, ISPC s, iptr n, ptr old));
extern void S_close_off_thread_local_segment PROTO((ptr tc, ISPC s, IGEN g));
extern void S_dirty_set PROTO((ptr *loc, ptr x));
extern void S_mark_card_dirty PROTO((uptr card, IGEN to_g));
@ -382,7 +382,7 @@ extern INT matherr PROTO((struct exception *x));
extern void S_segment_init PROTO((void));
extern void *S_getmem PROTO((iptr bytes, IBOOL zerofill));
extern void S_freemem PROTO((void *addr, iptr bytes));
extern iptr S_find_segments PROTO((ptr tc, ISPC s, IGEN g, iptr n));
extern iptr S_find_segments PROTO((thread_gc *creator, ISPC s, IGEN g, iptr n));
extern void S_free_chunk PROTO((chunkinfo *chunk));
extern void S_free_chunks PROTO((void));
extern uptr S_curmembytes PROTO((void));

File diff suppressed because it is too large Load Diff

View File

@ -573,7 +573,6 @@ static void check_pointer(ptr *pp, IBOOL address_is_meaningful, ptr base, uptr s
if (Scar(l) == p)
printf(" in unlocked\n");
}
abort(); // REMOVEME
}
}
}
@ -591,7 +590,7 @@ static ptr *find_nl(ptr *pp1, ptr *pp2, ISPC s, IGEN g) {
for (ls = S_threads; ls != Snil; ls = Scdr(ls)) {
ptr t_tc = (ptr)THREADTC(Scar(ls));
nl = TO_VOIDP(NEXTLOC_AT(t_tc, s, g));
nl = TO_VOIDP(THREAD_GC(t_tc)->next_loc[g][s]);
if (pp1 <= nl && nl < pp2)
return nl;
}
@ -626,17 +625,18 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; {
ptr t_tc = (ptr)THREADTC(Scar(ls));
for (s = 0; s <= max_real_space; s += 1) {
for (g = 0; g <= static_generation; INCRGEN(g)) {
if ((NEXTLOC_AT(t_tc, s, g) == (ptr)0) != (BASELOC_AT(t_tc, s, g) == (ptr)0)) {
thread_gc *tgc = THREAD_GC(t_tc);
if ((tgc->next_loc[g][s] == (ptr)0) != (tgc->base_loc[g][s] == (ptr)0)) {
S_checkheap_errors += 1;
printf("!!! inconsistent thread NEXT %p and BASE %p\n",
TO_VOIDP(NEXTLOC_AT(t_tc, s, g)), TO_VOIDP(BASELOC_AT(t_tc, s, g)));
TO_VOIDP(tgc->next_loc[g][s]), TO_VOIDP(tgc->base_loc[g][s]));
}
if ((REMOTERANGEEND(t_tc) != (ptr)0)
|| (REMOTERANGESTART(t_tc) != (ptr)(uptr)-1)) {
if ((tgc->remote_range_end != (ptr)0)
|| (tgc->remote_range_start != (ptr)(uptr)-1)) {
S_checkheap_errors += 1;
printf("!!! nonempty thread REMOTERANGE %p-%p\n",
TO_VOIDP(REMOTERANGESTART(t_tc)),
TO_VOIDP(REMOTERANGEEND(t_tc)));
TO_VOIDP(tgc->remote_range_start),
TO_VOIDP(tgc->remote_range_end));
}
}
}
@ -1122,11 +1122,12 @@ ptr S_do_gc(IGEN max_cg, IGEN min_tg, IGEN max_tg, ptr count_roots) {
ptr ls;
for (ls = S_threads; ls != Snil; ls = Scdr(ls)) {
ptr t_tc = (ptr)THREADTC(Scar(ls));
BASELOC_AT(t_tc, s, new_g) = BASELOC_AT(t_tc, s, old_g); BASELOC_AT(t_tc, s, old_g) = (ptr)0;
NEXTLOC_AT(t_tc, s, new_g) = NEXTLOC_AT(t_tc, s, old_g); NEXTLOC_AT(t_tc, s, old_g) = (ptr)0;
BYTESLEFT_AT(t_tc, s, new_g) = BYTESLEFT_AT(t_tc, s, old_g); BYTESLEFT_AT(t_tc, s, old_g) = 0;
SWEEPLOC_AT(t_tc, s, new_g) = SWEEPLOC_AT(t_tc, s, old_g); SWEEPLOC_AT(t_tc, s, old_g) = 0;
SWEEPNEXT_AT(t_tc, s, new_g) = SWEEPNEXT_AT(t_tc, s, old_g); SWEEPNEXT_AT(t_tc, s, old_g) = 0;
thread_gc *tgc = THREAD_GC(t_tc);
tgc->base_loc[new_g][s] = tgc->base_loc[old_g][s]; tgc->base_loc[old_g][s] = (ptr)0;
tgc->next_loc[new_g][s] = tgc->next_loc[old_g][s]; tgc->next_loc[old_g][s] = (ptr)0;
tgc->bytes_left[new_g][s] = tgc->bytes_left[old_g][s]; tgc->bytes_left[old_g][s] = 0;
tgc->sweep_loc[new_g][s] = tgc->sweep_loc[old_g][s]; tgc->sweep_loc[old_g][s] = 0;
tgc->sweep_next[new_g][s] = tgc->sweep_next[old_g][s]; tgc->sweep_next[old_g][s] = NULL;
}
}
}

View File

@ -75,6 +75,7 @@ EXTERN ptr S_foreign_dynamic;
EXTERN struct S_G_struct {
/* scheme.c */
double thread_context[size_tc / sizeof(double)];
thread_gc main_thread_gc;
ptr active_threads_id;
ptr error_invoke_code_object;
ptr invoke_code_object;

View File

@ -434,8 +434,8 @@ static void s_showalloc(IBOOL show_dump, const char *outfn) {
/* add in bytes previously recorded */
bytes[g][s] += S_G.bytes_of_space[g][s];
/* add in bytes in active segments */
if (NEXTLOC_AT(tc, s, g) != FIX(0))
bytes[g][s] += (uptr)NEXTLOC_AT(tc, s, g) - (uptr)BASELOC_AT(tc, s, g);
if (THREAD_GC(tc)->next_loc[g][s] != FIX(0))
bytes[g][s] += (uptr)THREAD_GC(tc)->next_loc[g][s] - (uptr)THREAD_GC(tc)->base_loc[g][s];
}
}

View File

@ -1135,7 +1135,7 @@ extern void Sbuild_heap(kernel, custom_init) const char *kernel; void (*custom_i
* thread context and hence there is no parent thread context. */
tc = (ptr)THREADTC(S_create_thread_object("startup", tc));
#ifdef PTHREADS
s_thread_setspecific(S_tc_key, tc);
s_thread_setspecific(S_tc_key, TO_VOIDP(tc));
#endif
/* #scheme-init enables interrupts */

View File

@ -37,7 +37,7 @@ Low-level Memory management strategy:
#include <sys/types.h>
static void out_of_memory PROTO((void));
static void initialize_seginfo PROTO((seginfo *si, ptr tc, ISPC s, IGEN g));
static void initialize_seginfo PROTO((seginfo *si, thread_gc *creator, ISPC s, IGEN g));
static seginfo *allocate_segments PROTO((uptr nreq));
static void expand_segment_table PROTO((uptr base, uptr end, seginfo *si));
static void contract_segment_table PROTO((uptr base, uptr end));
@ -225,7 +225,7 @@ static INT find_index(iptr n) {
return (index < PARTIAL_CHUNK_POOLS-1) ? index : PARTIAL_CHUNK_POOLS-1;
}
static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED ptr tc, ISPC s, IGEN g) {
static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED thread_gc *creator, ISPC s, IGEN g) {
INT d;
si->space = s;
@ -235,7 +235,7 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED ptr tc, ISPC s, IG
si->use_marks = 0;
si->must_mark = 0;
#ifdef PTHREADS
si->creator_tc = tc;
si->creator = creator;
#endif
si->list_bits = NULL;
si->min_dirty_byte = 0xff;
@ -256,7 +256,7 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED ptr tc, ISPC s, IG
si->sweep_next = NULL;
}
iptr S_find_segments(tc, s, g, n) ptr tc; ISPC s; IGEN g; iptr n; {
iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr n; {
chunkinfo *chunk, *nextchunk;
seginfo *si, *nextsi, **prevsi;
iptr nunused_segs, j;
@ -280,7 +280,7 @@ iptr S_find_segments(tc, s, g, n) ptr tc; ISPC s; IGEN g; iptr n; {
}
chunk->nused_segs += 1;
initialize_seginfo(si, tc, s, g);
initialize_seginfo(si, creator, s, g);
si->next = S_G.occupied_segments[g][s];
S_G.occupied_segments[g][s] = si;
S_G.number_of_empty_segments -= 1;
@ -318,7 +318,7 @@ iptr S_find_segments(tc, s, g, n) ptr tc; ISPC s; IGEN g; iptr n; {
nextsi->next = S_G.occupied_segments[g][s];
S_G.occupied_segments[g][s] = si;
for (j = n, nextsi = si; j > 0; j -= 1, nextsi = nextsi->next) {
initialize_seginfo(nextsi, tc, s, g);
initialize_seginfo(nextsi, creator, s, g);
}
S_G.number_of_empty_segments -= n;
return si->number;
@ -338,7 +338,7 @@ iptr S_find_segments(tc, s, g, n) ptr tc; ISPC s; IGEN g; iptr n; {
/* we couldn't find space, so ask for more */
si = allocate_segments(n);
for (nextsi = si; n > 0; n -= 1, nextsi += 1) {
initialize_seginfo(nextsi, tc, s, g);
initialize_seginfo(nextsi, creator, s, g);
/* add segment to appropriate list of occupied segments */
nextsi->next = S_G.occupied_segments[g][s];
S_G.occupied_segments[g][s] = nextsi;

View File

@ -16,6 +16,8 @@
#include "system.h"
static thread_gc *free_thread_gcs;
/* locally defined functions */
#ifdef PTHREADS
static s_thread_rv_t start_thread PROTO((void *tc));
@ -55,28 +57,40 @@ void S_thread_init() {
or more places. */
ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; {
ptr thread, tc;
thread_gc *tgc;
INT i;
tc_mutex_acquire();
if (S_threads == Snil) {
tc = TO_PTR(S_G.thread_context);
tgc = &S_G.main_thread_gc;
} else { /* clone parent */
ptr p_v = PARAMETERS(p_tc);
iptr i, n = Svector_length(p_v);
ptr v;
tc = TO_PTR(malloc(size_tc));
if (free_thread_gcs) {
tgc = free_thread_gcs;
free_thread_gcs = tgc->next;
} else
tgc = malloc(sizeof(thread_gc));
if (tc == (ptr)0)
S_error(who, "unable to malloc thread data structure");
memcpy(TO_VOIDP(tc), TO_VOIDP(p_tc), size_tc);
for (i = 0; i < num_thread_local_allocation_segments; i++) {
BASELOC(tc, i) = (ptr)0;
NEXTLOC(tc, i) = (ptr)0;
BYTESLEFT(tc, i) = 0;
SWEEPLOC(tc, i) = (ptr)0;
{
IGEN g; ISPC s;
for (g = 0; g <= static_generation; g++) {
for (s = 0; s <= max_real_space; s++) {
tgc->base_loc[g][s] = (ptr)0;
tgc->next_loc[g][s] = (ptr)0;
tgc->bytes_left[g][s] = 0;
tgc->sweep_loc[g][s] = (ptr)0;
}
}
}
v = S_vector_in(tc, space_new, 0, n);
@ -88,6 +102,9 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; {
CODERANGESTOFLUSH(tc) = Snil;
}
GCDATA(tc) = TO_PTR(tgc);
tgc->tc = tc;
/* override nonclonable tc fields */
THREADNO(tc) = S_G.threadno;
S_G.threadno = S_add(S_G.threadno, FIX(1));
@ -141,9 +158,12 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; {
LZ4OUTBUFFER(tc) = 0;
SWEEPER(tc) = -1;
REMOTERANGESTART(tc) = (ptr)(uptr)-1;
REMOTERANGEEND(tc) = (ptr)0;
tgc->sweeper = main_sweeper_index;
tgc->will_be_sweeper = main_sweeper_index;
tgc->will_be_sweeper = main_sweeper_index;
tgc->remote_range_start = (ptr)(uptr)-1;
tgc->remote_range_end = (ptr)0;
tgc->pending_ephemerons = (ptr)0;
tc_mutex_release();
@ -158,8 +178,8 @@ IBOOL Sactivate_thread() { /* create or reactivate current thread */
ptr thread;
/* borrow base thread for now */
thread = S_create_thread_object("Sactivate_thread", S_G.thread_context);
s_thread_setspecific(S_tc_key, (ptr)THREADTC(thread));
thread = S_create_thread_object("Sactivate_thread", TO_PTR(S_G.thread_context));
s_thread_setspecific(S_tc_key, TO_VOIDP(THREADTC(thread)));
return 1;
} else {
reactivate_thread(tc)
@ -223,12 +243,13 @@ static IBOOL destroy_thread(tc) ptr tc; {
/* process remembered set before dropping allocation area */
S_scan_dirty((ptr *)EAP(tc), (ptr *)REAL_EAP(tc));
/* move thread-local allocation to global space */
/* close off thread-local allocation */
{
ISPC s; IGEN g;
thread_gc *tgc = THREAD_GC(tc);
for (g = 0; g <= static_generation; g++)
for (s = 0; s <= max_real_space; s++)
if (NEXTLOC_AT(tc, s, g))
if (tgc->next_loc[g][s])
S_close_off_thread_local_segment(tc, s, g);
}
@ -258,10 +279,18 @@ static IBOOL destroy_thread(tc) ptr tc; {
}
}
if (LZ4OUTBUFFER(tc) != NULL) free(LZ4OUTBUFFER(tc));
if (SIGNALINTERRUPTQUEUE(tc) != NULL) free(SIGNALINTERRUPTQUEUE(tc));
if (LZ4OUTBUFFER(tc) != (ptr)0) free(TO_VOIDP(LZ4OUTBUFFER(tc)));
if (SIGNALINTERRUPTQUEUE(tc) != (ptr)0) free(TO_VOIDP(SIGNALINTERRUPTQUEUE(tc)));
/* Never free a thread_gc, since it may be recorded in a segment
as the segment's creator. Recycle manually, instead. */
THREAD_GC(tc)->sweeper = main_sweeper_index;
THREAD_GC(tc)->tc = (ptr)0;
THREAD_GC(tc)->next = free_thread_gcs;
free_thread_gcs = THREAD_GC(tc);
free((void *)tc);
THREADTC(thread) = 0; /* mark it dead */
status = 1;
break;
@ -291,7 +320,7 @@ ptr S_fork_thread(thunk) ptr thunk; {
static s_thread_rv_t start_thread(p) void *p; {
ptr tc = (ptr)p; ptr cp;
s_thread_setspecific(S_tc_key, tc);
s_thread_setspecific(S_tc_key, TO_VOIDP(tc));
cp = CP(tc);
CP(tc) = Svoid; /* should hold calling code object, which we don't have */
@ -303,7 +332,7 @@ static s_thread_rv_t start_thread(p) void *p; {
/* find and destroy our thread */
destroy_thread(tc);
s_thread_setspecific(S_tc_key, (ptr)0);
s_thread_setspecific(S_tc_key, NULL);
s_thread_return;
}
@ -335,7 +364,7 @@ void S_mutex_acquire(m) scheme_mutex_t *m; {
if ((count = m->count) > 0 && s_thread_equal(m->owner, self)) {
if (count == most_positive_fixnum)
S_error1("mutex-acquire", "recursion limit exceeded for ~s", m);
S_error1("mutex-acquire", "recursion limit exceeded for ~s", TO_PTR(m));
m->count = count + 1;
return;
}
@ -353,7 +382,7 @@ INT S_mutex_tryacquire(m) scheme_mutex_t *m; {
if ((count = m->count) > 0 && s_thread_equal(m->owner, self)) {
if (count == most_positive_fixnum)
S_error1("mutex-acquire", "recursion limit exceeded for ~s", m);
S_error1("mutex-acquire", "recursion limit exceeded for ~s", TO_PTR(m));
m->count = count + 1;
return 0;
}
@ -374,7 +403,7 @@ void S_mutex_release(m) scheme_mutex_t *m; {
INT status;
if ((count = m->count) == 0 || !s_thread_equal(m->owner, self))
S_error1("mutex-release", "thread does not own mutex ~s", m);
S_error1("mutex-release", "thread does not own mutex ~s", TO_PTR(m));
if ((m->count = count - 1) == 0) {
m->owner = 0; /* needed for a memory model like ARM, for example */
@ -460,10 +489,10 @@ IBOOL S_condition_wait(c, m, t) s_thread_cond_t *c; scheme_mutex_t *m; ptr t; {
iptr collect_index = 0;
if ((count = m->count) == 0 || !s_thread_equal(m->owner, self))
S_error1("condition-wait", "thread does not own mutex ~s", m);
S_error1("condition-wait", "thread does not own mutex ~s", TO_PTR(m));
if (count != 1)
S_error1("condition-wait", "mutex ~s is recursively locked", m);
S_error1("condition-wait", "mutex ~s is recursively locked", TO_PTR(m));
if (t != Sfalse) {
/* Keep in sync with ts record in s/date.ss */

View File

@ -76,30 +76,24 @@ typedef int IFASLCODE; /* fasl type codes */
#define ALREADY_PTR(p) (p)
#define SG_AT_TO_INDEX(s, g) (((g) * (1 + max_real_space)) + (s))
#define BASELOC_AT(tc, s, g) BASELOC(tc, SG_AT_TO_INDEX(s, g))
#define NEXTLOC_AT(tc, s, g) NEXTLOC(tc, SG_AT_TO_INDEX(s, g))
#define BYTESLEFT_AT(tc, s, g) BYTESLEFT(tc, SG_AT_TO_INDEX(s, g))
#define SWEEPLOC_AT(tc, s, g) SWEEPLOC(tc, SG_AT_TO_INDEX(s, g))
#define SWEEPNEXT_AT(tc, s, g) SWEEPNEXT(tc, SG_AT_TO_INDEX(s, g))
/* inline allocation --- no mutex required */
/* find room allocates n bytes in space s and generation g into
* destination x, tagged with ty, punting to find_more_room if
* no space is left in the current segment. n is assumed to be
* an integral multiple of the object alignment. */
#define find_room_T(tc, s, g, t, n, T, x) do { \
iptr L_IDX = SG_AT_TO_INDEX(s, g); \
iptr N_BYTES = n; \
ptr X = NEXTLOC(tc, L_IDX); \
NEXTLOC(tc, L_IDX) = (ptr)((uptr)X + N_BYTES); \
if ((BYTESLEFT(tc, L_IDX) -= (n)) < 0) X = S_find_more_thread_room(tc, s, g, N_BYTES, X); \
(x) = T(TYPE(X, t)); \
#define find_gc_room_T(tgc, s, g, t, n, T, x) do { \
thread_gc *TGC = tgc; \
iptr N_BYTES = n; \
ptr X = TGC->next_loc[g][s]; \
TGC->next_loc[g][s] = (ptr)((uptr)X + N_BYTES); \
if ((TGC->bytes_left[g][s] -= (n)) < 0) X = S_find_more_gc_room(tgc, s, g, N_BYTES, X); \
(x) = T(TYPE(X, t)); \
} while(0)
#define find_room(tc, s, g, t, n, x) find_room_T(tc, s, g, t, n, ALREADY_PTR, x)
#define find_room_voidp(tc, s, g, n, x) find_room_T(tc, s, g, typemod, n, TO_VOIDP, x)
#define find_room(tc, s, g, t, n, x) find_gc_room_T(THREAD_GC(tc), s, g, t, n, ALREADY_PTR, x)
#define find_gc_room(tgc, s, g, t, n, x) find_gc_room_T(tgc, s, g, t, n, ALREADY_PTR, x)
#define find_room_voidp(tc, s, g, n, x) find_gc_room_T(THREAD_GC(tc), s, g, typemod, n, TO_VOIDP, x)
#define find_gc_room_voidp(tgc, s, g, n, x) find_gc_room_T(tgc, s, g, typemod, n, TO_VOIDP, x)
/* new-space inline allocation --- no mutex required */
/* Like `find_room`, but always `space_new` and generation 0,
@ -158,7 +152,7 @@ typedef struct _seginfo {
octet *list_bits; /* for `$list-bits-ref` and `$list-bits-set!` */
uptr number; /* the segment number */
#ifdef PTHREADS
ptr creator_tc; /* for GC parallelism heuristic; might not match an active thread unless old_space */
struct thread_gc *creator; /* for GC parallelism; might not have an active thread, unless old_space */
#endif
struct _chunkinfo *chunk; /* the chunk this segment belongs to */
struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs) */
@ -452,6 +446,47 @@ typedef struct {
#define AS_IMPLICIT_ATOMIC(T, X) X
#endif
typedef struct remote_range {
ISPC s;
IGEN g;
ptr start, end;
struct remote_range *next;
} remote_range;
typedef struct thread_gc {
ptr tc;
struct thread_gc *next;
ptr base_loc[static_generation+1][max_real_space+1];
ptr next_loc[static_generation+1][max_real_space+1];
iptr bytes_left[static_generation+1][max_real_space+1];
ptr orig_next_loc[max_real_space+1];
ptr sweep_loc[static_generation+1][max_real_space+1];
seginfo *sweep_next[static_generation+1][max_real_space+1];
ptr pending_ephemerons;
ptr sweep_stack;
ptr sweep_stack_start;
ptr sweep_stack_limit;
int sweep_change;
int sweeper; /* parallel GC: sweeper thread identity */
int will_be_sweeper;
struct thread_gc *remote_range_tgc;
ptr remote_range_start;
ptr remote_range_end;
iptr bitmask_overhead[static_generation+1];
} thread_gc;
#define THREAD_GC(tc) ((thread_gc *)TO_VOIDP(GCDATA(tc)))
#define main_sweeper_index maximum_parallel_collect_threads
#ifdef __MINGW32__
/* With MinGW on 64-bit Windows, setjmp/longjmp is not reliable. Using
__builtin_setjmp/__builtin_longjmp is reliable, but

View File

@ -1498,11 +1498,7 @@
(define-constant virtual-register-count 16)
(define-constant static-generation 7)
(define-constant num-generations (fx+ (constant static-generation) 1))
(define-constant num-spaces (fx+ (constant max-real-space) 1))
(define-constant num-thread-local-allocation-segments (fx* (constant num-generations)
(constant num-spaces)))
(define-constant maximum-parallel-collect-threads 8)
(define-constant maximum-parallel-collect-threads 16)
;;; make sure gc sweeps all ptrs
(define-primitive-structure-disps tc typemod
@ -1578,23 +1574,7 @@
[ptr DSTBV]
[ptr SRCBV]
[double fpregs (constant asm-fpreg-max)]
;; thread-local allocation and parallel collection:
[xptr base-loc (constant num-thread-local-allocation-segments)]
[xptr next-loc (constant num-thread-local-allocation-segments)]
[iptr bytes-left (constant num-thread-local-allocation-segments)]
[xptr orig-next-loc (constant num-spaces)]
[xptr sweep-loc (constant num-thread-local-allocation-segments)]
[xptr sweep-next (constant num-thread-local-allocation-segments)]
[xptr pending-ephemerons]
[iptr sweeper]
[xptr sweep-stack]
[xptr sweep-stack-start]
[xptr sweep-stack-limit]
[iptr sweep-change]
[iptr remote-sweeper]
[xptr remote-range-start]
[xptr remote-range-end]
[iptr bitmask-overhead (constant num-generations)]))
[xptr gc-data]))
(define tc-field-list
(let f ([ls (oblist)] [params '()])

View File

@ -243,10 +243,10 @@
;; A stack segment has a single owner, so it's ok for us
;; to sweep the stack content, even though it's on a
;; remote segment relative to the current sweeper.
(RECORD_REMOTE_RANGE _tc_ _ _size_ s_si)]
(RECORD_REMOTE_RANGE _tgc_ _ _size_ s_si)]
[else
(set! (continuation-stack _)
(copy_stack _tc_
(copy_stack _tgc_
(continuation-stack _)
(& (continuation-stack-length _))
(continuation-stack-clength _)))]))]
@ -687,9 +687,9 @@
(define-trace-macro (add-ephemeron-to-pending)
(case-mode
[(sweep mark)
(add_ephemeron_to_pending _tc_ _)]
(add_ephemeron_to_pending _tgc_ _)]
[measure
(add_ephemeron_to_pending_measure _tc_ _)]
(add_ephemeron_to_pending_measure _tgc_ _)]
[else]))
(define-trace-macro (assert-ephemeron-size-ok)
@ -744,7 +744,7 @@
(set! (continuation-stack-length _copy_) (continuation-stack-clength _))
;; May need to recur at end to promote link:
(GC_TC_MUTEX_ACQUIRE)
(set! conts_to_promote (S_cons_in _tc_ space_new 0 _copy_ conts_to_promote))
(set! conts_to_promote (S_cons_in (-> _tgc_ tc) space_new 0 _copy_ conts_to_promote))
(GC_TC_MUTEX_RELEASE)]
[else
(copy continuation-stack-length)])]
@ -794,7 +794,7 @@
(SEGMENT_IS_LOCAL v_si val))
(trace-symcode symbol-pvalue val)]
[else
(RECORD_REMOTE_RANGE _tc_ _ _size_ v_si)])]
(RECORD_REMOTE_RANGE _tgc_ _ _size_ v_si)])]
[off (trace-symcode symbol-pvalue val)])]
[else
(trace-symcode symbol-pvalue val)]))
@ -816,7 +816,7 @@
;; swept already. NB: assuming keyvals are always pairs.
(when (&& (!= next Sfalse) (OLDSPACE keyval))
(GC_TC_MUTEX_ACQUIRE)
(set! tlcs_to_rehash (S_cons_in _tc_ space_new 0 _copy_ tlcs_to_rehash))
(set! tlcs_to_rehash (S_cons_in (-> _tgc_ tc) space_new 0 _copy_ tlcs_to_rehash))
(GC_TC_MUTEX_RELEASE))]
[else
(trace-nonself tlc-keyval)
@ -869,7 +869,7 @@
(trace-record-type-pm num rtd)]
[else
;; Try again in the bignum's sweeper
(RECORD_REMOTE_RANGE _tc_ _ _size_ pm_si)
(RECORD_REMOTE_RANGE _tgc_ _ _size_ pm_si)
(set! num S_G.zero_length_bignum)])]
[off
(trace-record-type-pm num rtd)])]
@ -948,7 +948,7 @@
(let* ([grtd : IGEN (GENERATION c_rtd)])
(set! (array-ref (array-ref S_G.countof grtd) countof_rtd_counts) += 1)
;; Allocate counts struct in same generation as rtd. Initialize timestamp & counts.
(find_room _tc_ space_data grtd type_typed_object size_rtd_counts counts)
(find_gc_room _tgc_ space_data grtd type_typed_object size_rtd_counts counts)
(set! (rtd-counts-type counts) type_rtd_counts)
(set! (rtd-counts-timestamp counts) (array-ref S_G.gctimestamp 0))
(let* ([g : IGEN 0])
@ -961,7 +961,7 @@
;; For max_copied_generation, the list will get copied again in `rtds_with_counts` fixup;
;; meanwhile, allocating in `space_impure` would copy and sweep old list entries causing
;; otherwise inaccessible rtds to be retained
(S_cons_in _tc_
(S_cons_in (-> _tgc_ tc)
(cond [(<= grtd MAX_CG) space_new] [else space_impure])
(cond [(<= grtd MAX_CG) 0] [else grtd])
c_rtd
@ -1012,7 +1012,7 @@
(when (OLDSPACE old_stack)
(let* ([clength : iptr (- (cast uptr (SFP tc)) (cast uptr old_stack))])
;; Include SFP[0], which contains the return address
(set! (tc-scheme-stack tc) (copy_stack _tc_
(set! (tc-scheme-stack tc) (copy_stack _tgc_
old_stack
(& (tc-scheme-stack-size tc))
(+ clength (sizeof ptr))))
@ -1110,7 +1110,7 @@
(trace-pure (* (ENTRYNONCOMPACTLIVEMASKADDR oldret)))
(set! num (ENTRYLIVEMASK oldret))]
[else
(RECORD_REMOTE_RANGE _tc_ _ _size_ n_si)
(RECORD_REMOTE_RANGE _tgc_ _ _size_ n_si)
(set! num S_G.zero_length_bignum)])])
(let* ([index : iptr (BIGLEN num)])
(while
@ -1216,16 +1216,16 @@
[(-> t_si use_marks)
(cond
[(! (marked t_si t))
(mark_typemod_data_object _tc_ t n t_si)])]
(mark_typemod_data_object _tgc_ t n t_si)])]
[else
(let* ([oldt : ptr t])
(find_room _tc_ space_data from_g typemod n t)
(find_gc_room _tgc_ space_data from_g typemod n t)
(memcpy_aligned (TO_VOIDP t) (TO_VOIDP oldt) n))])]
[else
(RECORD_REMOTE_RANGE _tc_ _ _size_ t_si)])))
(RECORD_REMOTE_RANGE _tgc_ _ _size_ t_si)])))
(set! (reloc-table-code t) _)
(set! (code-reloc _) t)])
(S_record_code_mod tc_in (cast uptr (TO_PTR (& (code-data _ 0)))) (cast uptr (code-length _)))]
(S_record_code_mod (-> _tgc_ tc) (cast uptr (TO_PTR (& (code-data _ 0)))) (cast uptr (code-length _)))]
[vfasl-sweep
;; no vfasl_register_pointer, since relink_code can handle it
(set! (reloc-table-code t) (cast ptr (ptr_diff _ (-> vfi base_addr))))
@ -1463,7 +1463,7 @@
[else "void"])
name
(case (lookup 'mode config)
[(copy mark sweep sweep-in-old measure) "ptr tc_in, "]
[(copy mark sweep sweep-in-old measure) "thread_gc *tgc, "]
[(vfasl-copy vfasl-sweep)
"vfasl_info *vfi, "]
[else ""])
@ -1504,12 +1504,12 @@
(case (lookup 'mode config)
[(copy)
(code-block
"check_triggers(tc_in, si);"
"check_triggers(tgc, si);"
(code-block
"ptr new_p;"
"IGEN tg = TARGET_GENERATION(si);"
(body)
"SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;"
"tgc->sweep_change = SWEEP_CHANGE_PROGRESS;"
"FWDMARKER(p) = forward_marker;"
"FWDADDRESS(p) = new_p;"
(and (lookup 'maybe-backreferences? config #f)
@ -1518,10 +1518,10 @@
"return tg;"))]
[(mark)
(code-block
"check_triggers(tc_in, si);"
"check_triggers(tgc, si);"
(ensure-segment-mark-mask "si" "")
(body)
"SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;"
"tgc->sweep_change = SWEEP_CHANGE_PROGRESS;"
"ADD_BACKREFERENCE(p, si->generation);"
"return si->generation;")]
[(sweep)
@ -1703,7 +1703,7 @@
[(and preserve-flonum-eq?
(eq? 'copy (lookup 'mode config)))
(code (copy-statement field config)
"flonum_set_forwarded(tc_in, p, si);"
"flonum_set_forwarded(tgc, p, si);"
"FLONUM_FWDADDRESS(p) = new_p;"
(statements (cdr l) config))]
[else
@ -1853,7 +1853,7 @@
(hashtable-set! (lookup 'used config) 'p_sz #t)
(code (format "~a, ~a, p_sz, new_p);"
(case mode
[(copy) "find_room(tc_in, p_spc, tg"]
[(copy) "find_gc_room(tgc, p_spc, tg"]
[(vfasl-copy) "FIND_ROOM(vfi, p_vspc"])
(as-c 'type (lookup 'basetype config)))
(statements (let ([extra (lookup 'copy-extra config #f)])
@ -1904,7 +1904,7 @@
(unless (null? (cdr l))
(error 'skip-forwarding "not at end"))
(code "*dest = new_p;"
"SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;"
"tgc->sweep_change = SWEEP_CHANGE_PROGRESS;"
"return tg;")]
[else
(statements (cdr l) config)])]
@ -2072,7 +2072,7 @@
[(copy) "tg"]
[(mark) "TARGET_GENERATION(si)"]
[else "target_generation"])]
[`_tc_ "tc_in"]
[`_tgc_ "tgc"]
[`_backreferences?_
(if (lookup 'maybe-backreferences? config #f)
"BACKREFERENCES_ENABLED"
@ -2242,7 +2242,7 @@
"{ /* measure */"
(format " ptr r_p = ~a;" e)
" if (!IMMEDIATE(r_p))"
" push_measure(tc_in, r_p);"
" push_measure(tgc, r_p);"
"}"))
(define (copy-statement field config)
@ -2347,7 +2347,7 @@
" while (seg < end_seg) {"
" mark_si = SegInfo(seg);"
" g = mark_si->generation;"
" if (!fully_marked_mask[g]) init_fully_marked_mask(tc_in, g);"
" if (!fully_marked_mask[g]) init_fully_marked_mask(tgc, g);"
" mark_si->marked_mask = fully_marked_mask[g];"
" mark_si->marked_count = bytes_per_segment;"
" seg++;"
@ -2440,7 +2440,7 @@
(define (ensure-segment-mark-mask si inset)
(code
(format "~aif (!~a->marked_mask) {" inset si)
(format "~a init_mask(tc_in, ~a->marked_mask, ~a->generation, 0);"
(format "~a init_mask(tgc, ~a->marked_mask, ~a->generation, 0);"
inset si si)
(format "~a}" inset)))
@ -2667,7 +2667,7 @@
(parallel? ,parallel?))))
(print-code (generate "object_directly_refers_to_self"
`((mode self-test))))
(print-code (code "static void mark_typemod_data_object(ptr tc_in, ptr p, uptr p_sz, seginfo *si)"
(print-code (code "static void mark_typemod_data_object(thread_gc *tgc, ptr p, uptr p_sz, seginfo *si)"
(code-block
(ensure-segment-mark-mask "si" "")
(mark-statement '(one-bit no-sweep)