From 3f3cf5ab83702b5542c75a7968a34f45c7069cc5 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 4 Sep 2020 09:03:05 -0600 Subject: [PATCH] cs: more thread-local allocation Improved support for thread-location allocation (and using more fine-grained locks in fasl reading) may provide a small direct benefit, but the change is mainly intended as setup for more parallelism in the collector. --- .makefile | 2 +- Makefile | 12 +- pkgs/base/info.rkt | 2 +- racket/src/ChezScheme/c/alloc.c | 159 ++++++++++----- racket/src/ChezScheme/c/externs.h | 3 + racket/src/ChezScheme/c/fasl.c | 14 +- racket/src/ChezScheme/c/gc.c | 182 +++++++++++++----- racket/src/ChezScheme/c/gcwrapper.c | 18 +- racket/src/ChezScheme/c/globals.h | 3 +- racket/src/ChezScheme/c/prim5.c | 2 - racket/src/ChezScheme/c/schsig.c | 2 - racket/src/ChezScheme/c/segment.c | 1 + racket/src/ChezScheme/c/thread.c | 22 ++- racket/src/ChezScheme/c/types.h | 30 ++- racket/src/ChezScheme/c/vfasl.c | 4 + racket/src/ChezScheme/csug/smgmt.stex | 6 +- racket/src/ChezScheme/makefiles/Mf-install.in | 2 +- racket/src/ChezScheme/mats/7.ms | 36 ++-- .../mats/root-experr-compile-0-f-f-f | 2 +- racket/src/ChezScheme/rktboot/main.rkt | 5 + racket/src/ChezScheme/s/cmacros.ss | 12 +- racket/src/ChezScheme/s/mkgc.ss | 17 +- racket/src/cs/compile-file.ss | 2 +- racket/src/version/racket_version.h | 2 +- 24 files changed, 377 insertions(+), 163 deletions(-) diff --git a/.makefile b/.makefile index 0cc397c467..4fdbd22782 100644 --- a/.makefile +++ b/.makefile @@ -335,7 +335,7 @@ RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) # This branch name changes each time the pb boot files are updated: -PB_BRANCH == circa-7.8.0.8-3 +PB_BRANCH == circa-7.8.0.10-1 PB_REPO == https://github.com/racket/pb # Alternative source for Chez Scheme boot files, normally set by diff --git a/Makefile b/Makefile index 64f6583af7..5f7a2d2ef7 100644 --- a/Makefile +++ b/Makefile @@ -45,7 +45,7 @@ RACKETCS_SUFFIX = RACKET = RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) -PB_BRANCH = circa-7.8.0.8-3 +PB_BRANCH = circa-7.8.0.10-1 PB_REPO = https://github.com/racket/pb EXTRA_REPOS_BASE = CS_CROSS_SUFFIX = @@ -304,14 +304,14 @@ maybe-fetch-pb: if [ "$(RACKET_FOR_BOOTFILES)" = "" ] ; then $(MAKE) fetch-pb EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" ; fi fetch-pb-from: mkdir -p racket/src/ChezScheme/boot - if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.8-3 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.8-3:remotes/origin/circa-7.8.0.8-3 ; fi - cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.8-3 + if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-1 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-1:remotes/origin/circa-7.8.0.10-1 ; fi + cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-1 pb-stage: - cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.8-3 - cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.8-3 + cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-1 + cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-1 cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build" pb-push: - cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.8-3 + cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-1 win-cs-base: IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)" diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index 6872b774a5..9aa582818c 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.8.0.9") +(define version "7.8.0.10") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/racket/src/ChezScheme/c/alloc.c b/racket/src/ChezScheme/c/alloc.c index 832fa1712d..18f2a3b3fd 100644 --- a/racket/src/ChezScheme/c/alloc.c +++ b/racket/src/ChezScheme/c/alloc.c @@ -29,7 +29,6 @@ void S_alloc_init() { S_G.bytes_of_generation[g] = 0; for (s = 0; s <= max_real_space; s++) { S_G.base_loc[g][s] = FIX(0); - S_G.first_loc[g][s] = FIX(0); S_G.next_loc[g][s] = FIX(0); S_G.bytes_left[g][s] = 0; S_G.bytes_of_space[g][s] = 0; @@ -157,6 +156,9 @@ ptr S_compute_bytes_allocated(xg, xs) ptr xg; ptr xs; { /* add in bytes in active segments */ if (next_loc != FIX(0)) n += (uptr)next_loc - (uptr)S_G.base_loc[g][s]; + next_loc = NEXTLOC_AT(tc, s, g); + if (next_loc != FIX(0)) + n += (uptr)next_loc - (uptr)BASELOC_AT(tc, s, g); if (s == space_data) { /* don't count space used for bitmaks */ n -= S_G.bitmask_overhead[g]; @@ -184,6 +186,54 @@ static void maybe_fire_collector() { S_fire_collector(); } +static ptr more_room_segment(ISPC s, IGEN g, iptr n, iptr *_new_bytes) +{ + iptr nsegs, seg; + ptr new; + + S_pants_down += 1; + + nsegs = (uptr)(n + ptr_bytes + bytes_per_segment - 1) >> segment_offset_bits; + + /* block requests to minimize fragmentation and improve cache locality */ + if (s == space_code && nsegs < 16) nsegs = 16; + + seg = S_find_segments(s, g, nsegs); + new = build_ptr(seg, 0); + + *_new_bytes = nsegs * bytes_per_segment; + + return new; +} + +static void close_off_segment(ptr old, ptr base_loc, ptr sweep_loc, ISPC s, IGEN g) +{ + if (base_loc) { + seginfo *si; + uptr bytes = (uptr)old - (uptr)base_loc; + + /* increment bytes_allocated by the closed-off partial segment */ + S_G.bytes_of_space[g][s] += bytes; + S_G.bytes_of_generation[g] += bytes; + + /* lay down an end-of-segment marker */ + *(ptr*)TO_VOIDP(old) = forward_marker; + + /* add to sweep list */ + si = SegInfo(addr_get_segment(base_loc)); + si->sweep_next = S_G.to_sweep[g][s]; + si->sweep_start = sweep_loc; + S_G.to_sweep[g][s] = si; + } +} + +static void more_room_done(IGEN g) +{ + if (g == 0 && S_pants_down == 1) maybe_fire_collector(); + + S_pants_down -= 1; +} + /* find_more_room * S_find_more_room is called from the macro find_room when * the current segment is too full to fit the allocation. @@ -193,49 +243,67 @@ static void maybe_fire_collector() { * gc where the end of this segment is and where the next * segment of this type resides. Allocation occurs from the * beginning of the newly obtained segment. The need for the - * eos marker explains the (2 * ptr_bytes) byte factor in + * eos marker explains the ptr_bytes byte factor in * S_find_more_room. */ /* S_find_more_room is always called with mutex */ ptr S_find_more_room(s, g, n, old) ISPC s; IGEN g; iptr n; ptr old; { - iptr nsegs, seg; ptr new; + iptr new_bytes; + + close_off_segment(old, S_G.base_loc[g][s], S_G.sweep_loc[g][s], s, g); - S_pants_down += 1; + new = more_room_segment(s, g, n, &new_bytes); - nsegs = (uptr)(n + 2 * ptr_bytes + bytes_per_segment - 1) >> segment_offset_bits; - - /* block requests to minimize fragmentation and improve cache locality */ - if (s == space_code && nsegs < 16) nsegs = 16; - - seg = S_find_segments(s, g, nsegs); - new = build_ptr(seg, 0); - - if (old == FIX(0)) { - /* first object of this space */ - S_G.first_loc[g][s] = new; - } else { - uptr bytes = (uptr)old - (uptr)S_G.base_loc[g][s]; - /* increment bytes_allocated by the closed-off partial segment */ - S_G.bytes_of_space[g][s] += bytes; - S_G.bytes_of_generation[g] += bytes; - /* lay down an end-of-segment marker */ - *(ptr*)TO_VOIDP(old) = forward_marker; - *((ptr*)TO_VOIDP(old) + 1) = new; - } - - /* base address of current block of segments to track amount of allocation */ + /* base address of current block of segments to track amount of allocation + and to register a closed-off segment in the sweep list */ S_G.base_loc[g][s] = new; + /* in case a GC has started: */ + S_G.sweep_loc[g][s] = new; + S_G.next_loc[g][s] = (ptr)((uptr)new + n); - S_G.bytes_left[g][s] = (nsegs * bytes_per_segment - n) - 2 * ptr_bytes; + S_G.bytes_left[g][s] = (new_bytes - n) - ptr_bytes; - if (g == 0 && S_pants_down == 1) maybe_fire_collector(); - - S_pants_down -= 1; + more_room_done(g); + return new; } +ptr S_find_more_thread_room(ptr tc, ISPC s, IGEN g, iptr n, ptr old) { + ptr new; + iptr new_bytes; + + tc_mutex_acquire() + + /* closing off segment effectively moves to global space: */ + close_off_segment(old, BASELOC_AT(tc, s, g), SWEEPLOC_AT(tc, s, g), s, g); + + new = more_room_segment(s, g, n, &new_bytes); + + BASELOC_AT(tc, s, g) = new; + SWEEPLOC_AT(tc, s, g) = new; + BYTESLEFT_AT(tc, s, g) = (new_bytes - n) - ptr_bytes; + NEXTLOC_AT(tc, s, g) = (ptr)((uptr)new + n); + + more_room_done(g); + + tc_mutex_release() + + return new; +} + +/* tc_mutex must be held */ +void S_close_off_thread_local_segment(ptr tc, ISPC s, IGEN g) { + /* closing off segment effectively moves to global space: */ + close_off_segment(NEXTLOC_AT(tc, s, g), BASELOC_AT(tc, s, g), SWEEPLOC_AT(tc, s, g), s, g); + + BASELOC_AT(tc, s, g) = (ptr)0; + BYTESLEFT_AT(tc, s, g) = 0; + NEXTLOC_AT(tc, s, g) = (ptr)0; + SWEEPLOC_AT(tc, s, g) = (ptr)0; +} + /* S_reset_allocation_pointer is always called with mutex */ /* We always allocate exactly one segment for the allocation area, since we can get into hot water with formerly locked objects, specifically @@ -508,8 +576,8 @@ void S_list_bits_set(p, bits) ptr p; iptr bits; { si->list_bits[segment_bitmap_byte(p)] |= segment_bitmap_bits(p, bits); } -/* S_cons_in is always called with mutex */ -ptr S_cons_in(s, g, car, cdr) ISPC s; IGEN g; ptr car, cdr; { +/* tc_mutex must be held */ +ptr S_cons_in_global(s, g, car, cdr) ISPC s; IGEN g; ptr car, cdr; { ptr p; find_room(s, g, type_pair, size_pair, p); @@ -518,6 +586,16 @@ ptr S_cons_in(s, g, car, cdr) ISPC s; IGEN g; ptr car, cdr; { return p; } +ptr S_cons_in(s, g, car, cdr) ISPC s; IGEN g; ptr car, cdr; { + ptr tc = get_thread_context(); + ptr p; + + thread_find_room_g(tc, s, g, type_pair, size_pair, p); + INITCAR(p) = car; + INITCDR(p) = cdr; + return p; +} + ptr Scons(car, cdr) ptr car, cdr; { ptr tc = get_thread_context(); ptr p; @@ -528,11 +606,11 @@ ptr Scons(car, cdr) ptr car, cdr; { return p; } -/* S_ephemeron_cons_in is always called with mutex */ ptr S_ephemeron_cons_in(gen, car, cdr) IGEN gen; ptr car, cdr; { ptr p; + ptr tc = get_thread_context(); - find_room(space_ephemeron, gen, type_pair, size_ephemeron, p); + thread_find_room_g(tc, space_ephemeron, gen, type_pair, size_ephemeron, p); INITCAR(p) = car; INITCDR(p) = cdr; EPHEMERONPREVREF(p) = 0; @@ -736,13 +814,13 @@ ptr S_closure(cod, n) ptr cod; iptr n; { return p; } -/* S_mkcontinuation is always called with mutex */ ptr S_mkcontinuation(s, g, nuate, stack, length, clength, link, ret, winders, attachments) ISPC s; IGEN g; ptr nuate; ptr stack; iptr length; iptr clength; ptr link; ptr ret; ptr winders; ptr attachments; { ptr p; + ptr tc = get_thread_context(); - find_room(s, g, type_closure, size_continuation, p); + thread_find_room_g(tc, s, g, type_closure, size_continuation, p); CLOSENTRY(p) = nuate; CONTSTACK(p) = stack; CONTLENGTH(p) = length; @@ -968,12 +1046,11 @@ ptr S_bignum(tc, n, sign) ptr tc; iptr n; IBOOL sign; { return p; } -/* S_code is always called with mutex */ ptr S_code(tc, type, n) ptr tc; iptr type, n; { ptr p; iptr d; d = size_code(n); - find_room(space_code, 0, type_typed_object, d, p); + thread_find_room_g(tc, space_code, 0, type_typed_object, d, p); CODETYPE(p) = type; CODELEN(p) = n; /* we record the code modification here, even though we haven't @@ -994,11 +1071,7 @@ ptr S_relocation_table(n) iptr n; { } ptr S_weak_cons(ptr car, ptr cdr) { - ptr p; - tc_mutex_acquire(); - p = S_cons_in(space_weakpair, 0, car, cdr); - tc_mutex_release(); - return p; + return S_cons_in(space_weakpair, 0, car, cdr); } ptr S_phantom_bytevector(sz) uptr sz; { diff --git a/racket/src/ChezScheme/c/externs.h b/racket/src/ChezScheme/c/externs.h index 875b213e35..173995b87a 100644 --- a/racket/src/ChezScheme/c/externs.h +++ b/racket/src/ChezScheme/c/externs.h @@ -67,6 +67,8 @@ extern ptr S_compute_bytes_allocated PROTO((ptr xg, ptr xs)); extern ptr S_bytes_finalized PROTO(()); extern ptr S_find_more_room PROTO((ISPC s, IGEN g, iptr n, ptr old)); extern void S_record_new_dirty_card PROTO((ptr *ppp, IGEN to_g)); +extern ptr S_find_more_thread_room PROTO((ptr tc, IGEN g, ISPC s, iptr n, ptr old)); +extern void S_close_off_thread_local_segment PROTO((ptr tc, ISPC s, IGEN g)); extern void S_dirty_set PROTO((ptr *loc, ptr x)); extern void S_mark_card_dirty PROTO((uptr card, IGEN to_g)); extern void S_scan_dirty PROTO((ptr *p, ptr *endp)); @@ -76,6 +78,7 @@ extern ptr S_get_more_room_help PROTO((ptr tc, uptr ap, uptr type, uptr size)); extern ptr S_list_bits_ref PROTO((ptr p)); extern void S_list_bits_set PROTO((ptr p, iptr bits)); extern ptr S_cons_in PROTO((ISPC s, IGEN g, ptr car, ptr cdr)); +extern ptr S_cons_in_global PROTO((ISPC s, IGEN g, ptr car, ptr cdr)); extern ptr S_ephemeron_cons_in PROTO((IGEN g, ptr car, ptr cdr)); extern ptr S_symbol PROTO((ptr name)); extern ptr S_rational PROTO((ptr n, ptr d)); diff --git a/racket/src/ChezScheme/c/fasl.c b/racket/src/ChezScheme/c/fasl.c index c78412df72..61da3ae21c 100644 --- a/racket/src/ChezScheme/c/fasl.c +++ b/racket/src/ChezScheme/c/fasl.c @@ -309,13 +309,10 @@ ptr S_fasl_read(INT fd, IFASLCODE situation, ptr path, ptr externals) { ptr tc = get_thread_context(); ptr x; struct unbufFaslFileObj uffo; - /* acquire mutex in case we modify code pages */ - tc_mutex_acquire() uffo.path = path; uffo.type = UFFO_TYPE_FD; uffo.fd = fd; x = fasl_entry(tc, situation, &uffo, externals); - tc_mutex_release() return x; } @@ -324,11 +321,9 @@ ptr S_bv_fasl_read(ptr bv, int ty, uptr offset, uptr len, ptr path, ptr external ptr x; struct unbufFaslFileObj uffo; /* acquire mutex in case we modify code pages */ - tc_mutex_acquire() uffo.path = path; uffo.type = UFFO_TYPE_BV; x = bv_fasl_entry(tc, bv, ty, offset, len, &uffo, externals); - tc_mutex_release() return x; } @@ -814,6 +809,8 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) { faslin(tc, &rtd_uid, t, pstrbuf, f); + tc_mutex_acquire() + /* look for rtd on uid's property list */ plist = SYMSPLIST(rtd_uid); for (ls = plist; ls != Snil; ls = Scdr(Scdr(ls))) { @@ -827,6 +824,7 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) { if (!rtd_equiv(tmp, rtd)) S_error2("", "incompatible record type ~s in ~a", RECORDDESCNAME(tmp), f->uf->path); } + tc_mutex_release() return; } } @@ -839,6 +837,9 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) { /* register rtd on uid's property list */ SETSYMSPLIST(rtd_uid, Scons(S_G.rtd_key, Scons(rtd, plist))); + + tc_mutex_release() + return; } case fasl_type_record: { @@ -1251,7 +1252,8 @@ static void fasl_record(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f, uptr si } } -/* Result: 0 => interned; 1 => replaced; -1 => inconsistent */ +/* Call with tc mutex. + Result: 0 => interned; 1 => replaced; -1 => inconsistent */ int S_fasl_intern_rtd(ptr *x) { ptr rtd, rtd_uid, plist, ls; diff --git a/racket/src/ChezScheme/c/gc.c b/racket/src/ChezScheme/c/gc.c index 2349538b7e..09b360058f 100644 --- a/racket/src/ChezScheme/c/gc.c +++ b/racket/src/ChezScheme/c/gc.c @@ -131,7 +131,7 @@ static void sweep_in_old PROTO((ptr p)); static void sweep_object_in_old PROTO((ptr p)); static IBOOL object_directly_refers_to_self PROTO((ptr p)); static ptr copy_stack PROTO((ptr old, iptr *length, iptr clength)); -static void resweep_weak_pairs PROTO((seginfo *oldweakspacesegments)); +static void resweep_weak_pairs PROTO((ptr tc, seginfo *oldweakspacesegments)); static void forward_or_bwp PROTO((ptr *pp, ptr p)); static void sweep_generation PROTO((ptr tc)); static void sweep_from_stack PROTO((ptr tc)); @@ -148,8 +148,8 @@ static IGEN sweep_dirty_port PROTO((ptr x, IGEN youngest)); static IGEN sweep_dirty_symbol PROTO((ptr x, IGEN youngest)); static void sweep_code_object PROTO((ptr tc, ptr co, IGEN from_g)); static void record_dirty_segment PROTO((IGEN from_g, IGEN to_g, seginfo *si)); -static void sweep_dirty PROTO((void)); -static void resweep_dirty_weak_pairs PROTO((void)); +static void sweep_dirty PROTO((ptr tc)); +static void resweep_dirty_weak_pairs PROTO((ptr tc)); static void mark_typemod_data_object PROTO((ptr p, uptr len, seginfo *si)); static void add_pending_guardian PROTO((ptr gdn, ptr tconc)); static void add_trigger_guardians_to_recheck PROTO((ptr ls)); @@ -167,7 +167,7 @@ static void copy_and_clear_list_bits(seginfo *oldspacesegments); static uptr total_size_so_far(); static uptr list_length PROTO((ptr ls)); #endif -static uptr target_generation_space_so_far(); +static uptr target_generation_space_so_far(ptr tc); #ifdef ENABLE_MEASURE static void init_measure(IGEN min_gen, IGEN max_gen); @@ -194,18 +194,17 @@ static void check_pending_measure_ephemerons(); /* initialized and used each gc cycle. any others should be defined in globals.h */ static IBOOL change; -static ptr sweep_loc[static_generation+1][max_real_space+1]; -static ptr orig_next_loc[static_generation+1][max_real_space+1]; static ptr tlcs_to_rehash; static ptr conts_to_promote; static ptr recheck_guardians_ls; +static seginfo *resweep_weak_segments; #ifdef ENABLE_OBJECT_COUNTS static int measure_all_enabled; static uptr count_root_bytes; #endif -/* max_cg: maximum copied generation, i.e., maximum generation subject to collection. max_cg >= 0 && max_cg <= 255. +/* max_cg: maximum copied generation, i.e., maximum generation subject to collection. max_cg >= 0 && max_cg <= static_generation. * min_tg: minimum target generation. max_tg == 0 ? min_tg == 0 : min_tg > 0 && min_tg <= max_tg; * max_tg: maximum target generation. max_tg == max_cg || max_tg == max_cg + 1. * Objects in generation g are collected into generation MIN(max_tg, MAX(min_tg, g+1)). @@ -620,8 +619,8 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { /* flush instruction cache: effectively clear_code_mod but safer */ for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { - ptr tc = (ptr)THREADTC(Scar(ls)); - S_flush_instruction_cache(tc); + ptr t_tc = (ptr)THREADTC(Scar(ls)); + S_flush_instruction_cache(t_tc); } tlcs_to_rehash = Snil; @@ -632,9 +631,39 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { S_G.must_mark_gen0 = 0; for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { - ptr tc = (ptr)THREADTC(Scar(ls)); - S_scan_dirty(TO_VOIDP(EAP(tc)), TO_VOIDP(REAL_EAP(tc))); - EAP(tc) = REAL_EAP(tc) = AP(tc) = (ptr)0; + ptr t_tc = (ptr)THREADTC(Scar(ls)); + S_scan_dirty(TO_VOIDP(EAP(t_tc)), TO_VOIDP(REAL_EAP(t_tc))); + EAP(t_tc) = REAL_EAP(t_tc) = AP(t_tc) = (ptr)0; + + /* clear thread-local allocation: */ + for (g = 0; g <= MAX_CG; g++) { + for (s = 0; s <= max_real_space; s++) { + if (BASELOC_AT(t_tc, s, g)) { + /* We close off, instead of just setting BASELOC to 0, + in case the page ends up getting marked, in which + case a terminator mark needed. */ + S_close_off_thread_local_segment(t_tc, s, g); + } + } + } + + if (t_tc != tc) { + /* close off any current allocation in MAX_TG, and ensure that + end-of-segment markers are otherwise set (in case that's + needed for dirty-byte sweeping) */ + for (s = 0; s <= max_real_space; s++) { + if (BASELOC_AT(t_tc, s, MAX_TG)) + S_close_off_thread_local_segment(t_tc, s, MAX_TG); + for (g = MAX_TG + 1; g <= static_generation; g++) { + ptr old = NEXTLOC_AT(t_tc, s, g); + if (old != (ptr)0) + *(ptr*)TO_VOIDP(old) = forward_marker; + } + } + } else { + for (s = 0; s <= max_real_space; s++) + SWEEPLOC_AT(t_tc, s, MAX_TG) = BASELOC_AT(t_tc, s, MAX_TG); + } } /* perform after ScanDirty */ @@ -645,6 +674,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { #endif sweep_stack_start = sweep_stack = sweep_stack_limit = NULL; + resweep_weak_segments = NULL; for (g = MIN_TG; g <= MAX_TG; g++) fully_marked_mask[g] = NULL; /* set up generations to be copied */ @@ -652,7 +682,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { S_G.bytes_of_generation[g] = 0; for (s = 0; s <= max_real_space; s++) { S_G.base_loc[g][s] = FIX(0); - S_G.first_loc[g][s] = FIX(0); + S_G.to_sweep[g][s] = NULL; S_G.next_loc[g][s] = FIX(0); S_G.bytes_left[g][s] = 0; S_G.bytes_of_space[g][s] = 0; @@ -670,12 +700,13 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { pre_phantom_bytes += S_G.bytesof[g][countof_phantom]; } - /* set up target generation sweep_loc and orig_next_loc pointers */ + /* set up target generation sweep_loc pointers */ for (g = MIN_TG; g <= MAX_TG; g += 1) { for (s = 0; s <= max_real_space; s++) { /* for all but max_tg (and max_tg as well, if max_tg == max_cg), this - will set orig_net_loc and sweep_loc to 0 */ - orig_next_loc[g][s] = sweep_loc[g][s] = S_G.next_loc[g][s]; + will set sweep_loc to 0 */ + S_G.sweep_loc[g][s] = S_G.next_loc[g][s]; + S_G.to_sweep[g][s] = NULL; } } @@ -697,7 +728,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { si->next = oldspacesegments; oldspacesegments = si; si->old_space = 1; - /* update generation now, both to computer the target generation, + /* update generation now, both to compute the target generation, and so that any updated dirty references will record the correct new generation; also used for a check in S_dirty_set */ si->generation = compute_target_generation(si->generation); @@ -962,11 +993,14 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { } /* sweep areas marked dirty by assignments into older generations */ - sweep_dirty(); + sweep_dirty(tc); sweep_generation(tc); + /* since we will later resweep dirty weak pairs, make sure sweep_generation + ends with a terminator in place for space_weakpair, at least in all threads + other than this one that may have allocated there during sweep_generation */ - pre_finalization_size = target_generation_space_so_far(); + pre_finalization_size = target_generation_space_so_far(tc); /* handle guardians */ { ptr pend_hold_ls, final_ls, pend_final_ls, maybe_final_ordered_ls; @@ -1198,7 +1232,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { } } - S_G.bytes_finalized = target_generation_space_so_far() - pre_finalization_size; + S_G.bytes_finalized = target_generation_space_so_far(tc) - pre_finalization_size; { iptr post_phantom_bytes = 0; for (g = MIN_TG; g <= MAX_TG; g++) { @@ -1208,8 +1242,8 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { } /* handle weak pairs */ - resweep_dirty_weak_pairs(); - resweep_weak_pairs(oldweakspacesegments); + resweep_dirty_weak_pairs(tc); + resweep_weak_pairs(tc, oldweakspacesegments); /* still-pending ephemerons all go to bwp */ finish_pending_ephemerons(oldspacesegments); @@ -1436,33 +1470,62 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { return Svoid; } -#define sweep_space(s, from_g, body) { \ - slp = &sweep_loc[from_g][s]; \ - nlp = &S_G.next_loc[from_g][s]; \ - if (*slp == 0) *slp = S_G.first_loc[from_g][s]; \ - pp = TO_VOIDP(*slp); \ - while (pp != (nl = (ptr *)TO_VOIDP(*nlp))) { \ - do { \ - if ((p = *pp) == forward_marker) \ - pp = TO_VOIDP(*(pp + 1)); \ - else \ - body \ - } while (pp != nl); \ - } \ - *slp = TO_PTR(pp); \ +#define save_resweep(s, si) do { \ + if (s == space_weakpair) { \ + si->sweep_next = resweep_weak_segments; \ + resweep_weak_segments = si; \ + } \ + } while (0) + +#define sweep_space_range(s, from_g, body) { \ + while ((pp = TO_VOIDP(*slp)) != (nl = TO_VOIDP(*nlp))) { \ + *slp = TO_PTR(nl); \ + while (pp != nl) { \ + p = *pp; \ + body \ + } \ + } \ } -static void resweep_weak_pairs(seginfo *oldweakspacesegments) { +#define sweep_space(s, from_g, body) { \ + while ((si = S_G.to_sweep[from_g][s]) != NULL) { \ + S_G.to_sweep[from_g][s] = si->sweep_next; \ + save_resweep(s, si); \ + pp = TO_VOIDP(si->sweep_start); \ + while ((p = *pp) != forward_marker) \ + body \ + } \ + slp = &S_G.sweep_loc[from_g][s]; \ + nlp = &S_G.next_loc[from_g][s]; \ + sweep_space_range(s, from_g, body) \ + slp = &SWEEPLOC_AT(tc, s, from_g); \ + nlp = &NEXTLOC_AT(tc, s, from_g); \ + sweep_space_range(s, from_g, body) \ + } + +static void resweep_weak_pairs(ptr tc, seginfo *oldweakspacesegments) { IGEN from_g; ptr *slp, *nlp; ptr *pp, p, *nl; seginfo *si; for (from_g = MIN_TG; from_g <= MAX_TG; from_g += 1) { - sweep_loc[from_g][space_weakpair] = orig_next_loc[from_g][space_weakpair]; + /* By starting from `base_loc`, we may needlessly sweep pairs in `MAX_TG` + that were allocated before the GC, but that's ok. */ + S_G.sweep_loc[from_g][space_weakpair] = S_G.base_loc[from_g][space_weakpair]; + SWEEPLOC_AT(tc, space_weakpair, from_g) = BASELOC_AT(tc, space_weakpair, from_g); + S_G.to_sweep[space_weakpair][from_g] = NULL; /* in case there was new allocation */ sweep_space(space_weakpair, from_g, { forward_or_bwp(pp, p); pp += 2; }) + } + + for (si = resweep_weak_segments; si != NULL; si = si->sweep_next) { + pp = TO_VOIDP(build_ptr(si->number, 0)); + while ((p = *pp) != forward_marker) { + forward_or_bwp(pp, p); + pp += 2; + } } for (si = oldweakspacesegments; si != NULL; si = si->next) { @@ -1506,6 +1569,7 @@ static void forward_or_bwp(pp, p) ptr *pp; ptr p; { static void sweep_generation(ptr tc) { ptr *slp, *nlp; ptr *pp, p, *nl; IGEN from_g; + seginfo *si; do { change = 0; @@ -1674,9 +1738,9 @@ static void record_dirty_segment(IGEN from_g, IGEN to_g, seginfo *si) { } } -static void sweep_dirty() { +static void sweep_dirty(ptr tc) { IGEN youngest, min_youngest; - ptr *pp, *ppend, *nl; + ptr *pp, *ppend, *nl, start, next_loc; uptr seg, d; ISPC s; IGEN from_g, to_g; @@ -1712,8 +1776,18 @@ static void sweep_dirty() { } min_youngest = 0xff; - nl = from_g == MAX_TG ? TO_VOIDP(orig_next_loc[from_g][s]) : TO_VOIDP(S_G.next_loc[from_g][s]); - ppend = TO_VOIDP(build_ptr(seg, 0)); + start = build_ptr(seg, 0); + ppend = TO_VOIDP(start); + + /* The current allocation pointer, either global or thread-local, + may be relevant as the ending point. We assume that thread-local + regions for all other threads aer terminated and won't get new + allocations while dirty sweeping runs. */ + next_loc = S_G.next_loc[from_g][s]; + if (((uptr)next_loc < (uptr)start) + || ((uptr)next_loc >= ((uptr)start + bytes_per_segment))) + next_loc = NEXTLOC_AT(tc, s, from_g); + nl = TO_VOIDP(next_loc); if (s == space_weakpair) { weakseginfo *next = weaksegments_to_resweep; @@ -1731,7 +1805,6 @@ static void sweep_dirty() { if (*dp == -1) { pp = ppend; ppend += bytes_per_card; - if (pp <= nl && nl < ppend) ppend = nl; d = dend; } else { while (d < dend) { @@ -1981,16 +2054,26 @@ static void sweep_dirty() { POP_BACKREFERENCE() } -static void resweep_dirty_weak_pairs() { +static void resweep_dirty_weak_pairs(ptr tc) { weakseginfo *ls; - ptr *pp, *ppend, *nl, p; + ptr *pp, *ppend, p; IGEN from_g, min_youngest, youngest; uptr d; + /* Make sure terminator is in place for allocation areas relevant to this thread */ + for (from_g = MIN_TG; from_g <= static_generation; from_g++) { + ptr old; + old = S_G.next_loc[from_g][space_weakpair]; + if (old != (ptr)0) + *(ptr*)TO_VOIDP(old) = forward_marker; + old = NEXTLOC_AT(tc, space_weakpair, from_g); + if (old != (ptr)0) + *(ptr*)TO_VOIDP(old) = forward_marker; + } + for (ls = weaksegments_to_resweep; ls != NULL; ls = ls->next) { seginfo *dirty_si = ls->si; from_g = dirty_si->generation; - nl = from_g == MAX_TG ? TO_VOIDP(orig_next_loc[from_g][space_weakpair]) : TO_VOIDP(S_G.next_loc[from_g][space_weakpair]); ppend = TO_VOIDP(build_ptr(dirty_si->number, 0)); min_youngest = 0xff; d = 0; @@ -2005,10 +2088,11 @@ static void resweep_dirty_weak_pairs() { while (d < dend) { pp = ppend; ppend += bytes_per_card / sizeof(ptr); - if (pp <= nl && nl < ppend) ppend = nl; if (dirty_si->dirty_bytes[d] <= MAX_CG) { youngest = ls->youngest[d]; while (pp < ppend) { + if (!dirty_si->marked_mask && *pp == forward_marker) + break; if (!dirty_si->marked_mask || marked(dirty_si, TO_PTR(pp))) { p = *pp; seginfo *si; @@ -2245,7 +2329,7 @@ static uptr total_size_so_far() { } #endif -static uptr target_generation_space_so_far() { +static uptr target_generation_space_so_far(ptr tc) { IGEN g; ISPC s; uptr sz = 0; @@ -2257,6 +2341,8 @@ static uptr target_generation_space_so_far() { sz += S_G.bytes_of_space[g][s]; if (S_G.next_loc[g][s] != FIX(0)) sz += (uptr)S_G.next_loc[g][s] - (uptr)S_G.base_loc[g][s]; + if (NEXTLOC_AT(tc, s, g) != FIX(0)) + sz += (uptr)NEXTLOC_AT(tc, s, g) - (uptr)BASELOC_AT(tc, s, g); } } diff --git a/racket/src/ChezScheme/c/gcwrapper.c b/racket/src/ChezScheme/c/gcwrapper.c index 6ca07453de..47c9023f3a 100644 --- a/racket/src/ChezScheme/c/gcwrapper.c +++ b/racket/src/ChezScheme/c/gcwrapper.c @@ -654,11 +654,20 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; { || s == space_immobile_impure || s == space_count_pure || s == space_count_impure || s == space_closure) { /* doesn't handle: space_port, space_continuation, space_code, space_pure_typed_object, space_impure_record, or impure_typed_object */ - nl = TO_VOIDP(S_G.next_loc[g][s]); - /* check for dangling references */ pp1 = TO_VOIDP(build_ptr(seg, 0)); pp2 = TO_VOIDP(build_ptr(seg + 1, 0)); + + nl = TO_VOIDP(S_G.next_loc[g][s]); + if (!(pp1 <= nl && nl < pp2)) { + ptr ls; + for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { + ptr t_tc = (ptr)THREADTC(Scar(ls)); + nl = TO_VOIDP(NEXTLOC_AT(t_tc, s, g)); + if (pp1 <= nl && nl < pp2) + break; + } + } if (pp1 <= nl && nl < pp2) pp2 = nl; while (pp1 < pp2) { @@ -670,7 +679,7 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; { /* skip non-pair part of ephemeron */ } else { p = *pp1; - if (p == forward_marker) { + if (!si->marked_mask && (p == forward_marker)) { pp1 = pp2; /* break out of outer loop */ break; } else if (!IMMEDIATE(p)) { @@ -969,9 +978,10 @@ ptr S_do_gc(IGEN max_cg, IGEN min_tg, IGEN max_tg, ptr count_roots) { /* now transfer old_g info to new_g, and clear old_g info */ S_G.bytes_of_generation[new_g] = S_G.bytes_of_generation[old_g]; S_G.bytes_of_generation[old_g] = 0; for (s = 0; s <= max_real_space; s += 1) { - S_G.first_loc[new_g][s] = S_G.first_loc[old_g][s]; S_G.first_loc[old_g][s] = FIX(0); + S_G.to_sweep[new_g][s] = S_G.to_sweep[old_g][s]; S_G.to_sweep[old_g][s] = NULL; S_G.base_loc[new_g][s] = S_G.base_loc[old_g][s]; S_G.base_loc[old_g][s] = FIX(0); S_G.next_loc[new_g][s] = S_G.next_loc[old_g][s]; S_G.next_loc[old_g][s] = FIX(0); + S_G.sweep_loc[new_g][s] = S_G.sweep_loc[old_g][s]; S_G.sweep_loc[old_g][s] = FIX(0); S_G.bytes_left[new_g][s] = S_G.bytes_left[old_g][s]; S_G.bytes_left[old_g][s] = 0; S_G.bytes_of_space[new_g][s] = S_G.bytes_of_space[old_g][s]; S_G.bytes_of_space[old_g][s] = 0; S_G.occupied_segments[new_g][s] = S_G.occupied_segments[old_g][s]; S_G.occupied_segments[old_g][s] = NULL; diff --git a/racket/src/ChezScheme/c/globals.h b/racket/src/ChezScheme/c/globals.h index 3db39d046d..e05a1873b8 100644 --- a/racket/src/ChezScheme/c/globals.h +++ b/racket/src/ChezScheme/c/globals.h @@ -96,9 +96,10 @@ EXTERN struct S_G_struct { /* alloc.c */ ptr *protected[max_protected]; uptr protect_next; - ptr first_loc[static_generation+1][max_real_space+1]; + seginfo *to_sweep[static_generation+1][max_real_space+1]; ptr base_loc[static_generation+1][max_real_space+1]; ptr next_loc[static_generation+1][max_real_space+1]; + ptr sweep_loc[static_generation+1][max_real_space+1]; iptr bytes_left[static_generation+1][max_real_space+1]; uptr bytes_of_space[static_generation+1][max_real_space+1]; uptr bytes_of_generation[static_generation+1]; diff --git a/racket/src/ChezScheme/c/prim5.c b/racket/src/ChezScheme/c/prim5.c index 581c72f5c8..4b272788d5 100644 --- a/racket/src/ChezScheme/c/prim5.c +++ b/racket/src/ChezScheme/c/prim5.c @@ -191,9 +191,7 @@ static ptr s_weak_pairp(p) ptr p; { static ptr s_ephemeron_cons(car, cdr) ptr car, cdr; { ptr p; - tc_mutex_acquire() p = S_ephemeron_cons_in(0, car, cdr); - tc_mutex_release() return p; } diff --git a/racket/src/ChezScheme/c/schsig.c b/racket/src/ChezScheme/c/schsig.c index 96518c1e18..540efb2573 100644 --- a/racket/src/ChezScheme/c/schsig.c +++ b/racket/src/ChezScheme/c/schsig.c @@ -51,7 +51,6 @@ static void split(k, s) ptr k; ptr *s; { seginfo *si; ISPC spc; - tc_mutex_acquire() /* set m to size of lower piece, n to size of upper piece */ m = (uptr)TO_PTR(s) - (uptr)CONTSTACK(k); n = CONTCLENGTH(k) - m; @@ -73,7 +72,6 @@ static void split(k, s) ptr k; ptr *s; { CONTLENGTH(k) = CONTCLENGTH(k) = n; CONTSTACK(k) = TO_PTR(s); *s = TO_PTR(DOUNDERFLOW); - tc_mutex_release() } /* We may come in to S_split_and_resize with a multi-shot contination whose diff --git a/racket/src/ChezScheme/c/segment.c b/racket/src/ChezScheme/c/segment.c index 896146ac64..79071a3743 100644 --- a/racket/src/ChezScheme/c/segment.c +++ b/racket/src/ChezScheme/c/segment.c @@ -250,6 +250,7 @@ static void initialize_seginfo(seginfo *si, ISPC s, IGEN g) { #endif si->counting_mask = NULL; si->measured_mask = NULL; + si->sweep_next = NULL; } iptr S_find_segments(s, g, n) ISPC s; IGEN g; iptr n; { diff --git a/racket/src/ChezScheme/c/thread.c b/racket/src/ChezScheme/c/thread.c index 7162cf0849..f142c19b61 100644 --- a/racket/src/ChezScheme/c/thread.c +++ b/racket/src/ChezScheme/c/thread.c @@ -113,8 +113,7 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { /* S_thread had better not do thread-local allocation */ thread = S_thread(tc); - /* use S_cons_in to avoid thread-local allocation */ - S_threads = S_cons_in(space_new, 0, thread, S_threads); + S_threads = S_cons_in_global(space_new, 0, thread, S_threads); S_nthreads += 1; SETSYMVAL(S_G.active_threads_id, FIX(UNFIX(SYMVAL(S_G.active_threads_id)) + 1)); @@ -129,6 +128,13 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { LZ4OUTBUFFER(tc) = 0; + for (i = 0; i < num_thread_local_allocation_segments; i++) { + BASELOC(tc, i) = (ptr)0; + NEXTLOC(tc, i) = (ptr)0; + BYTESLEFT(tc, i) = 0; + SWEEPLOC(tc, i) = (ptr)0; + } + tc_mutex_release() return thread; @@ -207,6 +213,15 @@ static IBOOL destroy_thread(tc) ptr tc; { /* process remembered set before dropping allocation area */ S_scan_dirty((ptr *)EAP(tc), (ptr *)REAL_EAP(tc)); + /* move thread-local allocation to global space */ + { + ISPC s; IGEN g; + for (g = 0; g <= static_generation; g++) + for (s = 0; s <= max_real_space; s++) + if (NEXTLOC_AT(tc, s, g)) + S_close_off_thread_local_segment(tc, s, g); + } + /* process guardian entries */ { ptr target, ges, obj, next; seginfo *si; @@ -314,7 +329,7 @@ void S_mutex_acquire(m) scheme_mutex_t *m; { m->count = count + 1; return; } - + if ((status = s_thread_mutex_lock(&m->pmutex)) != 0) S_error1("mutex-acquire", "failed: ~a", S_strerror(status)); m->owner = self; @@ -476,4 +491,3 @@ IBOOL S_condition_wait(c, m, t) s_thread_cond_t *c; scheme_mutex_t *m; ptr t; { } } #endif /* PTHREADS */ - diff --git a/racket/src/ChezScheme/c/types.h b/racket/src/ChezScheme/c/types.h index a45a3a8858..65e37e9363 100644 --- a/racket/src/ChezScheme/c/types.h +++ b/racket/src/ChezScheme/c/types.h @@ -91,12 +91,28 @@ typedef int IFASLCODE; /* fasl type codes */ #define find_room(s, g, t, n, x) find_room_T(s, g, t, n, ALREADY_PTR, x) #define find_room_voidp(s, g, n, x) find_room_T(s, g, typemod, n, TO_VOIDP, x) +#define SG_AT_TO_INDEX(s, g) ((g * (1 + max_real_space)) + s) + +#define BASELOC_AT(tc, s, g) BASELOC(tc, SG_AT_TO_INDEX(s, g)) +#define NEXTLOC_AT(tc, s, g) NEXTLOC(tc, SG_AT_TO_INDEX(s, g)) +#define BYTESLEFT_AT(tc, s, g) BYTESLEFT(tc, SG_AT_TO_INDEX(s, g)) +#define SWEEPLOC_AT(tc, s, g) SWEEPLOC(tc, SG_AT_TO_INDEX(s, g)) + +/* inline allocation --- no mutex required */ +/* Like `find_room`, but allocating into thread-local space. */ +#define thread_find_room_g_T(tc, s, g, t, n, T, x) { \ + ptr X = NEXTLOC_AT(tc, s, g); \ + NEXTLOC_AT(tc, s, g) = (ptr)((uptr)X + (n)); \ + if ((BYTESLEFT_AT(tc, s, g) -= (n)) < 0) X = S_find_more_thread_room(tc, s, g, n, X); \ + (x) = T(TYPE(X, t)); \ +} + +#define thread_find_room_g(tc, s, g, t, n, x) thread_find_room_g_T(tc, s, g, t, n, ALREADY_PTR, x) +#define thread_find_room_g_voidp(tc, s, g, n, x) thread_find_room_g_T(tc, s, g, typemod, n, TO_VOIDP, x) + /* thread-local inline allocation --- no mutex required */ -/* thread_find_room allocates n bytes in the local allocation area of - * the thread (hence space new, generation zero) into destination x, tagged - * with type t, punting to find_more_room if no space is left in the current - * allocation area. n is assumed to be an integral multiple of the object - * alignment. */ +/* Like `thread_find_room_g`, but always `space_new` and generation 0, + so using the same bump pointer as most new allocation */ #define thread_find_room_T(tc, t, n, T, x) { \ ptr _tc = tc;\ uptr _ap = (uptr)AP(_tc);\ @@ -151,7 +167,9 @@ typedef struct _seginfo { octet *list_bits; /* for `$list-bits-ref` and `$list-bits-set!` */ uptr number; /* the segment number */ struct _chunkinfo *chunk; /* the chunk this segment belongs to */ - struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs */ + struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs) */ + struct _seginfo *sweep_next; /* next in list of segments allocated during GC => need to sweep */ + ptr sweep_start; /* address within segment to start sweep */ struct _seginfo **dirty_prev; /* pointer to the next pointer on the previous seginfo in the DirtySegments list */ struct _seginfo *dirty_next; /* pointer to the next seginfo on the DirtySegments list */ ptr trigger_ephemerons; /* ephemerons to re-check if object in segment is copied out */ diff --git a/racket/src/ChezScheme/c/vfasl.c b/racket/src/ChezScheme/c/vfasl.c index 0d6fbdc502..0137046ff0 100644 --- a/racket/src/ChezScheme/c/vfasl.c +++ b/racket/src/ChezScheme/c/vfasl.c @@ -464,6 +464,8 @@ ptr S_vfasl(ptr bv, void *stream, iptr offset, iptr input_len) RECORDINSTTYPE(rtd) = S_G.base_rtd; RECORDDESCUID(rtd) = S_G.base_rtd; + tc_mutex_acquire() + while (1) { ptr new_rtd, meta_rtd, parent_rtd; @@ -494,6 +496,8 @@ ptr S_vfasl(ptr bv, void *stream, iptr offset, iptr input_len) } } } + + tc_mutex_release() } /* Replace rtd references to interned references */ diff --git a/racket/src/ChezScheme/csug/smgmt.stex b/racket/src/ChezScheme/csug/smgmt.stex index 41d511a33f..747b3419bd 100644 --- a/racket/src/ChezScheme/csug/smgmt.stex +++ b/racket/src/ChezScheme/csug/smgmt.stex @@ -274,11 +274,11 @@ effectively delays collection of older generations indefinitely. This parameter determines the maximum nonstatic generation, hence the total number of generations, currently in use. -Its value is an exact integer in the range 1 through 254. +Its value is an exact integer in the range 1 through 6. When set to 1, only two nonstatic generations are used; when set to 2, three nonstatic generations are used, and so on. -When set to 254, 255 nonstatic generations are used, plus the single -static generation for a total of 256 generations. +When set to 6, 7 nonstatic generations are used, plus the single +static generation for a total of 8 generations. Increasing the number of generations effectively decreases how often old objects are collected, potentially decreasing collection overhead but potentially increasing the number of inaccessible objects retained in the diff --git a/racket/src/ChezScheme/makefiles/Mf-install.in b/racket/src/ChezScheme/makefiles/Mf-install.in index 158af7dac8..b00f506c37 100644 --- a/racket/src/ChezScheme/makefiles/Mf-install.in +++ b/racket/src/ChezScheme/makefiles/Mf-install.in @@ -62,7 +62,7 @@ InstallLZ4Target= # no changes should be needed below this point # ############################################################################### -Version=csv9.5.3.37 +Version=csv9.5.3.38 Include=boot/$m PetiteBoot=boot/$m/petite.boot SchemeBoot=boot/$m/scheme.boot diff --git a/racket/src/ChezScheme/mats/7.ms b/racket/src/ChezScheme/mats/7.ms index e785e683af..00de98f451 100644 --- a/racket/src/ChezScheme/mats/7.ms +++ b/racket/src/ChezScheme/mats/7.ms @@ -5705,8 +5705,8 @@ evaluating module init (pair? (with-interrupts-disabled (let ([cmg (collect-maximum-generation)]) - (collect-maximum-generation 4) - (collect 4 4) + (collect-maximum-generation 3) + (collect 3 3) (let () (define (locate type gen ls) (cond @@ -5723,42 +5723,42 @@ evaluating module init (let ([hc (object-counts)]) (assert (locate 'box 0 hc)) (assert (locate (record-type-descriptor flub) 0 hc)) - (collect-maximum-generation 7) + (collect-maximum-generation 6) (let ([hc (object-counts)]) (assert (locate 'box 0 hc)) (assert (locate (record-type-descriptor flub) 0 hc)) - (collect 7 7) + (collect 6 6) (let () (define q1 (make-flub q0)) (define b1 (box b0)) - (collect 6 6) + (collect 5 5) (let () (define q2 (make-flub q1)) (define b2 (box b1)) - (collect 5 5) + (collect 4 4) (let ([hc (object-counts)]) + (assert (locate 'box 4 hc)) (assert (locate 'box 5 hc)) (assert (locate 'box 6 hc)) - (assert (locate 'box 7 hc)) + (assert (locate (record-type-descriptor flub) 4 hc)) (assert (locate (record-type-descriptor flub) 5 hc)) (assert (locate (record-type-descriptor flub) 6 hc)) - (assert (locate (record-type-descriptor flub) 7 hc)) - (collect-maximum-generation 5) + (collect-maximum-generation 4) (let ([hc (object-counts)]) - (assert (locate 'box 5 hc)) + (assert (locate 'box 4 hc)) + (assert (not (locate 'box 5 hc))) (assert (not (locate 'box 6 hc))) - (assert (not (locate 'box 7 hc))) - (assert (locate (record-type-descriptor flub) 5 hc)) + (assert (locate (record-type-descriptor flub) 4 hc)) + (assert (not (locate (record-type-descriptor flub) 5 hc))) (assert (not (locate (record-type-descriptor flub) 6 hc))) - (assert (not (locate (record-type-descriptor flub) 7 hc))) - (collect 5 5) + (collect 4 4) (let ([hc (object-counts)]) - (assert (locate 'box 5 hc)) + (assert (locate 'box 4 hc)) + (assert (not (locate 'box 5 hc))) (assert (not (locate 'box 6 hc))) - (assert (not (locate 'box 7 hc))) - (assert (locate (record-type-descriptor flub) 5 hc)) + (assert (locate (record-type-descriptor flub) 4 hc)) + (assert (not (locate (record-type-descriptor flub) 5 hc))) (assert (not (locate (record-type-descriptor flub) 6 hc))) - (assert (not (locate (record-type-descriptor flub) 7 hc))) (collect-maximum-generation cmg) (collect cmg cmg) (cons q2 b2))))))))))))) diff --git a/racket/src/ChezScheme/mats/root-experr-compile-0-f-f-f b/racket/src/ChezScheme/mats/root-experr-compile-0-f-f-f index e9e384053c..62a1738006 100644 --- a/racket/src/ChezScheme/mats/root-experr-compile-0-f-f-f +++ b/racket/src/ChezScheme/mats/root-experr-compile-0-f-f-f @@ -7568,7 +7568,7 @@ format.mo:Expected error in mat format-dollar: "format: expected real number for 7.mo:Expected error in mat exit: "exit: unexpected return from exit handler". 7.mo:Expected error in mat abort: "abort: unexpected return from abort handler". 7.mo:Expected error in mat collect: "collect-maximum-generation: invalid generation -1". -7.mo:Expected error in mat collect: "collect-maximum-generation: 10000 exceeds maximum supported value 254". +7.mo:Expected error in mat collect: "collect-maximum-generation: 10000 exceeds maximum supported value 6". 7.mo:Expected error in mat collect: "collect-maximum-generation: invalid generation static". 7.mo:Expected error in mat collect: "release-minimum-generation: invalid generation -1". 7.mo:Expected error in mat collect: "release-minimum-generation: new release minimum generation must not be be greater than collect-maximum-generation". diff --git a/racket/src/ChezScheme/rktboot/main.rkt b/racket/src/ChezScheme/rktboot/main.rkt index 3dd96f6e9c..8d74119138 100644 --- a/racket/src/ChezScheme/rktboot/main.rkt +++ b/racket/src/ChezScheme/rktboot/main.rkt @@ -6,12 +6,15 @@ ;; with command-line arguments, instead of environment variables. (define scheme-src #f) +(define dest-dir #f) (define mach #f) (command-line #:once-each [("--scheme-src") dir "Select the directory (defaults to current directory)" (set! scheme-src dir)] + [("--dest") dir "Select the destination derectory (defaults to Scheme directory)" + (set! dest-dir dir)] [("--machine") machine "Select the machine type (defaults to inferred)" (set! mach machine)]) @@ -20,6 +23,8 @@ (flush-output)) (void (putenv "SCHEME_SRC" (or scheme-src "."))) +(when dest-dir + (void (putenv "SCHEME_WORKAREA" dest-dir))) (when mach (void (putenv "MACH" mach))) diff --git a/racket/src/ChezScheme/s/cmacros.ss b/racket/src/ChezScheme/s/cmacros.ss index 576f63cf50..31f5f9f13a 100644 --- a/racket/src/ChezScheme/s/cmacros.ss +++ b/racket/src/ChezScheme/s/cmacros.ss @@ -357,7 +357,7 @@ ;; --------------------------------------------------------------------- ;; Version and machine types: -(define-constant scheme-version #x09050325) +(define-constant scheme-version #x09050326) (define-syntax define-machine-types (lambda (x) @@ -1493,6 +1493,9 @@ ([iptr type] [uptr tc])) (define-constant virtual-register-count 16) +(define-constant static-generation 7) +(define-constant num-thread-local-allocation-segments (fx* (fx+ 1 (constant static-generation)) + (fx+ 1 (constant max-real-space)))) ;;; make sure gc sweeps all ptrs (define-primitive-structure-disps tc typemod @@ -1567,7 +1570,11 @@ [ptr parameters] [ptr DSTBV] [ptr SRCBV] - [double fpregs (constant asm-fpreg-max)])) + [double fpregs (constant asm-fpreg-max)] + [xptr base-loc (constant num-thread-local-allocation-segments)] + [xptr next-loc (constant num-thread-local-allocation-segments)] + [iptr bytes-left (constant num-thread-local-allocation-segments)] + [xptr sweep-loc (constant num-thread-local-allocation-segments)])) (define tc-field-list (let f ([ls (oblist)] [params '()]) @@ -2018,7 +2025,6 @@ (define-constant default-collect-trip-bytes (expt 2 (+ 20 (constant log2-ptr-bytes)))) (define-constant default-heap-reserve-ratio 1.0) -(define-constant static-generation 255) (define-constant default-max-nonstatic-generation 4) (constant-case address-bits diff --git a/racket/src/ChezScheme/s/mkgc.ss b/racket/src/ChezScheme/s/mkgc.ss index 2eff7aaefb..e4be33e7b1 100644 --- a/racket/src/ChezScheme/s/mkgc.ss +++ b/racket/src/ChezScheme/s/mkgc.ss @@ -1048,17 +1048,12 @@ (copy-bytes code-data len)] [else (define t : ptr (code-reloc _)) - (case-mode - [(sweep sweep-in-old vfasl-sweep) - (define m : iptr (reloc-table-size t)) - (define oldco : ptr (reloc-table-code t))] - [else - (define m : iptr (cond - [t (reloc-table-size t)] - [else 0])) - (define oldco : ptr (cond - [t (reloc-table-code t)] - [else 0]))]) + (define m : iptr (cond + [t (reloc-table-size t)] + [else 0])) + (define oldco : ptr (cond + [t (reloc-table-code t)] + [else 0])) (case-mode [vfasl-sweep (let* ([r_sz : uptr (size_reloc_table m)] diff --git a/racket/src/cs/compile-file.ss b/racket/src/cs/compile-file.ss index 5d7b0cd147..83213d66bc 100644 --- a/racket/src/cs/compile-file.ss +++ b/racket/src/cs/compile-file.ss @@ -2,7 +2,7 @@ ;; Check to make we're using a build of Chez Scheme ;; that has all the features we need. (define-values (need-maj need-min need-sub need-dev) - (values 9 5 3 37)) + (values 9 5 3 38)) (unless (guard (x [else #f]) (eval 'scheme-fork-version-number)) (error 'compile-file diff --git a/racket/src/version/racket_version.h b/racket/src/version/racket_version.h index a87153812c..ab6838348d 100644 --- a/racket/src/version/racket_version.h +++ b/racket/src/version/racket_version.h @@ -16,7 +16,7 @@ #define MZSCHEME_VERSION_X 7 #define MZSCHEME_VERSION_Y 8 #define MZSCHEME_VERSION_Z 0 -#define MZSCHEME_VERSION_W 9 +#define MZSCHEME_VERSION_W 10 /* A level of indirection makes `#` work as needed: */ #define AS_a_STR_HELPER(x) #x