From c46e4f91c1a3693df2ec5d8221d926f89f43c761 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Tue, 22 Sep 2020 11:47:08 -0600 Subject: [PATCH] Chez Scheme GC: internal parallelism by messages instead of locks Change the internal parallelism strategy for the GC to record an owner for each allocated segment of memory, and have the owner be solely responsible for copying or marking objects of the segment. When sweeping, a collecting thread handles references to objects that it owns or that have been copied or marked already, and it asks another collecting thread to resweep an object that refers to objects owned by that that thread. At worst, an object ends up being swept by all collecting threads, one at a time, but that's unlikely for a given object. The approach seems likely to scale better than a lock-based approach, even the one that used a lightweight, CAS-based lock and retries on lock failure. --- .makefile | 2 +- Makefile | 12 +- racket/src/ChezScheme/c/Mf-base | 4 +- racket/src/ChezScheme/c/alloc.c | 3 + racket/src/ChezScheme/c/gc.c | 1038 +++++++++++-------- racket/src/ChezScheme/c/gcwrapper.c | 213 +++- racket/src/ChezScheme/c/globals.h | 1 + racket/src/ChezScheme/c/scheme.c | 7 + racket/src/ChezScheme/c/segment.c | 1 - racket/src/ChezScheme/c/thread.c | 3 +- racket/src/ChezScheme/c/types.h | 3 +- racket/src/ChezScheme/c/vfasl.c | 18 +- racket/src/ChezScheme/rktboot/make-boot.rkt | 2 + racket/src/ChezScheme/s/Mf-base | 28 +- racket/src/ChezScheme/s/cmacros.ss | 18 +- racket/src/ChezScheme/s/mkgc.ss | 351 ++++--- racket/src/ChezScheme/s/mkheader.ss | 2 + racket/src/ChezScheme/workarea | 2 + racket/src/cs/c/check_boot.sh | 4 + racket/src/cs/c/ready_boot.sh | 2 + racket/src/cs/c/reset_boot.sh | 1 + 21 files changed, 1089 insertions(+), 626 deletions(-) diff --git a/.makefile b/.makefile index 275bcf19a8..294bf03f4b 100644 --- a/.makefile +++ b/.makefile @@ -338,7 +338,7 @@ RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) # This branch name changes each time the pb boot files are updated: -PB_BRANCH == circa-7.8.0.10-7 +PB_BRANCH == circa-7.8.0.10-11 PB_REPO == https://github.com/racket/pb # Alternative source for Chez Scheme boot files, normally set by diff --git a/Makefile b/Makefile index d5ed400d86..4b52cc66c8 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ RACKETCS_SUFFIX = RACKET = RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) -PB_BRANCH = circa-7.8.0.10-7 +PB_BRANCH = circa-7.8.0.10-11 PB_REPO = https://github.com/racket/pb EXTRA_REPOS_BASE = CS_CROSS_SUFFIX = @@ -306,14 +306,14 @@ maybe-fetch-pb-as-is: echo done fetch-pb-from: mkdir -p racket/src/ChezScheme/boot - if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-7 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-7:remotes/origin/circa-7.8.0.10-7 ; fi - cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-7 + if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-11 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-11:remotes/origin/circa-7.8.0.10-11 ; fi + cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-11 pb-stage: - cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-7 - cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-7 + cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-11 + cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-11 cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build" pb-push: - cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-7 + cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-11 win-cs-base: IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)" diff --git a/racket/src/ChezScheme/c/Mf-base b/racket/src/ChezScheme/c/Mf-base index c537529a2c..5844703f6c 100644 --- a/racket/src/ChezScheme/c/Mf-base +++ b/racket/src/ChezScheme/c/Mf-base @@ -69,9 +69,11 @@ ${kernelobj}: ${Include}/equates.h ${Include}/scheme.h ${mainobj}: ${Include}/scheme.h ${kernelobj}: ${zlibHeaderDep} ${LZ4HeaderDep} gc-011.o gc-par.o gc-ocd.o gc-oce.o: gc.c -gc-011.o gc-par.o gc-ocd.o: ${Include}/gc-ocd.inc +gc-011.o gc-ocd.o: ${Include}/gc-ocd.inc gc-oce.o: ${Include}/gc-oce.inc +gc-par.o: ${Include}/gc-par.inc vfasl.o: ${Include}/vfasl.inc +gcwrapper.o: ${Include}/heapcheck.inc ../zlib/zlib.h ../zlib/zconf.h: ../zlib/configure.log diff --git a/racket/src/ChezScheme/c/alloc.c b/racket/src/ChezScheme/c/alloc.c index 77fb5f2af4..d361b8c27f 100644 --- a/racket/src/ChezScheme/c/alloc.c +++ b/racket/src/ChezScheme/c/alloc.c @@ -80,6 +80,9 @@ void S_alloc_init() { S_protect(&S_G.null_immutable_string); find_room(tc, space_new, 0, type_typed_object, size_string(0), S_G.null_immutable_string); STRTYPE(S_G.null_immutable_string) = (0 << string_length_offset) | type_string | string_immutable_flag; + + S_protect(&S_G.zero_length_bignum); + S_G.zero_length_bignum = S_bignum(tc, 0, 0); } } diff --git a/racket/src/ChezScheme/c/gc.c b/racket/src/ChezScheme/c/gc.c index 04ebbcaed8..7fd7483d48 100644 --- a/racket/src/ChezScheme/c/gc.c +++ b/racket/src/ChezScheme/c/gc.c @@ -29,9 +29,9 @@ The copying, sweeping, and marking operations that depend on object's shape are mostly implemented in "mkgc.ss". That script generates "gc-ocd.inc" (for modes where object counting and - backpointers are disabled) and "gc-oce.inc". The rest of the - implementation here can still depend on representatoin details, - though, especially for pairs, weak pairs, and ephemerons. + backpointers are disabled), "gc-oce.inc", and "gc-par.inc". The + rest of the implementation here can still depend on representatoin + details, though, especially for pairs, weak pairs, and ephemerons. GC Copying versus Marking ------------------------- @@ -124,45 +124,58 @@ ------------------- Parallel mode runs `sweep_generation` concurrently in multiple - threads. It relies on a number of invariants: + sweeper threads. It relies on a number of invariants: * There are no attempts to take tc_mutex suring sweeping. To the degree that locking is needed (e.g., to allocate new segments), `S_use_gc_tc_mutex` redirects to gc_tc_mutex. No other locks can be taken while that one is held. - * To copy from or mark on a segment, a segment-specific lock must - be taken. + * To copy from or mark on a segment, a sweeper must own the + segment. A sweeper during sweeping may encounter a "remote" + reference to a segment that it doesn't own; in that case, it + registers the object containing the remote reference to be + re-swept by the sweeeer that owns the target of the referenced. - The lock must be taken before checking anything about objects on - the page, including whether the object starts with a forwarding - pointer. If a lock acquisition fails, everything must be - retryable as the level of the object or segment sweep. For a - segment sweep, objects may end up being swept multiple times. + A segment is owned by the thread that originally allocated it. + When a GC starts, for old-space segments that are owned by + threads that do no have a corresponding sweeper, the segment is + moved to the main collecting thread's ownership. - The lock is re-entrant, but re-locking information is held - outside the locak in a local variable, instead of being part of - the lock state. (That's why an ENABLE_LOCK_ACQUIRE declaration - is required in functions that take locks.) + Note that copying and marking are constrained so that they don't + have to recursively copy or mark. In some cases, this property + is achieved by not caring whether a reference goes to an old + copy or unmarked object; for example, a record type's size field + will be the same in both places, so either copy can be used to + determine a record size of copying. A record type's parent field + would not be available, however, since it can get overwritten + with forwarding information. - * Lock acquisition must be failable everywhere, with one - exception: when an object spans multiple segments, then `mark` - may need to set mark bits on multiple segments. In that case, it - can wait on locks for the extra pages, because there's an order - for the lock-taking: the first segment's lock followed by each - later segment. + * An object that is marked does not count as "remote". - * A segment in the target generation is exposed to the pool of - collecting threads only after a copy to the target segment is - complete. That's a consequence of keeping segments to sweep in a - thread-specific list. + Sweepers might attempt to access marked-object information at + the same time that it is being updated by the owning sweeper. + It's ok if the non-owning sweepers get stale information; + they'll just send the referencing object to the owning thread + for re-sweeping. A write fence ensures that non-owning sweepers + do not inspect mark-bitmap bits that have not been initialized. - * The segment-table lock is required only for writing. When a - thread allocates a new segment, that segment becomes relevant - only to other threads at the point where an object in the new - segment is exposed to the other threads. So, for example, the - fence associated with taking a segment lock doubles to ensure - that a write has exposed the object. + * Normally, a sweeper that encounters a remote reference can + continue sweeping and eventually register the remote re-sweep. + An object is swept by only one sweeper at a time; if mmultiple + remote references to different sweepers are discovered in an + object, it is sent to nly one of the remote sweepers, and that + sweeper will eventually send on the object to the other sweeper. + At worst, each object is swept N times for N sweepers. + + In rare cases, a sweeper cannot fully process an object, because + doing so would require inspecting a remote object. For example, + a record type's pointer mask or a stack frame's live-pointer + mask can be a bignum, and the bignum might be remote. In those + cases, the object might have to be sent back to the original + sweeper, and so on. In the owrst case, the object can be swept + more tha N times ---- but, again, this case rarely happens at + all, and sweeping more than N times is very unlikely. Currently, counting and backreference modes do not support parallelism. @@ -180,7 +193,7 @@ static ptr copy_stack PROTO((ptr tc_in, ptr old, iptr *length, iptr clength)); static void resweep_weak_pairs PROTO((ptr tc, seginfo *oldweakspacesegments)); static void forward_or_bwp PROTO((ptr *pp, ptr p)); static void sweep_generation PROTO((ptr tc)); -static void sweep_from_stack PROTO((ptr tc)); +static iptr sweep_from_stack PROTO((ptr tc)); static void enlarge_sweep_stack PROTO((ptr tc)); static uptr size_object PROTO((ptr p)); static iptr sweep_typed_object PROTO((ptr tc_in, ptr p, IGEN from_g)); @@ -237,20 +250,33 @@ static void check_pending_measure_ephemerons(ptr tc_in); #ifdef ENABLE_TIMING #include -/* gets milliseconds of real time (not CPU time) */ -static uptr get_time () { +static uptr get_real_time () { struct timeval now; gettimeofday(&now, NULL); return ((uptr) now.tv_sec) * 1000 + ((uptr) now.tv_usec) / 1000; } -# define GET_TIME(x) uptr x = get_time() -# define ACCUM_TIME(a, y, x) uptr y = get_time() - x; a += y +static uptr get_cpu_time () { + struct timespec now; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now); + return ((uptr) now.tv_sec) * 1000 + ((uptr) now.tv_nsec) / 1000000; +} +# define GET_REAL_TIME(x) uptr x = get_real_time() +# define GET_CPU_TIME(x) uptr x = get_cpu_time() +# define ACCUM_REAL_TIME(a, y, x) uptr y = get_real_time() - x; a += y +# define ACCUM_CPU_TIME(a, y, x) uptr y = get_cpu_time() - x; a += y # define REPORT_TIME(e) e static uptr collect_accum, all_accum, par_accum; +static int percentage(iptr n, iptr d) { return (n * 100) / d; } +# define COUNT_SWEPT_BYTES(start, end) num_swept_bytes += ((uptr)TO_PTR(end) - (uptr)TO_PTR(start)) +# define ADJUST_COUNTER(e) e #else -# define GET_TIME(x) do { } while (0) -# define ACCUM_TIME(a, y, x) do { } while (0) +# define GET_REAL_TIME(x) do { } while (0) +# define GET_CPU_TIME(x) do { } while (0) +# define ACCUM_REAL_TIME(a, y, x) do { } while (0) +# define ACCUM_CPU_TIME(a, y, x) do { } while (0) # define REPORT_TIME(e) do { } while (0) +# define COUNT_SWEPT_BYTES(start, end) do { } while (0) +# define ADJUST_COUNTER(e) do { } while (0) #endif #if defined(MIN_TG) && defined(MAX_TG) @@ -259,6 +285,12 @@ static uptr collect_accum, all_accum, par_accum; # endif #endif +#define main_sweeper_index maximum_parallel_collect_threads + +/* Use the `REMOTESWEEPER` field to assign the sweeper before sweepers + actually start: */ +#define WILL_BE_SWEEPER(tc) REMOTESWEEPER(tc) + /* #define DEBUG */ /* initialized and used each gc cycle. any others should be defined in globals.h */ @@ -335,78 +367,94 @@ static ptr sweep_from; # define ADD_BACKREFERENCE(p, from_g) #endif +typedef struct remote_range { + ISPC s; + IGEN g; + ptr start, end; + struct remote_range *next; +} remote_range; + #if !defined(PTHREADS) # undef ENABLE_PARALLEL #endif #ifdef ENABLE_PARALLEL -# define ENABLE_LOCK_ACQUIRE int old_lock_state; -# define SEGMENT_LOCK_ACQUIRE(si) \ - ((si->lock == tc_in) \ - ? (old_lock_state = 0, 1) \ - : (old_lock_state = 1, AS_IMPLICIT_ATOMIC(int, S_cas_load_acquire_ptr(&si->lock, (ptr)0, tc_in)))) -# define SEGMENT_LOCK_RELEASE(si) do { \ - if (old_lock_state) { \ - S_store_release(); \ - BEGIN_IMPLICIT_ATOMIC(); \ - si->lock = (ptr)0; \ - END_IMPLICIT_ATOMIC(); \ - } \ - } while (0) -# define SEGMENT_LOCK_MUST_ACQUIRE(si) do { } while (!SEGMENT_LOCK_ACQUIRE(si)) -# define RECORD_LOCK_FAILED(tc, si) LOCKSTATUS(tc) = Sfalse -# define CLEAR_LOCK_FAILED(tc) LOCKSTATUS(tc) = Strue -# define CHECK_LOCK_FAILED(tc) (LOCKSTATUS(tc) == Sfalse) -# define SAVE_SWEEP_RANGE_FOR_LATER(tc, s, g, slp, sl, nl) save_sweep_range_for_later(tc, s, g, slp, sl, nl) -# define SAVE_SWEEP_SEGMENT_FOR_LATER(tc, si) save_sweep_segment_for_later(tc, si) -# define GC_TC_MUTEX_ACQUIRE() gc_tc_mutex_acquire() +# define GC_TC_MUTEX_ACQUIRE() gc_tc_mutex_acquire() # define GC_TC_MUTEX_RELEASE() gc_tc_mutex_release() +# define SEGMENT_IS_LOCAL(si, p) ((SWEEPER(si->creator_tc) == SWEEPER(tc_in)) || marked(si, p)) +# define RECORD_REMOTE_RANGE_TO(tc, start, size, sweeper) do { \ + ptr START = TO_PTR(UNTYPE_ANY(start)); \ + ptr END = (ptr)((uptr)START + (size)); \ + if ((uptr)START < (uptr)REMOTERANGESTART(tc)) \ + REMOTERANGESTART(tc) = START; \ + if ((uptr)END > (uptr)REMOTERANGEEND(tc)) \ + REMOTERANGEEND(tc) = END; \ + REMOTESWEEPER(tc) = sweeper; \ + } while (0) +# define RECORD_REMOTE_RANGE(tc, start, size, si) RECORD_REMOTE_RANGE_TO(tc, start, size, SWEEPER(si->creator_tc)) +# define FLUSH_REMOTE_RANGE(tc, s, g) do { \ + if (REMOTERANGESTART(tc) != (ptr)(uptr)-1) { \ + flush_remote_range(tc, s, g); \ + } \ + } while (0) + static void gather_active_sweepers(); +static void reassign_segment_creator(ptr tc, seginfo *si); static IBOOL sweeper_started(int i); static void parallel_sweep_dirty_and_generation(ptr tc); -static void save_sweep_range_for_later(ptr tc_in, ISPC s, IGEN g, ptr *slp, ptr *sl, ptr *nl); -static void save_sweep_segment_for_later(ptr tc_in, seginfo *si); -static int gate_postponed(ptr tc, int status); +static iptr sweep_generation_trading_work(ptr tc); -#define SWEEPER_NONE 0 -#define SWEEPER_READY 1 -#define SWEEPER_SWEEPING 2 +static void flush_remote_range(ptr tc, ISPC s, IGEN g); +static remote_range *send_and_receive_remote_ranges(ptr tc); + +#define SWEEPER_NONE 0 +#define SWEEPER_READY 1 +#define SWEEPER_SWEEPING 2 +#define SWEEPER_WAITING_FOR_WORK 3 typedef struct { int status; - s_thread_cond_t done_cond; + s_thread_cond_t done_cond, work_cond; ptr sweep_tc; ptr thread; /* not 0 => thread to sweep on start */ seginfo *dirty_segments[DIRTY_SEGMENT_LISTS]; + /* modified only by owning sweeper: */ + remote_range *ranges_to_send[maximum_parallel_collect_threads+1]; + /* modified with sweeper mutex held: */ + remote_range *ranges_received; +#ifdef ENABLE_TIMING + int remote_ranges_sent, remote_ranges_received; + iptr remote_ranges_bytes_sent, remote_ranges_bytes_received; +#endif } gc_thread_data; -static gc_thread_data sweepers[maximum_parallel_collect_threads]; +static gc_thread_data sweepers[maximum_parallel_collect_threads+1]; static int num_sweepers; static seginfo *main_dirty_segments[DIRTY_SEGMENT_LISTS]; #else -# define ENABLE_LOCK_ACQUIRE /* empty */ -# define SEGMENT_LOCK_ACQUIRE(si) 1 -# define SEGMENT_LOCK_MUST_ACQUIRE(si) do { } while (0) -# define SEGMENT_LOCK_RELEASE(si) do { } while (0) -# define RECORD_LOCK_FAILED(tc, si) do { } while (0) -# define CLEAR_LOCK_FAILED(tc) do { } while (0) -# define CHECK_LOCK_FAILED(tc) 0 -# define SAVE_SWEEP_RANGE_FOR_LATER(tc, s, g, slp, sl, nl) do { } while (0) -# define SAVE_SWEEP_SEGMENT_FOR_LATER(tc, si) do { } while (0) + # define GC_TC_MUTEX_ACQUIRE() do { } while (0) # define GC_TC_MUTEX_RELEASE() do { } while (0) + +# define SEGMENT_IS_LOCAL(si, p) 1 +# define RECORD_REMOTE_RANGE_TO(tc, start, size, sweeper) do { } while (0) +# define RECORD_REMOTE_RANGE(tc, start, size, si) do { } while (0) +# define FLUSH_REMOTE_RANGE(tc, s, g) do { } while (0) + # define gather_active_sweepers() do { } while (0) +# define reassign_segment_creator(tc, si) do { } while (0) # define parallel_sweep_dirty_and_generation(tc) do { sweep_dirty(tc); sweep_generation(tc); } while (0) +# define send_and_receive_remote_ranges(tc) NULL static void sweep_dirty PROTO((ptr tc)); + #endif #define SWEEP_NO_CHANGE 0 #define SWEEP_CHANGE_PROGRESS 1 -#define SWEEP_CHANGE_POSTPONED 2 #if ptr_alignment == 2 # define record_full_marked_mask 0x55 @@ -436,11 +484,14 @@ uptr list_length(ptr ls) { } #endif -#define init_mask(tc, dest, tg, init) { \ - find_room_voidp(tc, space_data, tg, ptr_align(segment_bitmap_bytes), dest); \ - memset(dest, init, segment_bitmap_bytes); \ +#define init_mask(tc, dest, tg, init) do { \ + octet *MASK; \ + find_room_voidp(tc, space_data, tg, ptr_align(segment_bitmap_bytes), MASK); \ + memset(MASK, init, segment_bitmap_bytes); \ + STORE_FENCE(); \ + dest = MASK; \ BITMASKOVERHEAD(tc, tg) += ptr_align(segment_bitmap_bytes); \ - } + } while (0) #define marked(si, p) (si->marked_mask && (si->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p))) @@ -495,41 +546,39 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { /* use relocate_pure for newspace fields that can't point to younger objects or where there's no need to track generations */ -#define relocate_pure(ppp) do { \ +#define relocate_pure(ppp, start, size) do { \ ptr* PPP = ppp; ptr PP = *PPP; \ - relocate_pure_help(PPP, PP); \ + relocate_pure_help(PPP, PP, start, size); \ } while (0) -#define relocate_pure_help(ppp, pp) do { \ +#define relocate_pure_help(ppp, pp, start, size) do { \ seginfo *SI; \ if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL) { \ if (SI->old_space) \ - relocate_pure_help_help(ppp, pp, SI); \ + relocate_pure_help_help(ppp, pp, SI, start, size); \ ELSE_MEASURE_NONOLDSPACE(pp) \ } \ } while (0) -#define relocate_pure_help_help(ppp, pp, si) do { \ - if (SEGMENT_LOCK_ACQUIRE(si)) { \ +#define relocate_pure_help_help(ppp, pp, si, start, size) do { \ + if (SEGMENT_IS_LOCAL(si, pp)) { \ if (FORWARDEDP(pp, si)) \ *ppp = GET_FWDADDRESS(pp); \ else if (!new_marked(si, pp)) \ mark_or_copy_pure(ppp, pp, si); \ - SEGMENT_LOCK_RELEASE(si); \ } else \ - RECORD_LOCK_FAILED(tc_in, si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, si); \ } while (0) -#define relocate_code(pp, si) do { \ +#define relocate_code(pp, si, start, size) do { \ if (si->old_space) { \ - if (SEGMENT_LOCK_ACQUIRE(si)) { \ + if (SEGMENT_IS_LOCAL(si, pp)) { \ if (FWDMARKER(pp) == forward_marker) \ pp = GET_FWDADDRESS(pp); \ else if (!new_marked(si, pp)) \ mark_or_copy_pure(&pp, pp, si); \ - SEGMENT_LOCK_RELEASE(si); \ } else \ - RECORD_LOCK_FAILED(tc_in, si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, si); \ } ELSE_MEASURE_NONOLDSPACE(pp) \ } while (0) @@ -545,39 +594,39 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { #ifdef NO_DIRTY_NEWSPACE_POINTERS -# define relocate_impure_help(PPP, PP, FROM_G) do {(void)FROM_G; relocate_pure_help(PPP, PP);} while (0) -# define relocate_impure(PPP, FROM_G) do {(void)FROM_G; relocate_pure(PPP);} while (0) +# define relocate_impure_help(PPP, PP, FROM_G, start, size) do {(void)FROM_G; relocate_pure_help(PPP, PP, start, size);} while (0) +# define relocate_impure(PPP, FROM_G, start, size) do {(void)FROM_G; relocate_pure(PPP, start, size);} while (0) #else /* !NO_DIRTY_NEWSPACE_POINTERS */ -#define relocate_impure(ppp, from_g) do { \ +#define relocate_impure(ppp, from_g, start, size) do { \ ptr* PPP = ppp; ptr PP = *PPP; IGEN FROM_G = from_g; \ - relocate_impure_help(PPP, PP, FROM_G); \ + relocate_impure_help(PPP, PP, FROM_G, start, size); \ } while (0) -#define relocate_impure_help(ppp, pp, from_g) do { \ +#define relocate_impure_help(ppp, pp, from_g, start, size) do { \ seginfo *SI; \ if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL) { \ if (SI->old_space) \ - relocate_impure_help_help(ppp, pp, from_g, SI); \ + relocate_impure_help_help(ppp, pp, from_g, SI, start, size); \ ELSE_MEASURE_NONOLDSPACE(pp) \ } \ } while (0) -#define relocate_impure_help_help(ppp, pp, from_g, si) do { \ +#define relocate_impure_help_help(ppp, pp, from_g, si, start, size) do { \ IGEN __to_g; \ - if (SEGMENT_LOCK_ACQUIRE(si)) { \ + if (SEGMENT_IS_LOCAL(si, pp)) { \ if (FORWARDEDP(pp, si)) { \ *ppp = GET_FWDADDRESS(pp); \ __to_g = TARGET_GENERATION(si); \ - if (__to_g < from_g) S_record_new_dirty_card(tc_in, ppp, __to_g); \ } else if (!new_marked(si, pp)) { \ mark_or_copy_impure(__to_g, ppp, pp, from_g, si); \ - if (__to_g < from_g) S_record_new_dirty_card(tc_in, ppp, __to_g); \ + } else { \ + __to_g = TARGET_GENERATION(si); \ } \ - SEGMENT_LOCK_RELEASE(si); \ + if (__to_g < from_g) S_record_new_dirty_card(tc_in, ppp, __to_g); \ } else \ - RECORD_LOCK_FAILED(tc_in, si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, si); \ } while (0) #define mark_or_copy_impure(to_g, dest, p, from_g, si) do { \ @@ -589,13 +638,13 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { #endif /* !NO_DIRTY_NEWSPACE_POINTERS */ -#define relocate_dirty(PPP, YOUNGEST) do { \ +#define relocate_dirty(PPP, YOUNGEST, start, size) do { \ seginfo *_si; ptr *_ppp = PPP, _pp = *_ppp; IGEN _pg; \ if (!IMMEDIATE(_pp) && (_si = MaybeSegInfo(ptr_get_segment(_pp))) != NULL) { \ if (!_si->old_space) { \ _pg = _si->generation; \ } else { \ - if (SEGMENT_LOCK_ACQUIRE(_si)) { \ + if (SEGMENT_IS_LOCAL(_si, _pp)) { \ if (FORWARDEDP(_pp, _si)) { \ *_ppp = GET_FWDADDRESS(_pp); \ _pg = TARGET_GENERATION(_si); \ @@ -606,9 +655,8 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { } else { \ _pg = copy(tc_in, _pp, _si, _ppp); \ } \ - SEGMENT_LOCK_RELEASE(_si); \ } else { \ - RECORD_LOCK_FAILED(tc_in, _si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, _si); \ _pg = 0xff; \ } \ } \ @@ -620,50 +668,16 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { # define is_counting_root(si, p) (si->counting_mask && (si->counting_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p))) #endif -static void do_relocate_indirect(ptr tc_in, ptr p) { - ENABLE_LOCK_ACQUIRE - relocate_pure(&p); -} -#define relocate_indirect(p) do_relocate_indirect(tc_in, p) - #ifdef ENABLE_PARALLEL - -/* The `_now` variants of various functions/macros handle the possibly - of a lock failure by retrying immediately. A lock failure really - shouldn't happend where the `_now` forms are used, but because - locking may be implemented with a CAS that can fail spuriously on - processors like Arm, the lock can fail, anyway. */ - -static void do_relocate_pure_now(ptr tc_in, ptr *pp) { - ENABLE_LOCK_ACQUIRE - relocate_pure(pp); - while (CHECK_LOCK_FAILED(tc_in)) { - CLEAR_LOCK_FAILED(tc_in); - relocate_pure(pp); - } +static void do_relocate_indirect(ptr tc_in, ptr p, ptr* start, uptr len) { + relocate_pure(&p, start, len); } - -static void do_mark_or_copy_pure_now(ptr tc_in, ptr *dest, ptr pp, seginfo *si) { - do { - CLEAR_LOCK_FAILED(tc_in); - mark_or_copy_pure(dest, pp, si); - } while (CHECK_LOCK_FAILED(tc_in)); -} - -# define relocate_pure_now(pp) do_relocate_pure_now(tc_in, pp) -# define mark_or_copy_pure_now(dest, pp, si) do_mark_or_copy_pure_now(tc, dest, pp, si) - -static void sweep_thread_now(ptr tc_in, ptr p) { - do { - CLEAR_LOCK_FAILED(tc_in); - sweep_thread(tc_in, p); - } while (CHECK_LOCK_FAILED(tc_in)); -} - +# define relocate_indirect(p, start, len) do_relocate_indirect(tc_in, p, start, len) #else -# define relocate_pure_now(pp) relocate_pure(pp) -# define mark_or_copy_pure_now(tc, pp, si) mark_or_copy_pure(tc, pp, si) -# define sweep_thread_now(tc, thread) sweep_thread(tc, thread) +static void do_relocate_indirect(ptr tc_in, ptr p) { + relocate_pure(&p, NULL, 0); +} +# define relocate_indirect(p, start, len) do_relocate_indirect(tc_in, p) #endif FORCEINLINE void check_triggers(ptr tc_in, seginfo *si) { @@ -686,7 +700,9 @@ FORCEINLINE void check_triggers(ptr tc_in, seginfo *si) { } } -#ifndef ENABLE_OBJECT_COUNTS +#if defined(ENABLE_PARALLEL) +# include "gc-par.inc" +#elif !defined(ENABLE_OBJECT_COUNTS) # include "gc-ocd.inc" #else # include "gc-oce.inc" @@ -704,7 +720,7 @@ static void sweep_in_old(ptr tc_in, ptr p) { /* Detect all the cases when we need to give up on in-place sweeping: */ if (object_directly_refers_to_self(p)) { - relocate_pure_now(&p); + relocate_pure(&p, NULL, 0); return; } @@ -826,7 +842,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { count_root_t *count_roots; #endif - GET_TIME(astart); + GET_REAL_TIME(astart); /* flush instruction cache: effectively clear_code_mod but safer */ for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { @@ -841,7 +857,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { #endif /* !NO_DIRTY_NEWSPACE_POINTERS */ S_G.must_mark_gen0 = 0; - /* map `tc`s of rendezvous threads to sweeping threads */ + /* map `tc`s of rendezvous threads to sweeping threads, setting WILL_BE_SWEEPER */ gather_active_sweepers(); for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { @@ -849,14 +865,20 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { S_scan_dirty(TO_VOIDP(EAP(t_tc)), TO_VOIDP(REAL_EAP(t_tc))); EAP(t_tc) = REAL_EAP(t_tc) = AP(t_tc) = (ptr)0; + /* Initially, map every context to the same sweeper, so + we can relocate some initial objects */ + SWEEPER(t_tc) = main_sweeper_index; + + /* If WILL_BE_SWEEPER() is not already set right, set it to + `main_sweeper_index`. */ #ifdef ENABLE_PARALLEL - if (SWEEPER(t_tc) != -1) { - if ((SWEEPER(t_tc) >= num_sweepers) - || (sweepers[SWEEPER(t_tc)].sweep_tc != t_tc)) - SWEEPER(t_tc) = -1; + if (WILL_BE_SWEEPER(t_tc) != main_sweeper_index) { + if ((WILL_BE_SWEEPER(t_tc) >= num_sweepers) + || (sweepers[WILL_BE_SWEEPER(t_tc)].sweep_tc != t_tc)) + WILL_BE_SWEEPER(t_tc) = main_sweeper_index; } #else - SWEEPER(t_tc) = -1; + WILL_BE_SWEEPER(t_tc) = main_sweeper_index; #endif /* clear thread-local allocation: */ @@ -871,7 +893,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { } } - if ((t_tc != tc) && (SWEEPER(t_tc) == -1)) { + if ((t_tc != tc) && (WILL_BE_SWEEPER(t_tc) == main_sweeper_index)) { /* close off any current allocation in MAX_TG, and ensure that end-of-segment markers are otherwise set (in case that's needed for dirty-byte sweeping) */ @@ -892,6 +914,9 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { for (g = MIN_TG; g <= MAX_TG; g++) BITMASKOVERHEAD(t_tc, g) = 0; for (s = 0; s <= max_real_space; s++) { + /* need to save `NEXTLOC_AT` to ensure that dirty sweeping + doesn't overshoot into newly allocated objects */ + ORIGNEXTLOC(t_tc, s) = NEXTLOC_AT(t_tc, s, MAX_TG); SWEEPLOC_AT(t_tc, s, MAX_TG) = NEXTLOC_AT(t_tc, s, MAX_TG); for (g = MIN_TG; g <= MAX_TG; g++) SWEEPNEXT_AT(t_tc, s, g) = (ptr)0; @@ -963,6 +988,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { si->marked_mask = NULL; /* clear old mark bits, if any */ si->marked_count = 0; si->min_dirty_byte = 0; /* prevent registering as dirty while GCing */ + reassign_segment_creator(tc, si); } S_G.occupied_segments[g][s] = NULL; @@ -1076,7 +1102,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { if (!si->old_space || FORWARDEDP(p, si) || marked(si, p) || !count_roots[i].weak) { /* reached or older; sweep transitively */ - relocate_pure_now(&p); + relocate_pure(&p, NULL, 0); sweep(tc, p, TARGET_GENERATION(si)); ADD_BACKREFERENCE(p, si->generation); sweep_generation(tc); @@ -1168,25 +1194,25 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { thread = Scar(ls); #ifdef ENABLE_PARALLEL - t_tc = (ptr)THREADTC(Scar(ls)); - if (SWEEPER(t_tc) != -1) { + t_tc = (ptr)THREADTC(thread); + if (WILL_BE_SWEEPER(t_tc) != main_sweeper_index) { if (!OLDSPACE(thread)) { /* sweep in sweeper thread: */ - sweepers[SWEEPER(t_tc)].thread = thread; + sweepers[WILL_BE_SWEEPER(t_tc)].thread = thread; } else { /* relocate now, so main sweeping will happen in sweeper thread */ ptr tc_in = t_tc; /* shadows enclosing `tc_in` binding */ - relocate_pure_now(&thread); + relocate_pure(&thread, NULL, 0); } } else #endif if (!OLDSPACE(thread)) - sweep_thread_now(tc, thread); + sweep_thread(tc, thread); } - relocate_pure_now(&S_threads); + relocate_pure(&S_threads, NULL, 0); - GET_TIME(start); + GET_REAL_TIME(start); /* relocate nonempty oldspace symbols and set up list of buckets to rebuild later */ buckets_to_rebuild = NULL; @@ -1215,7 +1241,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { (SYMVAL(sym) != sunbound || SYMPLIST(sym) != Snil || SYMSPLIST(sym) != Snil)) { seginfo *sym_si = SegInfo(ptr_get_segment(sym)); if (!new_marked(sym_si, sym)) - mark_or_copy_pure_now(&sym, sym, sym_si); + mark_or_copy_pure(&sym, sym, sym_si); } } S_G.buckets_of_generation[g] = NULL; @@ -1224,26 +1250,9 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { /* relocate the protected C pointers */ {uptr i; for (i = 0; i < S_G.protect_next; i++) - relocate_pure_now(S_G.protected[i]); + relocate_pure(S_G.protected[i], NULL, 0); } -#ifdef ENABLE_PARALLEL - /* make sure threads with sweepers have terminated older-generation - pages before sweeping dirty objects */ - { int i; - for (i = 0; i < num_sweepers; i++) { - ptr t_tc = sweepers[i].sweep_tc; - for (s = 0; s <= max_real_space; s++) { - for (g = MAX_TG; g <= static_generation; g++) { - ptr old = NEXTLOC_AT(t_tc, s, g); - if (old != (ptr)0) - *(ptr*)TO_VOIDP(old) = forward_marker; - } - } - } - } -#endif - /* sweep older locked and unlocked objects that are on `space_new` segments, because we can't find dirty writes there */ for (g = MAX_CG + 1; g <= static_generation; INCRGEN(g)) { @@ -1359,7 +1368,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { /* if tconc was old it's been forwarded */ tconc = GUARDIANTCONC(ls); - WITH_TOP_BACKREFERENCE(tconc, relocate_pure_now(&rep)); + WITH_TOP_BACKREFERENCE(tconc, relocate_pure(&rep, NULL, 0)); old_end = Scdr(tconc); new_end = S_cons_in(tc, space_impure, 0, FIX(0), FIX(0)); @@ -1407,7 +1416,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { } rep = GUARDIANREP(ls); - WITH_TOP_BACKREFERENCE(tconc, relocate_pure_now(&rep)); + WITH_TOP_BACKREFERENCE(tconc, relocate_pure(&rep, NULL, 0)); relocate_rep = 1; #ifdef ENABLE_OBJECT_COUNTS @@ -1509,8 +1518,9 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { /* still-pending ephemerons all go to bwp */ finish_pending_ephemerons(tc, oldspacesegments); - ACCUM_TIME(collect_accum, step, start); - REPORT_TIME(fprintf(stderr, "%d col +%ld ms %ld ms\n", MAX_CG, step, collect_accum)); + ACCUM_REAL_TIME(collect_accum, step, start); + REPORT_TIME(fprintf(stderr, "%d coll +%ld ms %ld ms [real time]\n", + MAX_CG, step, collect_accum)); /* post-gc oblist handling. rebuild old buckets in the target generation, pruning unforwarded symbols */ { bucket_list *bl; bucket *b, *bnext; bucket_pointer_list *bpl; bucket **pb; ptr sym; @@ -1729,8 +1739,8 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { for (g = MIN_TG; g <= MAX_TG; g++) S_G.bitmask_overhead[g] += BITMASKOVERHEAD(tc_in, g); - ACCUM_TIME(all_accum, astep, astart); - REPORT_TIME(fprintf(stderr, "%d all +%ld ms %ld ms\n", MAX_CG, astep, all_accum)); + ACCUM_REAL_TIME(all_accum, astep, astart); + REPORT_TIME(fprintf(stderr, "%d all +%ld ms %ld ms [real time]\n", MAX_CG, astep, all_accum)); if (count_roots_ls != Sfalse) { #ifdef ENABLE_OBJECT_COUNTS @@ -1742,41 +1752,71 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { return Svoid; } -#define sweep_space(s, from_g, body) { \ +#ifdef ENABLE_PARALLEL + +static void reassign_segment_creator(ptr tc, seginfo *si) { + int i; + + for (i = 0; i < num_sweepers; i++) + if (sweepers[i].sweep_tc == si->creator_tc) + return; + + si->creator_tc = tc; +} + +static void flush_remote_range(ptr tc, ISPC s, IGEN g) { + remote_range *r; + int me = SWEEPER(tc); + int they = REMOTESWEEPER(tc); + + find_room_voidp(tc, space_data, 0, ptr_align(sizeof(remote_range)), r); + BITMASKOVERHEAD(tc, 0) += ptr_align(sizeof(remote_range)); + r->s = s; + r->g = g; + r->start = REMOTERANGESTART(tc); + r->end = REMOTERANGEEND(tc); + r->next = sweepers[me].ranges_to_send[they]; + sweepers[me].ranges_to_send[they] = r; + + REMOTERANGESTART(tc) = (ptr)(uptr)-1; + REMOTERANGEEND(tc) = (ptr)0; + + SWEEPCHANGE(tc) = SWEEP_CHANGE_PROGRESS; +} + +#endif + +#define sweep_space(s, from_g, body) do { \ + sweep_space_segments(s, from_g, body); \ + sweep_space_bump_range(s, from_g, body); \ + } while (0) + +#define sweep_space_segments(s, from_g, body) do { \ while ((si = (seginfo *)TO_VOIDP(SWEEPNEXT_AT(tc_in, s, from_g))) != NULL) { \ SWEEPNEXT_AT(tc_in, s, from_g) = TO_PTR(si->sweep_next); \ pp = TO_VOIDP(si->sweep_start); \ while ((p = *pp) != forward_marker) \ body \ - if (CHECK_LOCK_FAILED(tc_in)) { \ - SAVE_SWEEP_SEGMENT_FOR_LATER(tc_in, si); \ - break; \ - } else { \ - COUNT_SWEPT_BYTES(si->sweep_start, pp); \ - save_resweep(s, si); \ - } \ + COUNT_SWEPT_BYTES(si->sweep_start, pp); \ + FLUSH_REMOTE_RANGE(tc_in, s, from_g); \ + save_resweep(s, si); \ } \ + } while (0) + +#define sweep_space_bump_range(s, from_g, body) do { \ slp = &SWEEPLOC_AT(tc_in, s, from_g); \ nlp = &NEXTLOC_AT(tc_in, s, from_g); \ - sweep_space_range(s, from_g, body) \ - } - -#define sweep_space_range(s, from_g, body) { \ - while ((sl = TO_VOIDP(*slp)) != (nl = TO_VOIDP(*nlp))) { \ - *slp = TO_PTR(nl); \ - pp = sl; \ - while (pp != nl) { \ - p = *pp; \ - body \ - } \ - if (CHECK_LOCK_FAILED(tc_in)) { \ - SAVE_SWEEP_RANGE_FOR_LATER(tc_in, s, from_g, slp, sl, nl); \ - break; \ - } else { \ - COUNT_SWEPT_BYTES(sl, nl); \ - } \ - } \ - } + while ((sl = TO_VOIDP(*slp)) != (nl = TO_VOIDP(*nlp))) { \ + *slp = TO_PTR(nl); \ + pp = sl; \ + while (pp != nl) { \ + p = *pp; \ + body \ + } \ + COUNT_SWEPT_BYTES(sl, nl); \ + FLUSH_REMOTE_RANGE(tc_in, s, from_g); \ + } \ + } while (0) #define save_resweep(s, si) do { \ if (s == space_weakpair) { \ @@ -1787,46 +1827,6 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { } \ } while (0) -#ifdef ENABLE_TIMING -# define COUNT_SWEPT_BYTES(start, end) num_swept_bytes += ((uptr)TO_PTR(end) - (uptr)TO_PTR(start)) -#else -# define COUNT_SWEPT_BYTES(start, end) do { } while (0) -#endif - -#ifdef ENABLE_PARALLEL - -static void save_sweep_segment_for_later(ptr tc_in, seginfo *si) { - ISPC s = si->space; IGEN g = si->generation; - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - si->sweep_next = TO_VOIDP(SWEEPNEXT_AT(tc_in, s, g)); - SWEEPNEXT_AT(tc_in, s, g) = TO_PTR(si); -} - -static void save_sweep_range_for_later(ptr tc_in, ISPC s, IGEN g, ptr *slp, ptr *sl, ptr *nl) { - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - /* check whether this segment is still the thread-local allocation segment: */ - if (TO_VOIDP(*slp) == nl) { - *slp = sl; - } else { - /* need to set the sweep pointer in the segment, which must be one - of the ones queued to sweep */ - seginfo *si; - si = SWEEPNEXT_AT(tc_in, s, g); - while (1) { - if (si == NULL) S_error_abort("could not find segment for sweep range"); - if (TO_VOIDP(si->sweep_start) == nl) { - si->sweep_start = TO_PTR(sl); - return; - } - si = si->sweep_next; - } - } -} - -#endif - static void resweep_weak_pairs(ptr tc_in, seginfo *oldweakspacesegments) { IGEN from_g; ptr *pp, p, *nl; @@ -1910,100 +1910,194 @@ static void forward_or_bwp(pp, p) ptr *pp; ptr p; { } static iptr sweep_generation_pass(ptr tc_in) { - ENABLE_LOCK_ACQUIRE - ptr *slp, *nlp; ptr *pp, p, *nl, *sl; IGEN from_g; + ptr *slp, *nlp; ptr *pp, *ppn, p, *nl, *sl; IGEN from_g; seginfo *si; iptr num_swept_bytes = 0; + remote_range *received_ranges; do { SWEEPCHANGE(tc_in) = SWEEP_NO_CHANGE; - sweep_from_stack(tc_in); + num_swept_bytes += sweep_from_stack(tc_in); for (from_g = MIN_TG; from_g <= MAX_TG; from_g += 1) { - + sweep_space(space_impure, from_g, { - SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)); /* only pairs put here in backreference mode */ - relocate_impure_help(pp, p, from_g); - p = *(pp += 1); - relocate_impure_help(pp, p, from_g); - pp += 1; - }) + /* only pairs in theses spaces in backreference mode */ + SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)); + relocate_impure_help(pp, p, from_g, pp, 2 * ptr_bytes); + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, 2 * ptr_bytes); + pp = ppn + 1; + }); SET_BACKREFERENCE(Sfalse) sweep_space(space_symbol, from_g, { p = TYPE(TO_PTR(pp), type_symbol); sweep_symbol(tc_in, p, from_g); pp += size_symbol / sizeof(ptr); - }) + }); sweep_space(space_port, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); sweep_port(tc_in, p, from_g); pp += size_port / sizeof(ptr); - }) + }); sweep_space(space_weakpair, from_g, { SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)) - p = *(pp += 1); - relocate_impure_help(pp, p, from_g); - pp += 1; - }) + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, size_pair); + pp = ppn + 1; + }); SET_BACKREFERENCE(Sfalse) sweep_space(space_ephemeron, from_g, { p = TYPE(TO_PTR(pp), type_pair); add_ephemeron_to_pending(tc_in, p); pp += size_ephemeron / sizeof(ptr); - }) + }); sweep_space(space_pure, from_g, { SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)) /* only pairs put here in backreference mode */ - relocate_impure_help(pp, p, from_g); - p = *(pp += 1); - relocate_impure_help(pp, p, from_g); - pp += 1; - }) + relocate_impure_help(pp, p, from_g, pp, 2 * ptr_bytes); + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, 2 * ptr_bytes); + pp = ppn + 1; + }); SET_BACKREFERENCE(Sfalse) sweep_space(space_continuation, from_g, { p = TYPE(TO_PTR(pp), type_closure); sweep_continuation(tc_in, p, from_g); pp += size_continuation / sizeof(ptr); - }) + }); sweep_space(space_pure_typed_object, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); pp = TO_VOIDP(((uptr)TO_PTR(pp) + sweep_typed_object(tc_in, p, from_g))); - }) + }); sweep_space(space_code, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); sweep_code_object(tc_in, p, from_g); pp += size_code(CODELEN(p)) / sizeof(ptr); - }) + }); sweep_space(space_impure_record, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); sweep_record(tc_in, p, from_g); pp = TO_VOIDP((iptr)TO_PTR(pp) + size_record_inst(UNFIX(RECORDDESCSIZE(RECORDINSTTYPE(p))))); - }) + }); - /* space used only as needed for backreferences: */ + /* space used only as needed for backreferences: */ sweep_space(space_impure_typed_object, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); pp = TO_VOIDP((uptr)TO_PTR(pp) + sweep_typed_object(tc_in, p, from_g)); - }) + }); - /* space used only as needed for backreferences: */ + /* space used only as needed for backreferences: */ sweep_space(space_closure, from_g, { p = TYPE(TO_PTR(pp), type_closure); sweep(tc_in, p, from_g); pp = TO_VOIDP((uptr)TO_PTR(pp) + size_object(p)); - }) + }); + } - /* don't sweep from space_count_pure or space_count_impure */ + received_ranges = send_and_receive_remote_ranges(tc_in); + + /* The ranges in `received_ranges` include old-generation objects from + other parallel sweepers, which means they correspond to dirty + sweeps in the originating sweeper. We handle them here like + regular sweeping using `relocate_impure`, which will register a + dirty-card update as needed. */ + while (received_ranges != NULL) { + ISPC s = received_ranges->s; + IGEN from_g = received_ranges->g; + + pp = TO_VOIDP(received_ranges->start); + nl = TO_VOIDP(received_ranges->end); + + if ((s == space_impure) + || (s == space_immobile_impure) + || (s == space_count_impure) + || (s == space_pure) + || (s == space_impure_typed_object)) { + while (pp < nl) { + p = *pp; + relocate_impure_help(pp, p, from_g, pp, 2 * ptr_bytes); + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, 2 * ptr_bytes); + pp = ppn + 1; + } + } else if (s == space_closure) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_closure); + sweep(tc_in, p, from_g); + pp = TO_VOIDP((uptr)TO_PTR(pp) + size_object(p)); + } + } else if (s == space_continuation) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_closure); + sweep_continuation(tc_in, p, from_g); + pp += size_continuation / sizeof(ptr); + } + } else if (s == space_code) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + sweep_code_object(tc_in, p, from_g); + pp += size_code(CODELEN(p)) / sizeof(ptr); + } + } else if ((s == space_pure_typed_object) + || (s == space_count_pure)) { + /* old generation can happen in the special case of a thread object: */ + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + pp = TO_VOIDP(((uptr)TO_PTR(pp) + sweep_typed_object(tc_in, p, from_g))); + } + } else if (s == space_symbol) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_symbol); + sweep_symbol(tc_in, p, from_g); + pp += size_symbol / sizeof(ptr); + } + } else if (s == space_port) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + sweep_port(tc_in, p, from_g); + pp += size_port / sizeof(ptr); + } + } else if (s == space_weakpair) { + while (pp < nl) { + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, size_pair); + pp = ppn + 1; + } + } else if (s == space_ephemeron) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_pair); + add_ephemeron_to_pending(tc_in, p); + pp += size_ephemeron / sizeof(ptr); + } + } else if (s == space_impure_record) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + sweep_record(tc_in, p, from_g); + pp = TO_VOIDP((iptr)TO_PTR(pp) + + size_record_inst(UNFIX(RECORDDESCSIZE(RECORDINSTTYPE(p))))); + } + } else { + S_error_abort("dirty range sweep: unexpected space"); + } + FLUSH_REMOTE_RANGE(tc_in, s, from_g); + COUNT_SWEPT_BYTES(received_ranges->start, received_ranges->end); + received_ranges = received_ranges->next; } /* Waiting until sweeping doesn't trigger a change reduces the @@ -2012,15 +2106,14 @@ static iptr sweep_generation_pass(ptr tc_in) { it doesn't change the worst-case complexity. */ if (SWEEPCHANGE(tc_in) == SWEEP_NO_CHANGE) check_pending_ephemerons(tc_in); + } while (SWEEPCHANGE(tc_in) == SWEEP_CHANGE_PROGRESS); return num_swept_bytes; } static void sweep_generation(ptr tc_in) { - do { - sweep_generation_pass(tc_in); - } while (SWEEPCHANGE(tc_in) != SWEEP_NO_CHANGE); + sweep_generation_pass(tc_in); } void enlarge_sweep_stack(ptr tc_in) { @@ -2036,26 +2129,51 @@ void enlarge_sweep_stack(ptr tc_in) { SWEEPSTACK(tc_in) = (ptr)((uptr)new_sweep_stack + sz); } -void sweep_from_stack(ptr tc_in) { +#ifdef ENABLE_PARALLEL +static ISPC infer_space(ptr p, seginfo *si) { + /* Certain kinds of values get allocated to more specific spaces by + parallel mode compared to non-parallel mode. Marking objects from + a previous collection can mean sweeping from the less-specific + space, however. We can synthesize an appropropriate space here, + since it will only be used only by the handling of received + ranges. */ + + if (si->marked_mask) { + ITYPE t = TYPEBITS(p); + if (t == type_typed_object) { + ptr tf = TYPEFIELD(p); + if ((iptr)tf == type_ratnum) + return space_pure; + else if ((iptr)tf == type_exactnum) + return space_pure; + } else if (t == type_closure) + return space_closure; + } + + return si->space; +} +#endif + +iptr sweep_from_stack(ptr tc_in) { + iptr num_swept_bytes = 0; + if (SWEEPSTACK(tc_in) > SWEEPSTACKSTART(tc_in)) { while (SWEEPSTACK(tc_in) > SWEEPSTACKSTART(tc_in)) { ptr p; seginfo *si; SWEEPSTACK(tc_in) = (ptr)((uptr)SWEEPSTACK(tc_in) - ptr_bytes); p = *(ptr *)TO_VOIDP(SWEEPSTACK(tc_in)); - /* Room for improvement: `si->generation` is needed only - for objects that have impure fields */ + /* Room for improvement: `si->generation` is needed only for + objects that have impure fields, or in parallel mode for + remote ranges. */ si = SegInfo(ptr_get_segment(p)); sweep(tc_in, p, si->generation); - if (CHECK_LOCK_FAILED(tc_in)) { - /* try doing something else for now and sweep `p` later */ - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - push_sweep(p) - break; - } + COUNT_SWEPT_BYTES(0, size_object(p)); + FLUSH_REMOTE_RANGE(tc_in, infer_space(p, si), si->generation); } } + + return num_swept_bytes; } static iptr sweep_typed_object(ptr tc, ptr p, IGEN from_g) { @@ -2158,9 +2276,8 @@ static void setup_sweep_dirty() { } static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { - ENABLE_LOCK_ACQUIRE IGEN youngest, min_youngest; - ptr *pp, *ppend, *nl, start, next_loc; + ptr *pp, *ppn, *ppend, *nl, start; uptr seg, d; ISPC s; IGEN from_g, to_g; @@ -2204,14 +2321,16 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { start = build_ptr(seg, 0); ppend = TO_VOIDP(start); - /* The current allocation pointer may be relevant as the + /* The original allocation pointer may be relevant as the ending point. We assume that thread-local regions for all threads without a sweeper are terminated and won't get new allocations while dirty sweeping runs, while all allocations for a thread with a sweeper will be only using - that tc. */ - next_loc = NEXTLOC_AT(tc_in, s, from_g); - nl = TO_VOIDP(next_loc); + that tc, and no allocation happens for a non-target generation. */ + if (from_g == MAX_TG) + nl = TO_VOIDP(ORIGNEXTLOC(tc_in, s)); + else + nl = TO_VOIDP(NEXTLOC_AT(tc_in, s, from_g)); d = 0; while (d < cards_per_segment) { @@ -2228,12 +2347,12 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { ppend += bytes_per_card / sizeof(ptr); if (pp <= nl && nl < ppend) ppend = nl; - COUNT_SWEPT_BYTES(pp, ppend); - if (dirty_si->dirty_bytes[d] <= MAX_CG) { /* start out with assumption that we won't find any wrong-way pointers */ youngest = 0xff; + COUNT_SWEPT_BYTES(pp, ppend); + if ((s == space_impure) || (s == space_immobile_impure) || (s == space_impure_typed_object) || (s == space_count_impure) || (s == space_closure)) { @@ -2241,21 +2360,25 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { while (pp < ppend) { /* handle two pointers at a time */ if (marked(dirty_si, TO_PTR(pp))) { - relocate_dirty(pp,youngest); - pp += 1; - relocate_dirty(pp,youngest); - pp += 1; - } else + relocate_dirty(pp, youngest, pp, 2 * ptr_bytes); + ppn = pp + 1; + relocate_dirty(ppn, youngest, pp, 2 * ptr_bytes); + pp = ppn + 1; + } else { + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += 2; + } } + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else { while (pp < ppend && *pp != forward_marker) { /* handle two pointers at a time */ - relocate_dirty(pp,youngest); - pp += 1; - relocate_dirty(pp,youngest); - pp += 1; + relocate_dirty(pp, youngest, pp, 2 * ptr_bytes); + ppn = pp + 1; + relocate_dirty(ppn, youngest, pp, 2 * ptr_bytes); + pp = ppn + 1; } + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } } else if (s == space_symbol) { /* old symbols cannot overlap segment boundaries @@ -2274,9 +2397,13 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { if (!dirty_si->marked_mask || marked(dirty_si, p)) youngest = sweep_dirty_symbol(tc_in, p, youngest); + else + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += size_symbol / sizeof(ptr); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else if (s == space_port) { /* old ports cannot overlap segment boundaries since any object that spans multiple @@ -2294,9 +2421,13 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { if (!dirty_si->marked_mask || marked(dirty_si, p)) youngest = sweep_dirty_port(tc_in, p, youngest); + else + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += size_port / sizeof(ptr); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else if (s == space_impure_record) { /* abandon hope all ye who enter here */ ptr p; if (dirty_si->marked_mask) { @@ -2374,6 +2505,7 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { seginfo *si = SegInfo(ptr_get_segment(p)); if (!marked(si, p)) { /* skip unmarked words */ + FLUSH_REMOTE_RANGE(tc_in, s, from_g); p = (ptr)((uptr)p + byte_alignment); } else { youngest = sweep_dirty_record(tc_in, p, youngest); @@ -2382,6 +2514,8 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { RECORDINSTTYPE(p))))); } } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else { uptr j; ptr pnext; seginfo *si; @@ -2420,46 +2554,45 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { /* now sweep */ while ((ptr *)TO_VOIDP(UNTYPE(p, type_typed_object)) < ppend) { /* quit on end of segment */ - if (FWDMARKER(p) == forward_marker) break; + if (FWDMARKER(p) == forward_marker) break; youngest = sweep_dirty_record(tc_in, p, youngest); p = (ptr)((iptr)p + size_record_inst(UNFIX(RECORDDESCSIZE( RECORDINSTTYPE(p))))); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } } else if (s == space_weakpair) { while (pp < ppend && (dirty_si->marked_mask || (*pp != forward_marker))) { /* skip car field and handle cdr field */ if (!dirty_si->marked_mask || marked(dirty_si, TO_PTR(pp))) { - pp += 1; - relocate_dirty(pp, youngest); - pp += 1; - } else + ptr *ppn = pp + 1; + relocate_dirty(ppn, youngest, pp, size_pair); + pp = ppn + 1; + } else { + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += 2; + } } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else if (s == space_ephemeron) { while (pp < ppend && (dirty_si->marked_mask || (*pp != forward_marker))) { ptr p = TYPE(TO_PTR(pp), type_pair); if (!dirty_si->marked_mask || marked(dirty_si, p)) youngest = check_dirty_ephemeron(tc_in, p, youngest); + else + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += size_ephemeron / sizeof(ptr); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else { S_error_abort("sweep_dirty(gc): unexpected space"); } - if (CHECK_LOCK_FAILED(tc_in)) { - /* give up for now, and remember to restart at this segment */ - DirtySegmentsAt(dirty_segments, from_g, to_g) = dirty_si; - if (s == space_weakpair) - local_weaksegments_to_resweep = local_weaksegments_to_resweep->next; - add_weaksegments_to_resweep(local_weaksegments_to_resweep, last_local_weaksegments_to_resweep); - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - return num_swept_bytes; - } - if (s == space_weakpair) { local_weaksegments_to_resweep->youngest[d] = youngest; } else { @@ -2482,8 +2615,6 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { add_weaksegments_to_resweep(local_weaksegments_to_resweep, last_local_weaksegments_to_resweep); - SWEEPCHANGE(tc_in) = SWEEP_NO_CHANGE; - POP_BACKREFERENCE() return num_swept_bytes; @@ -2635,7 +2766,6 @@ static void add_trigger_ephemerons_to_pending(ptr tc_in, ptr pe) { } static void check_ephemeron(ptr tc_in, ptr pe) { - ENABLE_LOCK_ACQUIRE ptr p; seginfo *si; IGEN from_g; @@ -2648,36 +2778,34 @@ static void check_ephemeron(ptr tc_in, ptr pe) { p = Scar(pe); if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && si->old_space) { - if (SEGMENT_LOCK_ACQUIRE(si)) { + if (SEGMENT_IS_LOCAL(si, p)) { if (new_marked(si, p)) { - ENABLE_LOCK_ACQUIRE /* for nested relocate */ #ifndef NO_DIRTY_NEWSPACE_POINTERS IGEN tg = TARGET_GENERATION(si); if (tg < from_g) S_record_new_dirty_card(tc_in, &INITCAR(pe), tg); #endif - relocate_impure(&INITCDR(pe), from_g); + relocate_impure(&INITCDR(pe), from_g, pe, size_ephemeron); } else if (FORWARDEDP(p, si)) { - ENABLE_LOCK_ACQUIRE /* for nested relocate */ #ifndef NO_DIRTY_NEWSPACE_POINTERS IGEN tg = TARGET_GENERATION(si); if (tg < from_g) S_record_new_dirty_card(tc_in, &INITCAR(pe), tg); #endif INITCAR(pe) = FWDADDRESS(p); - relocate_impure(&INITCDR(pe), from_g); + relocate_impure(&INITCDR(pe), from_g, pe, size_ephemeron); } else { - /* If we get here, then there's no lock failure: */ /* Not reached, so far; install as trigger */ ephemeron_add(&si->trigger_ephemerons, pe); si->has_triggers = 1; } - SEGMENT_LOCK_RELEASE(si); } else { - RECORD_LOCK_FAILED(tc_in, si); + RECORD_REMOTE_RANGE_TO(tc_in, pe, size_ephemeron, SWEEPER(si->creator_tc)); } } else { - relocate_impure(&INITCDR(pe), from_g); + relocate_impure(&INITCDR(pe), from_g, pe, size_ephemeron); } - + + FLUSH_REMOTE_RANGE(tc_in, space_ephemeron, from_g); + POP_BACKREFERENCE(); } @@ -2690,13 +2818,6 @@ static void check_pending_ephemerons(ptr tc_in) { while (pe != 0) { next_pe = EPHEMERONNEXT(pe); check_ephemeron(tc_in, pe); - if (CHECK_LOCK_FAILED(tc_in)) { - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - EPHEMERONNEXT(pe) = next_pe; - ephemeron_add(&PENDINGEPHEMERONS(tc_in), pe); - break; - } pe = next_pe; } } @@ -2706,7 +2827,6 @@ static void check_pending_ephemerons(ptr tc_in) { be less pessimistic than setting `youngest` to the target generation: */ static IGEN check_dirty_ephemeron(ptr tc_in, ptr pe, IGEN youngest) { - ENABLE_LOCK_ACQUIRE ptr p; seginfo *si; IGEN pg; @@ -2715,12 +2835,12 @@ static IGEN check_dirty_ephemeron(ptr tc_in, ptr pe, IGEN youngest) { p = Scar(pe); if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL) { if (si->old_space) { - if (SEGMENT_LOCK_ACQUIRE(si)) { + if (SEGMENT_IS_LOCAL(si, p)) { if (new_marked(si, p)) { - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } else if (FORWARDEDP(p, si)) { INITCAR(pe) = GET_FWDADDRESS(p); - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } else { /* Not reached, so far; add to pending list */ add_ephemeron_to_pending(tc_in, pe); @@ -2733,20 +2853,19 @@ static IGEN check_dirty_ephemeron(ptr tc_in, ptr pe, IGEN youngest) { if (youngest != MIN_TG && (pg = TARGET_GENERATION(si)) < youngest) youngest = pg; } - SEGMENT_LOCK_RELEASE(si); } else { - RECORD_LOCK_FAILED(tc_in, si); + RECORD_REMOTE_RANGE_TO(tc_in, pe, size_ephemeron, SWEEPER(si->creator_tc)); return youngest; } } else { if (youngest != MIN_TG && (pg = si->generation) < youngest) youngest = pg; - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } } else { /* Non-collectable key means that the value determines `youngest`: */ - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } POP_BACKREFERENCE() @@ -2888,7 +3007,6 @@ static s_thread_mutex_t sweep_mutex; static s_thread_cond_t sweep_cond; static int num_running_sweepers; -static s_thread_cond_t postpone_cond; static void gather_active_sweepers() { int i, n; @@ -2899,7 +3017,7 @@ static void gather_active_sweepers() { ptr tc = S_collect_waiting_tcs[i]; if (sweeper_started(n)) { sweepers[n].sweep_tc = tc; - SWEEPER(tc) = n; + WILL_BE_SWEEPER(tc) = n; n++; } else break; @@ -2912,7 +3030,6 @@ static void gather_active_sweepers() { static s_thread_rv_t start_sweeper(void *_data) { gc_thread_data *data = _data; ptr tc; - int status; iptr num_swept_bytes; IGEN g; #ifdef ENABLE_TIMING @@ -2924,8 +3041,7 @@ static s_thread_rv_t start_sweeper(void *_data) { while (data->status != SWEEPER_SWEEPING) { s_thread_cond_wait(&sweep_cond, &sweep_mutex); } - num_running_sweepers++; - GET_TIME(start); + GET_CPU_TIME(start); (void)s_thread_mutex_unlock(&sweep_mutex); tc = data->sweep_tc; @@ -2934,20 +3050,15 @@ static s_thread_rv_t start_sweeper(void *_data) { if (data->thread) { /* sweep tc in this sweeper, so that things it references are more likely handled in this sweeper: */ - sweep_thread_now(tc, data->thread); + seginfo *t_si = SegInfo(ptr_get_segment(data->thread)); + sweep_thread(tc, data->thread); + FLUSH_REMOTE_RANGE(tc, t_si->space, t_si->generation); data->thread = (ptr)0; } - status = 0; num_swept_bytes = 0; - do { - num_swept_bytes += sweep_dirty_segments(tc, data->dirty_segments); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); - do { - num_swept_bytes += sweep_generation_pass(tc); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); + num_swept_bytes += sweep_dirty_segments(tc, data->dirty_segments); + num_swept_bytes += sweep_generation_trading_work(tc); /* ensure terminators on any segment where sweeper may have allocated: */ { @@ -2960,18 +3071,22 @@ static s_thread_rv_t start_sweeper(void *_data) { } } } - + (void)s_thread_mutex_lock(&sweep_mutex); - --num_running_sweepers; - if (!num_running_sweepers) - s_thread_cond_broadcast(&postpone_cond); S_G.bitmask_overhead[0] += BITMASKOVERHEAD(tc, 0); BITMASKOVERHEAD(tc, 0) = 0; for (g = MIN_TG; g <= MAX_TG; g++) S_G.bitmask_overhead[g] += BITMASKOVERHEAD(tc, g); data->status = SWEEPER_READY; - ACCUM_TIME(sweep_accum, step, start); - REPORT_TIME(fprintf(stderr, "%d swp +%ld ms %ld ms %ld bytes [%p]\n", MAX_CG, step, sweep_accum, num_swept_bytes, tc)); + ACCUM_CPU_TIME(sweep_accum, step, start); + REPORT_TIME(fprintf(stderr, "%d swpr +%ld ms %ld ms %ld bytes %d%%/%d sent %d%%/%d received [%p]\n", + MAX_CG, step, sweep_accum, num_swept_bytes, + percentage(sweepers[SWEEPER(tc)].remote_ranges_bytes_sent, num_swept_bytes), + sweepers[SWEEPER(tc)].remote_ranges_sent, + percentage(sweepers[SWEEPER(tc)].remote_ranges_bytes_received, num_swept_bytes), + sweepers[SWEEPER(tc)].remote_ranges_received, + tc)); + SWEEPER(tc) = main_sweeper_index; s_thread_cond_signal(&data->done_cond); } @@ -2983,7 +3098,7 @@ static IBOOL sweeper_started(int i) { if (!sweep_mutex_initialized) { s_thread_mutex_init(&sweep_mutex); s_thread_cond_init(&sweep_cond); - s_thread_cond_init(&postpone_cond); + s_thread_cond_init(&sweepers[main_sweeper_index].work_cond); sweep_mutex_initialized = 1; } @@ -2992,6 +3107,7 @@ static IBOOL sweeper_started(int i) { sweepers[i].status = SWEEPER_READY; s_thread_cond_init(&sweepers[i].done_cond); + s_thread_cond_init(&sweepers[i].work_cond); if ((status = s_thread_create(start_sweeper, &sweepers[i])) != 0) { /* eror creating a thread; just go with as many as we have */ @@ -3005,39 +3121,41 @@ static IBOOL sweeper_started(int i) { } static void parallel_sweep_dirty_and_generation(ptr tc) { - int i, status; + int i; iptr num_swept_bytes; REPORT_TIME(fprintf(stderr, "------\n")); - GET_TIME(start); + GET_CPU_TIME(start); S_use_gc_tc_mutex = 1; /* start other sweepers */ (void)s_thread_mutex_lock(&sweep_mutex); - for (i = 0; i < num_sweepers; i++) + sweepers[main_sweeper_index].status = SWEEPER_SWEEPING; + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_sent = 0); + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_bytes_sent = 0); + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_received = 0); + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_bytes_received = 0); + for (i = 0; i < num_sweepers; i++) { sweepers[i].status = SWEEPER_SWEEPING; + SWEEPER(sweepers[i].sweep_tc) = i; + ADJUST_COUNTER(sweepers[i].remote_ranges_sent = 0); + ADJUST_COUNTER(sweepers[i].remote_ranges_bytes_sent = 0); + ADJUST_COUNTER(sweepers[i].remote_ranges_received = 0); + ADJUST_COUNTER(sweepers[i].remote_ranges_bytes_received = 0); + num_running_sweepers++; + } s_thread_cond_broadcast(&sweep_cond); num_running_sweepers++; (void)s_thread_mutex_unlock(&sweep_mutex); /* sweep in the main thread */ - status = 0; num_swept_bytes = 0; - do { - num_swept_bytes += sweep_dirty_segments(tc, main_dirty_segments); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); - do { - num_swept_bytes += sweep_generation_pass(tc); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); + num_swept_bytes += sweep_dirty_segments(tc, main_dirty_segments); + num_swept_bytes += sweep_generation_trading_work(tc); /* wait for other sweepers */ (void)s_thread_mutex_lock(&sweep_mutex); - --num_running_sweepers; - if (!num_running_sweepers) - s_thread_cond_broadcast(&postpone_cond); for (i = 0; i < num_sweepers; i++) { while (sweepers[i].status != SWEEPER_READY) { s_thread_cond_wait(&sweepers[i].done_cond, &sweep_mutex); @@ -3046,40 +3164,108 @@ static void parallel_sweep_dirty_and_generation(ptr tc) { } (void)s_thread_mutex_unlock(&sweep_mutex); - ACCUM_TIME(par_accum, step, start); - REPORT_TIME(fprintf(stderr, "%d par +%ld ms %ld ms %ld bytes [%p]\n", MAX_CG, step, par_accum, num_swept_bytes, tc)); + ACCUM_CPU_TIME(par_accum, step, start); + REPORT_TIME(fprintf(stderr, "%d main +%ld ms %ld ms %ld bytes %d%%/%d sent %d%%/%d received [%p]\n", + MAX_CG, + step, par_accum, num_swept_bytes, + percentage(sweepers[main_sweeper_index].remote_ranges_bytes_sent, num_swept_bytes), + sweepers[main_sweeper_index].remote_ranges_sent, + percentage(sweepers[main_sweeper_index].remote_ranges_bytes_received, num_swept_bytes), + sweepers[main_sweeper_index].remote_ranges_received, + tc)); S_use_gc_tc_mutex = 0; } -#define WAIT_AFTER_POSTPONES 10 +static iptr sweep_generation_trading_work(ptr tc) { + iptr num_swept_bytes = 0; -static int gate_postponed(ptr tc, int status) { - if (SWEEPCHANGE(tc) == SWEEP_CHANGE_POSTPONED) { - if (status < WAIT_AFTER_POSTPONES) - return status + 1; - else { - (void)s_thread_mutex_lock(&sweep_mutex); - /* This thread wasn't able to make progress after a lock conflict. - Instead of spinning, which could create livelock, wait until - some thread makes progress. */ - if (num_running_sweepers == 1) { - /* All other threads postponed, so this one should be able to - make progress after all. */ - } else { - --num_running_sweepers; - s_thread_cond_wait(&postpone_cond, &sweep_mutex); - num_running_sweepers++; + num_swept_bytes += sweep_generation_pass(tc); + + (void)s_thread_mutex_lock(&sweep_mutex); + --num_running_sweepers; + while (1) { + int me = SWEEPER(tc); + if ((num_running_sweepers == 0) + && (sweepers[me].ranges_received == NULL)) { + /* everyone is done */ + int i, they = main_sweeper_index; + for (i = -1; i < num_sweepers; i++) { + s_thread_cond_signal(&sweepers[they].work_cond); + they = i + 1; } (void)s_thread_mutex_unlock(&sweep_mutex); + return num_swept_bytes; + } else { + /* wait for work */ + if (sweepers[me].ranges_received != NULL) { + /* some work appeared since we last checked */ + num_running_sweepers++; + } else { + sweepers[me].status = SWEEPER_WAITING_FOR_WORK; + s_thread_cond_wait(&sweepers[me].work_cond, &sweep_mutex); + } + if (sweepers[me].status != SWEEPER_WAITING_FOR_WORK) { + /* got work; num_running_sweepers was incremented, too */ + (void)s_thread_mutex_unlock(&sweep_mutex); + num_swept_bytes += sweep_generation_pass(tc); + (void)s_thread_mutex_lock(&sweep_mutex); + --num_running_sweepers; + } else if (num_running_sweepers == 0) { + /* other sweeper noticed that everyone is done */ + (void)s_thread_mutex_unlock(&sweep_mutex); + return num_swept_bytes; + } else { + /* not clear why we were woken, so just go around again */ + } } - } else { - (void)s_thread_mutex_lock(&sweep_mutex); - s_thread_cond_broadcast(&postpone_cond); - (void)s_thread_mutex_unlock(&sweep_mutex); + } +} + +static remote_range *send_and_receive_remote_ranges(ptr tc) { + int i, me = SWEEPER(tc), they; + remote_range *r, *next, *last; + + (void)s_thread_mutex_lock(&sweep_mutex); + + they = main_sweeper_index; + for (i = -1; i < num_sweepers; i++) { + if (sweepers[me].ranges_to_send[they] != NULL) { + SWEEPCHANGE(tc) = SWEEP_CHANGE_PROGRESS; + r = sweepers[me].ranges_to_send[they]; + sweepers[me].ranges_to_send[they] = NULL; + for (next = r, last = r; next != NULL; next = next->next) { + ADJUST_COUNTER(sweepers[me].remote_ranges_sent++); + ADJUST_COUNTER(sweepers[me].remote_ranges_bytes_sent += ((uptr)next->end - (uptr)next->start)); + last = next; + } + last->next = sweepers[they].ranges_received; + sweepers[they].ranges_received = r; + if (sweepers[they].status == SWEEPER_WAITING_FOR_WORK) { + num_running_sweepers++; + sweepers[they].status = SWEEPER_SWEEPING; + s_thread_cond_signal(&sweepers[they].work_cond); + } + } + they = i + 1; } - return 0; + r = sweepers[me].ranges_received; + sweepers[me].ranges_received = NULL; + + (void)s_thread_mutex_unlock(&sweep_mutex); + + if (r != NULL) { + SWEEPCHANGE(tc) = SWEEP_CHANGE_PROGRESS; +#ifdef ENABLE_TIMING + for (next = r; next != NULL; next = next->next) { + ADJUST_COUNTER(sweepers[me].remote_ranges_received++); + ADJUST_COUNTER(sweepers[me].remote_ranges_bytes_received += ((uptr)next->end - (uptr)next->start)); + } +#endif + } + + return r; } #endif @@ -3149,7 +3335,7 @@ static void push_measure(ptr tc_in, ptr p) if (si->old_space) { /* We must be in a GC--measure fusion, so switch back to GC */ - relocate_pure_help_help(&p, p, si); + relocate_pure_help_help(&p, p, si, NULL, 0); return; } diff --git a/racket/src/ChezScheme/c/gcwrapper.c b/racket/src/ChezScheme/c/gcwrapper.c index bf48a423d3..81f1308c51 100644 --- a/racket/src/ChezScheme/c/gcwrapper.c +++ b/racket/src/ChezScheme/c/gcwrapper.c @@ -15,6 +15,7 @@ */ #include "system.h" +#include "popcount.h" /* locally defined functions */ static void segment_tell PROTO((uptr seg)); @@ -545,6 +546,59 @@ void S_addr_tell(ptr p) { segment_tell(addr_get_segment(p)); } +static void check_pointer(ptr *pp, IBOOL address_is_meaningful, ptr base, uptr seg, ISPC s, IBOOL aftergc) { + ptr p = *pp; + if (!IMMEDIATE(p)) { + seginfo *psi = MaybeSegInfo(ptr_get_segment(p)); + if (psi != NULL) { + if ((psi->space == space_empty) + || psi->old_space + || (psi->marked_mask && !(psi->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p)) + /* corner case: a continuation in space_count_pure can refer to code via CLOSENTRY + where the entry point doesn't have a mark bit: */ + && !((s == space_count_pure) && (psi->space == space_code)))) { + S_checkheap_errors += 1; + printf("!!! dangling reference at %s"PHtx" to "PHtx"%s\n", + (address_is_meaningful ? "" : "insideof "), + (ptrdiff_t)(address_is_meaningful ? pp : TO_VOIDP(base)), + (ptrdiff_t)p, (aftergc ? " after gc" : "")); + printf("from: "); segment_tell(seg); + printf("to: "); segment_tell(ptr_get_segment(p)); + { + ptr l; + for (l = S_G.locked_objects[psi->generation]; l != Snil; l = Scdr(l)) + if (Scar(l) == p) + printf(" in locked\n"); + for (l = S_G.unlocked_objects[psi->generation]; l != Snil; l = Scdr(l)) + if (Scar(l) == p) + printf(" in unlocked\n"); + } + abort(); // REMOVEME + } + } + } +} + +static void check_bignum(ptr p) { + if (!Sbignump(p)) + printf("!!! not a bignum %p\n", TO_VOIDP(p)); +} + +#include "heapcheck.inc" + +static ptr *find_nl(ptr *pp1, ptr *pp2, ISPC s, IGEN g) { + ptr *nl, ls; + + for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { + ptr t_tc = (ptr)THREADTC(Scar(ls)); + nl = TO_VOIDP(NEXTLOC_AT(t_tc, s, g)); + if (pp1 <= nl && nl < pp2) + return nl; + } + + return NULL; +} + static void check_heap_dirty_msg(msg, x) char *msg; ptr *x; { INT d; seginfo *si; @@ -577,6 +631,13 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; { printf("!!! inconsistent thread NEXT %p and BASE %p\n", TO_VOIDP(NEXTLOC_AT(t_tc, s, g)), TO_VOIDP(BASELOC_AT(t_tc, s, g))); } + if ((REMOTERANGEEND(t_tc) != (ptr)0) + || (REMOTERANGESTART(t_tc) != (ptr)(uptr)-1)) { + S_checkheap_errors += 1; + printf("!!! nonempty thread REMOTERANGE %p-%p\n", + TO_VOIDP(REMOTERANGESTART(t_tc)), + TO_VOIDP(REMOTERANGEEND(t_tc))); + } } } } @@ -669,67 +730,123 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; { printf("!!! unexpected generation %d segment "PHtx" in space_new\n", g, (ptrdiff_t)seg); } } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair || s == space_ephemeron - || s == space_immobile_impure || s == space_count_pure || s == space_count_impure || s == space_closure) { - /* doesn't handle: space_port, space_continuation, space_code, space_pure_typed_object, - space_impure_record, or impure_typed_object */ + || s == space_immobile_impure || s == space_count_pure || s == space_count_impure || s == space_closure + || s == space_pure_typed_object || s == space_continuation || s == space_port || s == space_code + || s == space_impure_record || s == space_impure_typed_object) { + ptr start; + /* check for dangling references */ pp1 = TO_VOIDP(build_ptr(seg, 0)); pp2 = TO_VOIDP(build_ptr(seg + 1, 0)); - nl = NULL; - { - ptr ls; - for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { - ptr t_tc = (ptr)THREADTC(Scar(ls)); - nl = TO_VOIDP(NEXTLOC_AT(t_tc, s, g)); - if (pp1 <= nl && nl < pp2) - break; - } - } + nl = find_nl(pp1, pp2, s, g); if (pp1 <= nl && nl < pp2) pp2 = nl; - while (pp1 < pp2) { - if (!si->marked_mask || (si->marked_mask[segment_bitmap_byte(TO_PTR(pp1))] & segment_bitmap_bit(TO_PTR(pp1)))) { - int a; - for (a = 0; (a < ptr_alignment) && (pp1 < pp2); a++) { -#define in_ephemeron_pair_part(pp1, seg) ((((uptr)TO_PTR(pp1) - (uptr)build_ptr(seg, 0)) % size_ephemeron) < size_pair) - if ((s == space_ephemeron) && !in_ephemeron_pair_part(pp1, seg)) { - /* skip non-pair part of ephemeron */ + if (s == space_pure_typed_object || s == space_port || s == space_code + || s == space_impure_record || s == space_impure_typed_object) { + if (si->marked_mask) { + /* not implemented */ + } else { + /* only check this segment for objects that start on it */ + uptr before_seg = seg; + + /* Back up over segments for the same space and generation: */ + while (1) { + seginfo *before_si = MaybeSegInfo(before_seg-1); + if (!before_si + || (before_si->space != si->space) + || (before_si->generation != si->generation) + || ((before_si->marked_mask == NULL) != (si->marked_mask == NULL))) + break; + before_seg--; + } + + /* Move forward to reach `seg` again: */ + start = build_ptr(before_seg, 0); + while (before_seg != seg) { + ptr *before_pp2, *before_nl; + + before_pp2 = TO_VOIDP(build_ptr(before_seg + 1, 0)); + if ((ptr *)TO_VOIDP(start) > before_pp2) { + /* skipped to a further segment */ + before_seg++; } else { - p = *pp1; - if (!si->marked_mask && (p == forward_marker)) { - pp1 = pp2; /* break out of outer loop */ - break; - } else if (!IMMEDIATE(p)) { - seginfo *psi = MaybeSegInfo(ptr_get_segment(p)); - if (psi != NULL) { - if ((psi->space == space_empty) - || psi->old_space - || (psi->marked_mask && !(psi->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p)) - /* corner case: a continuation in space_count_pure can refer to code via CLOSENTRY - where the entry point doesn't have a mark bit: */ - && !((s == space_count_pure) && (psi->space == space_code)))) { - S_checkheap_errors += 1; - printf("!!! dangling reference at "PHtx" to "PHtx"%s\n", (ptrdiff_t)pp1, (ptrdiff_t)p, (aftergc ? " after gc" : "")); - printf("from: "); segment_tell(seg); - printf("to: "); segment_tell(ptr_get_segment(p)); - { - ptr l; - for (l = S_G.locked_objects[psi->generation]; l != Snil; l = Scdr(l)) - if (Scar(l) == p) - printf(" in locked\n"); - for (l = S_G.unlocked_objects[psi->generation]; l != Snil; l = Scdr(l)) - if (Scar(l) == p) - printf(" in unlocked\n"); + before_nl = find_nl(TO_VOIDP(start), before_pp2, s, g); + if (((ptr*)TO_VOIDP(start)) <= before_nl && before_nl < before_pp2) { + /* this segment ends, so move to next segment */ + before_seg++; + if (s == space_code) { + /* in the case of code, it's possible for a whole segment to + go unused if a large code object didn't fit; give up, just in case */ + start = build_ptr(seg+1, 0); + } else { + start = build_ptr(before_seg, 0); + } + } else { + while (((ptr *)TO_VOIDP(start)) < before_pp2) { + if (*(ptr *)TO_VOIDP(start) == forward_marker) { + /* this segment ends, so move to next segment */ + if (s == space_code) { + start = build_ptr(seg+1, 0); + } else { + start = build_ptr(before_seg+1, 0); } + } else { + start = (ptr)((uptr)start + size_object(TYPE(start, type_typed_object))); } } + before_seg++; } } - pp1 += 1; } - } else - pp1 += ptr_alignment; + + if (((ptr *)TO_VOIDP(start)) >= pp2) { + /* previous object extended past the segment */ + } else { + pp1 = TO_VOIDP(start); + while (pp1 < pp2) { + if (*pp1 == forward_marker) + break; + else { + p = TYPE(TO_PTR(pp1), type_typed_object); + check_object(p, seg, s, aftergc); + pp1 = TO_VOIDP((ptr)((uptr)TO_PTR(pp1) + size_object(p))); + } + } + } + } + } else if (s == space_continuation) { + while (pp1 < pp2) { + if (*pp1 == forward_marker) + break; + if (!si->marked_mask || (si->marked_mask[segment_bitmap_byte(TO_PTR(pp1))] & segment_bitmap_bit(TO_PTR(pp1)))) { + p = TYPE(TO_PTR(pp1), type_closure); + check_object(p, seg, s, aftergc); + } + pp1 = TO_VOIDP((ptr)((uptr)TO_PTR(pp1) + size_continuation)); + } + } else { + while (pp1 < pp2) { + if (!si->marked_mask || (si->marked_mask[segment_bitmap_byte(TO_PTR(pp1))] & segment_bitmap_bit(TO_PTR(pp1)))) { + int a; + for (a = 0; (a < ptr_alignment) && (pp1 < pp2); a++) { +#define in_ephemeron_pair_part(pp1, seg) ((((uptr)TO_PTR(pp1) - (uptr)build_ptr(seg, 0)) % size_ephemeron) < size_pair) + if ((s == space_ephemeron) && !in_ephemeron_pair_part(pp1, seg)) { + /* skip non-pair part of ephemeron */ + } else { + p = *pp1; + if (!si->marked_mask && (p == forward_marker)) { + pp1 = pp2; /* break out of outer loop */ + break; + } else { + check_pointer(pp1, 1, (ptr)0, seg, s, aftergc); + } + } + pp1 += 1; + } + } else + pp1 += ptr_alignment; + } } /* verify that dirty bits are set appropriately */ diff --git a/racket/src/ChezScheme/c/globals.h b/racket/src/ChezScheme/c/globals.h index afc31c14e7..4722109a60 100644 --- a/racket/src/ChezScheme/c/globals.h +++ b/racket/src/ChezScheme/c/globals.h @@ -117,6 +117,7 @@ EXTERN struct S_G_struct { ptr null_immutable_vector; ptr null_immutable_fxvector; ptr null_immutable_bytevector; + ptr zero_length_bignum; seginfo *dirty_segments[DIRTY_SEGMENT_LISTS]; /* schsig.c */ diff --git a/racket/src/ChezScheme/c/scheme.c b/racket/src/ChezScheme/c/scheme.c index 5b7fda6450..68e7d07f89 100644 --- a/racket/src/ChezScheme/c/scheme.c +++ b/racket/src/ChezScheme/c/scheme.c @@ -330,6 +330,13 @@ static void idiot_checks() { oops = 1; } + if ((((code_flag_continuation << code_flags_offset) | (code_flag_mutable_closure << code_flags_offset)) + & (uptr)forward_marker) != 0) { + /* parallel GC relies on not confusing a forward marker with code flags */ + fprintf(stderr, "code flags overlap with forwadr_marker\n"); + oops = 1; + } + if (oops) S_abnormal_exit(); } diff --git a/racket/src/ChezScheme/c/segment.c b/racket/src/ChezScheme/c/segment.c index 92b6fa14ce..24e1bc7323 100644 --- a/racket/src/ChezScheme/c/segment.c +++ b/racket/src/ChezScheme/c/segment.c @@ -235,7 +235,6 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED ptr tc, ISPC s, IG si->use_marks = 0; si->must_mark = 0; #ifdef PTHREADS - si->lock = 0; si->creator_tc = tc; #endif si->list_bits = NULL; diff --git a/racket/src/ChezScheme/c/thread.c b/racket/src/ChezScheme/c/thread.c index c799367a3c..269ae7ffc8 100644 --- a/racket/src/ChezScheme/c/thread.c +++ b/racket/src/ChezScheme/c/thread.c @@ -142,7 +142,8 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { LZ4OUTBUFFER(tc) = 0; SWEEPER(tc) = -1; - LOCKSTATUS(tc) = Strue; + REMOTERANGESTART(tc) = (ptr)(uptr)-1; + REMOTERANGEEND(tc) = (ptr)0; tc_mutex_release(); diff --git a/racket/src/ChezScheme/c/types.h b/racket/src/ChezScheme/c/types.h index cf7162f38d..7f1517ea94 100644 --- a/racket/src/ChezScheme/c/types.h +++ b/racket/src/ChezScheme/c/types.h @@ -158,8 +158,7 @@ typedef struct _seginfo { octet *list_bits; /* for `$list-bits-ref` and `$list-bits-set!` */ uptr number; /* the segment number */ #ifdef PTHREADS - ptr lock; /* for parallel GC */ - ptr creator_tc; /* for parallelism heuristic; might not match an active thread */ + ptr creator_tc; /* for GC parallelism heuristic; might not match an active thread unless old_space */ #endif struct _chunkinfo *chunk; /* the chunk this segment belongs to */ struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs) */ diff --git a/racket/src/ChezScheme/c/vfasl.c b/racket/src/ChezScheme/c/vfasl.c index 833ef56865..4e6c29daa8 100644 --- a/racket/src/ChezScheme/c/vfasl.c +++ b/racket/src/ChezScheme/c/vfasl.c @@ -1151,10 +1151,20 @@ static void relink_code(ptr co, ptr sym_base, ptr *vspaces, uptr *vspace_offsets t = CODERELOC(co); t = ptr_add(vspaces[vspace_reloc], (uptr)t - vspace_offsets[vspace_reloc]); - if (to_static && !S_G.retain_static_relocation - && ((CODETYPE(co) & (code_flag_template << code_flags_offset)) == 0)) - CODERELOC(co) = (ptr)0; - else { + if (to_static && !S_G.retain_static_relocation) { + if ((CODETYPE(co) & (code_flag_template << code_flags_offset)) == 0) + CODERELOC(co) = (ptr)0; + else { + ptr tc = get_thread_context(); + iptr sz = size_reloc_table(RELOCSIZE(t)); + ptr new_t; + find_room(tc, space_data, static_generation, typemod, ptr_align(sz), new_t); + memcpy(TO_VOIDP(new_t), TO_VOIDP(t), sz); + t = new_t; + CODERELOC(co) = t; + RELOCCODE(t) = co; + } + } else { CODERELOC(co) = t; RELOCCODE(t) = co; } diff --git a/racket/src/ChezScheme/rktboot/make-boot.rkt b/racket/src/ChezScheme/rktboot/make-boot.rkt index 9385e734b0..dc5240f296 100644 --- a/racket/src/ChezScheme/rktboot/make-boot.rkt +++ b/racket/src/ChezScheme/rktboot/make-boot.rkt @@ -401,7 +401,9 @@ (status "Generate GC") (eval `(mkgc-ocd.inc ,(path->string (build-path out-subdir "gc-ocd.inc")))) (eval `(mkgc-oce.inc ,(path->string (build-path out-subdir "gc-oce.inc")))) + (eval `(mkgc-par.inc ,(path->string (build-path out-subdir "gc-par.inc")))) (eval `(mkvfasl.inc ,(path->string (build-path out-subdir "vfasl.inc")))) + (eval `(mkheapcheck.inc ,(path->string (build-path out-subdir "heapcheck.inc")))) (plumber-flush-all (current-plumber)))) (when (getenv "MAKE_BOOT_FOR_CROSS") diff --git a/racket/src/ChezScheme/s/Mf-base b/racket/src/ChezScheme/s/Mf-base index 7993b53980..dfd08463f5 100644 --- a/racket/src/ChezScheme/s/Mf-base +++ b/racket/src/ChezScheme/s/Mf-base @@ -114,7 +114,9 @@ Cheader = ../boot/$m/scheme.h Cequates = ../boot/$m/equates.h Cgcocd = ../boot/$m/gc-ocd.inc Cgcoce = ../boot/$m/gc-oce.inc +Cgcpar = ../boot/$m/gc-par.inc Cvfasl = ../boot/$m/vfasl.inc +Cheapcheck = ../boot/$m/heapcheck.inc Revision = ../boot/$m/revision # The following controls the patch files loaded before compiling, typically used only @@ -171,11 +173,11 @@ allsrc =\ np-languages.ss fxmap.ss # doit uses a different Scheme process to compile each target -doit: ${PetiteBoot} ${SchemeBoot} ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cvfasl} ${Revision} +doit: ${PetiteBoot} ${SchemeBoot} ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cgcpar} ${Cvfasl} ${Cheapcheck} ${Revision} # all uses a single Scheme process to compile all targets. this is typically # faster when most of the targets need to be recompiled. -all: bootall ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cvfasl} ${Revision} +all: bootall ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cgcpar} ${Cvfasl} ${Cheapcheck} ${Revision} # allx runs all up to three times and checks to see if the new boot file is the # same as the last, i.e., the system is properly bootstrapped. @@ -362,7 +364,7 @@ resetbootlinks: | ${Scheme} -q keepbootfiles: - for x in `echo scheme.boot petite.boot scheme.h equates.h gc-oce.inc gc-ocd.inc vfasl.inc` ; do\ + for x in `echo scheme.boot petite.boot scheme.h equates.h gc-oce.inc gc-ocd.inc gc-par.inc vfasl.inc heapcheck.inc` ; do\ if [ ! -h ../boot/$(m)/$$x ] ; then \ mv -f ../boot/$(m)/$$x ../../boot/$(m)/$$x ;\ elif [ "${upupupbootdir}" != "../../.." ] ; then \ @@ -633,6 +635,15 @@ ${Cgcoce}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.s then mv -f ${Cgcoce}.bak ${Cgcoce};\ else rm -f ${Cgcoce}.bak; fi) +${Cgcpar}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss + (if [ -r ${Cgcpar} ]; then mv -f ${Cgcpar} ${Cgcpar}.bak; fi) + echo '(reset-handler abort)'\ + '(mkgc-par.inc "${Cgcpar}")' |\ + ${Scheme} -q ${macroobj} mkheader.so mkgc.so + (if `cmp -s ${Cgcpar} ${Cgcpar}.bak`;\ + then mv -f ${Cgcpar}.bak ${Cgcpar};\ + else rm -f ${Cgcpar}.bak; fi) + ${Cvfasl}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss (if [ -r ${Cvfasl} ]; then mv -f ${Cvfasl} ${Cvfasl}.bak; fi) echo '(reset-handler abort)'\ @@ -642,6 +653,15 @@ ${Cvfasl}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.s then mv -f ${Cvfasl}.bak ${Cvfasl};\ else rm -f ${Cvfasl}.bak; fi) +${Cheapcheck}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss + (if [ -r ${Cheapcheck} ]; then mv -f ${Cheapcheck} ${Cheapcheck}.bak; fi) + echo '(reset-handler abort)'\ + '(mkheapcheck.inc "${Cheapcheck}")' |\ + ${Scheme} -q ${macroobj} mkheader.so mkgc.so + (if `cmp -s ${Cheapcheck} ${Cheapcheck}.bak`;\ + then mv -f ${Cheapcheck}.bak ${Cheapcheck};\ + else rm -f ${Cheapcheck}.bak; fi) + .PHONY: ${Revision} ${Revision}: update-revision @./update-revision > ${Revision} @@ -664,7 +684,9 @@ reset: $(MAKE) reset-one FILE=scheme.h $(MAKE) reset-one FILE=gc-oce.inc $(MAKE) reset-one FILE=gc-ocd.inc + $(MAKE) reset-one FILE=gc-par.inc $(MAKE) reset-one FILE=vfasl.inc + $(MAKE) reset-one FILE=heapcheck.inc .PHONY: reset-one reset-one: diff --git a/racket/src/ChezScheme/s/cmacros.ss b/racket/src/ChezScheme/s/cmacros.ss index a52be453c1..0ea135209c 100644 --- a/racket/src/ChezScheme/s/cmacros.ss +++ b/racket/src/ChezScheme/s/cmacros.ss @@ -855,6 +855,8 @@ ;; --------------------------------------------------------------------- ;; Bit and byte offsets for different types of objects: +;; Flags that matter to the GC must apply only to static-generation +;; objects, and they must not overlap with `forward-marker` (define-constant code-flag-system #b0000001) (define-constant code-flag-continuation #b0000010) (define-constant code-flag-template #b0000100) @@ -1389,7 +1391,8 @@ (define-primitive-structure-disps ratnum type-typed-object ([iptr type] [ptr numerator] - [ptr denominator])) + [ptr denominator] + [iptr pad])) ; for alignment (define-primitive-structure-disps vector type-typed-object ([iptr type] @@ -1433,7 +1436,8 @@ (define-primitive-structure-disps exactnum type-typed-object ([iptr type] [ptr real] - [ptr imag])) + [ptr imag] + [iptr pad])) ; for alignment (define-primitive-structure-disps closure type-closure ([ptr code] @@ -1495,8 +1499,9 @@ (define-constant virtual-register-count 16) (define-constant static-generation 7) (define-constant num-generations (fx+ (constant static-generation) 1)) -(define-constant num-thread-local-allocation-segments (fx* (fx+ 1 (constant static-generation)) - (fx+ 1 (constant max-real-space)))) +(define-constant num-spaces (fx+ (constant max-real-space) 1)) +(define-constant num-thread-local-allocation-segments (fx* (constant num-generations) + (constant num-spaces))) (define-constant maximum-parallel-collect-threads 8) ;;; make sure gc sweeps all ptrs @@ -1577,6 +1582,7 @@ [xptr base-loc (constant num-thread-local-allocation-segments)] [xptr next-loc (constant num-thread-local-allocation-segments)] [iptr bytes-left (constant num-thread-local-allocation-segments)] + [xptr orig-next-loc (constant num-spaces)] [xptr sweep-loc (constant num-thread-local-allocation-segments)] [xptr sweep-next (constant num-thread-local-allocation-segments)] [xptr pending-ephemerons] @@ -1585,7 +1591,9 @@ [xptr sweep-stack-start] [xptr sweep-stack-limit] [iptr sweep-change] - [xptr lock-status] + [iptr remote-sweeper] + [xptr remote-range-start] + [xptr remote-range-end] [iptr bitmask-overhead (constant num-generations)])) (define tc-field-list diff --git a/racket/src/ChezScheme/s/mkgc.ss b/racket/src/ChezScheme/s/mkgc.ss index 20d3408da9..9534b50160 100644 --- a/racket/src/ChezScheme/s/mkgc.ss +++ b/racket/src/ChezScheme/s/mkgc.ss @@ -13,7 +13,9 @@ (disable-unbound-warning mkgc-ocd.inc mkgc-oce.inc - mkvfasl.inc) + mkgc-par.inc + mkvfasl.inc + mkheapcheck.inc) ;; Currently supported traversal modes: ;; - copy @@ -25,6 +27,7 @@ ;; - measure : recurs for reachable size ;; - vfasl-copy ;; - vfasl-sweep +;; - check ;; For the specification, there are a few declaration forms described ;; below, such as `trace` to declare a pointer-valued field within an @@ -83,6 +86,7 @@ ;; - (trace-early ) : relocate for sweep, copy, and mark; recur otherwise; implies pure ;; - (trace-now ) : direct recur; implies pure ;; - (trace-early-rtd ) : for record types, avoids recur on #!base-rtd; implies pure +;; - (trace-pure-code ) : like `trace-pure`, but special handling in parallel mode ;; - (trace-ptrs ) : trace an array of pointerrs ;; - (trace-pure-ptrs ) : pure analog of `trace-ptrs` ;; - (copy ) : copy for copy, ignore otherwise @@ -96,8 +100,6 @@ ;; - (as-mark-end ...) : declares that s implement counting, ;; which means that it's included for mark mode ;; - (skip-forwarding) : disable forward-pointer installation in copy mode -;; - (check-lock-failed) : bail out if a lock aquire failed; use this before dereferencing -;; an object reference that might not have been relocated ;; - (assert ) : assertion ;; ;; In the above declarations, nonterminals like can be @@ -150,6 +152,7 @@ ;; Built-in variables: ;; - _ : object being copied, swept, etc. ;; - _copy_ : target in copy or vfasl mode, same as _ otherwise +;; - _size_ : size of the current object, but only in parallel mode ;; - _tf_ : type word ;; - _tg_ : target generation ;; - _backreferences?_ : dynamic flag indicating whether backreferences are on @@ -173,6 +176,9 @@ [(copy) (set! (ephemeron-prev-ref _copy_) 0) (set! (ephemeron-next _copy_) 0)] + [(check) + (trace pair-car) + (trace pair-cdr)] [else]) (add-ephemeron-to-pending) (mark one-bit no-sweep) @@ -181,6 +187,9 @@ [space-weakpair (space space-weakpair) (vfasl-fail "weakpair") + (case-mode + [(check) (trace pair-car)] + [else]) (try-double-pair copy pair-car trace pair-cdr countof-weakpair)] @@ -193,7 +202,10 @@ [closure (define code : ptr (CLOSCODE _)) - (trace-code-early code) + (trace-code-early code) ; not traced in parallel mode + ;; In parallel mode, don't use any fields of `code` until the + ;; second on after the type, because the type and first field may + ;; be overwritten with forwarding information (cond [(and-not-as-dirty (or-assume-continuation @@ -221,12 +233,23 @@ (case-mode [(sweep) (define stk : ptr (continuation-stack _)) - (when (&& (!= stk (cast ptr 0)) (OLDSPACE stk)) - (set! (continuation-stack _) - (copy_stack _tc_ - (continuation-stack _) - (& (continuation-stack-length _)) - (continuation-stack-clength _))))] + (define s_si : seginfo* NULL) + (when (&& (!= stk (cast ptr 0)) + (begin + (set! s_si (SegInfo (ptr_get_segment stk))) + (-> s_si old_space))) + (cond + [(! (SEGMENT_IS_LOCAL s_si stk)) + ;; A stack segment has a single owner, so it's ok for us + ;; to sweep the stack content, even though it's on a + ;; remote segment relative to the current sweeper. + (RECORD_REMOTE_RANGE _tc_ _ _size_ s_si)] + [else + (set! (continuation-stack _) + (copy_stack _tc_ + (continuation-stack _) + (& (continuation-stack-length _)) + (continuation-stack-clength _)))]))] [else]) (count countof-stack (continuation-stack-length _) 1 [measure]) (trace-pure continuation-link) @@ -250,9 +273,15 @@ [else (cond [(& (code-type code) (<< code-flag-mutable-closure code-flags-offset)) + ;; in parallel mode, assume that code pointer is static and doesn't need to be swept space-impure] [else - space-pure])])) + (case-flag parallel? + [on + ;; use space-closure so code reference (not a regular ptr) is swept correctly + space-closure] + [off + space-pure])])])) (vspace vspace_closure) (when-vfasl (when (& (code-type code) (<< code-flag-mutable-closure code-flags-offset)) @@ -286,7 +315,7 @@ (size size-symbol) (mark one-bit) (trace/define symbol-value val :vfasl-as (FIX (vfasl_symbol_to_index vfi _))) - (trace-symcode symbol-pvalue val) + (trace-local-symcode symbol-pvalue val) (trace-nonself/vfasl-as-nil symbol-plist) (trace-nonself symbol-name) (trace-nonself/vfasl-as-nil symbol-splist) @@ -463,37 +492,33 @@ (count countof-box)] [ratnum - (space space-data) + (space (case-flag parallel? + [on space-pure] + [off space-data])) (vspace vspace_impure) ; would be better if we had pure, but these are rare (size size-ratnum) (copy-type ratnum-type) - (trace-now ratnum-numerator) - (trace-now ratnum-denominator) - (case-mode - [(copy) (when (CHECK_LOCK_FAILED _tc_) - ;; create failed relocates so that the heap checker isn't unhappy - (set! (ratnum-numerator _copy_) (cast ptr 0)) - (set! (ratnum-denominator _copy_) (cast ptr 0)))] - [(mark) (check-lock-failed)] - [else]) + (trace-nonparallel-now ratnum-numerator) + (trace-nonparallel-now ratnum-denominator) + (case-flag parallel? + [on (pad (set! (ratnum-pad _copy_) 0))] + [off]) (mark) (vfasl-pad-word) (count countof-ratnum)] [exactnum - (space space-data) + (space (case-flag parallel? + [on space-pure] + [off space-data])) (vspace vspace_impure) ; same rationale as ratnum (size size-exactnum) (copy-type exactnum-type) - (trace-now exactnum-real) - (trace-now exactnum-imag) - (case-mode - [(copy) (when (CHECK_LOCK_FAILED _tc_) - ;; create failed relocates so that the heap checker isn't unhappy - (set! (exactnum-real _copy_) (cast ptr 0)) - (set! (exactnum-imag _copy_) (cast ptr 0)))] - [(mark) (check-lock-failed)] - [else]) + (trace-nonparallel-now exactnum-real) + (trace-nonparallel-now exactnum-imag) + (case-flag parallel? + [on (pad (set! (exactnum-pad _copy_) 0))] + [off]) (mark) (vfasl-pad-word) (count countof-exactnum)] @@ -613,6 +638,11 @@ [else (trace-nonself field)])) +(define-trace-macro (trace-nonparallel-now field) + (case-flag parallel? + [on (trace-pure field)] + [off (trace-now field)])) + (define-trace-macro (try-double-pair do-car pair-car do-cdr pair-cdr count-pair) @@ -680,7 +710,16 @@ ;; Special relocation handling for code in a closure: (set! code (vfasl_relocate_code vfi code))] [else - (trace-early (just code))]))) + ;; In parallel mode, the `code` pointer may or may not have been + ;; forwarded. In that case, we may misinterpret the forward mmarker + ;; as a code type with flags, but it's ok, because the flags will + ;; only be set for static-generation objects + (case-flag parallel? + [on (case-mode + [(sweep sweep-in-old) + (trace-pure-code (just code))] + [else])] + [off (trace-early (just code))])]))) (define-trace-macro (copy-clos-code code) (case-mode @@ -718,7 +757,6 @@ (trace ref)] [(sweep sweep-in-old) (trace ref) ; can't trace `val` directly, because we need an impure relocate - (check-lock-failed) (define val : ptr (ref _))] [vfasl-copy (set! (ref _copy_) vfasl-val)] @@ -733,7 +771,6 @@ (case-flag as-dirty? [on (trace (just code))] [off (trace-pure (just code))]) - (check-lock-failed) (INITSYMCODE _ code)] [measure] [vfasl-copy @@ -741,6 +778,27 @@ [else (copy symbol-pvalue)])) +(define-trace-macro (trace-local-symcode symbol-pvalue val) + (case-mode + [(sweep) + (case-flag parallel? + [on + (define v_si : seginfo* (cond + [(Sprocedurep val) (SegInfo (ptr_get_segment val))] + [else NULL])) + (cond + [(\|\| + (\|\| + (== v_si NULL) + (! (-> v_si old_space))) + (SEGMENT_IS_LOCAL v_si val)) + (trace-symcode symbol-pvalue val)] + [else + (RECORD_REMOTE_RANGE _tc_ _ _size_ v_si)])] + [off (trace-symcode symbol-pvalue val)])] + [else + (trace-symcode symbol-pvalue val)])) + (define-trace-macro (trace-tlc tlc-next tlc-keyval) (case-mode [(copy mark) @@ -800,11 +858,24 @@ [on] [off (case-mode - [(sweep sweep-in-old self-test) - ;; Bignum pointer mask may need forwarding - (trace-pure (record-type-pm rtd)) - (check-lock-failed) - (set! num (record-type-pm rtd))] + [(sweep) + (case-flag parallel? + [on + (define pm_si : seginfo* (SegInfo (ptr_get_segment num))) + (cond + [(\|\| + (! (-> pm_si old_space)) + (SEGMENT_IS_LOCAL pm_si num)) + (trace-record-type-pm num rtd)] + [else + ;; Try again in the bignum's sweeper + (RECORD_REMOTE_RANGE _tc_ _ _size_ pm_si) + (set! num S_G.zero_length_bignum)])] + [off + (trace-record-type-pm num rtd)])] + [(sweep-in-old self-test) + (trace-record-type-pm num rtd)] + [(check) (check-bignum num)] [else])]) (let* ([index : iptr (- (BIGLEN num) 1)] ;; Ignore bit for already forwarded rtd @@ -825,6 +896,11 @@ (set! mask (bignum-data num index)) (set! bits bigit_bits)))]))])) +(define-trace-macro (trace-record-type-pm num rtd) + ;; Bignum pointer mask may need forwarding + (trace-pure (record-type-pm rtd)) + (set! num (record-type-pm rtd))) + (define-trace-macro (vfasl-check-parent-rtd rtd) (case-mode [(vfasl-copy) @@ -917,7 +993,7 @@ (cast iptr (port-buffer _)))) (trace port-buffer) (set! (port-last _) (cast ptr (+ (cast iptr (port-buffer _)) n))))] - [sweep-in-old + [(sweep-in-old check) (when (& (cast uptr _tf_) flag) (trace port-buffer))] [else @@ -1024,10 +1100,19 @@ (trace-pure (* pp))) (set! mask >>= 1)))] [else - (trace-pure (* (ENTRYNONCOMPACTLIVEMASKADDR oldret))) - (check-lock-failed) - (let* ([num : ptr (ENTRYLIVEMASK oldret)] - [index : iptr (BIGLEN num)]) + (case-mode + [(check) (check-bignum num)] + [else + (define n_si : seginfo* (SegInfo (ptr_get_segment num))) + (cond + [(! (-> n_si old_space))] + [(SEGMENT_IS_LOCAL n_si num) + (trace-pure (* (ENTRYNONCOMPACTLIVEMASKADDR oldret))) + (set! num (ENTRYLIVEMASK oldret))] + [else + (RECORD_REMOTE_RANGE _tc_ _ _size_ n_si) + (set! num S_G.zero_length_bignum)])]) + (let* ([index : iptr (BIGLEN num)]) (while :? (!= index 0) (set! index -= 1) @@ -1055,11 +1140,10 @@ [(sweep sweep-in-old) (define x_si : seginfo* (SegInfo (ptr_get_segment c_p))) (when (-> x_si old_space) - (relocate_code c_p x_si) + (relocate_code c_p x_si _ _size_) (case-mode [sweep-in-old] [else - (check-lock-failed) (set! field (cast ptr (+ (cast uptr c_p) co)))]))] [else (trace-pure (just c_p))])) @@ -1116,7 +1200,6 @@ (case-mode [sweep - (check-lock-failed) (cond [(&& (== from_g static_generation) (&& (! S_G.retain_static_relocation) @@ -1126,21 +1209,20 @@ (let* ([t_si : seginfo* (SegInfo (ptr_get_segment t))]) (when (-> t_si old_space) (cond - [(SEGMENT_LOCK_ACQUIRE t_si) + [(SEGMENT_IS_LOCAL t_si t) (set! n (size_reloc_table (reloc-table-size t))) (count countof-relocation-table (just n) 1 sweep) (cond [(-> t_si use_marks) - ;; Assert: (! (marked t_si t)) - (mark_typemod_data_object _tc_ t n t_si)] + (cond + [(! (marked t_si t)) + (mark_typemod_data_object _tc_ t n t_si)])] [else (let* ([oldt : ptr t]) (find_room _tc_ space_data from_g typemod n t) - (memcpy_aligned (TO_VOIDP t) (TO_VOIDP oldt) n))]) - (SEGMENT_LOCK_RELEASE t_si)] + (memcpy_aligned (TO_VOIDP t) (TO_VOIDP oldt) n))])] [else - (RECORD_LOCK_FAILED _tc_ t_si) - (check-lock-failed)]))) + (RECORD_REMOTE_RANGE _tc_ _ _size_ t_si)]))) (set! (reloc-table-code t) _) (set! (code-reloc _) t)]) (S_record_code_mod tc_in (cast uptr (TO_PTR (& (code-data _ 0)))) (cast uptr (code-length _)))] @@ -1150,6 +1232,10 @@ (set! (code-reloc _) (cast ptr (ptr_diff t (-> vfi base_addr))))] [else])])) +(define-trace-macro (check-bignum var) + (trace (just var)) + (check_bignum var)) + (define-trace-macro (unless-code-relocated stmt) (case-flag code-relocated? [on] @@ -1389,6 +1475,7 @@ [(lookup 'as-dirty? config #f) ", IGEN youngest"] [(lookup 'no-from-g? config #f) ""] [else ", IGEN from_g"])] + [(check) ", uptr seg, ISPC s_in, IBOOL aftergc"] [else ""])) (let ([body (lambda () @@ -1417,14 +1504,11 @@ (case (lookup 'mode config) [(copy) (code-block - "ENABLE_LOCK_ACQUIRE" - "if (CHECK_LOCK_FAILED(tc_in)) return 0xff;" "check_triggers(tc_in, si);" (code-block "ptr new_p;" "IGEN tg = TARGET_GENERATION(si);" (body) - "if (CHECK_LOCK_FAILED(tc_in)) return 0xff;" "SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;" "FWDMARKER(p) = forward_marker;" "FWDADDRESS(p) = new_p;" @@ -1434,17 +1518,14 @@ "return tg;"))] [(mark) (code-block - "ENABLE_LOCK_ACQUIRE" - "if (CHECK_LOCK_FAILED(tc_in)) return 0xff;" "check_triggers(tc_in, si);" - (ensure-segment-mark-mask "si" "" '()) + (ensure-segment-mark-mask "si" "") (body) "SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;" "ADD_BACKREFERENCE(p, si->generation);" "return si->generation;")] [(sweep) (code-block - "ENABLE_LOCK_ACQUIRE" (and (lookup 'maybe-backreferences? config #f) "PUSH_BACKREFERENCE(p)") (body) @@ -1453,9 +1534,7 @@ (and (lookup 'as-dirty? config #f) "return youngest;"))] [(sweep-in-old) - (code-block - "ENABLE_LOCK_ACQUIRE" - (body))] + (body)] [(measure) (body)] [(self-test) @@ -1579,8 +1658,8 @@ (code "/* Do not inspect the type or first field of the rtd, because" " it may have been overwritten for forwarding. */")])] - [(measure sweep sweep-in-old) - (statements `((trace-early ,field)) config)] + [(measure sweep sweep-in-old check) + (statements `((trace-early ,field)) (cons `(early-rtd? #t) config))] [else #f]) (statements (cdr l) (cons `(copy-extra-rtd ,field) config)))] [`(trace ,field) @@ -1590,9 +1669,12 @@ (code (and (not (lookup 'as-dirty? config #f)) (trace-statement field config #f 'pure)) (statements (cdr l) config))] + [`(trace-pure-code ,field) + (code (and (not (lookup 'as-dirty? config #f)) + (trace-statement field (cons `(early-code? #t) config) #f 'pure)) + (statements (cdr l) config))] [`(trace-early ,field) (code (trace-statement field config #t 'pure) - (check-lock-failure-statement config) (statements (cdr l) (if (symbol? field) (cons `(copy-extra ,field) config) config)))] @@ -1680,7 +1762,7 @@ (statements (cons `(copy-bytes ,offset (* ptr_bytes ,len)) (cdr l)) config)] - [(sweep measure sweep-in-old vfasl-sweep) + [(sweep measure sweep-in-old vfasl-sweep check) (code (loop-over-pointers (field-expression offset config "p" #t) @@ -1855,10 +1937,6 @@ (statements (list count-stmt) config)))] [else (statements (cdr l) config)])] - [`(check-lock-failed) - (code - (check-lock-failure-statement config) - (statements (cdr l) config))] [`(define ,id : ,type ,rhs) (let* ([used (lookup 'used config)] [prev-used? (hashtable-ref used id #f)]) @@ -1981,6 +2059,12 @@ [`_copy_ (case (lookup 'mode config) [(copy vfasl-copy) "new_p"] [else "p"])] + [`_size_ + (cond + [(lookup 'parallel? config #f) + (hashtable-set! (lookup 'used config) 'p_sz #t) + "p_sz"] + [else "SIZE"])] [`_tf_ (lookup 'tf config "TYPEFIELD(p)")] [`_tg_ @@ -2052,6 +2136,8 @@ (expression (car (apply-macro m (list a))) config protect? multiline?))] [else (protect (format "~a(~a)" op (expression a config #t)))])] + [`(begin ,a ,b) + (format "(~a, ~a)" (expression a config #t) (expression b config #t))] [`(,op ,a ,b) (cond [(memq op '(& && \|\| == != + - * < > <= >= << >> ->)) @@ -2118,23 +2204,38 @@ (measure-statement (field-expression field config "p" #f))] [(eq? mode 'self-test) (format "if (p == ~a) return 1;" (field-expression field config "p" #f))] + [(eq? mode 'check) + (format "check_pointer(&(~a), ~a, ~a, seg, s_in, aftergc);" + (field-expression field config "p" #f) + (match field + [`(just ,_) "0"] + [else "1"]) + (expression '_ config))] [else #f])) (define (relocate-statement purity e config) (define mode (lookup 'mode config)) + (define (get-start) (expression '_ config)) + (define (get-size) (cond + [(lookup 'early-rtd? config #f) + (expression '(size_record_inst (UNFIX (record-type-size (record-type _)))) config)] + [(lookup 'early-code? config #f) + (expression '(size_closure (CODEFREE (CLOSCODE _))) config)] + [else + (expression '_size_ config)])) (case mode [(vfasl-sweep) (format "vfasl_relocate(vfi, &~a);" e)] [(sweep-in-old) (if (eq? purity 'pure) - (format "relocate_pure(&~a);" e) - (format "relocate_indirect(~a);" e))] + (format "relocate_pure(&~a, ~a, ~a);" e (get-start) (get-size)) + (format "relocate_indirect(~a, ~a, ~a);" e (get-start) (get-size)))] [else (if (lookup 'as-dirty? config #f) (begin (when (eq? purity 'pure) (error 'relocate-statement "pure as dirty?")) - (format "relocate_dirty(&~a, youngest);" e)) - (format "relocate_~a(&~a~a);" purity e (if (eq? purity 'impure) ", from_g" "")))])) + (format "relocate_dirty(&~a, youngest, ~a, ~a);" e (get-start) (get-size))) + (format "relocate_~a(&~a~a, ~a, ~a);" purity e (if (eq? purity 'impure) ", from_g" "") (get-start) (get-size)))])) (define (measure-statement e) (code @@ -2243,26 +2344,19 @@ " seginfo *mark_si; IGEN g;" " si->marked_count += ((uptr)build_ptr(seg+1,0)) - addr;" " seg++;" - " /* Note: taking a sequence of locks for a span of segments */" " while (seg < end_seg) {" - " ENABLE_LOCK_ACQUIRE" " mark_si = SegInfo(seg);" - " SEGMENT_LOCK_MUST_ACQUIRE(mark_si);" " g = mark_si->generation;" " if (!fully_marked_mask[g]) init_fully_marked_mask(tc_in, g);" " mark_si->marked_mask = fully_marked_mask[g];" " mark_si->marked_count = bytes_per_segment;" - " SEGMENT_LOCK_RELEASE(mark_si);" " seg++;" " }" " mark_si = SegInfo(end_seg);" " {" - " ENABLE_LOCK_ACQUIRE" - " SEGMENT_LOCK_MUST_ACQUIRE(mark_si);" - (ensure-segment-mark-mask "mark_si" " " '()) + (ensure-segment-mark-mask "mark_si" " ") " /* no need to set a bit: it's enough to have made `marked_mask` non-NULL */" " mark_si->marked_count += addr + p_sz - (uptr)build_ptr(end_seg,0);" - " SEGMENT_LOCK_RELEASE(mark_si);" " }" "}")]))] [within-segment? @@ -2294,13 +2388,11 @@ "else" (within-loop-statement (code " seginfo *mark_si = SegInfo(ptr_get_segment(mark_p));" - " ENABLE_LOCK_ACQUIRE" - " SEGMENT_LOCK_MUST_ACQUIRE(mark_si);" - (ensure-segment-mark-mask "mark_si" " " '())) + (ensure-segment-mark-mask "mark_si" " ")) "mark_si" step #t - " SEGMENT_LOCK_RELEASE(mark_si);")))]) + #f)))]) (cond [no-sweep? #f] [else @@ -2312,20 +2404,6 @@ (code-block push))] [else push]))])))) - (define (check-lock-failure-statement config) - (let ([mode (lookup 'mode config)]) - (case mode - [(copy mark sweep) - (code - "if (CHECK_LOCK_FAILED(tc_in))" - (case mode - [(copy mark) (code-block "return 0xff;")] - [(sweep sweep-in-old) - (if (lookup 'as-dirty? config #f) - (code-block "return 0xff;") - (code-block "return;"))]))] - [else #f]))) - (define (field-expression field config arg protect?) (if (symbol? field) (cond @@ -2359,15 +2437,11 @@ (error 'field-ref "index not allowed for non-array field ~s" acc-name)) (format "~a(~a)" c-ref obj)]))) - (define (ensure-segment-mark-mask si inset flags) + (define (ensure-segment-mark-mask si inset) (code (format "~aif (!~a->marked_mask) {" inset si) - (format "~a find_room_voidp(tc_in, space_data, ~a->generation, ptr_align(segment_bitmap_bytes), ~a->marked_mask);" + (format "~a init_mask(tc_in, ~a->marked_mask, ~a->generation, 0);" inset si si) - (if (memq 'no-clear flags) - (format "~a /* no clearing needed */" inset) - (format "~a memset(~a->marked_mask, 0, segment_bitmap_bytes);" inset si)) - (format "~a S_G.bitmask_overhead[~a->generation] += ptr_align(segment_bitmap_bytes);" inset si) (format "~a}" inset))) (define (just-mark-bit-space? sp) @@ -2534,25 +2608,29 @@ (loop (cdr l))))] [else (cons (car l) (loop (cdr l)))])))) - (define (gen-gc ofn count? measure?) + (define (gen-gc ofn count? measure? parallel?) (guard (x [#t (raise x)]) (parameterize ([current-output-port (open-output-file ofn 'replace)]) (print-code (generate "copy" `((mode copy) (maybe-backreferences? ,count?) - (counts? ,count?)))) + (counts? ,count?) + (parallel? ,parallel?)))) (print-code (generate "sweep" `((mode sweep) (maybe-backreferences? ,count?) - (counts? ,count?)))) + (counts? ,count?) + (parallel? ,parallel?)))) (print-code (generate "sweep_object_in_old" `((mode sweep-in-old) - (maybe-backreferences? ,count?)))) + (maybe-backreferences? ,count?) + (parallel? ,parallel?)))) (print-code (generate "sweep_dirty_object" `((mode sweep) (maybe-backreferences? ,count?) (counts? ,count?) + (parallel? ,parallel?) (as-dirty? #t)))) (letrec ([sweep1 (case-lambda @@ -2566,26 +2644,32 @@ (known-types (,type)) (maybe-backreferences? ,count?) (counts? ,count?)))))])]) - (sweep1 'record "sweep_record" '()) - (sweep1 'record "sweep_dirty_record" '((as-dirty? #t))) - (sweep1 'symbol) - (sweep1 'symbol "sweep_dirty_symbol" '((as-dirty? #t))) - (sweep1 'thread "sweep_thread" '((no-from-g? #t))) - (sweep1 'port) - (sweep1 'port "sweep_dirty_port" '((as-dirty? #t))) - (sweep1 'closure "sweep_continuation" '((code-relocated? #t) - (assume-continuation? #t))) - (sweep1 'code "sweep_code_object")) + (sweep1 'record "sweep_record" `((parallel? ,parallel?))) + (sweep1 'record "sweep_dirty_record" `((as-dirty? #t) + (parallel? ,parallel?))) + (sweep1 'symbol "sweep_symbol" `((parallel? ,parallel?))) + (sweep1 'symbol "sweep_dirty_symbol" `((as-dirty? #t) + (parallel? ,parallel?))) + (sweep1 'thread "sweep_thread" `((no-from-g? #t) + (parallel? ,parallel?))) + (sweep1 'port "sweep_port" `((parallel? ,parallel?))) + (sweep1 'port "sweep_dirty_port" `((as-dirty? #t) + (parallel? ,parallel?))) + (sweep1 'closure "sweep_continuation" `((code-relocated? #t) + (assume-continuation? #t) + (parallel? ,parallel?))) + (sweep1 'code "sweep_code_object" `((parallel? ,parallel?)))) (print-code (generate "size_object" `((mode size)))) (print-code (generate "mark_object" `((mode mark) - (counts? ,count?)))) + (counts? ,count?) + (parallel? ,parallel?)))) (print-code (generate "object_directly_refers_to_self" `((mode self-test)))) (print-code (code "static void mark_typemod_data_object(ptr tc_in, ptr p, uptr p_sz, seginfo *si)" (code-block - (ensure-segment-mark-mask "si" "" '()) + (ensure-segment-mark-mask "si" "") (mark-statement '(one-bit no-sweep) (cons (list 'used (make-eq-hashtable)) @@ -2603,11 +2687,22 @@ `((mode vfasl-sweep) (return-size? #t))))))) + (define (gen-heapcheck ofn) + (guard + (x [#t (raise x)]) + (parameterize ([current-output-port (open-output-file ofn 'replace)]) + (print-code (generate "check_object" + `((mode check)))) + (print-code (generate "size_object" + `((mode size))))))) + ;; Render via mkequates to record a mapping from selectors to C ;; macros: (let-values ([(op get) (open-bytevector-output-port (native-transcoder))]) (mkequates.h op)) - (set! mkgc-ocd.inc (lambda (ofn) (gen-gc ofn #f #f))) - (set! mkgc-oce.inc (lambda (ofn) (gen-gc ofn #t #t))) - (set! mkvfasl.inc (lambda (ofn) (gen-vfasl ofn)))) + (set! mkgc-ocd.inc (lambda (ofn) (gen-gc ofn #f #f #f))) + (set! mkgc-oce.inc (lambda (ofn) (gen-gc ofn #t #t #f))) + (set! mkgc-par.inc (lambda (ofn) (gen-gc ofn #f #f #t))) + (set! mkvfasl.inc (lambda (ofn) (gen-vfasl ofn))) + (set! mkheapcheck.inc (lambda (ofn) (gen-heapcheck ofn)))) diff --git a/racket/src/ChezScheme/s/mkheader.ss b/racket/src/ChezScheme/s/mkheader.ss index 9d24514db8..2a98aa8e01 100644 --- a/racket/src/ChezScheme/s/mkheader.ss +++ b/racket/src/ChezScheme/s/mkheader.ss @@ -1006,10 +1006,12 @@ (defref EXACTNUM_TYPE exactnum type) (defref EXACTNUM_REAL_PART exactnum real) (defref EXACTNUM_IMAG_PART exactnum imag) + (defref EXACTNUM_PAD exactnum pad) (defref RATTYPE ratnum type) (defref RATNUM ratnum numerator) (defref RATDEN ratnum denominator) + (defref RATPAD ratnum pad) (defref CLOSENTRY closure code) (defref CLOSIT closure data) diff --git a/racket/src/ChezScheme/workarea b/racket/src/ChezScheme/workarea index c06b594db5..6c7cadb02f 100755 --- a/racket/src/ChezScheme/workarea +++ b/racket/src/ChezScheme/workarea @@ -285,7 +285,9 @@ workdir $W/boot/$M (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/equates.h equates.h) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/gc-ocd.inc gc-ocd.inc) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/gc-oce.inc gc-oce.inc) +(cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/gc-par.inc gc-par.inc) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/vfasl.inc vfasl.inc) +(cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/heapcheck.inc heapcheck.inc) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/petite.boot petite.boot) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/scheme.boot scheme.boot) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/def.so def.so) diff --git a/racket/src/cs/c/check_boot.sh b/racket/src/cs/c/check_boot.sh index 14e7b3e967..4d913f9d4c 100644 --- a/racket/src/cs/c/check_boot.sh +++ b/racket/src/cs/c/check_boot.sh @@ -24,7 +24,9 @@ check_pb scheme.h check_pb equates.h check_pb gc-ocd.inc check_pb gc-oce.inc +check_pb gc-par.inc check_pb vfasl.inc +check_pb heapcheck.inc check_mach() { @@ -43,4 +45,6 @@ check_mach scheme.h check_mach equates.h check_mach gc-ocd.inc check_mach gc-oce.inc +check_mach gc-par.inc check_mach vfasl.inc +check_mach heapcheck.inc diff --git a/racket/src/cs/c/ready_boot.sh b/racket/src/cs/c/ready_boot.sh index 84b5f5a78f..046f2dfd46 100644 --- a/racket/src/cs/c/ready_boot.sh +++ b/racket/src/cs/c/ready_boot.sh @@ -22,6 +22,8 @@ ready_mach scheme.h ready_mach equates.h ready_mach gc-ocd.inc ready_mach gc-oce.inc +ready_mach gc-par.inc ready_mach vfasl.inc +ready_mach heapcheck.inc rm -f boot_pending diff --git a/racket/src/cs/c/reset_boot.sh b/racket/src/cs/c/reset_boot.sh index 5821c8ec4b..bcde08540c 100644 --- a/racket/src/cs/c/reset_boot.sh +++ b/racket/src/cs/c/reset_boot.sh @@ -22,4 +22,5 @@ ready_mach scheme.h ready_mach equates.h ready_mach gc-ocd.inc ready_mach gc-oce.inc +ready_mach gc-par.inc ready_mach vfasl.inc