diff --git a/.makefile b/.makefile index 275bcf19a8..294bf03f4b 100644 --- a/.makefile +++ b/.makefile @@ -338,7 +338,7 @@ RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) # This branch name changes each time the pb boot files are updated: -PB_BRANCH == circa-7.8.0.10-7 +PB_BRANCH == circa-7.8.0.10-11 PB_REPO == https://github.com/racket/pb # Alternative source for Chez Scheme boot files, normally set by diff --git a/Makefile b/Makefile index d5ed400d86..4b52cc66c8 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ RACKETCS_SUFFIX = RACKET = RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) -PB_BRANCH = circa-7.8.0.10-7 +PB_BRANCH = circa-7.8.0.10-11 PB_REPO = https://github.com/racket/pb EXTRA_REPOS_BASE = CS_CROSS_SUFFIX = @@ -306,14 +306,14 @@ maybe-fetch-pb-as-is: echo done fetch-pb-from: mkdir -p racket/src/ChezScheme/boot - if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-7 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-7:remotes/origin/circa-7.8.0.10-7 ; fi - cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-7 + if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.10-11 https://github.com/racket/pb racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.10-11:remotes/origin/circa-7.8.0.10-11 ; fi + cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.10-11 pb-stage: - cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-7 - cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-7 + cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.10-11 + cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.10-11 cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build" pb-push: - cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-7 + cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.10-11 win-cs-base: IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" GIT_CLONE_ARGS_qq="$(GIT_CLONE_ARGS_qq)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)" diff --git a/racket/src/ChezScheme/c/Mf-base b/racket/src/ChezScheme/c/Mf-base index c537529a2c..5844703f6c 100644 --- a/racket/src/ChezScheme/c/Mf-base +++ b/racket/src/ChezScheme/c/Mf-base @@ -69,9 +69,11 @@ ${kernelobj}: ${Include}/equates.h ${Include}/scheme.h ${mainobj}: ${Include}/scheme.h ${kernelobj}: ${zlibHeaderDep} ${LZ4HeaderDep} gc-011.o gc-par.o gc-ocd.o gc-oce.o: gc.c -gc-011.o gc-par.o gc-ocd.o: ${Include}/gc-ocd.inc +gc-011.o gc-ocd.o: ${Include}/gc-ocd.inc gc-oce.o: ${Include}/gc-oce.inc +gc-par.o: ${Include}/gc-par.inc vfasl.o: ${Include}/vfasl.inc +gcwrapper.o: ${Include}/heapcheck.inc ../zlib/zlib.h ../zlib/zconf.h: ../zlib/configure.log diff --git a/racket/src/ChezScheme/c/alloc.c b/racket/src/ChezScheme/c/alloc.c index 77fb5f2af4..d361b8c27f 100644 --- a/racket/src/ChezScheme/c/alloc.c +++ b/racket/src/ChezScheme/c/alloc.c @@ -80,6 +80,9 @@ void S_alloc_init() { S_protect(&S_G.null_immutable_string); find_room(tc, space_new, 0, type_typed_object, size_string(0), S_G.null_immutable_string); STRTYPE(S_G.null_immutable_string) = (0 << string_length_offset) | type_string | string_immutable_flag; + + S_protect(&S_G.zero_length_bignum); + S_G.zero_length_bignum = S_bignum(tc, 0, 0); } } diff --git a/racket/src/ChezScheme/c/gc.c b/racket/src/ChezScheme/c/gc.c index 04ebbcaed8..7fd7483d48 100644 --- a/racket/src/ChezScheme/c/gc.c +++ b/racket/src/ChezScheme/c/gc.c @@ -29,9 +29,9 @@ The copying, sweeping, and marking operations that depend on object's shape are mostly implemented in "mkgc.ss". That script generates "gc-ocd.inc" (for modes where object counting and - backpointers are disabled) and "gc-oce.inc". The rest of the - implementation here can still depend on representatoin details, - though, especially for pairs, weak pairs, and ephemerons. + backpointers are disabled), "gc-oce.inc", and "gc-par.inc". The + rest of the implementation here can still depend on representatoin + details, though, especially for pairs, weak pairs, and ephemerons. GC Copying versus Marking ------------------------- @@ -124,45 +124,58 @@ ------------------- Parallel mode runs `sweep_generation` concurrently in multiple - threads. It relies on a number of invariants: + sweeper threads. It relies on a number of invariants: * There are no attempts to take tc_mutex suring sweeping. To the degree that locking is needed (e.g., to allocate new segments), `S_use_gc_tc_mutex` redirects to gc_tc_mutex. No other locks can be taken while that one is held. - * To copy from or mark on a segment, a segment-specific lock must - be taken. + * To copy from or mark on a segment, a sweeper must own the + segment. A sweeper during sweeping may encounter a "remote" + reference to a segment that it doesn't own; in that case, it + registers the object containing the remote reference to be + re-swept by the sweeeer that owns the target of the referenced. - The lock must be taken before checking anything about objects on - the page, including whether the object starts with a forwarding - pointer. If a lock acquisition fails, everything must be - retryable as the level of the object or segment sweep. For a - segment sweep, objects may end up being swept multiple times. + A segment is owned by the thread that originally allocated it. + When a GC starts, for old-space segments that are owned by + threads that do no have a corresponding sweeper, the segment is + moved to the main collecting thread's ownership. - The lock is re-entrant, but re-locking information is held - outside the locak in a local variable, instead of being part of - the lock state. (That's why an ENABLE_LOCK_ACQUIRE declaration - is required in functions that take locks.) + Note that copying and marking are constrained so that they don't + have to recursively copy or mark. In some cases, this property + is achieved by not caring whether a reference goes to an old + copy or unmarked object; for example, a record type's size field + will be the same in both places, so either copy can be used to + determine a record size of copying. A record type's parent field + would not be available, however, since it can get overwritten + with forwarding information. - * Lock acquisition must be failable everywhere, with one - exception: when an object spans multiple segments, then `mark` - may need to set mark bits on multiple segments. In that case, it - can wait on locks for the extra pages, because there's an order - for the lock-taking: the first segment's lock followed by each - later segment. + * An object that is marked does not count as "remote". - * A segment in the target generation is exposed to the pool of - collecting threads only after a copy to the target segment is - complete. That's a consequence of keeping segments to sweep in a - thread-specific list. + Sweepers might attempt to access marked-object information at + the same time that it is being updated by the owning sweeper. + It's ok if the non-owning sweepers get stale information; + they'll just send the referencing object to the owning thread + for re-sweeping. A write fence ensures that non-owning sweepers + do not inspect mark-bitmap bits that have not been initialized. - * The segment-table lock is required only for writing. When a - thread allocates a new segment, that segment becomes relevant - only to other threads at the point where an object in the new - segment is exposed to the other threads. So, for example, the - fence associated with taking a segment lock doubles to ensure - that a write has exposed the object. + * Normally, a sweeper that encounters a remote reference can + continue sweeping and eventually register the remote re-sweep. + An object is swept by only one sweeper at a time; if mmultiple + remote references to different sweepers are discovered in an + object, it is sent to nly one of the remote sweepers, and that + sweeper will eventually send on the object to the other sweeper. + At worst, each object is swept N times for N sweepers. + + In rare cases, a sweeper cannot fully process an object, because + doing so would require inspecting a remote object. For example, + a record type's pointer mask or a stack frame's live-pointer + mask can be a bignum, and the bignum might be remote. In those + cases, the object might have to be sent back to the original + sweeper, and so on. In the owrst case, the object can be swept + more tha N times ---- but, again, this case rarely happens at + all, and sweeping more than N times is very unlikely. Currently, counting and backreference modes do not support parallelism. @@ -180,7 +193,7 @@ static ptr copy_stack PROTO((ptr tc_in, ptr old, iptr *length, iptr clength)); static void resweep_weak_pairs PROTO((ptr tc, seginfo *oldweakspacesegments)); static void forward_or_bwp PROTO((ptr *pp, ptr p)); static void sweep_generation PROTO((ptr tc)); -static void sweep_from_stack PROTO((ptr tc)); +static iptr sweep_from_stack PROTO((ptr tc)); static void enlarge_sweep_stack PROTO((ptr tc)); static uptr size_object PROTO((ptr p)); static iptr sweep_typed_object PROTO((ptr tc_in, ptr p, IGEN from_g)); @@ -237,20 +250,33 @@ static void check_pending_measure_ephemerons(ptr tc_in); #ifdef ENABLE_TIMING #include -/* gets milliseconds of real time (not CPU time) */ -static uptr get_time () { +static uptr get_real_time () { struct timeval now; gettimeofday(&now, NULL); return ((uptr) now.tv_sec) * 1000 + ((uptr) now.tv_usec) / 1000; } -# define GET_TIME(x) uptr x = get_time() -# define ACCUM_TIME(a, y, x) uptr y = get_time() - x; a += y +static uptr get_cpu_time () { + struct timespec now; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &now); + return ((uptr) now.tv_sec) * 1000 + ((uptr) now.tv_nsec) / 1000000; +} +# define GET_REAL_TIME(x) uptr x = get_real_time() +# define GET_CPU_TIME(x) uptr x = get_cpu_time() +# define ACCUM_REAL_TIME(a, y, x) uptr y = get_real_time() - x; a += y +# define ACCUM_CPU_TIME(a, y, x) uptr y = get_cpu_time() - x; a += y # define REPORT_TIME(e) e static uptr collect_accum, all_accum, par_accum; +static int percentage(iptr n, iptr d) { return (n * 100) / d; } +# define COUNT_SWEPT_BYTES(start, end) num_swept_bytes += ((uptr)TO_PTR(end) - (uptr)TO_PTR(start)) +# define ADJUST_COUNTER(e) e #else -# define GET_TIME(x) do { } while (0) -# define ACCUM_TIME(a, y, x) do { } while (0) +# define GET_REAL_TIME(x) do { } while (0) +# define GET_CPU_TIME(x) do { } while (0) +# define ACCUM_REAL_TIME(a, y, x) do { } while (0) +# define ACCUM_CPU_TIME(a, y, x) do { } while (0) # define REPORT_TIME(e) do { } while (0) +# define COUNT_SWEPT_BYTES(start, end) do { } while (0) +# define ADJUST_COUNTER(e) do { } while (0) #endif #if defined(MIN_TG) && defined(MAX_TG) @@ -259,6 +285,12 @@ static uptr collect_accum, all_accum, par_accum; # endif #endif +#define main_sweeper_index maximum_parallel_collect_threads + +/* Use the `REMOTESWEEPER` field to assign the sweeper before sweepers + actually start: */ +#define WILL_BE_SWEEPER(tc) REMOTESWEEPER(tc) + /* #define DEBUG */ /* initialized and used each gc cycle. any others should be defined in globals.h */ @@ -335,78 +367,94 @@ static ptr sweep_from; # define ADD_BACKREFERENCE(p, from_g) #endif +typedef struct remote_range { + ISPC s; + IGEN g; + ptr start, end; + struct remote_range *next; +} remote_range; + #if !defined(PTHREADS) # undef ENABLE_PARALLEL #endif #ifdef ENABLE_PARALLEL -# define ENABLE_LOCK_ACQUIRE int old_lock_state; -# define SEGMENT_LOCK_ACQUIRE(si) \ - ((si->lock == tc_in) \ - ? (old_lock_state = 0, 1) \ - : (old_lock_state = 1, AS_IMPLICIT_ATOMIC(int, S_cas_load_acquire_ptr(&si->lock, (ptr)0, tc_in)))) -# define SEGMENT_LOCK_RELEASE(si) do { \ - if (old_lock_state) { \ - S_store_release(); \ - BEGIN_IMPLICIT_ATOMIC(); \ - si->lock = (ptr)0; \ - END_IMPLICIT_ATOMIC(); \ - } \ - } while (0) -# define SEGMENT_LOCK_MUST_ACQUIRE(si) do { } while (!SEGMENT_LOCK_ACQUIRE(si)) -# define RECORD_LOCK_FAILED(tc, si) LOCKSTATUS(tc) = Sfalse -# define CLEAR_LOCK_FAILED(tc) LOCKSTATUS(tc) = Strue -# define CHECK_LOCK_FAILED(tc) (LOCKSTATUS(tc) == Sfalse) -# define SAVE_SWEEP_RANGE_FOR_LATER(tc, s, g, slp, sl, nl) save_sweep_range_for_later(tc, s, g, slp, sl, nl) -# define SAVE_SWEEP_SEGMENT_FOR_LATER(tc, si) save_sweep_segment_for_later(tc, si) -# define GC_TC_MUTEX_ACQUIRE() gc_tc_mutex_acquire() +# define GC_TC_MUTEX_ACQUIRE() gc_tc_mutex_acquire() # define GC_TC_MUTEX_RELEASE() gc_tc_mutex_release() +# define SEGMENT_IS_LOCAL(si, p) ((SWEEPER(si->creator_tc) == SWEEPER(tc_in)) || marked(si, p)) +# define RECORD_REMOTE_RANGE_TO(tc, start, size, sweeper) do { \ + ptr START = TO_PTR(UNTYPE_ANY(start)); \ + ptr END = (ptr)((uptr)START + (size)); \ + if ((uptr)START < (uptr)REMOTERANGESTART(tc)) \ + REMOTERANGESTART(tc) = START; \ + if ((uptr)END > (uptr)REMOTERANGEEND(tc)) \ + REMOTERANGEEND(tc) = END; \ + REMOTESWEEPER(tc) = sweeper; \ + } while (0) +# define RECORD_REMOTE_RANGE(tc, start, size, si) RECORD_REMOTE_RANGE_TO(tc, start, size, SWEEPER(si->creator_tc)) +# define FLUSH_REMOTE_RANGE(tc, s, g) do { \ + if (REMOTERANGESTART(tc) != (ptr)(uptr)-1) { \ + flush_remote_range(tc, s, g); \ + } \ + } while (0) + static void gather_active_sweepers(); +static void reassign_segment_creator(ptr tc, seginfo *si); static IBOOL sweeper_started(int i); static void parallel_sweep_dirty_and_generation(ptr tc); -static void save_sweep_range_for_later(ptr tc_in, ISPC s, IGEN g, ptr *slp, ptr *sl, ptr *nl); -static void save_sweep_segment_for_later(ptr tc_in, seginfo *si); -static int gate_postponed(ptr tc, int status); +static iptr sweep_generation_trading_work(ptr tc); -#define SWEEPER_NONE 0 -#define SWEEPER_READY 1 -#define SWEEPER_SWEEPING 2 +static void flush_remote_range(ptr tc, ISPC s, IGEN g); +static remote_range *send_and_receive_remote_ranges(ptr tc); + +#define SWEEPER_NONE 0 +#define SWEEPER_READY 1 +#define SWEEPER_SWEEPING 2 +#define SWEEPER_WAITING_FOR_WORK 3 typedef struct { int status; - s_thread_cond_t done_cond; + s_thread_cond_t done_cond, work_cond; ptr sweep_tc; ptr thread; /* not 0 => thread to sweep on start */ seginfo *dirty_segments[DIRTY_SEGMENT_LISTS]; + /* modified only by owning sweeper: */ + remote_range *ranges_to_send[maximum_parallel_collect_threads+1]; + /* modified with sweeper mutex held: */ + remote_range *ranges_received; +#ifdef ENABLE_TIMING + int remote_ranges_sent, remote_ranges_received; + iptr remote_ranges_bytes_sent, remote_ranges_bytes_received; +#endif } gc_thread_data; -static gc_thread_data sweepers[maximum_parallel_collect_threads]; +static gc_thread_data sweepers[maximum_parallel_collect_threads+1]; static int num_sweepers; static seginfo *main_dirty_segments[DIRTY_SEGMENT_LISTS]; #else -# define ENABLE_LOCK_ACQUIRE /* empty */ -# define SEGMENT_LOCK_ACQUIRE(si) 1 -# define SEGMENT_LOCK_MUST_ACQUIRE(si) do { } while (0) -# define SEGMENT_LOCK_RELEASE(si) do { } while (0) -# define RECORD_LOCK_FAILED(tc, si) do { } while (0) -# define CLEAR_LOCK_FAILED(tc) do { } while (0) -# define CHECK_LOCK_FAILED(tc) 0 -# define SAVE_SWEEP_RANGE_FOR_LATER(tc, s, g, slp, sl, nl) do { } while (0) -# define SAVE_SWEEP_SEGMENT_FOR_LATER(tc, si) do { } while (0) + # define GC_TC_MUTEX_ACQUIRE() do { } while (0) # define GC_TC_MUTEX_RELEASE() do { } while (0) + +# define SEGMENT_IS_LOCAL(si, p) 1 +# define RECORD_REMOTE_RANGE_TO(tc, start, size, sweeper) do { } while (0) +# define RECORD_REMOTE_RANGE(tc, start, size, si) do { } while (0) +# define FLUSH_REMOTE_RANGE(tc, s, g) do { } while (0) + # define gather_active_sweepers() do { } while (0) +# define reassign_segment_creator(tc, si) do { } while (0) # define parallel_sweep_dirty_and_generation(tc) do { sweep_dirty(tc); sweep_generation(tc); } while (0) +# define send_and_receive_remote_ranges(tc) NULL static void sweep_dirty PROTO((ptr tc)); + #endif #define SWEEP_NO_CHANGE 0 #define SWEEP_CHANGE_PROGRESS 1 -#define SWEEP_CHANGE_POSTPONED 2 #if ptr_alignment == 2 # define record_full_marked_mask 0x55 @@ -436,11 +484,14 @@ uptr list_length(ptr ls) { } #endif -#define init_mask(tc, dest, tg, init) { \ - find_room_voidp(tc, space_data, tg, ptr_align(segment_bitmap_bytes), dest); \ - memset(dest, init, segment_bitmap_bytes); \ +#define init_mask(tc, dest, tg, init) do { \ + octet *MASK; \ + find_room_voidp(tc, space_data, tg, ptr_align(segment_bitmap_bytes), MASK); \ + memset(MASK, init, segment_bitmap_bytes); \ + STORE_FENCE(); \ + dest = MASK; \ BITMASKOVERHEAD(tc, tg) += ptr_align(segment_bitmap_bytes); \ - } + } while (0) #define marked(si, p) (si->marked_mask && (si->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p))) @@ -495,41 +546,39 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { /* use relocate_pure for newspace fields that can't point to younger objects or where there's no need to track generations */ -#define relocate_pure(ppp) do { \ +#define relocate_pure(ppp, start, size) do { \ ptr* PPP = ppp; ptr PP = *PPP; \ - relocate_pure_help(PPP, PP); \ + relocate_pure_help(PPP, PP, start, size); \ } while (0) -#define relocate_pure_help(ppp, pp) do { \ +#define relocate_pure_help(ppp, pp, start, size) do { \ seginfo *SI; \ if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL) { \ if (SI->old_space) \ - relocate_pure_help_help(ppp, pp, SI); \ + relocate_pure_help_help(ppp, pp, SI, start, size); \ ELSE_MEASURE_NONOLDSPACE(pp) \ } \ } while (0) -#define relocate_pure_help_help(ppp, pp, si) do { \ - if (SEGMENT_LOCK_ACQUIRE(si)) { \ +#define relocate_pure_help_help(ppp, pp, si, start, size) do { \ + if (SEGMENT_IS_LOCAL(si, pp)) { \ if (FORWARDEDP(pp, si)) \ *ppp = GET_FWDADDRESS(pp); \ else if (!new_marked(si, pp)) \ mark_or_copy_pure(ppp, pp, si); \ - SEGMENT_LOCK_RELEASE(si); \ } else \ - RECORD_LOCK_FAILED(tc_in, si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, si); \ } while (0) -#define relocate_code(pp, si) do { \ +#define relocate_code(pp, si, start, size) do { \ if (si->old_space) { \ - if (SEGMENT_LOCK_ACQUIRE(si)) { \ + if (SEGMENT_IS_LOCAL(si, pp)) { \ if (FWDMARKER(pp) == forward_marker) \ pp = GET_FWDADDRESS(pp); \ else if (!new_marked(si, pp)) \ mark_or_copy_pure(&pp, pp, si); \ - SEGMENT_LOCK_RELEASE(si); \ } else \ - RECORD_LOCK_FAILED(tc_in, si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, si); \ } ELSE_MEASURE_NONOLDSPACE(pp) \ } while (0) @@ -545,39 +594,39 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { #ifdef NO_DIRTY_NEWSPACE_POINTERS -# define relocate_impure_help(PPP, PP, FROM_G) do {(void)FROM_G; relocate_pure_help(PPP, PP);} while (0) -# define relocate_impure(PPP, FROM_G) do {(void)FROM_G; relocate_pure(PPP);} while (0) +# define relocate_impure_help(PPP, PP, FROM_G, start, size) do {(void)FROM_G; relocate_pure_help(PPP, PP, start, size);} while (0) +# define relocate_impure(PPP, FROM_G, start, size) do {(void)FROM_G; relocate_pure(PPP, start, size);} while (0) #else /* !NO_DIRTY_NEWSPACE_POINTERS */ -#define relocate_impure(ppp, from_g) do { \ +#define relocate_impure(ppp, from_g, start, size) do { \ ptr* PPP = ppp; ptr PP = *PPP; IGEN FROM_G = from_g; \ - relocate_impure_help(PPP, PP, FROM_G); \ + relocate_impure_help(PPP, PP, FROM_G, start, size); \ } while (0) -#define relocate_impure_help(ppp, pp, from_g) do { \ +#define relocate_impure_help(ppp, pp, from_g, start, size) do { \ seginfo *SI; \ if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL) { \ if (SI->old_space) \ - relocate_impure_help_help(ppp, pp, from_g, SI); \ + relocate_impure_help_help(ppp, pp, from_g, SI, start, size); \ ELSE_MEASURE_NONOLDSPACE(pp) \ } \ } while (0) -#define relocate_impure_help_help(ppp, pp, from_g, si) do { \ +#define relocate_impure_help_help(ppp, pp, from_g, si, start, size) do { \ IGEN __to_g; \ - if (SEGMENT_LOCK_ACQUIRE(si)) { \ + if (SEGMENT_IS_LOCAL(si, pp)) { \ if (FORWARDEDP(pp, si)) { \ *ppp = GET_FWDADDRESS(pp); \ __to_g = TARGET_GENERATION(si); \ - if (__to_g < from_g) S_record_new_dirty_card(tc_in, ppp, __to_g); \ } else if (!new_marked(si, pp)) { \ mark_or_copy_impure(__to_g, ppp, pp, from_g, si); \ - if (__to_g < from_g) S_record_new_dirty_card(tc_in, ppp, __to_g); \ + } else { \ + __to_g = TARGET_GENERATION(si); \ } \ - SEGMENT_LOCK_RELEASE(si); \ + if (__to_g < from_g) S_record_new_dirty_card(tc_in, ppp, __to_g); \ } else \ - RECORD_LOCK_FAILED(tc_in, si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, si); \ } while (0) #define mark_or_copy_impure(to_g, dest, p, from_g, si) do { \ @@ -589,13 +638,13 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { #endif /* !NO_DIRTY_NEWSPACE_POINTERS */ -#define relocate_dirty(PPP, YOUNGEST) do { \ +#define relocate_dirty(PPP, YOUNGEST, start, size) do { \ seginfo *_si; ptr *_ppp = PPP, _pp = *_ppp; IGEN _pg; \ if (!IMMEDIATE(_pp) && (_si = MaybeSegInfo(ptr_get_segment(_pp))) != NULL) { \ if (!_si->old_space) { \ _pg = _si->generation; \ } else { \ - if (SEGMENT_LOCK_ACQUIRE(_si)) { \ + if (SEGMENT_IS_LOCAL(_si, _pp)) { \ if (FORWARDEDP(_pp, _si)) { \ *_ppp = GET_FWDADDRESS(_pp); \ _pg = TARGET_GENERATION(_si); \ @@ -606,9 +655,8 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { } else { \ _pg = copy(tc_in, _pp, _si, _ppp); \ } \ - SEGMENT_LOCK_RELEASE(_si); \ } else { \ - RECORD_LOCK_FAILED(tc_in, _si); \ + RECORD_REMOTE_RANGE(tc_in, start, size, _si); \ _pg = 0xff; \ } \ } \ @@ -620,50 +668,16 @@ static int flonum_is_forwarded_p(ptr p, seginfo *si) { # define is_counting_root(si, p) (si->counting_mask && (si->counting_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p))) #endif -static void do_relocate_indirect(ptr tc_in, ptr p) { - ENABLE_LOCK_ACQUIRE - relocate_pure(&p); -} -#define relocate_indirect(p) do_relocate_indirect(tc_in, p) - #ifdef ENABLE_PARALLEL - -/* The `_now` variants of various functions/macros handle the possibly - of a lock failure by retrying immediately. A lock failure really - shouldn't happend where the `_now` forms are used, but because - locking may be implemented with a CAS that can fail spuriously on - processors like Arm, the lock can fail, anyway. */ - -static void do_relocate_pure_now(ptr tc_in, ptr *pp) { - ENABLE_LOCK_ACQUIRE - relocate_pure(pp); - while (CHECK_LOCK_FAILED(tc_in)) { - CLEAR_LOCK_FAILED(tc_in); - relocate_pure(pp); - } +static void do_relocate_indirect(ptr tc_in, ptr p, ptr* start, uptr len) { + relocate_pure(&p, start, len); } - -static void do_mark_or_copy_pure_now(ptr tc_in, ptr *dest, ptr pp, seginfo *si) { - do { - CLEAR_LOCK_FAILED(tc_in); - mark_or_copy_pure(dest, pp, si); - } while (CHECK_LOCK_FAILED(tc_in)); -} - -# define relocate_pure_now(pp) do_relocate_pure_now(tc_in, pp) -# define mark_or_copy_pure_now(dest, pp, si) do_mark_or_copy_pure_now(tc, dest, pp, si) - -static void sweep_thread_now(ptr tc_in, ptr p) { - do { - CLEAR_LOCK_FAILED(tc_in); - sweep_thread(tc_in, p); - } while (CHECK_LOCK_FAILED(tc_in)); -} - +# define relocate_indirect(p, start, len) do_relocate_indirect(tc_in, p, start, len) #else -# define relocate_pure_now(pp) relocate_pure(pp) -# define mark_or_copy_pure_now(tc, pp, si) mark_or_copy_pure(tc, pp, si) -# define sweep_thread_now(tc, thread) sweep_thread(tc, thread) +static void do_relocate_indirect(ptr tc_in, ptr p) { + relocate_pure(&p, NULL, 0); +} +# define relocate_indirect(p, start, len) do_relocate_indirect(tc_in, p) #endif FORCEINLINE void check_triggers(ptr tc_in, seginfo *si) { @@ -686,7 +700,9 @@ FORCEINLINE void check_triggers(ptr tc_in, seginfo *si) { } } -#ifndef ENABLE_OBJECT_COUNTS +#if defined(ENABLE_PARALLEL) +# include "gc-par.inc" +#elif !defined(ENABLE_OBJECT_COUNTS) # include "gc-ocd.inc" #else # include "gc-oce.inc" @@ -704,7 +720,7 @@ static void sweep_in_old(ptr tc_in, ptr p) { /* Detect all the cases when we need to give up on in-place sweeping: */ if (object_directly_refers_to_self(p)) { - relocate_pure_now(&p); + relocate_pure(&p, NULL, 0); return; } @@ -826,7 +842,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { count_root_t *count_roots; #endif - GET_TIME(astart); + GET_REAL_TIME(astart); /* flush instruction cache: effectively clear_code_mod but safer */ for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { @@ -841,7 +857,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { #endif /* !NO_DIRTY_NEWSPACE_POINTERS */ S_G.must_mark_gen0 = 0; - /* map `tc`s of rendezvous threads to sweeping threads */ + /* map `tc`s of rendezvous threads to sweeping threads, setting WILL_BE_SWEEPER */ gather_active_sweepers(); for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { @@ -849,14 +865,20 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { S_scan_dirty(TO_VOIDP(EAP(t_tc)), TO_VOIDP(REAL_EAP(t_tc))); EAP(t_tc) = REAL_EAP(t_tc) = AP(t_tc) = (ptr)0; + /* Initially, map every context to the same sweeper, so + we can relocate some initial objects */ + SWEEPER(t_tc) = main_sweeper_index; + + /* If WILL_BE_SWEEPER() is not already set right, set it to + `main_sweeper_index`. */ #ifdef ENABLE_PARALLEL - if (SWEEPER(t_tc) != -1) { - if ((SWEEPER(t_tc) >= num_sweepers) - || (sweepers[SWEEPER(t_tc)].sweep_tc != t_tc)) - SWEEPER(t_tc) = -1; + if (WILL_BE_SWEEPER(t_tc) != main_sweeper_index) { + if ((WILL_BE_SWEEPER(t_tc) >= num_sweepers) + || (sweepers[WILL_BE_SWEEPER(t_tc)].sweep_tc != t_tc)) + WILL_BE_SWEEPER(t_tc) = main_sweeper_index; } #else - SWEEPER(t_tc) = -1; + WILL_BE_SWEEPER(t_tc) = main_sweeper_index; #endif /* clear thread-local allocation: */ @@ -871,7 +893,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { } } - if ((t_tc != tc) && (SWEEPER(t_tc) == -1)) { + if ((t_tc != tc) && (WILL_BE_SWEEPER(t_tc) == main_sweeper_index)) { /* close off any current allocation in MAX_TG, and ensure that end-of-segment markers are otherwise set (in case that's needed for dirty-byte sweeping) */ @@ -892,6 +914,9 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { for (g = MIN_TG; g <= MAX_TG; g++) BITMASKOVERHEAD(t_tc, g) = 0; for (s = 0; s <= max_real_space; s++) { + /* need to save `NEXTLOC_AT` to ensure that dirty sweeping + doesn't overshoot into newly allocated objects */ + ORIGNEXTLOC(t_tc, s) = NEXTLOC_AT(t_tc, s, MAX_TG); SWEEPLOC_AT(t_tc, s, MAX_TG) = NEXTLOC_AT(t_tc, s, MAX_TG); for (g = MIN_TG; g <= MAX_TG; g++) SWEEPNEXT_AT(t_tc, s, g) = (ptr)0; @@ -963,6 +988,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { si->marked_mask = NULL; /* clear old mark bits, if any */ si->marked_count = 0; si->min_dirty_byte = 0; /* prevent registering as dirty while GCing */ + reassign_segment_creator(tc, si); } S_G.occupied_segments[g][s] = NULL; @@ -1076,7 +1102,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { if (!si->old_space || FORWARDEDP(p, si) || marked(si, p) || !count_roots[i].weak) { /* reached or older; sweep transitively */ - relocate_pure_now(&p); + relocate_pure(&p, NULL, 0); sweep(tc, p, TARGET_GENERATION(si)); ADD_BACKREFERENCE(p, si->generation); sweep_generation(tc); @@ -1168,25 +1194,25 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { thread = Scar(ls); #ifdef ENABLE_PARALLEL - t_tc = (ptr)THREADTC(Scar(ls)); - if (SWEEPER(t_tc) != -1) { + t_tc = (ptr)THREADTC(thread); + if (WILL_BE_SWEEPER(t_tc) != main_sweeper_index) { if (!OLDSPACE(thread)) { /* sweep in sweeper thread: */ - sweepers[SWEEPER(t_tc)].thread = thread; + sweepers[WILL_BE_SWEEPER(t_tc)].thread = thread; } else { /* relocate now, so main sweeping will happen in sweeper thread */ ptr tc_in = t_tc; /* shadows enclosing `tc_in` binding */ - relocate_pure_now(&thread); + relocate_pure(&thread, NULL, 0); } } else #endif if (!OLDSPACE(thread)) - sweep_thread_now(tc, thread); + sweep_thread(tc, thread); } - relocate_pure_now(&S_threads); + relocate_pure(&S_threads, NULL, 0); - GET_TIME(start); + GET_REAL_TIME(start); /* relocate nonempty oldspace symbols and set up list of buckets to rebuild later */ buckets_to_rebuild = NULL; @@ -1215,7 +1241,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { (SYMVAL(sym) != sunbound || SYMPLIST(sym) != Snil || SYMSPLIST(sym) != Snil)) { seginfo *sym_si = SegInfo(ptr_get_segment(sym)); if (!new_marked(sym_si, sym)) - mark_or_copy_pure_now(&sym, sym, sym_si); + mark_or_copy_pure(&sym, sym, sym_si); } } S_G.buckets_of_generation[g] = NULL; @@ -1224,26 +1250,9 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { /* relocate the protected C pointers */ {uptr i; for (i = 0; i < S_G.protect_next; i++) - relocate_pure_now(S_G.protected[i]); + relocate_pure(S_G.protected[i], NULL, 0); } -#ifdef ENABLE_PARALLEL - /* make sure threads with sweepers have terminated older-generation - pages before sweeping dirty objects */ - { int i; - for (i = 0; i < num_sweepers; i++) { - ptr t_tc = sweepers[i].sweep_tc; - for (s = 0; s <= max_real_space; s++) { - for (g = MAX_TG; g <= static_generation; g++) { - ptr old = NEXTLOC_AT(t_tc, s, g); - if (old != (ptr)0) - *(ptr*)TO_VOIDP(old) = forward_marker; - } - } - } - } -#endif - /* sweep older locked and unlocked objects that are on `space_new` segments, because we can't find dirty writes there */ for (g = MAX_CG + 1; g <= static_generation; INCRGEN(g)) { @@ -1359,7 +1368,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { /* if tconc was old it's been forwarded */ tconc = GUARDIANTCONC(ls); - WITH_TOP_BACKREFERENCE(tconc, relocate_pure_now(&rep)); + WITH_TOP_BACKREFERENCE(tconc, relocate_pure(&rep, NULL, 0)); old_end = Scdr(tconc); new_end = S_cons_in(tc, space_impure, 0, FIX(0), FIX(0)); @@ -1407,7 +1416,7 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { } rep = GUARDIANREP(ls); - WITH_TOP_BACKREFERENCE(tconc, relocate_pure_now(&rep)); + WITH_TOP_BACKREFERENCE(tconc, relocate_pure(&rep, NULL, 0)); relocate_rep = 1; #ifdef ENABLE_OBJECT_COUNTS @@ -1509,8 +1518,9 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { /* still-pending ephemerons all go to bwp */ finish_pending_ephemerons(tc, oldspacesegments); - ACCUM_TIME(collect_accum, step, start); - REPORT_TIME(fprintf(stderr, "%d col +%ld ms %ld ms\n", MAX_CG, step, collect_accum)); + ACCUM_REAL_TIME(collect_accum, step, start); + REPORT_TIME(fprintf(stderr, "%d coll +%ld ms %ld ms [real time]\n", + MAX_CG, step, collect_accum)); /* post-gc oblist handling. rebuild old buckets in the target generation, pruning unforwarded symbols */ { bucket_list *bl; bucket *b, *bnext; bucket_pointer_list *bpl; bucket **pb; ptr sym; @@ -1729,8 +1739,8 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { for (g = MIN_TG; g <= MAX_TG; g++) S_G.bitmask_overhead[g] += BITMASKOVERHEAD(tc_in, g); - ACCUM_TIME(all_accum, astep, astart); - REPORT_TIME(fprintf(stderr, "%d all +%ld ms %ld ms\n", MAX_CG, astep, all_accum)); + ACCUM_REAL_TIME(all_accum, astep, astart); + REPORT_TIME(fprintf(stderr, "%d all +%ld ms %ld ms [real time]\n", MAX_CG, astep, all_accum)); if (count_roots_ls != Sfalse) { #ifdef ENABLE_OBJECT_COUNTS @@ -1742,41 +1752,71 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { return Svoid; } -#define sweep_space(s, from_g, body) { \ +#ifdef ENABLE_PARALLEL + +static void reassign_segment_creator(ptr tc, seginfo *si) { + int i; + + for (i = 0; i < num_sweepers; i++) + if (sweepers[i].sweep_tc == si->creator_tc) + return; + + si->creator_tc = tc; +} + +static void flush_remote_range(ptr tc, ISPC s, IGEN g) { + remote_range *r; + int me = SWEEPER(tc); + int they = REMOTESWEEPER(tc); + + find_room_voidp(tc, space_data, 0, ptr_align(sizeof(remote_range)), r); + BITMASKOVERHEAD(tc, 0) += ptr_align(sizeof(remote_range)); + r->s = s; + r->g = g; + r->start = REMOTERANGESTART(tc); + r->end = REMOTERANGEEND(tc); + r->next = sweepers[me].ranges_to_send[they]; + sweepers[me].ranges_to_send[they] = r; + + REMOTERANGESTART(tc) = (ptr)(uptr)-1; + REMOTERANGEEND(tc) = (ptr)0; + + SWEEPCHANGE(tc) = SWEEP_CHANGE_PROGRESS; +} + +#endif + +#define sweep_space(s, from_g, body) do { \ + sweep_space_segments(s, from_g, body); \ + sweep_space_bump_range(s, from_g, body); \ + } while (0) + +#define sweep_space_segments(s, from_g, body) do { \ while ((si = (seginfo *)TO_VOIDP(SWEEPNEXT_AT(tc_in, s, from_g))) != NULL) { \ SWEEPNEXT_AT(tc_in, s, from_g) = TO_PTR(si->sweep_next); \ pp = TO_VOIDP(si->sweep_start); \ while ((p = *pp) != forward_marker) \ body \ - if (CHECK_LOCK_FAILED(tc_in)) { \ - SAVE_SWEEP_SEGMENT_FOR_LATER(tc_in, si); \ - break; \ - } else { \ - COUNT_SWEPT_BYTES(si->sweep_start, pp); \ - save_resweep(s, si); \ - } \ + COUNT_SWEPT_BYTES(si->sweep_start, pp); \ + FLUSH_REMOTE_RANGE(tc_in, s, from_g); \ + save_resweep(s, si); \ } \ + } while (0) + +#define sweep_space_bump_range(s, from_g, body) do { \ slp = &SWEEPLOC_AT(tc_in, s, from_g); \ nlp = &NEXTLOC_AT(tc_in, s, from_g); \ - sweep_space_range(s, from_g, body) \ - } - -#define sweep_space_range(s, from_g, body) { \ - while ((sl = TO_VOIDP(*slp)) != (nl = TO_VOIDP(*nlp))) { \ - *slp = TO_PTR(nl); \ - pp = sl; \ - while (pp != nl) { \ - p = *pp; \ - body \ - } \ - if (CHECK_LOCK_FAILED(tc_in)) { \ - SAVE_SWEEP_RANGE_FOR_LATER(tc_in, s, from_g, slp, sl, nl); \ - break; \ - } else { \ - COUNT_SWEPT_BYTES(sl, nl); \ - } \ - } \ - } + while ((sl = TO_VOIDP(*slp)) != (nl = TO_VOIDP(*nlp))) { \ + *slp = TO_PTR(nl); \ + pp = sl; \ + while (pp != nl) { \ + p = *pp; \ + body \ + } \ + COUNT_SWEPT_BYTES(sl, nl); \ + FLUSH_REMOTE_RANGE(tc_in, s, from_g); \ + } \ + } while (0) #define save_resweep(s, si) do { \ if (s == space_weakpair) { \ @@ -1787,46 +1827,6 @@ ptr GCENTRY(ptr tc_in, ptr count_roots_ls) { } \ } while (0) -#ifdef ENABLE_TIMING -# define COUNT_SWEPT_BYTES(start, end) num_swept_bytes += ((uptr)TO_PTR(end) - (uptr)TO_PTR(start)) -#else -# define COUNT_SWEPT_BYTES(start, end) do { } while (0) -#endif - -#ifdef ENABLE_PARALLEL - -static void save_sweep_segment_for_later(ptr tc_in, seginfo *si) { - ISPC s = si->space; IGEN g = si->generation; - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - si->sweep_next = TO_VOIDP(SWEEPNEXT_AT(tc_in, s, g)); - SWEEPNEXT_AT(tc_in, s, g) = TO_PTR(si); -} - -static void save_sweep_range_for_later(ptr tc_in, ISPC s, IGEN g, ptr *slp, ptr *sl, ptr *nl) { - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - /* check whether this segment is still the thread-local allocation segment: */ - if (TO_VOIDP(*slp) == nl) { - *slp = sl; - } else { - /* need to set the sweep pointer in the segment, which must be one - of the ones queued to sweep */ - seginfo *si; - si = SWEEPNEXT_AT(tc_in, s, g); - while (1) { - if (si == NULL) S_error_abort("could not find segment for sweep range"); - if (TO_VOIDP(si->sweep_start) == nl) { - si->sweep_start = TO_PTR(sl); - return; - } - si = si->sweep_next; - } - } -} - -#endif - static void resweep_weak_pairs(ptr tc_in, seginfo *oldweakspacesegments) { IGEN from_g; ptr *pp, p, *nl; @@ -1910,100 +1910,194 @@ static void forward_or_bwp(pp, p) ptr *pp; ptr p; { } static iptr sweep_generation_pass(ptr tc_in) { - ENABLE_LOCK_ACQUIRE - ptr *slp, *nlp; ptr *pp, p, *nl, *sl; IGEN from_g; + ptr *slp, *nlp; ptr *pp, *ppn, p, *nl, *sl; IGEN from_g; seginfo *si; iptr num_swept_bytes = 0; + remote_range *received_ranges; do { SWEEPCHANGE(tc_in) = SWEEP_NO_CHANGE; - sweep_from_stack(tc_in); + num_swept_bytes += sweep_from_stack(tc_in); for (from_g = MIN_TG; from_g <= MAX_TG; from_g += 1) { - + sweep_space(space_impure, from_g, { - SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)); /* only pairs put here in backreference mode */ - relocate_impure_help(pp, p, from_g); - p = *(pp += 1); - relocate_impure_help(pp, p, from_g); - pp += 1; - }) + /* only pairs in theses spaces in backreference mode */ + SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)); + relocate_impure_help(pp, p, from_g, pp, 2 * ptr_bytes); + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, 2 * ptr_bytes); + pp = ppn + 1; + }); SET_BACKREFERENCE(Sfalse) sweep_space(space_symbol, from_g, { p = TYPE(TO_PTR(pp), type_symbol); sweep_symbol(tc_in, p, from_g); pp += size_symbol / sizeof(ptr); - }) + }); sweep_space(space_port, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); sweep_port(tc_in, p, from_g); pp += size_port / sizeof(ptr); - }) + }); sweep_space(space_weakpair, from_g, { SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)) - p = *(pp += 1); - relocate_impure_help(pp, p, from_g); - pp += 1; - }) + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, size_pair); + pp = ppn + 1; + }); SET_BACKREFERENCE(Sfalse) sweep_space(space_ephemeron, from_g, { p = TYPE(TO_PTR(pp), type_pair); add_ephemeron_to_pending(tc_in, p); pp += size_ephemeron / sizeof(ptr); - }) + }); sweep_space(space_pure, from_g, { SET_BACKREFERENCE(TYPE(TO_PTR(pp), type_pair)) /* only pairs put here in backreference mode */ - relocate_impure_help(pp, p, from_g); - p = *(pp += 1); - relocate_impure_help(pp, p, from_g); - pp += 1; - }) + relocate_impure_help(pp, p, from_g, pp, 2 * ptr_bytes); + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, 2 * ptr_bytes); + pp = ppn + 1; + }); SET_BACKREFERENCE(Sfalse) sweep_space(space_continuation, from_g, { p = TYPE(TO_PTR(pp), type_closure); sweep_continuation(tc_in, p, from_g); pp += size_continuation / sizeof(ptr); - }) + }); sweep_space(space_pure_typed_object, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); pp = TO_VOIDP(((uptr)TO_PTR(pp) + sweep_typed_object(tc_in, p, from_g))); - }) + }); sweep_space(space_code, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); sweep_code_object(tc_in, p, from_g); pp += size_code(CODELEN(p)) / sizeof(ptr); - }) + }); sweep_space(space_impure_record, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); sweep_record(tc_in, p, from_g); pp = TO_VOIDP((iptr)TO_PTR(pp) + size_record_inst(UNFIX(RECORDDESCSIZE(RECORDINSTTYPE(p))))); - }) + }); - /* space used only as needed for backreferences: */ + /* space used only as needed for backreferences: */ sweep_space(space_impure_typed_object, from_g, { p = TYPE(TO_PTR(pp), type_typed_object); pp = TO_VOIDP((uptr)TO_PTR(pp) + sweep_typed_object(tc_in, p, from_g)); - }) + }); - /* space used only as needed for backreferences: */ + /* space used only as needed for backreferences: */ sweep_space(space_closure, from_g, { p = TYPE(TO_PTR(pp), type_closure); sweep(tc_in, p, from_g); pp = TO_VOIDP((uptr)TO_PTR(pp) + size_object(p)); - }) + }); + } - /* don't sweep from space_count_pure or space_count_impure */ + received_ranges = send_and_receive_remote_ranges(tc_in); + + /* The ranges in `received_ranges` include old-generation objects from + other parallel sweepers, which means they correspond to dirty + sweeps in the originating sweeper. We handle them here like + regular sweeping using `relocate_impure`, which will register a + dirty-card update as needed. */ + while (received_ranges != NULL) { + ISPC s = received_ranges->s; + IGEN from_g = received_ranges->g; + + pp = TO_VOIDP(received_ranges->start); + nl = TO_VOIDP(received_ranges->end); + + if ((s == space_impure) + || (s == space_immobile_impure) + || (s == space_count_impure) + || (s == space_pure) + || (s == space_impure_typed_object)) { + while (pp < nl) { + p = *pp; + relocate_impure_help(pp, p, from_g, pp, 2 * ptr_bytes); + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, 2 * ptr_bytes); + pp = ppn + 1; + } + } else if (s == space_closure) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_closure); + sweep(tc_in, p, from_g); + pp = TO_VOIDP((uptr)TO_PTR(pp) + size_object(p)); + } + } else if (s == space_continuation) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_closure); + sweep_continuation(tc_in, p, from_g); + pp += size_continuation / sizeof(ptr); + } + } else if (s == space_code) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + sweep_code_object(tc_in, p, from_g); + pp += size_code(CODELEN(p)) / sizeof(ptr); + } + } else if ((s == space_pure_typed_object) + || (s == space_count_pure)) { + /* old generation can happen in the special case of a thread object: */ + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + pp = TO_VOIDP(((uptr)TO_PTR(pp) + sweep_typed_object(tc_in, p, from_g))); + } + } else if (s == space_symbol) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_symbol); + sweep_symbol(tc_in, p, from_g); + pp += size_symbol / sizeof(ptr); + } + } else if (s == space_port) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + sweep_port(tc_in, p, from_g); + pp += size_port / sizeof(ptr); + } + } else if (s == space_weakpair) { + while (pp < nl) { + ppn = pp + 1; + p = *ppn; + relocate_impure_help(ppn, p, from_g, pp, size_pair); + pp = ppn + 1; + } + } else if (s == space_ephemeron) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_pair); + add_ephemeron_to_pending(tc_in, p); + pp += size_ephemeron / sizeof(ptr); + } + } else if (s == space_impure_record) { + while (pp < nl) { + p = TYPE(TO_PTR(pp), type_typed_object); + sweep_record(tc_in, p, from_g); + pp = TO_VOIDP((iptr)TO_PTR(pp) + + size_record_inst(UNFIX(RECORDDESCSIZE(RECORDINSTTYPE(p))))); + } + } else { + S_error_abort("dirty range sweep: unexpected space"); + } + FLUSH_REMOTE_RANGE(tc_in, s, from_g); + COUNT_SWEPT_BYTES(received_ranges->start, received_ranges->end); + received_ranges = received_ranges->next; } /* Waiting until sweeping doesn't trigger a change reduces the @@ -2012,15 +2106,14 @@ static iptr sweep_generation_pass(ptr tc_in) { it doesn't change the worst-case complexity. */ if (SWEEPCHANGE(tc_in) == SWEEP_NO_CHANGE) check_pending_ephemerons(tc_in); + } while (SWEEPCHANGE(tc_in) == SWEEP_CHANGE_PROGRESS); return num_swept_bytes; } static void sweep_generation(ptr tc_in) { - do { - sweep_generation_pass(tc_in); - } while (SWEEPCHANGE(tc_in) != SWEEP_NO_CHANGE); + sweep_generation_pass(tc_in); } void enlarge_sweep_stack(ptr tc_in) { @@ -2036,26 +2129,51 @@ void enlarge_sweep_stack(ptr tc_in) { SWEEPSTACK(tc_in) = (ptr)((uptr)new_sweep_stack + sz); } -void sweep_from_stack(ptr tc_in) { +#ifdef ENABLE_PARALLEL +static ISPC infer_space(ptr p, seginfo *si) { + /* Certain kinds of values get allocated to more specific spaces by + parallel mode compared to non-parallel mode. Marking objects from + a previous collection can mean sweeping from the less-specific + space, however. We can synthesize an appropropriate space here, + since it will only be used only by the handling of received + ranges. */ + + if (si->marked_mask) { + ITYPE t = TYPEBITS(p); + if (t == type_typed_object) { + ptr tf = TYPEFIELD(p); + if ((iptr)tf == type_ratnum) + return space_pure; + else if ((iptr)tf == type_exactnum) + return space_pure; + } else if (t == type_closure) + return space_closure; + } + + return si->space; +} +#endif + +iptr sweep_from_stack(ptr tc_in) { + iptr num_swept_bytes = 0; + if (SWEEPSTACK(tc_in) > SWEEPSTACKSTART(tc_in)) { while (SWEEPSTACK(tc_in) > SWEEPSTACKSTART(tc_in)) { ptr p; seginfo *si; SWEEPSTACK(tc_in) = (ptr)((uptr)SWEEPSTACK(tc_in) - ptr_bytes); p = *(ptr *)TO_VOIDP(SWEEPSTACK(tc_in)); - /* Room for improvement: `si->generation` is needed only - for objects that have impure fields */ + /* Room for improvement: `si->generation` is needed only for + objects that have impure fields, or in parallel mode for + remote ranges. */ si = SegInfo(ptr_get_segment(p)); sweep(tc_in, p, si->generation); - if (CHECK_LOCK_FAILED(tc_in)) { - /* try doing something else for now and sweep `p` later */ - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - push_sweep(p) - break; - } + COUNT_SWEPT_BYTES(0, size_object(p)); + FLUSH_REMOTE_RANGE(tc_in, infer_space(p, si), si->generation); } } + + return num_swept_bytes; } static iptr sweep_typed_object(ptr tc, ptr p, IGEN from_g) { @@ -2158,9 +2276,8 @@ static void setup_sweep_dirty() { } static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { - ENABLE_LOCK_ACQUIRE IGEN youngest, min_youngest; - ptr *pp, *ppend, *nl, start, next_loc; + ptr *pp, *ppn, *ppend, *nl, start; uptr seg, d; ISPC s; IGEN from_g, to_g; @@ -2204,14 +2321,16 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { start = build_ptr(seg, 0); ppend = TO_VOIDP(start); - /* The current allocation pointer may be relevant as the + /* The original allocation pointer may be relevant as the ending point. We assume that thread-local regions for all threads without a sweeper are terminated and won't get new allocations while dirty sweeping runs, while all allocations for a thread with a sweeper will be only using - that tc. */ - next_loc = NEXTLOC_AT(tc_in, s, from_g); - nl = TO_VOIDP(next_loc); + that tc, and no allocation happens for a non-target generation. */ + if (from_g == MAX_TG) + nl = TO_VOIDP(ORIGNEXTLOC(tc_in, s)); + else + nl = TO_VOIDP(NEXTLOC_AT(tc_in, s, from_g)); d = 0; while (d < cards_per_segment) { @@ -2228,12 +2347,12 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { ppend += bytes_per_card / sizeof(ptr); if (pp <= nl && nl < ppend) ppend = nl; - COUNT_SWEPT_BYTES(pp, ppend); - if (dirty_si->dirty_bytes[d] <= MAX_CG) { /* start out with assumption that we won't find any wrong-way pointers */ youngest = 0xff; + COUNT_SWEPT_BYTES(pp, ppend); + if ((s == space_impure) || (s == space_immobile_impure) || (s == space_impure_typed_object) || (s == space_count_impure) || (s == space_closure)) { @@ -2241,21 +2360,25 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { while (pp < ppend) { /* handle two pointers at a time */ if (marked(dirty_si, TO_PTR(pp))) { - relocate_dirty(pp,youngest); - pp += 1; - relocate_dirty(pp,youngest); - pp += 1; - } else + relocate_dirty(pp, youngest, pp, 2 * ptr_bytes); + ppn = pp + 1; + relocate_dirty(ppn, youngest, pp, 2 * ptr_bytes); + pp = ppn + 1; + } else { + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += 2; + } } + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else { while (pp < ppend && *pp != forward_marker) { /* handle two pointers at a time */ - relocate_dirty(pp,youngest); - pp += 1; - relocate_dirty(pp,youngest); - pp += 1; + relocate_dirty(pp, youngest, pp, 2 * ptr_bytes); + ppn = pp + 1; + relocate_dirty(ppn, youngest, pp, 2 * ptr_bytes); + pp = ppn + 1; } + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } } else if (s == space_symbol) { /* old symbols cannot overlap segment boundaries @@ -2274,9 +2397,13 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { if (!dirty_si->marked_mask || marked(dirty_si, p)) youngest = sweep_dirty_symbol(tc_in, p, youngest); + else + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += size_symbol / sizeof(ptr); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else if (s == space_port) { /* old ports cannot overlap segment boundaries since any object that spans multiple @@ -2294,9 +2421,13 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { if (!dirty_si->marked_mask || marked(dirty_si, p)) youngest = sweep_dirty_port(tc_in, p, youngest); + else + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += size_port / sizeof(ptr); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else if (s == space_impure_record) { /* abandon hope all ye who enter here */ ptr p; if (dirty_si->marked_mask) { @@ -2374,6 +2505,7 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { seginfo *si = SegInfo(ptr_get_segment(p)); if (!marked(si, p)) { /* skip unmarked words */ + FLUSH_REMOTE_RANGE(tc_in, s, from_g); p = (ptr)((uptr)p + byte_alignment); } else { youngest = sweep_dirty_record(tc_in, p, youngest); @@ -2382,6 +2514,8 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { RECORDINSTTYPE(p))))); } } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else { uptr j; ptr pnext; seginfo *si; @@ -2420,46 +2554,45 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { /* now sweep */ while ((ptr *)TO_VOIDP(UNTYPE(p, type_typed_object)) < ppend) { /* quit on end of segment */ - if (FWDMARKER(p) == forward_marker) break; + if (FWDMARKER(p) == forward_marker) break; youngest = sweep_dirty_record(tc_in, p, youngest); p = (ptr)((iptr)p + size_record_inst(UNFIX(RECORDDESCSIZE( RECORDINSTTYPE(p))))); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } } else if (s == space_weakpair) { while (pp < ppend && (dirty_si->marked_mask || (*pp != forward_marker))) { /* skip car field and handle cdr field */ if (!dirty_si->marked_mask || marked(dirty_si, TO_PTR(pp))) { - pp += 1; - relocate_dirty(pp, youngest); - pp += 1; - } else + ptr *ppn = pp + 1; + relocate_dirty(ppn, youngest, pp, size_pair); + pp = ppn + 1; + } else { + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += 2; + } } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else if (s == space_ephemeron) { while (pp < ppend && (dirty_si->marked_mask || (*pp != forward_marker))) { ptr p = TYPE(TO_PTR(pp), type_pair); if (!dirty_si->marked_mask || marked(dirty_si, p)) youngest = check_dirty_ephemeron(tc_in, p, youngest); + else + FLUSH_REMOTE_RANGE(tc_in, s, from_g); pp += size_ephemeron / sizeof(ptr); } + + FLUSH_REMOTE_RANGE(tc_in, s, from_g); } else { S_error_abort("sweep_dirty(gc): unexpected space"); } - if (CHECK_LOCK_FAILED(tc_in)) { - /* give up for now, and remember to restart at this segment */ - DirtySegmentsAt(dirty_segments, from_g, to_g) = dirty_si; - if (s == space_weakpair) - local_weaksegments_to_resweep = local_weaksegments_to_resweep->next; - add_weaksegments_to_resweep(local_weaksegments_to_resweep, last_local_weaksegments_to_resweep); - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - return num_swept_bytes; - } - if (s == space_weakpair) { local_weaksegments_to_resweep->youngest[d] = youngest; } else { @@ -2482,8 +2615,6 @@ static uptr sweep_dirty_segments(ptr tc_in, seginfo **dirty_segments) { add_weaksegments_to_resweep(local_weaksegments_to_resweep, last_local_weaksegments_to_resweep); - SWEEPCHANGE(tc_in) = SWEEP_NO_CHANGE; - POP_BACKREFERENCE() return num_swept_bytes; @@ -2635,7 +2766,6 @@ static void add_trigger_ephemerons_to_pending(ptr tc_in, ptr pe) { } static void check_ephemeron(ptr tc_in, ptr pe) { - ENABLE_LOCK_ACQUIRE ptr p; seginfo *si; IGEN from_g; @@ -2648,36 +2778,34 @@ static void check_ephemeron(ptr tc_in, ptr pe) { p = Scar(pe); if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && si->old_space) { - if (SEGMENT_LOCK_ACQUIRE(si)) { + if (SEGMENT_IS_LOCAL(si, p)) { if (new_marked(si, p)) { - ENABLE_LOCK_ACQUIRE /* for nested relocate */ #ifndef NO_DIRTY_NEWSPACE_POINTERS IGEN tg = TARGET_GENERATION(si); if (tg < from_g) S_record_new_dirty_card(tc_in, &INITCAR(pe), tg); #endif - relocate_impure(&INITCDR(pe), from_g); + relocate_impure(&INITCDR(pe), from_g, pe, size_ephemeron); } else if (FORWARDEDP(p, si)) { - ENABLE_LOCK_ACQUIRE /* for nested relocate */ #ifndef NO_DIRTY_NEWSPACE_POINTERS IGEN tg = TARGET_GENERATION(si); if (tg < from_g) S_record_new_dirty_card(tc_in, &INITCAR(pe), tg); #endif INITCAR(pe) = FWDADDRESS(p); - relocate_impure(&INITCDR(pe), from_g); + relocate_impure(&INITCDR(pe), from_g, pe, size_ephemeron); } else { - /* If we get here, then there's no lock failure: */ /* Not reached, so far; install as trigger */ ephemeron_add(&si->trigger_ephemerons, pe); si->has_triggers = 1; } - SEGMENT_LOCK_RELEASE(si); } else { - RECORD_LOCK_FAILED(tc_in, si); + RECORD_REMOTE_RANGE_TO(tc_in, pe, size_ephemeron, SWEEPER(si->creator_tc)); } } else { - relocate_impure(&INITCDR(pe), from_g); + relocate_impure(&INITCDR(pe), from_g, pe, size_ephemeron); } - + + FLUSH_REMOTE_RANGE(tc_in, space_ephemeron, from_g); + POP_BACKREFERENCE(); } @@ -2690,13 +2818,6 @@ static void check_pending_ephemerons(ptr tc_in) { while (pe != 0) { next_pe = EPHEMERONNEXT(pe); check_ephemeron(tc_in, pe); - if (CHECK_LOCK_FAILED(tc_in)) { - CLEAR_LOCK_FAILED(tc_in); - SWEEPCHANGE(tc_in) = SWEEP_CHANGE_POSTPONED; - EPHEMERONNEXT(pe) = next_pe; - ephemeron_add(&PENDINGEPHEMERONS(tc_in), pe); - break; - } pe = next_pe; } } @@ -2706,7 +2827,6 @@ static void check_pending_ephemerons(ptr tc_in) { be less pessimistic than setting `youngest` to the target generation: */ static IGEN check_dirty_ephemeron(ptr tc_in, ptr pe, IGEN youngest) { - ENABLE_LOCK_ACQUIRE ptr p; seginfo *si; IGEN pg; @@ -2715,12 +2835,12 @@ static IGEN check_dirty_ephemeron(ptr tc_in, ptr pe, IGEN youngest) { p = Scar(pe); if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL) { if (si->old_space) { - if (SEGMENT_LOCK_ACQUIRE(si)) { + if (SEGMENT_IS_LOCAL(si, p)) { if (new_marked(si, p)) { - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } else if (FORWARDEDP(p, si)) { INITCAR(pe) = GET_FWDADDRESS(p); - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } else { /* Not reached, so far; add to pending list */ add_ephemeron_to_pending(tc_in, pe); @@ -2733,20 +2853,19 @@ static IGEN check_dirty_ephemeron(ptr tc_in, ptr pe, IGEN youngest) { if (youngest != MIN_TG && (pg = TARGET_GENERATION(si)) < youngest) youngest = pg; } - SEGMENT_LOCK_RELEASE(si); } else { - RECORD_LOCK_FAILED(tc_in, si); + RECORD_REMOTE_RANGE_TO(tc_in, pe, size_ephemeron, SWEEPER(si->creator_tc)); return youngest; } } else { if (youngest != MIN_TG && (pg = si->generation) < youngest) youngest = pg; - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } } else { /* Non-collectable key means that the value determines `youngest`: */ - relocate_dirty(&INITCDR(pe), youngest); + relocate_dirty(&INITCDR(pe), youngest, pe, size_ephemeron); } POP_BACKREFERENCE() @@ -2888,7 +3007,6 @@ static s_thread_mutex_t sweep_mutex; static s_thread_cond_t sweep_cond; static int num_running_sweepers; -static s_thread_cond_t postpone_cond; static void gather_active_sweepers() { int i, n; @@ -2899,7 +3017,7 @@ static void gather_active_sweepers() { ptr tc = S_collect_waiting_tcs[i]; if (sweeper_started(n)) { sweepers[n].sweep_tc = tc; - SWEEPER(tc) = n; + WILL_BE_SWEEPER(tc) = n; n++; } else break; @@ -2912,7 +3030,6 @@ static void gather_active_sweepers() { static s_thread_rv_t start_sweeper(void *_data) { gc_thread_data *data = _data; ptr tc; - int status; iptr num_swept_bytes; IGEN g; #ifdef ENABLE_TIMING @@ -2924,8 +3041,7 @@ static s_thread_rv_t start_sweeper(void *_data) { while (data->status != SWEEPER_SWEEPING) { s_thread_cond_wait(&sweep_cond, &sweep_mutex); } - num_running_sweepers++; - GET_TIME(start); + GET_CPU_TIME(start); (void)s_thread_mutex_unlock(&sweep_mutex); tc = data->sweep_tc; @@ -2934,20 +3050,15 @@ static s_thread_rv_t start_sweeper(void *_data) { if (data->thread) { /* sweep tc in this sweeper, so that things it references are more likely handled in this sweeper: */ - sweep_thread_now(tc, data->thread); + seginfo *t_si = SegInfo(ptr_get_segment(data->thread)); + sweep_thread(tc, data->thread); + FLUSH_REMOTE_RANGE(tc, t_si->space, t_si->generation); data->thread = (ptr)0; } - status = 0; num_swept_bytes = 0; - do { - num_swept_bytes += sweep_dirty_segments(tc, data->dirty_segments); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); - do { - num_swept_bytes += sweep_generation_pass(tc); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); + num_swept_bytes += sweep_dirty_segments(tc, data->dirty_segments); + num_swept_bytes += sweep_generation_trading_work(tc); /* ensure terminators on any segment where sweeper may have allocated: */ { @@ -2960,18 +3071,22 @@ static s_thread_rv_t start_sweeper(void *_data) { } } } - + (void)s_thread_mutex_lock(&sweep_mutex); - --num_running_sweepers; - if (!num_running_sweepers) - s_thread_cond_broadcast(&postpone_cond); S_G.bitmask_overhead[0] += BITMASKOVERHEAD(tc, 0); BITMASKOVERHEAD(tc, 0) = 0; for (g = MIN_TG; g <= MAX_TG; g++) S_G.bitmask_overhead[g] += BITMASKOVERHEAD(tc, g); data->status = SWEEPER_READY; - ACCUM_TIME(sweep_accum, step, start); - REPORT_TIME(fprintf(stderr, "%d swp +%ld ms %ld ms %ld bytes [%p]\n", MAX_CG, step, sweep_accum, num_swept_bytes, tc)); + ACCUM_CPU_TIME(sweep_accum, step, start); + REPORT_TIME(fprintf(stderr, "%d swpr +%ld ms %ld ms %ld bytes %d%%/%d sent %d%%/%d received [%p]\n", + MAX_CG, step, sweep_accum, num_swept_bytes, + percentage(sweepers[SWEEPER(tc)].remote_ranges_bytes_sent, num_swept_bytes), + sweepers[SWEEPER(tc)].remote_ranges_sent, + percentage(sweepers[SWEEPER(tc)].remote_ranges_bytes_received, num_swept_bytes), + sweepers[SWEEPER(tc)].remote_ranges_received, + tc)); + SWEEPER(tc) = main_sweeper_index; s_thread_cond_signal(&data->done_cond); } @@ -2983,7 +3098,7 @@ static IBOOL sweeper_started(int i) { if (!sweep_mutex_initialized) { s_thread_mutex_init(&sweep_mutex); s_thread_cond_init(&sweep_cond); - s_thread_cond_init(&postpone_cond); + s_thread_cond_init(&sweepers[main_sweeper_index].work_cond); sweep_mutex_initialized = 1; } @@ -2992,6 +3107,7 @@ static IBOOL sweeper_started(int i) { sweepers[i].status = SWEEPER_READY; s_thread_cond_init(&sweepers[i].done_cond); + s_thread_cond_init(&sweepers[i].work_cond); if ((status = s_thread_create(start_sweeper, &sweepers[i])) != 0) { /* eror creating a thread; just go with as many as we have */ @@ -3005,39 +3121,41 @@ static IBOOL sweeper_started(int i) { } static void parallel_sweep_dirty_and_generation(ptr tc) { - int i, status; + int i; iptr num_swept_bytes; REPORT_TIME(fprintf(stderr, "------\n")); - GET_TIME(start); + GET_CPU_TIME(start); S_use_gc_tc_mutex = 1; /* start other sweepers */ (void)s_thread_mutex_lock(&sweep_mutex); - for (i = 0; i < num_sweepers; i++) + sweepers[main_sweeper_index].status = SWEEPER_SWEEPING; + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_sent = 0); + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_bytes_sent = 0); + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_received = 0); + ADJUST_COUNTER(sweepers[main_sweeper_index].remote_ranges_bytes_received = 0); + for (i = 0; i < num_sweepers; i++) { sweepers[i].status = SWEEPER_SWEEPING; + SWEEPER(sweepers[i].sweep_tc) = i; + ADJUST_COUNTER(sweepers[i].remote_ranges_sent = 0); + ADJUST_COUNTER(sweepers[i].remote_ranges_bytes_sent = 0); + ADJUST_COUNTER(sweepers[i].remote_ranges_received = 0); + ADJUST_COUNTER(sweepers[i].remote_ranges_bytes_received = 0); + num_running_sweepers++; + } s_thread_cond_broadcast(&sweep_cond); num_running_sweepers++; (void)s_thread_mutex_unlock(&sweep_mutex); /* sweep in the main thread */ - status = 0; num_swept_bytes = 0; - do { - num_swept_bytes += sweep_dirty_segments(tc, main_dirty_segments); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); - do { - num_swept_bytes += sweep_generation_pass(tc); - status = gate_postponed(tc, status); - } while (SWEEPCHANGE(tc) != SWEEP_NO_CHANGE); + num_swept_bytes += sweep_dirty_segments(tc, main_dirty_segments); + num_swept_bytes += sweep_generation_trading_work(tc); /* wait for other sweepers */ (void)s_thread_mutex_lock(&sweep_mutex); - --num_running_sweepers; - if (!num_running_sweepers) - s_thread_cond_broadcast(&postpone_cond); for (i = 0; i < num_sweepers; i++) { while (sweepers[i].status != SWEEPER_READY) { s_thread_cond_wait(&sweepers[i].done_cond, &sweep_mutex); @@ -3046,40 +3164,108 @@ static void parallel_sweep_dirty_and_generation(ptr tc) { } (void)s_thread_mutex_unlock(&sweep_mutex); - ACCUM_TIME(par_accum, step, start); - REPORT_TIME(fprintf(stderr, "%d par +%ld ms %ld ms %ld bytes [%p]\n", MAX_CG, step, par_accum, num_swept_bytes, tc)); + ACCUM_CPU_TIME(par_accum, step, start); + REPORT_TIME(fprintf(stderr, "%d main +%ld ms %ld ms %ld bytes %d%%/%d sent %d%%/%d received [%p]\n", + MAX_CG, + step, par_accum, num_swept_bytes, + percentage(sweepers[main_sweeper_index].remote_ranges_bytes_sent, num_swept_bytes), + sweepers[main_sweeper_index].remote_ranges_sent, + percentage(sweepers[main_sweeper_index].remote_ranges_bytes_received, num_swept_bytes), + sweepers[main_sweeper_index].remote_ranges_received, + tc)); S_use_gc_tc_mutex = 0; } -#define WAIT_AFTER_POSTPONES 10 +static iptr sweep_generation_trading_work(ptr tc) { + iptr num_swept_bytes = 0; -static int gate_postponed(ptr tc, int status) { - if (SWEEPCHANGE(tc) == SWEEP_CHANGE_POSTPONED) { - if (status < WAIT_AFTER_POSTPONES) - return status + 1; - else { - (void)s_thread_mutex_lock(&sweep_mutex); - /* This thread wasn't able to make progress after a lock conflict. - Instead of spinning, which could create livelock, wait until - some thread makes progress. */ - if (num_running_sweepers == 1) { - /* All other threads postponed, so this one should be able to - make progress after all. */ - } else { - --num_running_sweepers; - s_thread_cond_wait(&postpone_cond, &sweep_mutex); - num_running_sweepers++; + num_swept_bytes += sweep_generation_pass(tc); + + (void)s_thread_mutex_lock(&sweep_mutex); + --num_running_sweepers; + while (1) { + int me = SWEEPER(tc); + if ((num_running_sweepers == 0) + && (sweepers[me].ranges_received == NULL)) { + /* everyone is done */ + int i, they = main_sweeper_index; + for (i = -1; i < num_sweepers; i++) { + s_thread_cond_signal(&sweepers[they].work_cond); + they = i + 1; } (void)s_thread_mutex_unlock(&sweep_mutex); + return num_swept_bytes; + } else { + /* wait for work */ + if (sweepers[me].ranges_received != NULL) { + /* some work appeared since we last checked */ + num_running_sweepers++; + } else { + sweepers[me].status = SWEEPER_WAITING_FOR_WORK; + s_thread_cond_wait(&sweepers[me].work_cond, &sweep_mutex); + } + if (sweepers[me].status != SWEEPER_WAITING_FOR_WORK) { + /* got work; num_running_sweepers was incremented, too */ + (void)s_thread_mutex_unlock(&sweep_mutex); + num_swept_bytes += sweep_generation_pass(tc); + (void)s_thread_mutex_lock(&sweep_mutex); + --num_running_sweepers; + } else if (num_running_sweepers == 0) { + /* other sweeper noticed that everyone is done */ + (void)s_thread_mutex_unlock(&sweep_mutex); + return num_swept_bytes; + } else { + /* not clear why we were woken, so just go around again */ + } } - } else { - (void)s_thread_mutex_lock(&sweep_mutex); - s_thread_cond_broadcast(&postpone_cond); - (void)s_thread_mutex_unlock(&sweep_mutex); + } +} + +static remote_range *send_and_receive_remote_ranges(ptr tc) { + int i, me = SWEEPER(tc), they; + remote_range *r, *next, *last; + + (void)s_thread_mutex_lock(&sweep_mutex); + + they = main_sweeper_index; + for (i = -1; i < num_sweepers; i++) { + if (sweepers[me].ranges_to_send[they] != NULL) { + SWEEPCHANGE(tc) = SWEEP_CHANGE_PROGRESS; + r = sweepers[me].ranges_to_send[they]; + sweepers[me].ranges_to_send[they] = NULL; + for (next = r, last = r; next != NULL; next = next->next) { + ADJUST_COUNTER(sweepers[me].remote_ranges_sent++); + ADJUST_COUNTER(sweepers[me].remote_ranges_bytes_sent += ((uptr)next->end - (uptr)next->start)); + last = next; + } + last->next = sweepers[they].ranges_received; + sweepers[they].ranges_received = r; + if (sweepers[they].status == SWEEPER_WAITING_FOR_WORK) { + num_running_sweepers++; + sweepers[they].status = SWEEPER_SWEEPING; + s_thread_cond_signal(&sweepers[they].work_cond); + } + } + they = i + 1; } - return 0; + r = sweepers[me].ranges_received; + sweepers[me].ranges_received = NULL; + + (void)s_thread_mutex_unlock(&sweep_mutex); + + if (r != NULL) { + SWEEPCHANGE(tc) = SWEEP_CHANGE_PROGRESS; +#ifdef ENABLE_TIMING + for (next = r; next != NULL; next = next->next) { + ADJUST_COUNTER(sweepers[me].remote_ranges_received++); + ADJUST_COUNTER(sweepers[me].remote_ranges_bytes_received += ((uptr)next->end - (uptr)next->start)); + } +#endif + } + + return r; } #endif @@ -3149,7 +3335,7 @@ static void push_measure(ptr tc_in, ptr p) if (si->old_space) { /* We must be in a GC--measure fusion, so switch back to GC */ - relocate_pure_help_help(&p, p, si); + relocate_pure_help_help(&p, p, si, NULL, 0); return; } diff --git a/racket/src/ChezScheme/c/gcwrapper.c b/racket/src/ChezScheme/c/gcwrapper.c index bf48a423d3..81f1308c51 100644 --- a/racket/src/ChezScheme/c/gcwrapper.c +++ b/racket/src/ChezScheme/c/gcwrapper.c @@ -15,6 +15,7 @@ */ #include "system.h" +#include "popcount.h" /* locally defined functions */ static void segment_tell PROTO((uptr seg)); @@ -545,6 +546,59 @@ void S_addr_tell(ptr p) { segment_tell(addr_get_segment(p)); } +static void check_pointer(ptr *pp, IBOOL address_is_meaningful, ptr base, uptr seg, ISPC s, IBOOL aftergc) { + ptr p = *pp; + if (!IMMEDIATE(p)) { + seginfo *psi = MaybeSegInfo(ptr_get_segment(p)); + if (psi != NULL) { + if ((psi->space == space_empty) + || psi->old_space + || (psi->marked_mask && !(psi->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p)) + /* corner case: a continuation in space_count_pure can refer to code via CLOSENTRY + where the entry point doesn't have a mark bit: */ + && !((s == space_count_pure) && (psi->space == space_code)))) { + S_checkheap_errors += 1; + printf("!!! dangling reference at %s"PHtx" to "PHtx"%s\n", + (address_is_meaningful ? "" : "insideof "), + (ptrdiff_t)(address_is_meaningful ? pp : TO_VOIDP(base)), + (ptrdiff_t)p, (aftergc ? " after gc" : "")); + printf("from: "); segment_tell(seg); + printf("to: "); segment_tell(ptr_get_segment(p)); + { + ptr l; + for (l = S_G.locked_objects[psi->generation]; l != Snil; l = Scdr(l)) + if (Scar(l) == p) + printf(" in locked\n"); + for (l = S_G.unlocked_objects[psi->generation]; l != Snil; l = Scdr(l)) + if (Scar(l) == p) + printf(" in unlocked\n"); + } + abort(); // REMOVEME + } + } + } +} + +static void check_bignum(ptr p) { + if (!Sbignump(p)) + printf("!!! not a bignum %p\n", TO_VOIDP(p)); +} + +#include "heapcheck.inc" + +static ptr *find_nl(ptr *pp1, ptr *pp2, ISPC s, IGEN g) { + ptr *nl, ls; + + for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { + ptr t_tc = (ptr)THREADTC(Scar(ls)); + nl = TO_VOIDP(NEXTLOC_AT(t_tc, s, g)); + if (pp1 <= nl && nl < pp2) + return nl; + } + + return NULL; +} + static void check_heap_dirty_msg(msg, x) char *msg; ptr *x; { INT d; seginfo *si; @@ -577,6 +631,13 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; { printf("!!! inconsistent thread NEXT %p and BASE %p\n", TO_VOIDP(NEXTLOC_AT(t_tc, s, g)), TO_VOIDP(BASELOC_AT(t_tc, s, g))); } + if ((REMOTERANGEEND(t_tc) != (ptr)0) + || (REMOTERANGESTART(t_tc) != (ptr)(uptr)-1)) { + S_checkheap_errors += 1; + printf("!!! nonempty thread REMOTERANGE %p-%p\n", + TO_VOIDP(REMOTERANGESTART(t_tc)), + TO_VOIDP(REMOTERANGEEND(t_tc))); + } } } } @@ -669,67 +730,123 @@ void S_check_heap(aftergc, mcg) IBOOL aftergc; IGEN mcg; { printf("!!! unexpected generation %d segment "PHtx" in space_new\n", g, (ptrdiff_t)seg); } } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair || s == space_ephemeron - || s == space_immobile_impure || s == space_count_pure || s == space_count_impure || s == space_closure) { - /* doesn't handle: space_port, space_continuation, space_code, space_pure_typed_object, - space_impure_record, or impure_typed_object */ + || s == space_immobile_impure || s == space_count_pure || s == space_count_impure || s == space_closure + || s == space_pure_typed_object || s == space_continuation || s == space_port || s == space_code + || s == space_impure_record || s == space_impure_typed_object) { + ptr start; + /* check for dangling references */ pp1 = TO_VOIDP(build_ptr(seg, 0)); pp2 = TO_VOIDP(build_ptr(seg + 1, 0)); - nl = NULL; - { - ptr ls; - for (ls = S_threads; ls != Snil; ls = Scdr(ls)) { - ptr t_tc = (ptr)THREADTC(Scar(ls)); - nl = TO_VOIDP(NEXTLOC_AT(t_tc, s, g)); - if (pp1 <= nl && nl < pp2) - break; - } - } + nl = find_nl(pp1, pp2, s, g); if (pp1 <= nl && nl < pp2) pp2 = nl; - while (pp1 < pp2) { - if (!si->marked_mask || (si->marked_mask[segment_bitmap_byte(TO_PTR(pp1))] & segment_bitmap_bit(TO_PTR(pp1)))) { - int a; - for (a = 0; (a < ptr_alignment) && (pp1 < pp2); a++) { -#define in_ephemeron_pair_part(pp1, seg) ((((uptr)TO_PTR(pp1) - (uptr)build_ptr(seg, 0)) % size_ephemeron) < size_pair) - if ((s == space_ephemeron) && !in_ephemeron_pair_part(pp1, seg)) { - /* skip non-pair part of ephemeron */ + if (s == space_pure_typed_object || s == space_port || s == space_code + || s == space_impure_record || s == space_impure_typed_object) { + if (si->marked_mask) { + /* not implemented */ + } else { + /* only check this segment for objects that start on it */ + uptr before_seg = seg; + + /* Back up over segments for the same space and generation: */ + while (1) { + seginfo *before_si = MaybeSegInfo(before_seg-1); + if (!before_si + || (before_si->space != si->space) + || (before_si->generation != si->generation) + || ((before_si->marked_mask == NULL) != (si->marked_mask == NULL))) + break; + before_seg--; + } + + /* Move forward to reach `seg` again: */ + start = build_ptr(before_seg, 0); + while (before_seg != seg) { + ptr *before_pp2, *before_nl; + + before_pp2 = TO_VOIDP(build_ptr(before_seg + 1, 0)); + if ((ptr *)TO_VOIDP(start) > before_pp2) { + /* skipped to a further segment */ + before_seg++; } else { - p = *pp1; - if (!si->marked_mask && (p == forward_marker)) { - pp1 = pp2; /* break out of outer loop */ - break; - } else if (!IMMEDIATE(p)) { - seginfo *psi = MaybeSegInfo(ptr_get_segment(p)); - if (psi != NULL) { - if ((psi->space == space_empty) - || psi->old_space - || (psi->marked_mask && !(psi->marked_mask[segment_bitmap_byte(p)] & segment_bitmap_bit(p)) - /* corner case: a continuation in space_count_pure can refer to code via CLOSENTRY - where the entry point doesn't have a mark bit: */ - && !((s == space_count_pure) && (psi->space == space_code)))) { - S_checkheap_errors += 1; - printf("!!! dangling reference at "PHtx" to "PHtx"%s\n", (ptrdiff_t)pp1, (ptrdiff_t)p, (aftergc ? " after gc" : "")); - printf("from: "); segment_tell(seg); - printf("to: "); segment_tell(ptr_get_segment(p)); - { - ptr l; - for (l = S_G.locked_objects[psi->generation]; l != Snil; l = Scdr(l)) - if (Scar(l) == p) - printf(" in locked\n"); - for (l = S_G.unlocked_objects[psi->generation]; l != Snil; l = Scdr(l)) - if (Scar(l) == p) - printf(" in unlocked\n"); + before_nl = find_nl(TO_VOIDP(start), before_pp2, s, g); + if (((ptr*)TO_VOIDP(start)) <= before_nl && before_nl < before_pp2) { + /* this segment ends, so move to next segment */ + before_seg++; + if (s == space_code) { + /* in the case of code, it's possible for a whole segment to + go unused if a large code object didn't fit; give up, just in case */ + start = build_ptr(seg+1, 0); + } else { + start = build_ptr(before_seg, 0); + } + } else { + while (((ptr *)TO_VOIDP(start)) < before_pp2) { + if (*(ptr *)TO_VOIDP(start) == forward_marker) { + /* this segment ends, so move to next segment */ + if (s == space_code) { + start = build_ptr(seg+1, 0); + } else { + start = build_ptr(before_seg+1, 0); } + } else { + start = (ptr)((uptr)start + size_object(TYPE(start, type_typed_object))); } } + before_seg++; } } - pp1 += 1; } - } else - pp1 += ptr_alignment; + + if (((ptr *)TO_VOIDP(start)) >= pp2) { + /* previous object extended past the segment */ + } else { + pp1 = TO_VOIDP(start); + while (pp1 < pp2) { + if (*pp1 == forward_marker) + break; + else { + p = TYPE(TO_PTR(pp1), type_typed_object); + check_object(p, seg, s, aftergc); + pp1 = TO_VOIDP((ptr)((uptr)TO_PTR(pp1) + size_object(p))); + } + } + } + } + } else if (s == space_continuation) { + while (pp1 < pp2) { + if (*pp1 == forward_marker) + break; + if (!si->marked_mask || (si->marked_mask[segment_bitmap_byte(TO_PTR(pp1))] & segment_bitmap_bit(TO_PTR(pp1)))) { + p = TYPE(TO_PTR(pp1), type_closure); + check_object(p, seg, s, aftergc); + } + pp1 = TO_VOIDP((ptr)((uptr)TO_PTR(pp1) + size_continuation)); + } + } else { + while (pp1 < pp2) { + if (!si->marked_mask || (si->marked_mask[segment_bitmap_byte(TO_PTR(pp1))] & segment_bitmap_bit(TO_PTR(pp1)))) { + int a; + for (a = 0; (a < ptr_alignment) && (pp1 < pp2); a++) { +#define in_ephemeron_pair_part(pp1, seg) ((((uptr)TO_PTR(pp1) - (uptr)build_ptr(seg, 0)) % size_ephemeron) < size_pair) + if ((s == space_ephemeron) && !in_ephemeron_pair_part(pp1, seg)) { + /* skip non-pair part of ephemeron */ + } else { + p = *pp1; + if (!si->marked_mask && (p == forward_marker)) { + pp1 = pp2; /* break out of outer loop */ + break; + } else { + check_pointer(pp1, 1, (ptr)0, seg, s, aftergc); + } + } + pp1 += 1; + } + } else + pp1 += ptr_alignment; + } } /* verify that dirty bits are set appropriately */ diff --git a/racket/src/ChezScheme/c/globals.h b/racket/src/ChezScheme/c/globals.h index afc31c14e7..4722109a60 100644 --- a/racket/src/ChezScheme/c/globals.h +++ b/racket/src/ChezScheme/c/globals.h @@ -117,6 +117,7 @@ EXTERN struct S_G_struct { ptr null_immutable_vector; ptr null_immutable_fxvector; ptr null_immutable_bytevector; + ptr zero_length_bignum; seginfo *dirty_segments[DIRTY_SEGMENT_LISTS]; /* schsig.c */ diff --git a/racket/src/ChezScheme/c/scheme.c b/racket/src/ChezScheme/c/scheme.c index 5b7fda6450..68e7d07f89 100644 --- a/racket/src/ChezScheme/c/scheme.c +++ b/racket/src/ChezScheme/c/scheme.c @@ -330,6 +330,13 @@ static void idiot_checks() { oops = 1; } + if ((((code_flag_continuation << code_flags_offset) | (code_flag_mutable_closure << code_flags_offset)) + & (uptr)forward_marker) != 0) { + /* parallel GC relies on not confusing a forward marker with code flags */ + fprintf(stderr, "code flags overlap with forwadr_marker\n"); + oops = 1; + } + if (oops) S_abnormal_exit(); } diff --git a/racket/src/ChezScheme/c/segment.c b/racket/src/ChezScheme/c/segment.c index 92b6fa14ce..24e1bc7323 100644 --- a/racket/src/ChezScheme/c/segment.c +++ b/racket/src/ChezScheme/c/segment.c @@ -235,7 +235,6 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED ptr tc, ISPC s, IG si->use_marks = 0; si->must_mark = 0; #ifdef PTHREADS - si->lock = 0; si->creator_tc = tc; #endif si->list_bits = NULL; diff --git a/racket/src/ChezScheme/c/thread.c b/racket/src/ChezScheme/c/thread.c index c799367a3c..269ae7ffc8 100644 --- a/racket/src/ChezScheme/c/thread.c +++ b/racket/src/ChezScheme/c/thread.c @@ -142,7 +142,8 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { LZ4OUTBUFFER(tc) = 0; SWEEPER(tc) = -1; - LOCKSTATUS(tc) = Strue; + REMOTERANGESTART(tc) = (ptr)(uptr)-1; + REMOTERANGEEND(tc) = (ptr)0; tc_mutex_release(); diff --git a/racket/src/ChezScheme/c/types.h b/racket/src/ChezScheme/c/types.h index cf7162f38d..7f1517ea94 100644 --- a/racket/src/ChezScheme/c/types.h +++ b/racket/src/ChezScheme/c/types.h @@ -158,8 +158,7 @@ typedef struct _seginfo { octet *list_bits; /* for `$list-bits-ref` and `$list-bits-set!` */ uptr number; /* the segment number */ #ifdef PTHREADS - ptr lock; /* for parallel GC */ - ptr creator_tc; /* for parallelism heuristic; might not match an active thread */ + ptr creator_tc; /* for GC parallelism heuristic; might not match an active thread unless old_space */ #endif struct _chunkinfo *chunk; /* the chunk this segment belongs to */ struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs) */ diff --git a/racket/src/ChezScheme/c/vfasl.c b/racket/src/ChezScheme/c/vfasl.c index 833ef56865..4e6c29daa8 100644 --- a/racket/src/ChezScheme/c/vfasl.c +++ b/racket/src/ChezScheme/c/vfasl.c @@ -1151,10 +1151,20 @@ static void relink_code(ptr co, ptr sym_base, ptr *vspaces, uptr *vspace_offsets t = CODERELOC(co); t = ptr_add(vspaces[vspace_reloc], (uptr)t - vspace_offsets[vspace_reloc]); - if (to_static && !S_G.retain_static_relocation - && ((CODETYPE(co) & (code_flag_template << code_flags_offset)) == 0)) - CODERELOC(co) = (ptr)0; - else { + if (to_static && !S_G.retain_static_relocation) { + if ((CODETYPE(co) & (code_flag_template << code_flags_offset)) == 0) + CODERELOC(co) = (ptr)0; + else { + ptr tc = get_thread_context(); + iptr sz = size_reloc_table(RELOCSIZE(t)); + ptr new_t; + find_room(tc, space_data, static_generation, typemod, ptr_align(sz), new_t); + memcpy(TO_VOIDP(new_t), TO_VOIDP(t), sz); + t = new_t; + CODERELOC(co) = t; + RELOCCODE(t) = co; + } + } else { CODERELOC(co) = t; RELOCCODE(t) = co; } diff --git a/racket/src/ChezScheme/rktboot/make-boot.rkt b/racket/src/ChezScheme/rktboot/make-boot.rkt index 9385e734b0..dc5240f296 100644 --- a/racket/src/ChezScheme/rktboot/make-boot.rkt +++ b/racket/src/ChezScheme/rktboot/make-boot.rkt @@ -401,7 +401,9 @@ (status "Generate GC") (eval `(mkgc-ocd.inc ,(path->string (build-path out-subdir "gc-ocd.inc")))) (eval `(mkgc-oce.inc ,(path->string (build-path out-subdir "gc-oce.inc")))) + (eval `(mkgc-par.inc ,(path->string (build-path out-subdir "gc-par.inc")))) (eval `(mkvfasl.inc ,(path->string (build-path out-subdir "vfasl.inc")))) + (eval `(mkheapcheck.inc ,(path->string (build-path out-subdir "heapcheck.inc")))) (plumber-flush-all (current-plumber)))) (when (getenv "MAKE_BOOT_FOR_CROSS") diff --git a/racket/src/ChezScheme/s/Mf-base b/racket/src/ChezScheme/s/Mf-base index 7993b53980..dfd08463f5 100644 --- a/racket/src/ChezScheme/s/Mf-base +++ b/racket/src/ChezScheme/s/Mf-base @@ -114,7 +114,9 @@ Cheader = ../boot/$m/scheme.h Cequates = ../boot/$m/equates.h Cgcocd = ../boot/$m/gc-ocd.inc Cgcoce = ../boot/$m/gc-oce.inc +Cgcpar = ../boot/$m/gc-par.inc Cvfasl = ../boot/$m/vfasl.inc +Cheapcheck = ../boot/$m/heapcheck.inc Revision = ../boot/$m/revision # The following controls the patch files loaded before compiling, typically used only @@ -171,11 +173,11 @@ allsrc =\ np-languages.ss fxmap.ss # doit uses a different Scheme process to compile each target -doit: ${PetiteBoot} ${SchemeBoot} ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cvfasl} ${Revision} +doit: ${PetiteBoot} ${SchemeBoot} ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cgcpar} ${Cvfasl} ${Cheapcheck} ${Revision} # all uses a single Scheme process to compile all targets. this is typically # faster when most of the targets need to be recompiled. -all: bootall ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cvfasl} ${Revision} +all: bootall ${Cheader} ${Cequates} ${Cgcocd} ${Cgcoce} ${Cgcpar} ${Cvfasl} ${Cheapcheck} ${Revision} # allx runs all up to three times and checks to see if the new boot file is the # same as the last, i.e., the system is properly bootstrapped. @@ -362,7 +364,7 @@ resetbootlinks: | ${Scheme} -q keepbootfiles: - for x in `echo scheme.boot petite.boot scheme.h equates.h gc-oce.inc gc-ocd.inc vfasl.inc` ; do\ + for x in `echo scheme.boot petite.boot scheme.h equates.h gc-oce.inc gc-ocd.inc gc-par.inc vfasl.inc heapcheck.inc` ; do\ if [ ! -h ../boot/$(m)/$$x ] ; then \ mv -f ../boot/$(m)/$$x ../../boot/$(m)/$$x ;\ elif [ "${upupupbootdir}" != "../../.." ] ; then \ @@ -633,6 +635,15 @@ ${Cgcoce}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.s then mv -f ${Cgcoce}.bak ${Cgcoce};\ else rm -f ${Cgcoce}.bak; fi) +${Cgcpar}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss + (if [ -r ${Cgcpar} ]; then mv -f ${Cgcpar} ${Cgcpar}.bak; fi) + echo '(reset-handler abort)'\ + '(mkgc-par.inc "${Cgcpar}")' |\ + ${Scheme} -q ${macroobj} mkheader.so mkgc.so + (if `cmp -s ${Cgcpar} ${Cgcpar}.bak`;\ + then mv -f ${Cgcpar}.bak ${Cgcpar};\ + else rm -f ${Cgcpar}.bak; fi) + ${Cvfasl}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss (if [ -r ${Cvfasl} ]; then mv -f ${Cvfasl} ${Cvfasl}.bak; fi) echo '(reset-handler abort)'\ @@ -642,6 +653,15 @@ ${Cvfasl}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.s then mv -f ${Cvfasl}.bak ${Cvfasl};\ else rm -f ${Cvfasl}.bak; fi) +${Cheapcheck}: mkgc.so ${macroobj} nanopass.so base-lang.ss expand-lang.ss primref.ss types.ss io-types.ss fasl-helpers.ss hashtable-types.ss + (if [ -r ${Cheapcheck} ]; then mv -f ${Cheapcheck} ${Cheapcheck}.bak; fi) + echo '(reset-handler abort)'\ + '(mkheapcheck.inc "${Cheapcheck}")' |\ + ${Scheme} -q ${macroobj} mkheader.so mkgc.so + (if `cmp -s ${Cheapcheck} ${Cheapcheck}.bak`;\ + then mv -f ${Cheapcheck}.bak ${Cheapcheck};\ + else rm -f ${Cheapcheck}.bak; fi) + .PHONY: ${Revision} ${Revision}: update-revision @./update-revision > ${Revision} @@ -664,7 +684,9 @@ reset: $(MAKE) reset-one FILE=scheme.h $(MAKE) reset-one FILE=gc-oce.inc $(MAKE) reset-one FILE=gc-ocd.inc + $(MAKE) reset-one FILE=gc-par.inc $(MAKE) reset-one FILE=vfasl.inc + $(MAKE) reset-one FILE=heapcheck.inc .PHONY: reset-one reset-one: diff --git a/racket/src/ChezScheme/s/cmacros.ss b/racket/src/ChezScheme/s/cmacros.ss index a52be453c1..0ea135209c 100644 --- a/racket/src/ChezScheme/s/cmacros.ss +++ b/racket/src/ChezScheme/s/cmacros.ss @@ -855,6 +855,8 @@ ;; --------------------------------------------------------------------- ;; Bit and byte offsets for different types of objects: +;; Flags that matter to the GC must apply only to static-generation +;; objects, and they must not overlap with `forward-marker` (define-constant code-flag-system #b0000001) (define-constant code-flag-continuation #b0000010) (define-constant code-flag-template #b0000100) @@ -1389,7 +1391,8 @@ (define-primitive-structure-disps ratnum type-typed-object ([iptr type] [ptr numerator] - [ptr denominator])) + [ptr denominator] + [iptr pad])) ; for alignment (define-primitive-structure-disps vector type-typed-object ([iptr type] @@ -1433,7 +1436,8 @@ (define-primitive-structure-disps exactnum type-typed-object ([iptr type] [ptr real] - [ptr imag])) + [ptr imag] + [iptr pad])) ; for alignment (define-primitive-structure-disps closure type-closure ([ptr code] @@ -1495,8 +1499,9 @@ (define-constant virtual-register-count 16) (define-constant static-generation 7) (define-constant num-generations (fx+ (constant static-generation) 1)) -(define-constant num-thread-local-allocation-segments (fx* (fx+ 1 (constant static-generation)) - (fx+ 1 (constant max-real-space)))) +(define-constant num-spaces (fx+ (constant max-real-space) 1)) +(define-constant num-thread-local-allocation-segments (fx* (constant num-generations) + (constant num-spaces))) (define-constant maximum-parallel-collect-threads 8) ;;; make sure gc sweeps all ptrs @@ -1577,6 +1582,7 @@ [xptr base-loc (constant num-thread-local-allocation-segments)] [xptr next-loc (constant num-thread-local-allocation-segments)] [iptr bytes-left (constant num-thread-local-allocation-segments)] + [xptr orig-next-loc (constant num-spaces)] [xptr sweep-loc (constant num-thread-local-allocation-segments)] [xptr sweep-next (constant num-thread-local-allocation-segments)] [xptr pending-ephemerons] @@ -1585,7 +1591,9 @@ [xptr sweep-stack-start] [xptr sweep-stack-limit] [iptr sweep-change] - [xptr lock-status] + [iptr remote-sweeper] + [xptr remote-range-start] + [xptr remote-range-end] [iptr bitmask-overhead (constant num-generations)])) (define tc-field-list diff --git a/racket/src/ChezScheme/s/mkgc.ss b/racket/src/ChezScheme/s/mkgc.ss index 20d3408da9..9534b50160 100644 --- a/racket/src/ChezScheme/s/mkgc.ss +++ b/racket/src/ChezScheme/s/mkgc.ss @@ -13,7 +13,9 @@ (disable-unbound-warning mkgc-ocd.inc mkgc-oce.inc - mkvfasl.inc) + mkgc-par.inc + mkvfasl.inc + mkheapcheck.inc) ;; Currently supported traversal modes: ;; - copy @@ -25,6 +27,7 @@ ;; - measure : recurs for reachable size ;; - vfasl-copy ;; - vfasl-sweep +;; - check ;; For the specification, there are a few declaration forms described ;; below, such as `trace` to declare a pointer-valued field within an @@ -83,6 +86,7 @@ ;; - (trace-early ) : relocate for sweep, copy, and mark; recur otherwise; implies pure ;; - (trace-now ) : direct recur; implies pure ;; - (trace-early-rtd ) : for record types, avoids recur on #!base-rtd; implies pure +;; - (trace-pure-code ) : like `trace-pure`, but special handling in parallel mode ;; - (trace-ptrs ) : trace an array of pointerrs ;; - (trace-pure-ptrs ) : pure analog of `trace-ptrs` ;; - (copy ) : copy for copy, ignore otherwise @@ -96,8 +100,6 @@ ;; - (as-mark-end ...) : declares that s implement counting, ;; which means that it's included for mark mode ;; - (skip-forwarding) : disable forward-pointer installation in copy mode -;; - (check-lock-failed) : bail out if a lock aquire failed; use this before dereferencing -;; an object reference that might not have been relocated ;; - (assert ) : assertion ;; ;; In the above declarations, nonterminals like can be @@ -150,6 +152,7 @@ ;; Built-in variables: ;; - _ : object being copied, swept, etc. ;; - _copy_ : target in copy or vfasl mode, same as _ otherwise +;; - _size_ : size of the current object, but only in parallel mode ;; - _tf_ : type word ;; - _tg_ : target generation ;; - _backreferences?_ : dynamic flag indicating whether backreferences are on @@ -173,6 +176,9 @@ [(copy) (set! (ephemeron-prev-ref _copy_) 0) (set! (ephemeron-next _copy_) 0)] + [(check) + (trace pair-car) + (trace pair-cdr)] [else]) (add-ephemeron-to-pending) (mark one-bit no-sweep) @@ -181,6 +187,9 @@ [space-weakpair (space space-weakpair) (vfasl-fail "weakpair") + (case-mode + [(check) (trace pair-car)] + [else]) (try-double-pair copy pair-car trace pair-cdr countof-weakpair)] @@ -193,7 +202,10 @@ [closure (define code : ptr (CLOSCODE _)) - (trace-code-early code) + (trace-code-early code) ; not traced in parallel mode + ;; In parallel mode, don't use any fields of `code` until the + ;; second on after the type, because the type and first field may + ;; be overwritten with forwarding information (cond [(and-not-as-dirty (or-assume-continuation @@ -221,12 +233,23 @@ (case-mode [(sweep) (define stk : ptr (continuation-stack _)) - (when (&& (!= stk (cast ptr 0)) (OLDSPACE stk)) - (set! (continuation-stack _) - (copy_stack _tc_ - (continuation-stack _) - (& (continuation-stack-length _)) - (continuation-stack-clength _))))] + (define s_si : seginfo* NULL) + (when (&& (!= stk (cast ptr 0)) + (begin + (set! s_si (SegInfo (ptr_get_segment stk))) + (-> s_si old_space))) + (cond + [(! (SEGMENT_IS_LOCAL s_si stk)) + ;; A stack segment has a single owner, so it's ok for us + ;; to sweep the stack content, even though it's on a + ;; remote segment relative to the current sweeper. + (RECORD_REMOTE_RANGE _tc_ _ _size_ s_si)] + [else + (set! (continuation-stack _) + (copy_stack _tc_ + (continuation-stack _) + (& (continuation-stack-length _)) + (continuation-stack-clength _)))]))] [else]) (count countof-stack (continuation-stack-length _) 1 [measure]) (trace-pure continuation-link) @@ -250,9 +273,15 @@ [else (cond [(& (code-type code) (<< code-flag-mutable-closure code-flags-offset)) + ;; in parallel mode, assume that code pointer is static and doesn't need to be swept space-impure] [else - space-pure])])) + (case-flag parallel? + [on + ;; use space-closure so code reference (not a regular ptr) is swept correctly + space-closure] + [off + space-pure])])])) (vspace vspace_closure) (when-vfasl (when (& (code-type code) (<< code-flag-mutable-closure code-flags-offset)) @@ -286,7 +315,7 @@ (size size-symbol) (mark one-bit) (trace/define symbol-value val :vfasl-as (FIX (vfasl_symbol_to_index vfi _))) - (trace-symcode symbol-pvalue val) + (trace-local-symcode symbol-pvalue val) (trace-nonself/vfasl-as-nil symbol-plist) (trace-nonself symbol-name) (trace-nonself/vfasl-as-nil symbol-splist) @@ -463,37 +492,33 @@ (count countof-box)] [ratnum - (space space-data) + (space (case-flag parallel? + [on space-pure] + [off space-data])) (vspace vspace_impure) ; would be better if we had pure, but these are rare (size size-ratnum) (copy-type ratnum-type) - (trace-now ratnum-numerator) - (trace-now ratnum-denominator) - (case-mode - [(copy) (when (CHECK_LOCK_FAILED _tc_) - ;; create failed relocates so that the heap checker isn't unhappy - (set! (ratnum-numerator _copy_) (cast ptr 0)) - (set! (ratnum-denominator _copy_) (cast ptr 0)))] - [(mark) (check-lock-failed)] - [else]) + (trace-nonparallel-now ratnum-numerator) + (trace-nonparallel-now ratnum-denominator) + (case-flag parallel? + [on (pad (set! (ratnum-pad _copy_) 0))] + [off]) (mark) (vfasl-pad-word) (count countof-ratnum)] [exactnum - (space space-data) + (space (case-flag parallel? + [on space-pure] + [off space-data])) (vspace vspace_impure) ; same rationale as ratnum (size size-exactnum) (copy-type exactnum-type) - (trace-now exactnum-real) - (trace-now exactnum-imag) - (case-mode - [(copy) (when (CHECK_LOCK_FAILED _tc_) - ;; create failed relocates so that the heap checker isn't unhappy - (set! (exactnum-real _copy_) (cast ptr 0)) - (set! (exactnum-imag _copy_) (cast ptr 0)))] - [(mark) (check-lock-failed)] - [else]) + (trace-nonparallel-now exactnum-real) + (trace-nonparallel-now exactnum-imag) + (case-flag parallel? + [on (pad (set! (exactnum-pad _copy_) 0))] + [off]) (mark) (vfasl-pad-word) (count countof-exactnum)] @@ -613,6 +638,11 @@ [else (trace-nonself field)])) +(define-trace-macro (trace-nonparallel-now field) + (case-flag parallel? + [on (trace-pure field)] + [off (trace-now field)])) + (define-trace-macro (try-double-pair do-car pair-car do-cdr pair-cdr count-pair) @@ -680,7 +710,16 @@ ;; Special relocation handling for code in a closure: (set! code (vfasl_relocate_code vfi code))] [else - (trace-early (just code))]))) + ;; In parallel mode, the `code` pointer may or may not have been + ;; forwarded. In that case, we may misinterpret the forward mmarker + ;; as a code type with flags, but it's ok, because the flags will + ;; only be set for static-generation objects + (case-flag parallel? + [on (case-mode + [(sweep sweep-in-old) + (trace-pure-code (just code))] + [else])] + [off (trace-early (just code))])]))) (define-trace-macro (copy-clos-code code) (case-mode @@ -718,7 +757,6 @@ (trace ref)] [(sweep sweep-in-old) (trace ref) ; can't trace `val` directly, because we need an impure relocate - (check-lock-failed) (define val : ptr (ref _))] [vfasl-copy (set! (ref _copy_) vfasl-val)] @@ -733,7 +771,6 @@ (case-flag as-dirty? [on (trace (just code))] [off (trace-pure (just code))]) - (check-lock-failed) (INITSYMCODE _ code)] [measure] [vfasl-copy @@ -741,6 +778,27 @@ [else (copy symbol-pvalue)])) +(define-trace-macro (trace-local-symcode symbol-pvalue val) + (case-mode + [(sweep) + (case-flag parallel? + [on + (define v_si : seginfo* (cond + [(Sprocedurep val) (SegInfo (ptr_get_segment val))] + [else NULL])) + (cond + [(\|\| + (\|\| + (== v_si NULL) + (! (-> v_si old_space))) + (SEGMENT_IS_LOCAL v_si val)) + (trace-symcode symbol-pvalue val)] + [else + (RECORD_REMOTE_RANGE _tc_ _ _size_ v_si)])] + [off (trace-symcode symbol-pvalue val)])] + [else + (trace-symcode symbol-pvalue val)])) + (define-trace-macro (trace-tlc tlc-next tlc-keyval) (case-mode [(copy mark) @@ -800,11 +858,24 @@ [on] [off (case-mode - [(sweep sweep-in-old self-test) - ;; Bignum pointer mask may need forwarding - (trace-pure (record-type-pm rtd)) - (check-lock-failed) - (set! num (record-type-pm rtd))] + [(sweep) + (case-flag parallel? + [on + (define pm_si : seginfo* (SegInfo (ptr_get_segment num))) + (cond + [(\|\| + (! (-> pm_si old_space)) + (SEGMENT_IS_LOCAL pm_si num)) + (trace-record-type-pm num rtd)] + [else + ;; Try again in the bignum's sweeper + (RECORD_REMOTE_RANGE _tc_ _ _size_ pm_si) + (set! num S_G.zero_length_bignum)])] + [off + (trace-record-type-pm num rtd)])] + [(sweep-in-old self-test) + (trace-record-type-pm num rtd)] + [(check) (check-bignum num)] [else])]) (let* ([index : iptr (- (BIGLEN num) 1)] ;; Ignore bit for already forwarded rtd @@ -825,6 +896,11 @@ (set! mask (bignum-data num index)) (set! bits bigit_bits)))]))])) +(define-trace-macro (trace-record-type-pm num rtd) + ;; Bignum pointer mask may need forwarding + (trace-pure (record-type-pm rtd)) + (set! num (record-type-pm rtd))) + (define-trace-macro (vfasl-check-parent-rtd rtd) (case-mode [(vfasl-copy) @@ -917,7 +993,7 @@ (cast iptr (port-buffer _)))) (trace port-buffer) (set! (port-last _) (cast ptr (+ (cast iptr (port-buffer _)) n))))] - [sweep-in-old + [(sweep-in-old check) (when (& (cast uptr _tf_) flag) (trace port-buffer))] [else @@ -1024,10 +1100,19 @@ (trace-pure (* pp))) (set! mask >>= 1)))] [else - (trace-pure (* (ENTRYNONCOMPACTLIVEMASKADDR oldret))) - (check-lock-failed) - (let* ([num : ptr (ENTRYLIVEMASK oldret)] - [index : iptr (BIGLEN num)]) + (case-mode + [(check) (check-bignum num)] + [else + (define n_si : seginfo* (SegInfo (ptr_get_segment num))) + (cond + [(! (-> n_si old_space))] + [(SEGMENT_IS_LOCAL n_si num) + (trace-pure (* (ENTRYNONCOMPACTLIVEMASKADDR oldret))) + (set! num (ENTRYLIVEMASK oldret))] + [else + (RECORD_REMOTE_RANGE _tc_ _ _size_ n_si) + (set! num S_G.zero_length_bignum)])]) + (let* ([index : iptr (BIGLEN num)]) (while :? (!= index 0) (set! index -= 1) @@ -1055,11 +1140,10 @@ [(sweep sweep-in-old) (define x_si : seginfo* (SegInfo (ptr_get_segment c_p))) (when (-> x_si old_space) - (relocate_code c_p x_si) + (relocate_code c_p x_si _ _size_) (case-mode [sweep-in-old] [else - (check-lock-failed) (set! field (cast ptr (+ (cast uptr c_p) co)))]))] [else (trace-pure (just c_p))])) @@ -1116,7 +1200,6 @@ (case-mode [sweep - (check-lock-failed) (cond [(&& (== from_g static_generation) (&& (! S_G.retain_static_relocation) @@ -1126,21 +1209,20 @@ (let* ([t_si : seginfo* (SegInfo (ptr_get_segment t))]) (when (-> t_si old_space) (cond - [(SEGMENT_LOCK_ACQUIRE t_si) + [(SEGMENT_IS_LOCAL t_si t) (set! n (size_reloc_table (reloc-table-size t))) (count countof-relocation-table (just n) 1 sweep) (cond [(-> t_si use_marks) - ;; Assert: (! (marked t_si t)) - (mark_typemod_data_object _tc_ t n t_si)] + (cond + [(! (marked t_si t)) + (mark_typemod_data_object _tc_ t n t_si)])] [else (let* ([oldt : ptr t]) (find_room _tc_ space_data from_g typemod n t) - (memcpy_aligned (TO_VOIDP t) (TO_VOIDP oldt) n))]) - (SEGMENT_LOCK_RELEASE t_si)] + (memcpy_aligned (TO_VOIDP t) (TO_VOIDP oldt) n))])] [else - (RECORD_LOCK_FAILED _tc_ t_si) - (check-lock-failed)]))) + (RECORD_REMOTE_RANGE _tc_ _ _size_ t_si)]))) (set! (reloc-table-code t) _) (set! (code-reloc _) t)]) (S_record_code_mod tc_in (cast uptr (TO_PTR (& (code-data _ 0)))) (cast uptr (code-length _)))] @@ -1150,6 +1232,10 @@ (set! (code-reloc _) (cast ptr (ptr_diff t (-> vfi base_addr))))] [else])])) +(define-trace-macro (check-bignum var) + (trace (just var)) + (check_bignum var)) + (define-trace-macro (unless-code-relocated stmt) (case-flag code-relocated? [on] @@ -1389,6 +1475,7 @@ [(lookup 'as-dirty? config #f) ", IGEN youngest"] [(lookup 'no-from-g? config #f) ""] [else ", IGEN from_g"])] + [(check) ", uptr seg, ISPC s_in, IBOOL aftergc"] [else ""])) (let ([body (lambda () @@ -1417,14 +1504,11 @@ (case (lookup 'mode config) [(copy) (code-block - "ENABLE_LOCK_ACQUIRE" - "if (CHECK_LOCK_FAILED(tc_in)) return 0xff;" "check_triggers(tc_in, si);" (code-block "ptr new_p;" "IGEN tg = TARGET_GENERATION(si);" (body) - "if (CHECK_LOCK_FAILED(tc_in)) return 0xff;" "SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;" "FWDMARKER(p) = forward_marker;" "FWDADDRESS(p) = new_p;" @@ -1434,17 +1518,14 @@ "return tg;"))] [(mark) (code-block - "ENABLE_LOCK_ACQUIRE" - "if (CHECK_LOCK_FAILED(tc_in)) return 0xff;" "check_triggers(tc_in, si);" - (ensure-segment-mark-mask "si" "" '()) + (ensure-segment-mark-mask "si" "") (body) "SWEEPCHANGE(tc_in) = SWEEP_CHANGE_PROGRESS;" "ADD_BACKREFERENCE(p, si->generation);" "return si->generation;")] [(sweep) (code-block - "ENABLE_LOCK_ACQUIRE" (and (lookup 'maybe-backreferences? config #f) "PUSH_BACKREFERENCE(p)") (body) @@ -1453,9 +1534,7 @@ (and (lookup 'as-dirty? config #f) "return youngest;"))] [(sweep-in-old) - (code-block - "ENABLE_LOCK_ACQUIRE" - (body))] + (body)] [(measure) (body)] [(self-test) @@ -1579,8 +1658,8 @@ (code "/* Do not inspect the type or first field of the rtd, because" " it may have been overwritten for forwarding. */")])] - [(measure sweep sweep-in-old) - (statements `((trace-early ,field)) config)] + [(measure sweep sweep-in-old check) + (statements `((trace-early ,field)) (cons `(early-rtd? #t) config))] [else #f]) (statements (cdr l) (cons `(copy-extra-rtd ,field) config)))] [`(trace ,field) @@ -1590,9 +1669,12 @@ (code (and (not (lookup 'as-dirty? config #f)) (trace-statement field config #f 'pure)) (statements (cdr l) config))] + [`(trace-pure-code ,field) + (code (and (not (lookup 'as-dirty? config #f)) + (trace-statement field (cons `(early-code? #t) config) #f 'pure)) + (statements (cdr l) config))] [`(trace-early ,field) (code (trace-statement field config #t 'pure) - (check-lock-failure-statement config) (statements (cdr l) (if (symbol? field) (cons `(copy-extra ,field) config) config)))] @@ -1680,7 +1762,7 @@ (statements (cons `(copy-bytes ,offset (* ptr_bytes ,len)) (cdr l)) config)] - [(sweep measure sweep-in-old vfasl-sweep) + [(sweep measure sweep-in-old vfasl-sweep check) (code (loop-over-pointers (field-expression offset config "p" #t) @@ -1855,10 +1937,6 @@ (statements (list count-stmt) config)))] [else (statements (cdr l) config)])] - [`(check-lock-failed) - (code - (check-lock-failure-statement config) - (statements (cdr l) config))] [`(define ,id : ,type ,rhs) (let* ([used (lookup 'used config)] [prev-used? (hashtable-ref used id #f)]) @@ -1981,6 +2059,12 @@ [`_copy_ (case (lookup 'mode config) [(copy vfasl-copy) "new_p"] [else "p"])] + [`_size_ + (cond + [(lookup 'parallel? config #f) + (hashtable-set! (lookup 'used config) 'p_sz #t) + "p_sz"] + [else "SIZE"])] [`_tf_ (lookup 'tf config "TYPEFIELD(p)")] [`_tg_ @@ -2052,6 +2136,8 @@ (expression (car (apply-macro m (list a))) config protect? multiline?))] [else (protect (format "~a(~a)" op (expression a config #t)))])] + [`(begin ,a ,b) + (format "(~a, ~a)" (expression a config #t) (expression b config #t))] [`(,op ,a ,b) (cond [(memq op '(& && \|\| == != + - * < > <= >= << >> ->)) @@ -2118,23 +2204,38 @@ (measure-statement (field-expression field config "p" #f))] [(eq? mode 'self-test) (format "if (p == ~a) return 1;" (field-expression field config "p" #f))] + [(eq? mode 'check) + (format "check_pointer(&(~a), ~a, ~a, seg, s_in, aftergc);" + (field-expression field config "p" #f) + (match field + [`(just ,_) "0"] + [else "1"]) + (expression '_ config))] [else #f])) (define (relocate-statement purity e config) (define mode (lookup 'mode config)) + (define (get-start) (expression '_ config)) + (define (get-size) (cond + [(lookup 'early-rtd? config #f) + (expression '(size_record_inst (UNFIX (record-type-size (record-type _)))) config)] + [(lookup 'early-code? config #f) + (expression '(size_closure (CODEFREE (CLOSCODE _))) config)] + [else + (expression '_size_ config)])) (case mode [(vfasl-sweep) (format "vfasl_relocate(vfi, &~a);" e)] [(sweep-in-old) (if (eq? purity 'pure) - (format "relocate_pure(&~a);" e) - (format "relocate_indirect(~a);" e))] + (format "relocate_pure(&~a, ~a, ~a);" e (get-start) (get-size)) + (format "relocate_indirect(~a, ~a, ~a);" e (get-start) (get-size)))] [else (if (lookup 'as-dirty? config #f) (begin (when (eq? purity 'pure) (error 'relocate-statement "pure as dirty?")) - (format "relocate_dirty(&~a, youngest);" e)) - (format "relocate_~a(&~a~a);" purity e (if (eq? purity 'impure) ", from_g" "")))])) + (format "relocate_dirty(&~a, youngest, ~a, ~a);" e (get-start) (get-size))) + (format "relocate_~a(&~a~a, ~a, ~a);" purity e (if (eq? purity 'impure) ", from_g" "") (get-start) (get-size)))])) (define (measure-statement e) (code @@ -2243,26 +2344,19 @@ " seginfo *mark_si; IGEN g;" " si->marked_count += ((uptr)build_ptr(seg+1,0)) - addr;" " seg++;" - " /* Note: taking a sequence of locks for a span of segments */" " while (seg < end_seg) {" - " ENABLE_LOCK_ACQUIRE" " mark_si = SegInfo(seg);" - " SEGMENT_LOCK_MUST_ACQUIRE(mark_si);" " g = mark_si->generation;" " if (!fully_marked_mask[g]) init_fully_marked_mask(tc_in, g);" " mark_si->marked_mask = fully_marked_mask[g];" " mark_si->marked_count = bytes_per_segment;" - " SEGMENT_LOCK_RELEASE(mark_si);" " seg++;" " }" " mark_si = SegInfo(end_seg);" " {" - " ENABLE_LOCK_ACQUIRE" - " SEGMENT_LOCK_MUST_ACQUIRE(mark_si);" - (ensure-segment-mark-mask "mark_si" " " '()) + (ensure-segment-mark-mask "mark_si" " ") " /* no need to set a bit: it's enough to have made `marked_mask` non-NULL */" " mark_si->marked_count += addr + p_sz - (uptr)build_ptr(end_seg,0);" - " SEGMENT_LOCK_RELEASE(mark_si);" " }" "}")]))] [within-segment? @@ -2294,13 +2388,11 @@ "else" (within-loop-statement (code " seginfo *mark_si = SegInfo(ptr_get_segment(mark_p));" - " ENABLE_LOCK_ACQUIRE" - " SEGMENT_LOCK_MUST_ACQUIRE(mark_si);" - (ensure-segment-mark-mask "mark_si" " " '())) + (ensure-segment-mark-mask "mark_si" " ")) "mark_si" step #t - " SEGMENT_LOCK_RELEASE(mark_si);")))]) + #f)))]) (cond [no-sweep? #f] [else @@ -2312,20 +2404,6 @@ (code-block push))] [else push]))])))) - (define (check-lock-failure-statement config) - (let ([mode (lookup 'mode config)]) - (case mode - [(copy mark sweep) - (code - "if (CHECK_LOCK_FAILED(tc_in))" - (case mode - [(copy mark) (code-block "return 0xff;")] - [(sweep sweep-in-old) - (if (lookup 'as-dirty? config #f) - (code-block "return 0xff;") - (code-block "return;"))]))] - [else #f]))) - (define (field-expression field config arg protect?) (if (symbol? field) (cond @@ -2359,15 +2437,11 @@ (error 'field-ref "index not allowed for non-array field ~s" acc-name)) (format "~a(~a)" c-ref obj)]))) - (define (ensure-segment-mark-mask si inset flags) + (define (ensure-segment-mark-mask si inset) (code (format "~aif (!~a->marked_mask) {" inset si) - (format "~a find_room_voidp(tc_in, space_data, ~a->generation, ptr_align(segment_bitmap_bytes), ~a->marked_mask);" + (format "~a init_mask(tc_in, ~a->marked_mask, ~a->generation, 0);" inset si si) - (if (memq 'no-clear flags) - (format "~a /* no clearing needed */" inset) - (format "~a memset(~a->marked_mask, 0, segment_bitmap_bytes);" inset si)) - (format "~a S_G.bitmask_overhead[~a->generation] += ptr_align(segment_bitmap_bytes);" inset si) (format "~a}" inset))) (define (just-mark-bit-space? sp) @@ -2534,25 +2608,29 @@ (loop (cdr l))))] [else (cons (car l) (loop (cdr l)))])))) - (define (gen-gc ofn count? measure?) + (define (gen-gc ofn count? measure? parallel?) (guard (x [#t (raise x)]) (parameterize ([current-output-port (open-output-file ofn 'replace)]) (print-code (generate "copy" `((mode copy) (maybe-backreferences? ,count?) - (counts? ,count?)))) + (counts? ,count?) + (parallel? ,parallel?)))) (print-code (generate "sweep" `((mode sweep) (maybe-backreferences? ,count?) - (counts? ,count?)))) + (counts? ,count?) + (parallel? ,parallel?)))) (print-code (generate "sweep_object_in_old" `((mode sweep-in-old) - (maybe-backreferences? ,count?)))) + (maybe-backreferences? ,count?) + (parallel? ,parallel?)))) (print-code (generate "sweep_dirty_object" `((mode sweep) (maybe-backreferences? ,count?) (counts? ,count?) + (parallel? ,parallel?) (as-dirty? #t)))) (letrec ([sweep1 (case-lambda @@ -2566,26 +2644,32 @@ (known-types (,type)) (maybe-backreferences? ,count?) (counts? ,count?)))))])]) - (sweep1 'record "sweep_record" '()) - (sweep1 'record "sweep_dirty_record" '((as-dirty? #t))) - (sweep1 'symbol) - (sweep1 'symbol "sweep_dirty_symbol" '((as-dirty? #t))) - (sweep1 'thread "sweep_thread" '((no-from-g? #t))) - (sweep1 'port) - (sweep1 'port "sweep_dirty_port" '((as-dirty? #t))) - (sweep1 'closure "sweep_continuation" '((code-relocated? #t) - (assume-continuation? #t))) - (sweep1 'code "sweep_code_object")) + (sweep1 'record "sweep_record" `((parallel? ,parallel?))) + (sweep1 'record "sweep_dirty_record" `((as-dirty? #t) + (parallel? ,parallel?))) + (sweep1 'symbol "sweep_symbol" `((parallel? ,parallel?))) + (sweep1 'symbol "sweep_dirty_symbol" `((as-dirty? #t) + (parallel? ,parallel?))) + (sweep1 'thread "sweep_thread" `((no-from-g? #t) + (parallel? ,parallel?))) + (sweep1 'port "sweep_port" `((parallel? ,parallel?))) + (sweep1 'port "sweep_dirty_port" `((as-dirty? #t) + (parallel? ,parallel?))) + (sweep1 'closure "sweep_continuation" `((code-relocated? #t) + (assume-continuation? #t) + (parallel? ,parallel?))) + (sweep1 'code "sweep_code_object" `((parallel? ,parallel?)))) (print-code (generate "size_object" `((mode size)))) (print-code (generate "mark_object" `((mode mark) - (counts? ,count?)))) + (counts? ,count?) + (parallel? ,parallel?)))) (print-code (generate "object_directly_refers_to_self" `((mode self-test)))) (print-code (code "static void mark_typemod_data_object(ptr tc_in, ptr p, uptr p_sz, seginfo *si)" (code-block - (ensure-segment-mark-mask "si" "" '()) + (ensure-segment-mark-mask "si" "") (mark-statement '(one-bit no-sweep) (cons (list 'used (make-eq-hashtable)) @@ -2603,11 +2687,22 @@ `((mode vfasl-sweep) (return-size? #t))))))) + (define (gen-heapcheck ofn) + (guard + (x [#t (raise x)]) + (parameterize ([current-output-port (open-output-file ofn 'replace)]) + (print-code (generate "check_object" + `((mode check)))) + (print-code (generate "size_object" + `((mode size))))))) + ;; Render via mkequates to record a mapping from selectors to C ;; macros: (let-values ([(op get) (open-bytevector-output-port (native-transcoder))]) (mkequates.h op)) - (set! mkgc-ocd.inc (lambda (ofn) (gen-gc ofn #f #f))) - (set! mkgc-oce.inc (lambda (ofn) (gen-gc ofn #t #t))) - (set! mkvfasl.inc (lambda (ofn) (gen-vfasl ofn)))) + (set! mkgc-ocd.inc (lambda (ofn) (gen-gc ofn #f #f #f))) + (set! mkgc-oce.inc (lambda (ofn) (gen-gc ofn #t #t #f))) + (set! mkgc-par.inc (lambda (ofn) (gen-gc ofn #f #f #t))) + (set! mkvfasl.inc (lambda (ofn) (gen-vfasl ofn))) + (set! mkheapcheck.inc (lambda (ofn) (gen-heapcheck ofn)))) diff --git a/racket/src/ChezScheme/s/mkheader.ss b/racket/src/ChezScheme/s/mkheader.ss index 9d24514db8..2a98aa8e01 100644 --- a/racket/src/ChezScheme/s/mkheader.ss +++ b/racket/src/ChezScheme/s/mkheader.ss @@ -1006,10 +1006,12 @@ (defref EXACTNUM_TYPE exactnum type) (defref EXACTNUM_REAL_PART exactnum real) (defref EXACTNUM_IMAG_PART exactnum imag) + (defref EXACTNUM_PAD exactnum pad) (defref RATTYPE ratnum type) (defref RATNUM ratnum numerator) (defref RATDEN ratnum denominator) + (defref RATPAD ratnum pad) (defref CLOSENTRY closure code) (defref CLOSIT closure data) diff --git a/racket/src/ChezScheme/workarea b/racket/src/ChezScheme/workarea index c06b594db5..6c7cadb02f 100755 --- a/racket/src/ChezScheme/workarea +++ b/racket/src/ChezScheme/workarea @@ -285,7 +285,9 @@ workdir $W/boot/$M (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/equates.h equates.h) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/gc-ocd.inc gc-ocd.inc) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/gc-oce.inc gc-oce.inc) +(cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/gc-par.inc gc-par.inc) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/vfasl.inc vfasl.inc) +(cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/heapcheck.inc heapcheck.inc) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/petite.boot petite.boot) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/scheme.boot scheme.boot) (cd $W/boot/$M; workln "$upupupbootdir"/boot/$M/def.so def.so) diff --git a/racket/src/cs/c/check_boot.sh b/racket/src/cs/c/check_boot.sh index 14e7b3e967..4d913f9d4c 100644 --- a/racket/src/cs/c/check_boot.sh +++ b/racket/src/cs/c/check_boot.sh @@ -24,7 +24,9 @@ check_pb scheme.h check_pb equates.h check_pb gc-ocd.inc check_pb gc-oce.inc +check_pb gc-par.inc check_pb vfasl.inc +check_pb heapcheck.inc check_mach() { @@ -43,4 +45,6 @@ check_mach scheme.h check_mach equates.h check_mach gc-ocd.inc check_mach gc-oce.inc +check_mach gc-par.inc check_mach vfasl.inc +check_mach heapcheck.inc diff --git a/racket/src/cs/c/ready_boot.sh b/racket/src/cs/c/ready_boot.sh index 84b5f5a78f..046f2dfd46 100644 --- a/racket/src/cs/c/ready_boot.sh +++ b/racket/src/cs/c/ready_boot.sh @@ -22,6 +22,8 @@ ready_mach scheme.h ready_mach equates.h ready_mach gc-ocd.inc ready_mach gc-oce.inc +ready_mach gc-par.inc ready_mach vfasl.inc +ready_mach heapcheck.inc rm -f boot_pending diff --git a/racket/src/cs/c/reset_boot.sh b/racket/src/cs/c/reset_boot.sh index 5821c8ec4b..bcde08540c 100644 --- a/racket/src/cs/c/reset_boot.sh +++ b/racket/src/cs/c/reset_boot.sh @@ -22,4 +22,5 @@ ready_mach scheme.h ready_mach equates.h ready_mach gc-ocd.inc ready_mach gc-oce.inc +ready_mach gc-par.inc ready_mach vfasl.inc