From f1f4959b66c6dd5d86259e48c18f1414ee34d774 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 9 Oct 2020 06:54:47 -0600 Subject: [PATCH] Chez Scheme: separate allocation and thread mutexes Instead of one big lock, have one big lock and one small lock for just manipulating allocation state. This separation better reflects the locking needs of parallel collection, where the main collecting thread holds the one big lock, and helper threads need to take a different lock while changing allocation state. This commit also includes changes to cooperate better with LLVM's thread sanitizer. It doesn't work for parallel collection, but it seems to work otherwise and helped expose a missing lock and a questionable use of a global variable. --- .makefile | 2 +- Makefile | 12 +-- pkgs/base/info.rkt | 2 +- racket/src/ChezScheme/c/alloc.c | 101 +++++++++--------- racket/src/ChezScheme/c/externs.h | 5 +- racket/src/ChezScheme/c/gc.c | 49 +++++---- racket/src/ChezScheme/c/gcwrapper.c | 16 +-- racket/src/ChezScheme/c/globals.h | 10 +- racket/src/ChezScheme/c/prim5.c | 8 +- racket/src/ChezScheme/c/scheme.c | 4 +- racket/src/ChezScheme/c/schsig.c | 32 +++--- racket/src/ChezScheme/c/segment.c | 1 + racket/src/ChezScheme/c/symbol.c | 4 + racket/src/ChezScheme/c/thread.c | 28 +++-- racket/src/ChezScheme/c/types.h | 41 +++++-- racket/src/ChezScheme/c/version.h | 13 ++- racket/src/ChezScheme/makefiles/Mf-install.in | 2 +- racket/src/ChezScheme/mats/thread.ms | 2 +- racket/src/ChezScheme/s/cmacros.ss | 4 +- racket/src/ChezScheme/s/mkgc.ss | 12 +-- racket/src/version/racket_version.h | 2 +- 21 files changed, 201 insertions(+), 149 deletions(-) diff --git a/.makefile b/.makefile index e5e6f73f3f..6db2b06b56 100644 --- a/.makefile +++ b/.makefile @@ -338,7 +338,7 @@ RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) # This branch name changes each time the pb boot files are updated: -PB_BRANCH == circa-7.8.0.11-1 +PB_BRANCH == circa-7.9.0.2-1 PB_REPO = https://github.com/racket/pb # Alternative source for Chez Scheme boot files, normally set by diff --git a/Makefile b/Makefile index 3e42387f97..7b1d4a34d9 100644 --- a/Makefile +++ b/Makefile @@ -47,7 +47,7 @@ RACKETCS_SUFFIX = RACKET = RACKET_FOR_BOOTFILES = $(RACKET) RACKET_FOR_BUILD = $(RACKET) -PB_BRANCH = circa-7.8.0.11-1 +PB_BRANCH = circa-7.9.0.2-1 PB_REPO = https://github.com/racket/pb EXTRA_REPOS_BASE = CS_CROSS_SUFFIX = @@ -306,14 +306,14 @@ maybe-fetch-pb-as-is: echo done fetch-pb-from: mkdir -p racket/src/ChezScheme/boot - if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.8.0.11-1 $(PB_REPO) racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.8.0.11-1:remotes/origin/circa-7.8.0.11-1 ; fi - cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.8.0.11-1 + if [ ! -d racket/src/ChezScheme/boot/pb ] ; then git clone -q -b circa-7.9.0.2-1 $(PB_REPO) racket/src/ChezScheme/boot/pb ; else cd racket/src/ChezScheme/boot/pb && git fetch -q origin circa-7.9.0.2-1:remotes/origin/circa-7.9.0.2-1 ; fi + cd racket/src/ChezScheme/boot/pb && git checkout -q circa-7.9.0.2-1 pb-stage: - cd racket/src/ChezScheme/boot/pb && git branch circa-7.8.0.11-1 - cd racket/src/ChezScheme/boot/pb && git checkout circa-7.8.0.11-1 + cd racket/src/ChezScheme/boot/pb && git branch circa-7.9.0.2-1 + cd racket/src/ChezScheme/boot/pb && git checkout circa-7.9.0.2-1 cd racket/src/ChezScheme/boot/pb && git add . && git commit --amend -m "new build" pb-push: - cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.8.0.11-1 + cd racket/src/ChezScheme/boot/pb && git push -u origin circa-7.9.0.2-1 win-cs-base: IF "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-bc-then-cs-base SETUP_BOOT_MODE=--boot WIN32_BUILD_LEVEL=bc PLAIN_RACKET=racket\racketbc DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETBC_SUFFIX="$(RACKETBC_SUFFIX)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" IF not "$(RACKET_FOR_BUILD)" == "" $(MAKE) win-just-cs-base SETUP_BOOT_MODE=--chain DISABLE_STATIC_LIBS="$(DISABLE_STATIC_LIBS)" EXTRA_REPOS_BASE="$(EXTRA_REPOS_BASE)" JOB_OPTIONS="$(JOB_OPTIONS)" PLT_SETUP_OPTIONS="$(PLT_SETUP_OPTIONS)" RACKETCS_SUFFIX="$(RACKETCS_SUFFIX)" RACKET_FOR_BUILD="$(RACKET_FOR_BUILD)" diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index 168db9c9e4..43b11a9fdf 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.9.0.1") +(define version "7.9.0.2") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/racket/src/ChezScheme/c/alloc.c b/racket/src/ChezScheme/c/alloc.c index 4c83c5c050..bd0b50dce1 100644 --- a/racket/src/ChezScheme/c/alloc.c +++ b/racket/src/ChezScheme/c/alloc.c @@ -18,7 +18,7 @@ #include "popcount.h" /* locally defined functions */ -static void maybe_fire_collector PROTO((void)); +static void maybe_queue_fire_collector(thread_gc *tgc); void S_alloc_init() { ISPC s; IGEN g; UINT i; @@ -96,7 +96,6 @@ void S_protect(p) ptr *p; { S_G.protected[S_G.protect_next++] = p; } -/* S_reset_scheme_stack is always called with mutex */ void S_reset_scheme_stack(tc, n) ptr tc; iptr n; { ptr *x; iptr m; @@ -137,6 +136,9 @@ ptr S_compute_bytes_allocated(xg, xs) ptr xg; ptr xs; { ISPC s, smax, smin; IGEN g, gmax, gmin; uptr n; + tc_mutex_acquire(); + alloc_mutex_acquire(); + gmin = (IGEN)UNFIX(xg); if (gmin < 0) { gmin = 0; @@ -180,6 +182,9 @@ ptr S_compute_bytes_allocated(xg, xs) ptr xg; ptr xs; { if (gmin == 0 && smin <= space_new && space_new <= smax) n -= (uptr)REAL_EAP(tc) - (uptr)AP(tc); + alloc_mutex_release(); + tc_mutex_release(); + return Sunsigned(n); } @@ -187,12 +192,22 @@ ptr S_bytes_finalized() { return Sunsigned(S_G.bytes_finalized); } -static void maybe_fire_collector() { +/* called with alloc mutex */ +static void maybe_queue_fire_collector(thread_gc *tgc) { if ((S_G.bytes_of_generation[0] + S_G.bytesof[0][countof_phantom]) - S_G.g0_bytes_after_last_gc >= S_G.collect_trip_bytes) - S_fire_collector(); + tgc->queued_fire = 1; } -/* suitable mutex (either tc_mutex or gc_tc_mutex) must be held */ +void S_maybe_fire_collector(thread_gc *tgc) { + if ((tgc->during_alloc == 0) && (!IS_ALLOC_MUTEX_OWNER() || IS_TC_MUTEX_OWNER())) { + if (tgc->queued_fire) { + tgc->queued_fire = 0; + S_fire_collector(); + } + } +} + +/* allocation mutex must be held (or single-threaded guaranteed because collecting) */ static void close_off_segment(thread_gc *tgc, ptr old, ptr base_loc, ptr sweep_loc, ISPC s, IGEN g) { if (base_loc) { @@ -219,18 +234,11 @@ ptr S_find_more_gc_room(thread_gc *tgc, ISPC s, IGEN g, iptr n, ptr old) { ptr new; iptr new_bytes; -#ifdef PTHREADS - if (S_use_gc_tc_mutex) - gc_tc_mutex_acquire(); - else - tc_mutex_acquire(); -#else - tc_mutex_acquire(); -#endif + alloc_mutex_acquire(); close_off_segment(tgc, old, tgc->base_loc[g][s], tgc->sweep_loc[g][s], s, g); - S_pants_down += 1; + tgc->during_alloc += 1; nsegs = (uptr)(n + ptr_bytes + bytes_per_segment - 1) >> segment_offset_bits; @@ -247,23 +255,17 @@ ptr S_find_more_gc_room(thread_gc *tgc, ISPC s, IGEN g, iptr n, ptr old) { tgc->bytes_left[g][s] = (new_bytes - n) - ptr_bytes; tgc->next_loc[g][s] = (ptr)((uptr)new + n); - if (g == 0 && S_pants_down == 1) maybe_fire_collector(); + if (tgc->during_alloc == 1) maybe_queue_fire_collector(tgc); - S_pants_down -= 1; + tgc->during_alloc -= 1; + + alloc_mutex_release(); + S_maybe_fire_collector(tgc); -#ifdef PTHREADS - if (S_use_gc_tc_mutex) - gc_tc_mutex_release(); - else - tc_mutex_release(); -#else - tc_mutex_release(); -#endif - return new; } -/* tc_mutex must be held */ +/* allocation mutex must be held (or single-threaded guaranteed because collecting) */ void S_close_off_thread_local_segment(ptr tc, ISPC s, IGEN g) { thread_gc *tgc = THREAD_GC(tc); @@ -275,7 +277,8 @@ void S_close_off_thread_local_segment(ptr tc, ISPC s, IGEN g) { tgc->sweep_loc[g][s] = (ptr)0; } -/* S_reset_allocation_pointer is always called with mutex */ +/* S_reset_allocation_pointer is always called with allocation mutex + (or single-threaded guaranteed because collecting) */ /* We always allocate exactly one segment for the allocation area, since we can get into hot water with formerly locked objects, specifically symbols and impure records, that cross segment boundaries. This allows @@ -287,10 +290,11 @@ void S_close_off_thread_local_segment(ptr tc, ISPC s, IGEN g) { void S_reset_allocation_pointer(tc) ptr tc; { iptr seg; + thread_gc *tgc = THREAD_GC(tc); - S_pants_down += 1; + tgc->during_alloc += 1; - seg = S_find_segments(THREAD_GC(tc), space_new, 0, 1); + seg = S_find_segments(tgc, space_new, 0, 1); /* NB: if allocate_segments didn't already ensure we don't use the last segment of memory, we'd have to reject it here so cp2-alloc can avoid a carry check for @@ -303,19 +307,19 @@ void S_reset_allocation_pointer(tc) ptr tc; { S_G.bytes_of_space[0][space_new] += bytes_per_segment; S_G.bytes_of_generation[0] += bytes_per_segment; - if (S_pants_down == 1) maybe_fire_collector(); + if (tgc->during_alloc == 1) maybe_queue_fire_collector(THREAD_GC(tc)); AP(tc) = build_ptr(seg, 0); REAL_EAP(tc) = EAP(tc) = (ptr)((uptr)AP(tc) + bytes_per_segment); - S_pants_down -= 1; + tgc->during_alloc -= 1; } void S_record_new_dirty_card(thread_gc *tgc, ptr *ppp, IGEN to_g) { uptr card = (uptr)TO_PTR(ppp) >> card_offset_bits; dirtycardinfo *ndc; - gc_tc_mutex_acquire(); + alloc_mutex_acquire(); ndc = S_G.new_dirty_cards; if (ndc != NULL && ndc->card == card) { if (to_g < ndc->youngest) ndc->youngest = to_g; @@ -327,9 +331,10 @@ void S_record_new_dirty_card(thread_gc *tgc, ptr *ppp, IGEN to_g) { ndc->next = next; S_G.new_dirty_cards = ndc; } - gc_tc_mutex_release(); + alloc_mutex_release(); } +/* allocation mutex must be held (or only one thread due to call by collector) */ FORCEINLINE void mark_segment_dirty(seginfo *si, IGEN from_g, IGEN to_g) { IGEN old_to_g = si->min_dirty_byte; if (to_g < old_to_g) { @@ -363,13 +368,16 @@ void S_dirty_set(ptr *loc, ptr x) { } else { IGEN from_g = si->generation; if (from_g != 0) { + alloc_mutex_acquire(); si->dirty_bytes[((uptr)TO_PTR(loc) >> card_offset_bits) & ((1 << segment_card_offset_bits) - 1)] = 0; mark_segment_dirty(si, from_g, 0); + alloc_mutex_release(); } } } } +/* only called by GC, so no other thread is running */ void S_mark_card_dirty(uptr card, IGEN to_g) { uptr loc = card << card_offset_bits; uptr seg = addr_get_segment(loc); @@ -381,7 +389,8 @@ void S_mark_card_dirty(uptr card, IGEN to_g) { } } -/* scan remembered set from P to ENDP, transfering to dirty vector */ +/* scan remembered set from P to ENDP, transfering to dirty vector; + allocation mutex must be held */ void S_scan_dirty(ptr *p, ptr *endp) { uptr this, last; @@ -419,7 +428,7 @@ void S_scan_remembered_set() { ptr tc = get_thread_context(); uptr ap, eap, real_eap; - tc_mutex_acquire(); + alloc_mutex_acquire(); ap = (uptr)AP(tc); eap = (uptr)EAP(tc); @@ -438,7 +447,8 @@ void S_scan_remembered_set() { S_reset_allocation_pointer(tc); } - tc_mutex_release(); + alloc_mutex_release(); + S_maybe_fire_collector(THREAD_GC(tc)); } /* S_get_more_room is called from genereated machine code when there is @@ -466,14 +476,7 @@ ptr S_get_more_room_help(ptr tc, uptr ap, uptr type, uptr size) { eap = (uptr)EAP(tc); real_eap = (uptr)REAL_EAP(tc); -#ifdef PTHREADS - if (S_use_gc_tc_mutex) - gc_tc_mutex_acquire(); - else - tc_mutex_acquire(); -#else - tc_mutex_acquire(); -#endif + alloc_mutex_acquire(); S_scan_dirty(TO_VOIDP(eap), TO_VOIDP(real_eap)); eap = real_eap; @@ -508,14 +511,8 @@ ptr S_get_more_room_help(ptr tc, uptr ap, uptr type, uptr size) { } } -#ifdef PTHREADS - if (S_use_gc_tc_mutex) - gc_tc_mutex_release(); - else - tc_mutex_release(); -#else - tc_mutex_release(); -#endif + alloc_mutex_release(); + S_maybe_fire_collector(THREAD_GC(tc)); return x; } diff --git a/racket/src/ChezScheme/c/externs.h b/racket/src/ChezScheme/c/externs.h index d91936c324..02a3cdffff 100644 --- a/racket/src/ChezScheme/c/externs.h +++ b/racket/src/ChezScheme/c/externs.h @@ -62,7 +62,8 @@ off64_t lseek64(int,off64_t,int); extern void S_alloc_init PROTO((void)); extern void S_protect PROTO((ptr *p)); extern void S_reset_scheme_stack PROTO((ptr tc, iptr n)); -extern void S_reset_allocation_pointer PROTO((ptr tc)); +extern void S_reset_allocation_pointer PROTO((ptr tc)); /* call S_maybe_fire_collector afterward outside alloc mutex */ +extern void S_maybe_fire_collector(thread_gc *tgc); extern ptr S_compute_bytes_allocated PROTO((ptr xg, ptr xs)); extern ptr S_bytes_finalized PROTO(()); extern ptr S_find_more_room PROTO((ISPC s, IGEN g, iptr n, ptr old)); @@ -280,6 +281,7 @@ extern void S_mutex_free PROTO((scheme_mutex_t *m)); extern void S_mutex_acquire PROTO((scheme_mutex_t *m)); extern INT S_mutex_tryacquire PROTO((scheme_mutex_t *m)); extern void S_mutex_release PROTO((scheme_mutex_t *m)); +extern IBOOL S_mutex_is_owner PROTO((scheme_mutex_t *m)); extern s_thread_cond_t *S_make_condition PROTO((void)); extern void S_condition_free PROTO((s_thread_cond_t *c)); extern IBOOL S_condition_wait PROTO((s_thread_cond_t *c, scheme_mutex_t *m, ptr t)); @@ -404,6 +406,7 @@ extern void S_gettime PROTO((INT typeno, struct timespec *tp)); /* symbol.c */ extern ptr S_symbol_value PROTO((ptr sym)); +extern ptr S_symbol_racy_value PROTO((ptr sym)); extern void S_set_symbol_value PROTO((ptr sym, ptr val)); /* machine-dependent .c files, e.g., x88k.c */ diff --git a/racket/src/ChezScheme/c/gc.c b/racket/src/ChezScheme/c/gc.c index 1228289458..72840bfcc1 100644 --- a/racket/src/ChezScheme/c/gc.c +++ b/racket/src/ChezScheme/c/gc.c @@ -128,8 +128,8 @@ * There are no attempts to take tc_mutex suring sweeping. To the degree that locking is needed (e.g., to allocate new segments), - `S_use_gc_tc_mutex` redirects to gc_tc_mutex. No other locks - can be taken while that one is held. + the allocation mutex is used. No other locks can be taken while + that one is held. * To copy from or mark on a segment, a sweeper must own the segment. A sweeper during sweeping may encounter a "remote" @@ -367,12 +367,14 @@ static ptr sweep_from; #ifdef ENABLE_PARALLEL +static int in_parallel_sweepers = 0; + #define HAS_SWEEPER_WRT(t_tc, tc) 1 -# define GC_TC_MUTEX_ACQUIRE() gc_tc_mutex_acquire() -# define GC_TC_MUTEX_RELEASE() gc_tc_mutex_release() +# define GC_MUTEX_ACQUIRE() alloc_mutex_acquire() +# define GC_MUTEX_RELEASE() alloc_mutex_release() -# define SEGMENT_IS_LOCAL(si, p) (((si)->creator == tgc) || marked(si, p) || !S_use_gc_tc_mutex) +# define SEGMENT_IS_LOCAL(si, p) (((si)->creator == tgc) || marked(si, p) || !in_parallel_sweepers) # define RECORD_REMOTE_RANGE_TO(tgc, start, size, creator) do { \ ptr START = TO_PTR(UNTYPE_ANY(start)); \ ptr END = (ptr)((uptr)START + (size)); \ @@ -424,8 +426,8 @@ static int num_sweepers; #define HAS_SWEEPER_WRT(t_tc, tc) (t_tc == tc) -# define GC_TC_MUTEX_ACQUIRE() do { } while (0) -# define GC_TC_MUTEX_RELEASE() do { } while (0) +# define GC_MUTEX_ACQUIRE() do { } while (0) +# define GC_MUTEX_RELEASE() do { } while (0) # define SEGMENT_IS_LOCAL(si, p) 1 # define RECORD_REMOTE_RANGE_TO(tgc, start, size, creator) do { } while (0) @@ -493,11 +495,11 @@ uptr list_length(ptr ls) { #endif static void init_fully_marked_mask(thread_gc *tgc, IGEN g) { - GC_TC_MUTEX_ACQUIRE(); + GC_MUTEX_ACQUIRE(); if (!fully_marked_mask[g]) { init_mask(tgc, fully_marked_mask[g], g, 0xFF); } - GC_TC_MUTEX_RELEASE(); + GC_MUTEX_RELEASE(); } #ifdef PRESERVE_FLONUM_EQ @@ -1168,6 +1170,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { #ifdef ENABLE_PARALLEL { ptr t_tc = (ptr)THREADTC(thread); + THREAD_GC(t_tc)->during_alloc += 1; /* turned back off in parallel_sweep_dirty_and_generation */ if (!OLDSPACE(thread)) { /* remember to sweep in sweeper thread */ THREAD_GC(t_tc)->thread = thread; @@ -1712,6 +1715,8 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { for (g = MIN_TG; g <= MAX_TG; g++) S_G.bitmask_overhead[g] += tgc->bitmask_overhead[g]; + tgc->queued_fire = 0; + ACCUM_REAL_TIME(all_accum, astep, astart); REPORT_TIME(fprintf(stderr, "%d all +%ld ms %ld ms [real time]\n", MAX_CG, astep, all_accum)); @@ -1782,10 +1787,10 @@ static void flush_remote_range(thread_gc *tgc, ISPC s, IGEN g) { #define save_resweep(s, si) do { \ if (s == space_weakpair) { \ - GC_TC_MUTEX_ACQUIRE(); \ + GC_MUTEX_ACQUIRE(); \ si->sweep_next = resweep_weak_segments; \ resweep_weak_segments = si; \ - GC_TC_MUTEX_RELEASE(); \ + GC_MUTEX_RELEASE(); \ } \ } while (0) @@ -2165,23 +2170,23 @@ static void record_dirty_segment(IGEN from_g, IGEN to_g, seginfo *si) { if (to_g < from_g) { seginfo *oldfirst; - GC_TC_MUTEX_ACQUIRE(); + GC_MUTEX_ACQUIRE(); oldfirst = DirtySegments(from_g, to_g); DirtySegments(from_g, to_g) = si; si->dirty_prev = &DirtySegments(from_g, to_g); si->dirty_next = oldfirst; if (oldfirst != NULL) oldfirst->dirty_prev = &si->dirty_next; si->min_dirty_byte = to_g; - GC_TC_MUTEX_RELEASE(); + GC_MUTEX_RELEASE(); } } static void add_weaksegments_to_resweep(weakseginfo *segs, weakseginfo *last_seg) { if (segs != NULL) { - GC_TC_MUTEX_ACQUIRE(); + GC_MUTEX_ACQUIRE(); last_seg->next = weaksegments_to_resweep; weaksegments_to_resweep = segs; - GC_TC_MUTEX_RELEASE(); + GC_MUTEX_RELEASE(); } } @@ -2667,7 +2672,7 @@ static void add_trigger_guardians_to_recheck(ptr ls) { ptr last = ls, next; - GC_TC_MUTEX_ACQUIRE(); + GC_MUTEX_ACQUIRE(); next = GUARDIANNEXT(ls); while (next != 0) { @@ -2677,7 +2682,7 @@ static void add_trigger_guardians_to_recheck(ptr ls) INITGUARDIANNEXT(last) = recheck_guardians_ls; recheck_guardians_ls = ls; - GC_TC_MUTEX_RELEASE(); + GC_MUTEX_RELEASE(); } static void ephemeron_remove(ptr pe) { @@ -3072,8 +3077,8 @@ static IBOOL sweeper_started(int i, IBOOL start_new) { static void parallel_sweep_dirty_and_generation(thread_gc *tgc) { int i; thread_gc *all_tgcs = NULL; - - S_use_gc_tc_mutex = 1; + + in_parallel_sweepers = 1; /* start other sweepers */ (void)s_thread_mutex_lock(&sweep_mutex); @@ -3116,6 +3121,8 @@ static void parallel_sweep_dirty_and_generation(thread_gc *tgc) { S_G.bitmask_overhead[g] += t_tgc->bitmask_overhead[g]; S_flush_instruction_cache(t_tgc->tc); t_tgc->sweeper = main_sweeper_index; + t_tgc->queued_fire = 0; + t_tgc->during_alloc -= 1; if (t_tgc != tgc) { t_tgc->next = all_tgcs; @@ -3135,8 +3142,8 @@ static void parallel_sweep_dirty_and_generation(thread_gc *tgc) { (void)s_thread_mutex_unlock(&sweep_mutex); tgc->next = all_tgcs; - - S_use_gc_tc_mutex = 0; + + in_parallel_sweepers = 0; } static void run_sweeper(gc_sweeper *sweeper) { diff --git a/racket/src/ChezScheme/c/gcwrapper.c b/racket/src/ChezScheme/c/gcwrapper.c index 2558f27325..3f0c48bbc1 100644 --- a/racket/src/ChezScheme/c/gcwrapper.c +++ b/racket/src/ChezScheme/c/gcwrapper.c @@ -300,7 +300,7 @@ void Slock_object(x) ptr x; { if (!IMMEDIATE(x) && (si = MaybeSegInfo(ptr_get_segment(x))) != NULL && (g = si->generation) != static_generation) { ptr tc = get_thread_context(); tc_mutex_acquire(); - S_pants_down += 1; + THREAD_GC(tc)->during_alloc += 1; /* immobilize */ if (si->must_mark < MUST_MARK_INFINITY) { si->must_mark++; @@ -313,7 +313,7 @@ void Slock_object(x) ptr x; { if (g != 0) S_G.countof[g][countof_pair] += 1; } (void)remove_first_nomorep(x, &S_G.unlocked_objects[g], 0); - S_pants_down -= 1; + THREAD_GC(tc)->during_alloc -= 1; tc_mutex_release(); } } @@ -324,7 +324,7 @@ void Sunlock_object(x) ptr x; { if (!IMMEDIATE(x) && (si = MaybeSegInfo(ptr_get_segment(x))) != NULL && (g = si->generation) != static_generation) { ptr tc = get_thread_context(); tc_mutex_acquire(); - S_pants_down += 1; + THREAD_GC(tc)->during_alloc += 1; /* mobilize, if we haven't lost track */ if (si->must_mark < MUST_MARK_INFINITY) --si->must_mark; @@ -336,7 +336,7 @@ void Sunlock_object(x) ptr x; { if (g != 0) S_G.countof[g][countof_pair] += 1; } } - S_pants_down -= 1; + THREAD_GC(tc)->during_alloc -= 1; tc_mutex_release(); } } @@ -480,11 +480,11 @@ seginfo *S_ptr_seginfo(ptr p) { void Scompact_heap() { ptr tc = get_thread_context(); IBOOL eoc = S_G.enable_object_counts; - S_pants_down += 1; + THREAD_GC(tc)->during_alloc += 1; S_G.enable_object_counts = 1; S_gc_oce(tc, S_G.max_nonstatic_generation, static_generation, static_generation, Sfalse); S_G.enable_object_counts = eoc; - S_pants_down -= 1; + THREAD_GC(tc)->during_alloc -= 1; } /* S_check_heap checks for various kinds of heap consistency @@ -1162,7 +1162,7 @@ ptr S_do_gc(IGEN max_cg, IGEN min_tg, IGEN max_tg, ptr count_roots) { Slock_object(code); /* Scheme side grabs mutex before calling S_do_gc */ - S_pants_down += 1; + THREAD_GC(tc)->during_alloc += 1; if (S_G.new_max_nonstatic_generation > S_G.max_nonstatic_generation) { S_G.min_free_gen = S_G.new_min_free_gen; @@ -1279,7 +1279,7 @@ ptr S_do_gc(IGEN max_cg, IGEN min_tg, IGEN max_tg, ptr count_roots) { to get_more_room if and when they awake and try to allocate */ S_reset_allocation_pointer(tc); - S_pants_down -= 1; + THREAD_GC(tc)->during_alloc -= 1; Sunlock_object(code); diff --git a/racket/src/ChezScheme/c/globals.h b/racket/src/ChezScheme/c/globals.h index 6c3ec92ecc..ecdf11697f 100644 --- a/racket/src/ChezScheme/c/globals.h +++ b/racket/src/ChezScheme/c/globals.h @@ -39,9 +39,7 @@ EXTERN s_thread_key_t S_tc_key; EXTERN scheme_mutex_t S_tc_mutex; EXTERN s_thread_cond_t S_collect_cond; EXTERN s_thread_cond_t S_collect_thread0_cond; -EXTERN INT S_tc_mutex_depth; -EXTERN scheme_mutex_t S_gc_tc_mutex; -EXTERN IBOOL S_use_gc_tc_mutex; +EXTERN scheme_mutex_t S_alloc_mutex; /* ordered after S_tc_mutex */ EXTERN int S_collect_waiting_threads; EXTERN ptr S_collect_waiting_tcs[maximum_parallel_collect_threads]; # ifdef IMPLICIT_ATOMIC_AS_EXPLICIT @@ -50,6 +48,7 @@ EXTERN s_thread_mutex_t S_implicit_mutex; #endif /* segment.c */ +/* update of the segment table is protected by alloc mutex */ #ifdef segment_t2_bits #ifdef segment_t3_bits EXTERN t2table *S_segment_info[1<during_alloc) S_error_abort("nonrecoverable invalid memory reference"); else S_error_reset("invalid memory reference"); @@ -968,7 +968,7 @@ extern void Sretain_static_relocation(void) { #endif static void default_abnormal_exit(void) { - exit(1); + abort(); } extern void Sscheme_init(abnormal_exit) void (*abnormal_exit) PROTO((void)); { diff --git a/racket/src/ChezScheme/c/schsig.c b/racket/src/ChezScheme/c/schsig.c index d2e9ce76c2..20c24e6292 100644 --- a/racket/src/ChezScheme/c/schsig.c +++ b/racket/src/ChezScheme/c/schsig.c @@ -127,11 +127,8 @@ void S_split_and_resize() { * and clength + size(values) < stack-size; also, size may include * argument register values */ n = CONTCLENGTH(k) + (value_count * sizeof(ptr)) + stack_slop; - if (n >= SCHEMESTACKSIZE(tc)) { - tc_mutex_acquire(); + if (n >= SCHEMESTACKSIZE(tc)) S_reset_scheme_stack(tc, n); - tc_mutex_release(); - } } iptr S_continuation_depth(k) ptr k; { @@ -296,9 +293,7 @@ void S_overflow(tc, frame_request) ptr tc; iptr frame_request; { /* allocate a new stack, retaining same relative sfp */ sfp_offset = (uptr)TO_PTR(sfp) - (uptr)TO_PTR(split_point); - tc_mutex_acquire(); S_reset_scheme_stack(tc, above_split_size + frame_request); - tc_mutex_release(); SFP(tc) = (ptr)((uptr)SCHEMESTACK(tc) + sfp_offset); /* copy up everything above the split point. we don't know where the @@ -318,20 +313,21 @@ void S_error_abort(s) const char *s; { void S_abnormal_exit() { S_abnormal_exit_proc(); fprintf(stderr, "abnormal_exit procedure did not exit\n"); - exit(1); + abort(); } static void reset_scheme() { ptr tc = get_thread_context(); - tc_mutex_acquire(); + alloc_mutex_acquire(); /* eap should always be up-to-date now that we write-through to the tc when making any changes to eap when eap is a real register */ S_scan_dirty(TO_VOIDP(EAP(tc)), TO_VOIDP(REAL_EAP(tc))); S_reset_allocation_pointer(tc); S_reset_scheme_stack(tc, stack_slop); + alloc_mutex_release(); FRAME(tc,0) = TO_PTR(DOUNDERFLOW); - tc_mutex_release(); + S_maybe_fire_collector(THREAD_GC(tc)); } /* error_resets occur with the system in an unknown state, @@ -391,10 +387,10 @@ static void do_error(type, who, s, args) iptr type; const char *who, *s; ptr arg Scons(Sstring_utf8(s, -1), args))); #ifdef PTHREADS - while (S_tc_mutex_depth > 0) { + while (S_mutex_is_owner(&S_alloc_mutex)) + S_mutex_release(&S_alloc_mutex); + while (S_mutex_is_owner(&S_tc_mutex)) S_mutex_release(&S_tc_mutex); - S_tc_mutex_depth -= 1; - } #endif /* PTHREADS */ TRAP(tc) = (ptr)1; @@ -511,7 +507,7 @@ void S_fire_collector() { /* printf("firing collector!\n"); fflush(stdout); */ - if (!Sboolean_value(S_symbol_value(crp_id))) { + if (!Sboolean_value(S_symbol_racy_value(crp_id))) { ptr ls; /* printf("really firing collector!\n"); fflush(stdout); */ @@ -565,7 +561,7 @@ static BOOL WINAPI handle_signal(DWORD dwCtrlType) { #else ptr tc = get_thread_context(); #endif - if (!S_pants_down && Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) + if (!THREAD_GC(tc)->during_alloc && Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) return(FALSE); keyboard_interrupt(tc); return(TRUE); @@ -689,7 +685,7 @@ static void handle_signal(INT sig, UNUSED siginfo_t *si, UNUSED void *data) { /* disable keyboard interrupts in subordinate threads until we think of something more clever to do with them */ if (tc == TO_PTR(&S_G.thread_context)) { - if (!S_pants_down && Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) { + if (!THREAD_GC(tc)->during_alloc && Sboolean_value(KEYBOARDINTERRUPTPENDING(tc))) { /* this is a no-no, but the only other options are to ignore the signal or to kill the process */ RESET_SIGNAL @@ -715,11 +711,14 @@ static void handle_signal(INT sig, UNUSED siginfo_t *si, UNUSED void *data) { case SIGBUS: #endif /* SIGBUS */ case SIGSEGV: + { + ptr tc = get_thread_context(); RESET_SIGNAL - if (S_pants_down) + if (THREAD_GC(tc)->during_alloc) S_error_abort("nonrecoverable invalid memory reference"); else S_error_reset("invalid memory reference"); + } default: RESET_SIGNAL S_error_reset("unexpected signal"); @@ -820,7 +819,6 @@ void S_schsig_init() { } - S_pants_down = 0; S_set_symbol_value(S_G.collect_request_pending_id, Sfalse); init_signal_handlers(); diff --git a/racket/src/ChezScheme/c/segment.c b/racket/src/ChezScheme/c/segment.c index b3a6d29378..92aebe9fcf 100644 --- a/racket/src/ChezScheme/c/segment.c +++ b/racket/src/ChezScheme/c/segment.c @@ -256,6 +256,7 @@ static void initialize_seginfo(seginfo *si, NO_THREADS_UNUSED thread_gc *creator si->sweep_next = NULL; } +/* allocation mutex must be held */ iptr S_find_segments(creator, s, g, n) thread_gc *creator; ISPC s; IGEN g; iptr n; { chunkinfo *chunk, *nextchunk; seginfo *si, *nextsi, **prevsi; diff --git a/racket/src/ChezScheme/c/symbol.c b/racket/src/ChezScheme/c/symbol.c index d2e42613a8..1ac569f09f 100644 --- a/racket/src/ChezScheme/c/symbol.c +++ b/racket/src/ChezScheme/c/symbol.c @@ -22,6 +22,10 @@ ptr S_symbol_value(sym) ptr sym; { return SYMVAL(sym); } +ptr S_symbol_racy_value(ptr sym) NO_THREAD_SANITIZE { + return SYMVAL(sym); +} + void S_set_symbol_value(sym, val) ptr sym, val; { SETSYMVAL(sym, val); SETSYMCODE(sym, Sprocedurep(val) ? CLOSCODE(val) : S_G.nonprocedure_code); diff --git a/racket/src/ChezScheme/c/thread.c b/racket/src/ChezScheme/c/thread.c index d4a42d2c81..50eaf9bc21 100644 --- a/racket/src/ChezScheme/c/thread.c +++ b/racket/src/ChezScheme/c/thread.c @@ -36,11 +36,9 @@ void S_thread_init() { S_tc_mutex.count = 0; s_thread_cond_init(&S_collect_cond); s_thread_cond_init(&S_collect_thread0_cond); - S_tc_mutex_depth = 0; - s_thread_mutex_init(&S_gc_tc_mutex.pmutex); - S_tc_mutex.owner = 0; - S_tc_mutex.count = 0; - S_use_gc_tc_mutex = 0; + s_thread_mutex_init(&S_alloc_mutex.pmutex); + S_alloc_mutex.owner = 0; + S_alloc_mutex.count = 0; # ifdef IMPLICIT_ATOMIC_AS_EXPLICIT s_thread_mutex_init(&S_implicit_mutex); @@ -123,7 +121,11 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { FRAME(tc,0) = TO_PTR(&CODEIT(S_G.dummy_code_object,size_rp_header)); /* S_reset_allocation_pointer initializes ap and eap */ + alloc_mutex_acquire(); S_reset_allocation_pointer(tc); + alloc_mutex_release(); + S_maybe_fire_collector(tgc); + RANDOMSEED(tc) = most_positive_fixnum < 0xffffffff ? most_positive_fixnum : 0xffffffff; X(tc) = Y(tc) = U(tc) = V(tc) = W(tc) = FIX(0); @@ -159,6 +161,7 @@ ptr S_create_thread_object(who, p_tc) const char *who; ptr p_tc; { LZ4OUTBUFFER(tc) = 0; + tgc->during_alloc = 0; tgc->sweeper = main_sweeper_index; tgc->remote_range_start = (ptr)(uptr)-1; tgc->remote_range_end = (ptr)0; @@ -244,6 +247,8 @@ static IBOOL destroy_thread(tc) ptr tc; { *ls = Scdr(*ls); S_nthreads -= 1; + alloc_mutex_acquire(); + /* process remembered set before dropping allocation area */ S_scan_dirty((ptr *)EAP(tc), (ptr *)REAL_EAP(tc)); @@ -257,6 +262,8 @@ static IBOOL destroy_thread(tc) ptr tc; { S_close_off_thread_local_segment(tc, s, g); } + alloc_mutex_release(); + /* process guardian entries */ { ptr target, ges, obj, next; seginfo *si; @@ -361,7 +368,7 @@ void S_mutex_free(m) scheme_mutex_t *m; { free(m); } -void S_mutex_acquire(m) scheme_mutex_t *m; { +void S_mutex_acquire(scheme_mutex_t *m) NO_THREAD_SANITIZE { s_thread_t self = s_thread_self(); iptr count; INT status; @@ -379,7 +386,7 @@ void S_mutex_acquire(m) scheme_mutex_t *m; { m->count = 1; } -INT S_mutex_tryacquire(m) scheme_mutex_t *m; { +INT S_mutex_tryacquire(scheme_mutex_t *m) NO_THREAD_SANITIZE { s_thread_t self = s_thread_self(); iptr count; INT status; @@ -401,7 +408,12 @@ INT S_mutex_tryacquire(m) scheme_mutex_t *m; { return status; } -void S_mutex_release(m) scheme_mutex_t *m; { +IBOOL S_mutex_is_owner(scheme_mutex_t *m) NO_THREAD_SANITIZE { + s_thread_t self = s_thread_self(); + return ((m->count > 0) && s_thread_equal(m->owner, self)); +} + +void S_mutex_release(scheme_mutex_t *m) NO_THREAD_SANITIZE { s_thread_t self = s_thread_self(); iptr count; INT status; diff --git a/racket/src/ChezScheme/c/types.h b/racket/src/ChezScheme/c/types.h index 73d7842992..ecf3a736df 100644 --- a/racket/src/ChezScheme/c/types.h +++ b/racket/src/ChezScheme/c/types.h @@ -391,21 +391,37 @@ typedef struct { tc_mutex_release()\ }\ } -/* S_tc_mutex_depth records the number of nested mutex acquires in - C code on tc_mutex. it is used by do_error to release tc_mutex - the appropriate number of times. -*/ + #define tc_mutex_acquire() do { \ + assert_no_alloc_mutex(); \ S_mutex_acquire(&S_tc_mutex); \ - S_tc_mutex_depth += 1; \ } while (0); #define tc_mutex_release() do { \ - S_tc_mutex_depth -= 1; \ S_mutex_release(&S_tc_mutex); \ } while (0); -#define gc_tc_mutex_acquire() S_mutex_acquire(&S_gc_tc_mutex) -#define gc_tc_mutex_release() S_mutex_release(&S_gc_tc_mutex) +/* Allocation mutex is ordered after tc mutex */ +#define alloc_mutex_acquire() do { \ + S_mutex_acquire(&S_alloc_mutex); \ + } while (0); +#define alloc_mutex_release() do { \ + S_mutex_release(&S_alloc_mutex); \ + } while (0); + +/* To enable checking lock order: */ +#if 0 +# define assert_no_alloc_mutex() do { \ + if (S_mutex_is_owner(&S_alloc_mutex)) \ + S_error_abort("cannot take tc mutex after allocation mutex"); \ + } while (0) +#else +# define assert_no_alloc_mutex() do { } while (0) +#endif + +#define IS_TC_MUTEX_OWNER() S_mutex_is_owner(&S_tc_mutex) +#define IS_ALLOC_MUTEX_OWNER() S_mutex_is_owner(&S_alloc_mutex) + +/* Enable in "version.h": */ #ifdef IMPLICIT_ATOMIC_AS_EXPLICIT # define AS_IMPLICIT_ATOMIC(T, X) ({ \ T RESLT; \ @@ -434,8 +450,10 @@ typedef struct { #define reactivate_thread(tc) {} #define tc_mutex_acquire() do {} while (0) #define tc_mutex_release() do {} while (0) -#define gc_tc_mutex_acquire() do {} while (0) -#define gc_tc_mutex_release() do {} while (0) +#define alloc_mutex_acquire() do {} while (0) +#define alloc_mutex_release() do {} while (0) +#define IS_TC_MUTEX_OWNER() 1 +#define IS_ALLOC_MUTEX_OWNER() 1 #define S_cas_load_acquire_voidp(a, old, new) (*(a) = new, 1) #define S_cas_store_release_voidp(a, old, new) (*(a) = new, 1) #define S_cas_load_acquire_ptr(a, old, new) (*(a) = new, 1) @@ -458,6 +476,9 @@ typedef struct thread_gc { ptr tc; ptr thread; /* set only when collecting */ + int during_alloc; + IBOOL queued_fire; + struct thread_gc *next; ptr base_loc[static_generation+1][max_real_space+1]; diff --git a/racket/src/ChezScheme/c/version.h b/racket/src/ChezScheme/c/version.h index d61fc81b3a..8472375a75 100644 --- a/racket/src/ChezScheme/c/version.h +++ b/racket/src/ChezScheme/c/version.h @@ -472,8 +472,15 @@ typedef char tputsputcchar; # define NO_THREADS_UNUSED UNUSED #endif +#if defined(__has_feature) +# if __has_feature(thread_sanitizer) +# define NO_THREAD_SANITIZE __attribute__((no_sanitize("thread"))) +# define IMPLICIT_ATOMIC_AS_EXPLICIT +# endif +#endif +#ifndef NO_THREAD_SANITIZE +# define NO_THREAD_SANITIZE /* empty */ +#endif + /* Use "/dev/urandom" everywhere except Windows */ #define USE_DEV_URANDOM_UUID - -/* For debugging: */ -/* #define IMPLICIT_ATOMIC_AS_EXPLICIT */ diff --git a/racket/src/ChezScheme/makefiles/Mf-install.in b/racket/src/ChezScheme/makefiles/Mf-install.in index 023f9dbcea..2fbaf09aa4 100644 --- a/racket/src/ChezScheme/makefiles/Mf-install.in +++ b/racket/src/ChezScheme/makefiles/Mf-install.in @@ -62,7 +62,7 @@ InstallLZ4Target= # no changes should be needed below this point # ############################################################################### -Version=csv9.5.3.39 +Version=csv9.5.3.40 Include=boot/$m PetiteBoot=boot/$m/petite.boot SchemeBoot=boot/$m/scheme.boot diff --git a/racket/src/ChezScheme/mats/thread.ms b/racket/src/ChezScheme/mats/thread.ms index 3dd289679c..51988677a7 100644 --- a/racket/src/ChezScheme/mats/thread.ms +++ b/racket/src/ChezScheme/mats/thread.ms @@ -932,7 +932,7 @@ (let ([thread-count 2] [iterations 10000]) (equal? (parameterize ([collect-trip-bytes (expt 2 15)] - [collect-generation-radix 1]) + [collect-generation-radix 2]) ; using 1 risks extreme slowness via major collections (let ([out '()] [out-mutex (make-mutex)] [out-condition (make-condition)] diff --git a/racket/src/ChezScheme/s/cmacros.ss b/racket/src/ChezScheme/s/cmacros.ss index f86f6c399d..ea2289980f 100644 --- a/racket/src/ChezScheme/s/cmacros.ss +++ b/racket/src/ChezScheme/s/cmacros.ss @@ -357,7 +357,7 @@ ;; --------------------------------------------------------------------- ;; Version and machine types: -(define-constant scheme-version #x09050327) +(define-constant scheme-version #x09050328) (define-syntax define-machine-types (lambda (x) @@ -3219,8 +3219,6 @@ [double double] [double uptr] [double double double] - [int32 int32] - [int32 int32 uptr] [int32 uptr uptr uptr uptr uptr] [uptr] [uptr uptr] diff --git a/racket/src/ChezScheme/s/mkgc.ss b/racket/src/ChezScheme/s/mkgc.ss index e64b2336ca..c5c4dfe051 100644 --- a/racket/src/ChezScheme/s/mkgc.ss +++ b/racket/src/ChezScheme/s/mkgc.ss @@ -611,11 +611,11 @@ (count countof-phantom) ;; Separate from `count`, because we want to track sizes even ;; if counting is not enabled: - (GC_TC_MUTEX_ACQUIRE) + (GC_MUTEX_ACQUIRE) (set! (array-ref (array-ref S_G.bytesof _tg_) countof-phantom) += (phantom-length _)) - (GC_TC_MUTEX_RELEASE))] + (GC_MUTEX_RELEASE))] [measure (set! measure_total += (phantom-length _))] [else])])])) @@ -743,9 +743,9 @@ [(== (continuation-stack-length _) opportunistic-1-shot-flag) (set! (continuation-stack-length _copy_) (continuation-stack-clength _)) ;; May need to recur at end to promote link: - (GC_TC_MUTEX_ACQUIRE) + (GC_MUTEX_ACQUIRE) (set! conts_to_promote (S_cons_in (-> _tgc_ tc) space_new 0 _copy_ conts_to_promote)) - (GC_TC_MUTEX_RELEASE)] + (GC_MUTEX_RELEASE)] [else (copy continuation-stack-length)])] [else @@ -815,9 +815,9 @@ ;; determine if key is old, since keyval might or might not have been ;; swept already. NB: assuming keyvals are always pairs. (when (&& (!= next Sfalse) (OLDSPACE keyval)) - (GC_TC_MUTEX_ACQUIRE) + (GC_MUTEX_ACQUIRE) (set! tlcs_to_rehash (S_cons_in (-> _tgc_ tc) space_new 0 _copy_ tlcs_to_rehash)) - (GC_TC_MUTEX_RELEASE))] + (GC_MUTEX_RELEASE))] [else (trace-nonself tlc-keyval) (trace-nonself tlc-next)])) diff --git a/racket/src/version/racket_version.h b/racket/src/version/racket_version.h index 6926e5c7e7..ed88288e3f 100644 --- a/racket/src/version/racket_version.h +++ b/racket/src/version/racket_version.h @@ -16,7 +16,7 @@ #define MZSCHEME_VERSION_X 7 #define MZSCHEME_VERSION_Y 9 #define MZSCHEME_VERSION_Z 0 -#define MZSCHEME_VERSION_W 1 +#define MZSCHEME_VERSION_W 2 /* A level of indirection makes `#` work as needed: */ #define AS_a_STR_HELPER(x) #x