diff --git a/LOG b/LOG index 922b7b077f..effc48cf36 100644 --- a/LOG +++ b/LOG @@ -456,3 +456,9 @@ - fix overflow detection for fxsll, fxarithmetic-shift-left, and fxarithmetic-shift library.ss, fx.ms, release_notes.stex +- added ephemeron pairs and changed weak hashtables to use + ephemeron pairs for key--value mapping to avoid the key-in-value + problem + prims.ss, primdata.ss, newhash.ss, fasl.ss, mkheader.ss + cmacro.ss, prim5.c, fasl.c, gc.c, gcwrapper.c, types.h, + 4.ms, hash.ms, objects.stex, smgmt.stex, csug.bib diff --git a/c/fasl.c b/c/fasl.c index d791fb6a7f..8bc65837a7 100644 --- a/c/fasl.c +++ b/c/fasl.c @@ -860,6 +860,11 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) { faslin(tc, &INITCAR(*x), t, pstrbuf, f); faslin(tc, &INITCDR(*x), t, pstrbuf, f); return; + case fasl_type_ephemeron: + *x = S_cons_in(space_ephemeron, 0, FIX(0), FIX(0)); + faslin(tc, &INITCAR(*x), t, pstrbuf, f); + faslin(tc, &INITCDR(*x), t, pstrbuf, f); + return; case fasl_type_code: { iptr n, m, a; INT flags; iptr free; ptr co, reloc, name; diff --git a/c/gc.c b/c/gc.c index 23fa731184..113d0c939e 100644 --- a/c/gc.c +++ b/c/gc.c @@ -29,7 +29,7 @@ static uptr list_length PROTO((ptr ls)); static ptr dosort PROTO((ptr ls, uptr n)); static ptr domerge PROTO((ptr l1, ptr l2)); static IBOOL search_locked PROTO((ptr p)); -static ptr copy PROTO((ptr pp, ISPC pps)); +static ptr copy PROTO((ptr pp, seginfo *si)); static void sweep_ptrs PROTO((ptr *p, iptr n)); static void sweep PROTO((ptr tc, ptr p, IBOOL sweep_pure)); static ptr copy_stack PROTO((ptr old, iptr *length, iptr clength)); @@ -49,6 +49,13 @@ static void sweep_code_object PROTO((ptr tc, ptr co)); static void record_dirty_segment PROTO((IGEN from_g, IGEN to_g, seginfo *si)); static void sweep_dirty PROTO((void)); static void resweep_dirty_weak_pairs PROTO((void)); +static void add_ephemeron_to_pending PROTO((ptr p)); +static void add_trigger_ephemerons_to_repending PROTO((ptr p)); +static void check_trigger_ephemerons PROTO((seginfo *si)); +static void check_ephemeron PROTO((ptr pe, int add_to_trigger)); +static void check_pending_ephemerons PROTO(()); +static int check_dirty_ephemeron PROTO((ptr pe, int tg, int youngest)); +static void clear_trigger_ephemerons PROTO(()); /* MAXPTR is used to pad the sorted_locked_object vector. The pad value must be greater than any heap address */ #define MAXPTR ((ptr)-1) @@ -111,10 +118,10 @@ uptr list_length(ptr ls) { * youngest = GENERATION(*ppp); */ #define relocate_dirty(ppp,tg,youngest) {\ - ptr PP = *ppp; seginfo *SI; ISPC S;\ + ptr PP = *ppp; seginfo *SI;\ if (!IMMEDIATE(PP) && (SI = MaybeSegInfo(ptr_get_segment(PP))) != NULL) {\ - if ((S = SI->space) & space_old) {\ - relocate_help_help(ppp, PP, S)\ + if (SI->space & space_old) {\ + relocate_help_help(ppp, PP, SI)\ youngest = tg;\ } else {\ IGEN pg;\ @@ -126,38 +133,38 @@ uptr list_length(ptr ls) { } #define relocate_help(ppp, pp) {\ - seginfo *SI; ISPC S;\ - if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL && (S = SI->space) & space_old)\ - relocate_help_help(ppp, pp, S)\ + seginfo *SI; \ + if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL && SI->space & space_old)\ + relocate_help_help(ppp, pp, SI)\ } -#define relocate_help_help(ppp, pp, s) {\ +#define relocate_help_help(ppp, pp, si) {\ if (FWDMARKER(pp) == forward_marker && TYPEBITS(pp) != type_flonum)\ *ppp = FWDADDRESS(pp);\ else\ - *ppp = copy(pp, s);\ + *ppp = copy(pp, si);\ } #define relocate_return_addr(pcp) {\ - ISPC S;\ + seginfo *SI;\ ptr XCP;\ XCP = *(pcp);\ - if ((S = SPACE(XCP)) & space_old) {\ + if ((SI = SegInfo(ptr_get_segment(XCP)))->space & space_old) { \ iptr CO;\ CO = ENTRYOFFSET(XCP) + ((uptr)XCP - (uptr)&ENTRYOFFSET(XCP));\ - relocate_code(pcp,XCP,CO,S)\ + relocate_code(pcp,XCP,CO,SI)\ }\ } /* in the call to copy below, assuming SPACE(PP) == SPACE(XCP) since PP and XCP point to/into the same object */ -#define relocate_code(pcp,XCP,CO,S) {\ +#define relocate_code(pcp,XCP,CO,SI) {\ ptr PP;\ PP = (ptr)((uptr)XCP - CO);\ if (FWDMARKER(PP) == forward_marker)\ PP = FWDADDRESS(PP);\ else\ - PP = copy(PP, S);\ + PP = copy(PP, SI);\ *pcp = (ptr)((uptr)PP + CO);\ } @@ -185,7 +192,19 @@ static IBOOL search_locked(ptr p) { #define locked(p) (sorted_locked_objects != FIX(0) && search_locked(p)) -static ptr copy(pp, pps) ptr pp; ISPC pps; { +FORCEINLINE void check_trigger_ephemerons(seginfo *si) { + /* Registering ephemerons to recheck at the granularity of a segment + means that the worst-case complexity of GC is quadratic in the + number of objects that fit into a segment (but that only happens + if the objects are ephemeron keys that are reachable just through + a chain via the value field of the same ephemerons). */ + if (si->trigger_ephemerons) { + add_trigger_ephemerons_to_repending(si->trigger_ephemerons); + si->trigger_ephemerons = NULL; + } +} + +static ptr copy(pp, si) ptr pp; seginfo *si; { ptr p, tf; ITYPE t; IGEN tg; if (locked(pp)) return pp; @@ -194,6 +213,8 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; { change = 1; + check_trigger_ephemerons(si); + if ((t = TYPEBITS(pp)) == type_typed_object) { tf = TYPEFIELD(pp); if (TYPEP(tf, mask_record, type_record)) { @@ -418,9 +439,18 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; { return (ptr)0 /* not reached */; } } else if (t == type_pair) { - ptr qq = Scdr(pp); ptr q; seginfo *si; - if (qq != pp && TYPEBITS(qq) == type_pair && (si = MaybeSegInfo(ptr_get_segment(qq))) != NULL && si->space == pps && FWDMARKER(qq) != forward_marker && !locked(qq)) { - if (pps == (space_weakpair | space_old)) { + if (si->space == (space_ephemeron | space_old)) { +#ifdef ENABLE_OBJECT_COUNTS + S_G.countof[tg][countof_ephemeron] += 1; +#endif /* ENABLE_OBJECT_COUNTS */ + find_room(space_ephemeron, tg, type_pair, size_ephemeron, p); + INITCAR(p) = Scar(pp); + INITCDR(p) = Scdr(pp); + } else { + ptr qq = Scdr(pp); ptr q; seginfo *qsi; + if (qq != pp && TYPEBITS(qq) == type_pair && (qsi = MaybeSegInfo(ptr_get_segment(qq))) != NULL && qsi->space == si->space && FWDMARKER(qq) != forward_marker && !locked(qq)) { + check_trigger_ephemerons(qsi); + if (si->space == (space_weakpair | space_old)) { #ifdef ENABLE_OBJECT_COUNTS S_G.countof[tg][countof_weakpair] += 2; #endif /* ENABLE_OBJECT_COUNTS */ @@ -439,7 +469,7 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; { FWDMARKER(qq) = forward_marker; FWDADDRESS(qq) = q; } else { - if (pps == (space_weakpair | space_old)) { + if (si->space == (space_weakpair | space_old)) { #ifdef ENABLE_OBJECT_COUNTS S_G.countof[tg][countof_weakpair] += 1; #endif /* ENABLE_OBJECT_COUNTS */ @@ -453,6 +483,7 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; { INITCAR(p) = Scar(pp); INITCDR(p) = qq; } + } } else if (t == type_closure) { ptr code; @@ -533,10 +564,15 @@ static void sweep(ptr tc, ptr p, IBOOL sweep_pure) { ptr tf; ITYPE t; if ((t = TYPEBITS(p)) == type_pair) { - if ((SPACE(p) & ~(space_locked | space_old)) != space_weakpair) { - relocate(&INITCAR(p)) + ISPC s = SPACE(p) & ~(space_locked | space_old); + if (s == space_ephemeron) + add_ephemeron_to_pending(p); + else { + if (s != space_weakpair) { + relocate(&INITCAR(p)) + } + relocate(&INITCDR(p)) } - relocate(&INITCDR(p)) } else if (t == type_closure) { if (sweep_pure) { ptr code; @@ -827,7 +863,7 @@ void GCENTRY(ptr tc, IGEN mcg, IGEN tg) { if (FWDMARKER(sym) != forward_marker && /* coordinate with alloc.c */ (SYMVAL(sym) != sunbound || SYMPLIST(sym) != Snil || SYMSPLIST(sym) != Snil)) - (void)copy(sym, SPACE(sym)); + (void)copy(sym, SegInfo(ptr_get_segment(sym))); } S_G.buckets_of_generation[g] = NULL; } @@ -959,6 +995,9 @@ void GCENTRY(ptr tc, IGEN mcg, IGEN tg) { resweep_dirty_weak_pairs(); resweep_weak_pairs(tg); + /* still-pending ephemerons all go to bwp */ + clear_trigger_ephemerons(); + /* forward car fields of locked and unlocked older weak pairs */ for (g = mcg + 1; g <= static_generation; INCRGEN(g)) { for (ls = S_G.locked_objects[g]; ls != Snil; ls = Scdr(ls)) { @@ -1262,6 +1301,12 @@ static void sweep_generation(tc, g) ptr tc; IGEN g; { pp += 1; }) + sweep_space(space_ephemeron, { + p = TYPE((ptr)pp, type_pair); + add_ephemeron_to_pending(p); + pp += size_ephemeron / sizeof(ptr); + }) + sweep_space(space_pure, { relocate_help(pp, p) p = *(pp += 1); @@ -1292,6 +1337,13 @@ static void sweep_generation(tc, g) ptr tc; IGEN g; { pp = (ptr *)((iptr)pp + size_record_inst(UNFIX(RECORDDESCSIZE(RECORDINSTTYPE(p))))); }) + + /* Waiting until sweeping doesn't trigger a change reduces the + chance that an ephemeron must be reigistered as a + segment-specific trigger or gets triggered for recheck, but + it doesn't change the worst-case complexity. */ + if (!change) + check_pending_ephemerons(); } while (change); } @@ -1299,7 +1351,11 @@ static iptr size_object(p) ptr p; { ITYPE t; ptr tf; if ((t = TYPEBITS(p)) == type_pair) { - return size_pair; + seginfo *si; + if ((si = MaybeSegInfo(ptr_get_segment(p))) != NULL && (si->space & ~(space_locked | space_old)) == space_ephemeron) + return size_ephemeron; + else + return size_pair; } else if (t == type_closure) { ptr code = CLOSCODE(p); if (CODETYPE(code) & (code_flag_continuation << code_flags_offset)) @@ -1873,6 +1929,12 @@ static void sweep_dirty(void) { relocate_dirty(pp, tg, youngest) pp += 1; } + } else if (s == space_ephemeron) { + while (pp < ppend && *pp != forward_marker) { + ptr p = TYPE((ptr)pp, type_pair); + youngest = check_dirty_ephemeron(p, tg, youngest); + pp += size_ephemeron / sizeof(ptr); + } } else { S_error_abort("sweep_dirty(gc): unexpected space"); } @@ -1965,3 +2027,152 @@ static void resweep_dirty_weak_pairs() { record_dirty_segment(from_g, min_youngest, dirty_si); } } + +static ptr pending_ephemerons = NULL; +/* Ephemerons that we haven't looked at, chained through `next`. */ + +static ptr trigger_ephemerons = NULL; +/* Ephemerons that we've checked and added to segment triggers, + chained through `next`. Ephemerons attached to a segment are + chained through `trigger-next`. A #t in `trigger-next` means that + the ephemeron has been processed, so we don't need to remove it + from the trigger list in a segment. */ + +static ptr repending_ephemerons = NULL; +/* Ephemerons in `trigger_ephemerons` that we need to inspect again, + removed from the triggering segment and chained here through + `trigger-next`. */ + +static void add_ephemeron_to_pending(ptr pe) { + /* We could call check_ephemeron directly here, but the indirection + through `pending_ephemerons` can dramatically decrease the number + of times that we have to trigger re-checking, especially since + check_pending_pehemerons() is run only after all other sweep + opportunities are exhausted. */ + EPHEMERONNEXT(pe) = pending_ephemerons; + pending_ephemerons = pe; +} + +static void add_trigger_ephemerons_to_repending(ptr pe) { + ptr last_pe = pe, next_pe = EPHEMERONTRIGGERNEXT(pe); + while (next_pe != NULL) { + last_pe = next_pe; + next_pe = EPHEMERONTRIGGERNEXT(next_pe); + } + EPHEMERONTRIGGERNEXT(last_pe) = repending_ephemerons; + repending_ephemerons = pe; +} + +static void check_ephemeron(ptr pe, int add_to_trigger) { + ptr p; + seginfo *si; + + p = Scar(pe); + if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && si->space & space_old && !locked(p)) { + if (FWDMARKER(p) == forward_marker && TYPEBITS(p) != type_flonum) { + INITCAR(pe) = FWDADDRESS(p); + relocate(&INITCDR(pe)) + if (!add_to_trigger) + EPHEMERONTRIGGERNEXT(pe) = Strue; /* in trigger list, #t means "done" */ + } else { + /* Not reached, so far; install as trigger */ + EPHEMERONTRIGGERNEXT(pe) = si->trigger_ephemerons; + si->trigger_ephemerons = pe; + if (add_to_trigger) { + EPHEMERONNEXT(pe) = trigger_ephemerons; + trigger_ephemerons = pe; + } + } + } else { + relocate(&INITCDR(pe)) + } +} + +static void check_pending_ephemerons() { + ptr pe, next_pe; + + pe = pending_ephemerons; + pending_ephemerons = NULL; + while (pe != NULL) { + next_pe = EPHEMERONNEXT(pe); + check_ephemeron(pe, 1); + pe = next_pe; + } + + pe = repending_ephemerons; + repending_ephemerons = NULL; + while (pe != NULL) { + next_pe = EPHEMERONTRIGGERNEXT(pe); + check_ephemeron(pe, 0); + pe = next_pe; + } +} + +/* Like check_ephemeron(), but for a dirty, old-generation + ephemeron (that was not yet added to the pending list), so we can + be less pessimistic than setting `youngest` to the target + generation: */ +static int check_dirty_ephemeron(ptr pe, int tg, int youngest) { + ptr p; + seginfo *si; + + p = Scar(pe); + if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL) { + if (si->space & space_old && !locked(p)) { + if (FWDMARKER(p) == forward_marker && TYPEBITS(p) != type_flonum) { + INITCAR(pe) = FWDADDRESS(p); + relocate(&INITCDR(pe)) + youngest = tg; + } else { + /* Not reached, so far; install as trigger */ + EPHEMERONTRIGGERNEXT(pe) = si->trigger_ephemerons; + si->trigger_ephemerons = pe; + EPHEMERONNEXT(pe) = trigger_ephemerons; + trigger_ephemerons = pe; + /* Make the consistent (but pessimistic w.r.t. to wrong-way + pointers) assumption that the key will stay live and move + to the target generation. That assumption covers the value + part, too, since it can't end up younger than the target + generation. */ + youngest = tg; + } + } else { + int pg; + if ((pg = si->generation) < youngest) + youngest = pg; + relocate_dirty(&INITCDR(pe), tg, youngest) + } + } else { + /* Non-collectable key means that the value determines + `youngest`: */ + relocate_dirty(&INITCDR(pe), tg, youngest) + } + + return youngest; +} + +static void clear_trigger_ephemerons() { + ptr pe; + + if (pending_ephemerons != NULL) + S_error_abort("clear_trigger_ephemerons(gc): non-empty pending list"); + + pe = trigger_ephemerons; + trigger_ephemerons = NULL; + while (pe != NULL) { + if (EPHEMERONTRIGGERNEXT(pe) == Strue) { + /* The ephemeron was triggered and retains its key and value */ + } else { + seginfo *si; + ptr p = Scar(pe); + /* Key never became reachable, so clear key and value */ + INITCAR(pe) = Sbwp_object; + INITCDR(pe) = Sbwp_object; + + /* Remove trigger */ + si = SegInfo(ptr_get_segment(p)); + si->trigger_ephemerons = NULL; + } + pe = EPHEMERONNEXT(pe); + } +} diff --git a/c/gcwrapper.c b/c/gcwrapper.c index a0f82e1e48..c9445920d9 100644 --- a/c/gcwrapper.c +++ b/c/gcwrapper.c @@ -128,6 +128,8 @@ void S_gc_init() { S_G.countof_size[countof_guardian] = size_guardian_entry; INITVECTIT(S_G.countof_names, countof_oblist) = S_intern((const unsigned char *)"oblist"); S_G.countof_size[countof_guardian] = 0; + INITVECTIT(S_G.countof_names, countof_ephemeron) = S_intern((const unsigned char *)"ephemron"); + S_G.countof_size[countof_ephemeron] = 0; for (i = 0; i < countof_types; i += 1) { if (Svector_ref(S_G.countof_names, i) == FIX(0)) { fprintf(stderr, "uninitialized countof_name at index %d\n", i); @@ -505,7 +507,7 @@ void S_check_heap(aftergc) IBOOL aftergc; { S_checkheap_errors += 1; printf("!!! unexpected generation %d segment %#tx in space_new\n", g, (ptrdiff_t)seg); } - } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair) { + } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair || s == space_ephemeron) { /* out of date: doesn't handle space_port, space_continuation, space_code, space_pure_typed_object, space_impure_record */ nl = (ptr *)S_G.next_loc[s][g]; @@ -530,7 +532,7 @@ void S_check_heap(aftergc) IBOOL aftergc; { /* verify that dirty bits are set appropriately */ /* out of date: doesn't handle space_impure_record, space_port, and maybe others */ /* also doesn't check the SYMCODE for symbols */ - if (s == space_impure || s == space_symbol || s == space_weakpair) { + if (s == space_impure || s == space_symbol || s == space_weakpair || s == space_ephemeron) { found_eos = 0; pp2 = pp1 = build_ptr(seg, 0); for (d = 0; d < cards_per_segment; d += 1) { @@ -588,7 +590,7 @@ void S_check_heap(aftergc) IBOOL aftergc; { } } } - if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_impure_record))) { + if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_ephemeron && s != space_impure_record))) { for (d = 0; d < cards_per_segment; d += 1) { if (si->dirty_bytes[d] != 0xff) { S_checkheap_errors += 1; @@ -669,7 +671,7 @@ static void check_dirty() { S_checkheap_errors += 1; printf("!!! (check_dirty): dirty byte = %d for segment %#tx in %d -> %d dirty list\n", mingval, (ptrdiff_t)(si->number), from_g, to_g); } - if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair) { + if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair && s != space_ephemeron) { S_checkheap_errors += 1; printf("!!! (check_dirty): unexpected space %d for dirty segment %#tx\n", s, (ptrdiff_t)(si->number)); } @@ -684,6 +686,7 @@ static void check_dirty() { check_dirty_space(space_port); check_dirty_space(space_impure_record); check_dirty_space(space_weakpair); + check_dirty_space(space_ephemeron); fflush(stdout); } diff --git a/c/prim5.c b/c/prim5.c index 0b7fb4af97..0cffbc2189 100644 --- a/c/prim5.c +++ b/c/prim5.c @@ -34,6 +34,8 @@ static ptr s_trunc_rem PROTO((ptr x, ptr y)); static ptr s_fltofx PROTO((ptr x)); static ptr s_weak_cons PROTO((ptr car, ptr cdr)); static ptr s_weak_pairp PROTO((ptr p)); +static ptr s_ephemeron_cons PROTO((ptr car, ptr cdr)); +static ptr s_ephemeron_pairp PROTO((ptr p)); static ptr s_oblist PROTO((void)); static ptr s_bigoddp PROTO((ptr n)); static ptr s_float PROTO((ptr x)); @@ -176,6 +178,20 @@ static ptr s_weak_pairp(p) ptr p; { return Spairp(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && (si->space & ~space_locked) == space_weakpair ? Strue : Sfalse; } +static ptr s_ephemeron_cons(car, cdr) ptr car, cdr; { + ptr p; + + tc_mutex_acquire() + p = S_cons_in(space_ephemeron, 0, car, cdr); + tc_mutex_release() + return p; +} + +static ptr s_ephemeron_pairp(p) ptr p; { + seginfo *si; + return Spairp(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && (si->space & ~space_locked) == space_ephemeron ? Strue : Sfalse; +} + static ptr s_oblist() { ptr ls = Snil; iptr idx = S_G.oblist_length; @@ -1465,6 +1481,8 @@ void S_prim5_init() { Sforeign_symbol("(cs)s_fltofx", (void *)s_fltofx); Sforeign_symbol("(cs)s_weak_cons", (void *)s_weak_cons); Sforeign_symbol("(cs)s_weak_pairp", (void *)s_weak_pairp); + Sforeign_symbol("(cs)s_ephemeron_cons", (void *)s_ephemeron_cons); + Sforeign_symbol("(cs)s_ephemeron_pairp", (void *)s_ephemeron_pairp); Sforeign_symbol("(cs)continuation_depth", (void *)S_continuation_depth); Sforeign_symbol("(cs)single_continuation", (void *)S_single_continuation); Sforeign_symbol("(cs)c_exit", (void *)c_exit); diff --git a/c/types.h b/c/types.h index ec361ac335..ca4e582070 100644 --- a/c/types.h +++ b/c/types.h @@ -125,6 +125,7 @@ typedef struct _seginfo { struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs */ struct _seginfo **dirty_prev; /* pointer to the next pointer on the previous seginfo in the DirtySegments list */ struct _seginfo *dirty_next; /* pointer to the next seginfo on the DirtySegments list */ + ptr trigger_ephemerons; /* ephemerons to re-check if object in segment is copied out */ octet dirty_bytes[cards_per_segment]; /* one dirty byte per card */ } seginfo; diff --git a/csug/csug.bib b/csug/csug.bib index 68bfe52e45..50d7eac70a 100644 --- a/csug/csug.bib +++ b/csug/csug.bib @@ -555,3 +555,14 @@ year = 2008} address = {Indianapolis, IN, USA}, school = {Indiana University} } + +@inproceedings{Hayes:ephemerons, + author = {Barry Hayes}, + title = {Ephemerons: a New Finalization Mechanism}, + booktitle = {\it Proceedings of the 12th ACM SIGPLAN + Conference on Object-Oriented Languages, Programming, Systems, + and Applications}, + pages = {176--183}, + url = {https://doi.org/10.1145/263700.263733}, + year = {1997} +} diff --git a/csug/objects.stex b/csug/objects.stex index c20ddd28ca..0d4e5569e4 100644 --- a/csug/objects.stex +++ b/csug/objects.stex @@ -1820,7 +1820,9 @@ except the keys of the hashtable are held weakly, i.e., they are not protected from the garbage collector. Keys reclaimed by the garbage collector are removed from the table, and their associated values are dropped the next time the table -is modified, if not sooner. +is modified, if not sooner. A value in the hashtable can refer to a +key in the hashtable without preventing the garbage collector from +reclaiming the key (because keys are paired values using ephemeron pairs). A copy of a weak eq or eqv hashtable created by \scheme{hashtable-copy} is also weak. diff --git a/csug/smgmt.stex b/csug/smgmt.stex index 2cdde59816..07d5809a89 100644 --- a/csug/smgmt.stex +++ b/csug/smgmt.stex @@ -309,7 +309,7 @@ memory footprint, while setting it to a larger value may result in fewer calls into the operating system to request and free memory space. -\section{Weak Pairs and Guardians\label{SECTGUARDWEAKPAIRS}} +\section{Weak Pairs, Ephemeron Pairs, and Guardians\label{SECTGUARDWEAKPAIRS}} \index{weak pairs}\index{weak pointers}\emph{Weak pairs} allow programs to maintain \emph{weak pointers} to objects. @@ -317,21 +317,30 @@ A weak pointer to an object does not prevent the object from being reclaimed by the storage management system, but it does remain valid as long as the object is otherwise accessible in the system. +\index{ephemeron pairs}\emph{Ephemeron pairs} are like weak pairs, but +ephemeron pairs combine two pointers where the second is retained only +as long as the first is retained. + \index{guardians}\emph{Guardians} allow programs to protect objects from deallocation by the garbage collector and to determine when the objects would otherwise have been deallocated. -Weak pairs and guardians allow programs to retain +Weak pairs, ephemeron pairs, and guardians allow programs to retain information about objects in separate data structures (such as hash tables) without concern that maintaining this information will cause -the objects to remain indefinitely in the system. +the objects to remain indefinitely in the system. Ephemeron pairs +allow such data structures to retain key--value combinations +where a value may refer to its key, but the combination +can be reclaimed if neither must be saved otherwise. In addition, guardians allow objects to be saved from deallocation indefinitely so that they can be reused or so that clean-up or other actions can be performed using the data stored within the objects. The implementation of guardians and weak pairs used by {\ChezScheme} -is described in~\cite{Dybvig:guardians}. +is described in~\cite{Dybvig:guardians}. Ephemerons are described +in~\cite{Hayes:ephemerons}, but the implementation in {\ChezScheme} +avoids quadratic-time worst-case behavior. %---------------------------------------------------------------------------- \entryheader\label{desc:weak-cons} @@ -417,6 +426,89 @@ dropped, but makes no guarantees about when this will occur. \endschemedisplay +%---------------------------------------------------------------------------- +\entryheader\label{desc:ephemeron-cons} +\formdef{ephemeron-cons}{\categoryprocedure}{(ephemeron-cons \var{obj_1} \var{obj_2})} +\returns a new ephemeron pair +\listlibraries +\endentryheader + +\noindent +\var{obj_1} becomes the car and \var{obj_2} becomes the cdr of the +new pair. +Ephemeron pairs are indistinguishable from ordinary pairs in all but two ways: + +\begin{itemize} +\item ephemeron pairs can be distinguished from pairs using the +\scheme{ephemeron-pair?} predicate, and + +\item ephemeron pairs maintain a weak pointer to the object in the +car of the pair, and the cdr of the pair is preserved only as long +as the car of the pair is preserved. +\end{itemize} + +\noindent + +An ephemeron pair behaves like a weak pair, but the cdr is treated +specially in addition to the car: the cdr of an ephemeron is set to +\scheme{#!bwp} at the same time that the car is set to \scheme{#!bwp}. +Since the car and cdr fields are set to \scheme{#!bwp} at the same +time, then the fact that the car object may be referenced through the +cdr object does not by itself imply that car must be preserved (unlike +a weak pair); instead, the car must be saved for some reason +independent of the cdr object. + +Like weak pairs and other pairs, ephemeron pairs may be altered using +\scheme{set-car!} and \scheme{set-cdr!}, and ephemeron pairs are +printed in the same manner as ordinary pairs; there is no reader +syntax for ephemeron pairs. + +\schemedisplay +(define x (cons 'a 'b)) +(define p (ephemeron-cons x x)) +(car p) ;=> (a . b) +(cdr p) ;=> (a . b) + +(define x (cons 'a 'b)) +(define p (ephemeron-cons x x)) +(set! x '*) +(collect) +(car p) ;=> #!bwp +(cdr p) ;=> #!bwp + +(define x (cons 'a 'b)) +(define p (weak-cons x x)) ; \var{not an ephemeron pair} +(set! x '*) +(collect) +(car p) ;=> (a . b) +(cdr p) ;=> (a . b) +\endschemedisplay + +\noindent +As with weak pairs, the last two expressions of the middle example +above may in fact return \scheme{(a . b)} if a garbage collection +promoting the pair into an older generation occurs prior to the +assignment of \scheme{x} to \scheme{*}. In the last example above, +however, the results of the last two expressions will always be +\scheme{(a . b)}, because the cdr of a weak pair holds a non-weak +reference, and that non-weak reference prevents the car field from becoming +\scheme{#!bwp}. + +%---------------------------------------------------------------------------- +\entryheader +\formdef{ephemeron-pair?}{\categoryprocedure}{(ephemeron-pair? \var{obj})} +\returns \scheme{#t} if obj is a ephemeron pair, \scheme{#f} otherwise +\listlibraries +\endentryheader + +\schemedisplay +(ephemeron-pair? (ephemeron-cons 'a 'b)) ;=> #t +(ephemeron-pair? (cons 'a 'b)) ;=> #f +(ephemeron-pair? (weak-cons 'a 'b)) ;=> #f +(ephemeron-pair? "oops") ;=> #f +\endschemedisplay + + %---------------------------------------------------------------------------- \entryheader \formdef{bwp-object?}{\categoryprocedure}{(bwp-object? \var{obj})} @@ -473,7 +565,7 @@ subdivided into two disjoint subgroups: a subgroup referred to as ``accessible'' objects, and one referred to ``inaccessible'' objects. Inaccessible objects are objects that have been proven to be inaccessible (except through the guardian mechanism itself or through -the car field of a weak pair), and +the car field of a weak or ephemeron pair), and accessible objects are objects that have not been proven so. The word ``proven'' is important here: it may be that some objects in the accessible group are indeed inaccessible but @@ -516,7 +608,7 @@ migrated into an older generation.) Although an object registered without a representative and returned from a guardian has been proven otherwise -inaccessible (except possibly via the car field of a weak pair), it has +inaccessible (except possibly via the car field of a weak or ephemeron pair), it has not yet been reclaimed by the storage management system and will not be reclaimed until after the last nonweak pointer to it within or outside of the guardian system has been dropped. @@ -550,8 +642,8 @@ themselves can be registered with other guardians. An object that has been registered with a guardian without a representative and placed in -the car field of a weak pair remains in the car field of the -weak pair until after it has been returned from the guardian and +the car field of a weak or ephemeron pair remains in the car field of the +weak or ephemeron pair until after it has been returned from the guardian and dropped by the program or until the guardian itself is dropped. \schemedisplay @@ -577,7 +669,7 @@ This can also be forced by invoking \scheme{collect} several times.) On the other hand, if a representative (other than the object itself) is specified, the guarded object is dropped from the car field of the -weak pair at the same time as the representative becomes available +weak or ephemeron pair at the same time as the representative becomes available from the guardian. \schemedisplay @@ -592,7 +684,7 @@ from the guardian. \endschemedisplay The following example illustrates that the object is deallocated and -the car field of the weak pointer set to \scheme{#!bwp} when the guardian +the car field of the weak pair set to \scheme{#!bwp} when the guardian itself is dropped: \schemedisplay diff --git a/mats/4.ms b/mats/4.ms index 777fd76884..fff7e6e311 100644 --- a/mats/4.ms +++ b/mats/4.ms @@ -3107,6 +3107,189 @@ (bwp-object? (car x)))))) ) +(mat ephemeron + (begin + (define ephemeron-key car) + (define ephemeron-value cdr) + + (define gdn (make-guardian)) + #t) + + (ephemeron-pair? (ephemeron-cons 1 2)) + + (begin + ;; ---------------------------------------- + ;; Check that the ephemeron value doesn't retain + ;; itself as an epehemeron key + (define-values (es wps saved) + (let loop ([n 1000] [es '()] [wps '()] [saved '()]) + (cond + [(zero? n) + (values es wps saved)] + [else + (let ([k1 (gensym)] + [k2 (gensym)]) + (gdn k2) + (loop (sub1 n) + (cons (ephemeron-cons k1 (box k1)) + (cons (ephemeron-cons k2 (box k2)) + es)) + (weak-cons k1 (weak-cons k2 wps)) + (cons k1 saved)))]))) + + (collect (collect-maximum-generation)) + + ;; All now waiting to be reported by the guardian + (let loop ([es es] [wps wps] [saved saved]) + (cond + [(null? saved) #t] + [else + (and + (eq? (car saved) (car wps)) + (eq? (car saved) (ephemeron-key (car es))) + (eq? (car saved) (unbox (ephemeron-value (car es)))) + (eq? (cadr wps) (ephemeron-key (cadr es))) + (eq? (cadr wps) (unbox (ephemeron-value (cadr es)))) + (loop (cddr es) (cddr wps) (cdr saved)))]))) + + (begin + ;; Report each from the guardian: + (let loop ([saved saved]) + (unless (null? saved) + (gdn) + (loop (cdr saved)))) + + (collect (collect-maximum-generation)) + + (let loop ([es es] [wps wps] [saved saved]) + (cond + [(null? saved) #t] + [else + (and + (eq? (car saved) (car wps)) + (eq? (car saved) (ephemeron-key (car es))) + (eq? (car saved) (unbox (ephemeron-value (car es)))) + (eq? #!bwp (cadr wps)) + (eq? #!bwp (ephemeron-key (cadr es))) + (eq? #!bwp (ephemeron-value (cadr es))) + (loop (cddr es) (cddr wps) (cdr saved)))]))) + + ;; ---------------------------------------- + ;; Stress test to check that the GC doesn't suffer from quadratic + ;; behavior + (begin + (define (wrapper v) (list 1 2 3 4 5 v)) + + ;; Create a chain of ephemerons where we have all + ;; the the ephemerons immediately in a list, + ;; but we discover the keys one at a time + (define (mk n prev-key es) + (cond + [(zero? n) + (values prev-key es)] + [else + (let ([key (gensym)]) + (mk (sub1 n) + key + (cons (ephemeron-cons key (wrapper prev-key)) + es)))])) + + ;; Create a chain of ephemerons where we have all + ;; of the keys immediately in a list, + ;; but we discover the ephemerons one at a time + (define (mk* n prev-e keys) + (cond + [(zero? n) + (values prev-e keys)] + [else + (let ([key (gensym)]) + (mk* (sub1 n) + (ephemeron-cons key (wrapper prev-e)) + (cons key + keys)))])) + + (define (measure-time n keep-alive) + ;; Hang the discover-keys-one-at-a-time chain + ;; off the end of the discover-ephemerons-one-at-a-time + ;; chain, which is the most complex case for avoiding + ;; quadratic GC times + (define-values (key es) (mk n (gensym) '())) + (define-values (root holds) (mk* n key es)) + + (define start (current-time)) + (collect (collect-maximum-generation)) + (let ([delta (time-difference (current-time) start)]) + ;; Sanity check on ephemerons + (for-each (lambda (e) + (when (eq? #!bwp (ephemeron-key e)) + (error 'check "oops"))) + es) + ;; Keep `root` and `holds` live: + (keep-alive (cons root holds)) + ;; Return duration: + delta)) + + (define N 10000) + + ;; The first time should be roughy x10 the second (not x100) + (let loop ([tries 3]) + (define dummy #f) + (define (keep-alive v) (set! dummy (cons dummy v))) + (define t1 (measure-time (* 10 N) keep-alive)) + (define dummy2 (set! dummy #f)) + (define t2 (measure-time N keep-alive)) + (define (duration->inexact t) (+ (* (time-second t) 1e9) + (time-nanosecond t))) + (set! dummy #f) + (or (< (/ (duration->inexact t1) (duration->inexact t2)) 20) + (and (positive? tries) + (loop (sub1 tries)))))) + + ;; ---------------------------------------- + ;; Check interaction of mutation and generations + + ;; This check disables interrups so that a garbage collection + ;; happens only for the explicit `collect` request. + (with-interrupts-disabled + (let ([e (ephemeron-cons (gensym) 'ok)]) + (collect) ; => `e` is moved to generation 1 + (and + (eq? #!bwp (ephemeron-key e)) + (eq? #!bwp (ephemeron-value e)) + (let ([s (gensym)]) + (set-car! e s) + (set-cdr! e 'ok-again) + (collect) ; => `s` is moved to generation 1 + (and + (eq? s (ephemeron-key e)) + (eq? 'ok-again (ephemeron-value e)) + (begin + (set! s #f) + (collect 1) ; collect former `s` + (and + (eq? #!bwp (ephemeron-key e)) + (eq? #!bwp (ephemeron-value e))))))))) + + ;; ---------------------------------------- + ;; Check fasl: + (let ([s (gensym)]) + (define-values (o get) (open-bytevector-output-port)) + (fasl-write (list s + (ephemeron-cons s 'ok)) + o) + (let* ([l (fasl-read (open-bytevector-input-port (get)))] + [e (cadr l)]) + (and + (eq? (car l) (ephemeron-key e)) + (eq? 'ok (ephemeron-value e)) + (begin + (set! s #f) + (set! l #f) + (collect (collect-maximum-generation)) + (and + (eq? #!bwp (ephemeron-key e)) + (eq? #!bwp (ephemeron-value e)))))))) + (mat $primitive (procedure? #%car) (procedure? #2%car) diff --git a/mats/hash.ms b/mats/hash.ms index d781cdd524..dcc77b45f9 100644 --- a/mats/hash.ms +++ b/mats/hash.ms @@ -1173,6 +1173,37 @@ (hashtable-delete! ht 'a) (list (hashtable-size ht) (= (#%$hashtable-veclen ht) len))) '(0 #t)) + + ; test that weak-hashtable values do not imply that values + ; are reachable + (let ([wk1 (list 1)] + [wk2 (list 2)] + [wk3 (list 3)] + [wk4 (list 4)] + [ht (make-weak-eq-hashtable)]) + (hashtable-set! ht wk1 wk1) + (hashtable-set! ht wk2 wk1) + (hashtable-set! ht wk3 wk3) + (hashtable-set! ht wk4 wk2) + (collect (collect-maximum-generation)) + (and + (same-elements? (hashtable-keys ht) '#((1) (2) (3) (4))) + (equal? (hashtable-ref ht wk1 #f) wk1) + (equal? (hashtable-ref ht wk2 #f) wk1) + (equal? (hashtable-ref ht wk3 #f) wk3) + (equal? (hashtable-ref ht wk4 #f) wk2) + (begin + (set! wk1 #f) + (set! wk2 #f) + (set! wk3 #f) + (collect (collect-maximum-generation)) + (and + (same-elements? (hashtable-keys ht) '#((1) (2) (4))) + (equal? (hashtable-ref ht wk4 #f) '(2)) + (begin + (set! wk4 #f) + (collect (collect-maximum-generation)) + (same-elements? (hashtable-keys ht) '#())))))) ) (mat eq-hashtable-cell diff --git a/s/cmacros.ss b/s/cmacros.ss index 5ffe60676b..9c830aaa78 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -440,7 +440,7 @@ (define-constant fasl-type-small-integer 25) (define-constant fasl-type-base-rtd 26) (define-constant fasl-type-fxvector 27) -; 28 +(define-constant fasl-type-ephemeron 28) (define-constant fasl-type-bytevector 29) (define-constant fasl-type-weak-pair 30) (define-constant fasl-type-eq-hashtable 31) @@ -640,15 +640,16 @@ (symbol "symbol" #\x 2) ; (port "port" #\q 3) ; (weakpair "weakpr" #\w 4) ; - (pure "pure" #\p 5) ; swept immutable objects allocated here (all ptrs) - (continuation "cont" #\k 6) ; - (code "code" #\c 7) ; - (pure-typed-object "p-tobj" #\r 8) ; - (impure-record "ip-rec" #\s 9)) ; + (ephemeron "emph" #\e 5) ; + (pure "pure" #\p 6) ; swept immutable objects allocated here (all ptrs) + (continuation "cont" #\k 7) ; + (code "code" #\c 8) ; + (pure-typed-object "p-tobj" #\r 9) ; + (impure-record "ip-rec" #\s 10)) ; (unswept - (data "data" #\d 10))) ; unswept objects allocated here + (data "data" #\d 11))) ; unswept objects allocated here (unreal - (empty "empty" #\e 11))) ; available segments + (empty "empty" #\e 12))) ; available segments ;;; enumeration of types for which gc tracks object counts ;;; also update gc.c @@ -678,7 +679,8 @@ (define-constant countof-locked 22) (define-constant countof-guardian 23) (define-constant countof-oblist 24) -(define-constant countof-types 25) +(define-constant countof-ephemeron 25) +(define-constant countof-types 26) ;;; type-fixnum is assumed to be all zeros by at least by vector, fxvector, ;;; and bytevector index checks @@ -1182,6 +1184,12 @@ ([iptr type] [ptr ref])) +(define-primitive-structure-disps ephemeron type-pair + ([ptr car] + [ptr cdr] + [ptr next] ; `next` is needed by the GC to keep track of pending ephemerons + [ptr trigger-next])) ; `trigger-next` is similar, but for segment-specific lists + (define-primitive-structure-disps tlc type-typed-object ([iptr type] [ptr keyval] diff --git a/s/fasl.ss b/s/fasl.ss index 173a2bd84a..5c96969482 100644 --- a/s/fasl.ss +++ b/s/fasl.ss @@ -204,26 +204,32 @@ (define wrf-pair (lambda (x p t a?) - (if (weak-pair? x) - (begin - (put-u8 p (constant fasl-type-weak-pair)) - (wrf (car x) p t a?) - (wrf (cdr x) p t a?)) - (begin ; more like list* - (put-u8 p (constant fasl-type-pair)) - (let ([n (let wrf-pair-loop0 ([n 1] [x (cdr x)]) - ; cut off at end or at shared structure - (if (and (pair? x) - (not (weak-pair? x)) - (not (eq-hashtable-ref (table-hash t) x #f))) - (wrf-pair-loop0 (fx+ n 1) (cdr x)) - n))]) - (put-uptr p n) - (let wrf-pair-loop1 ([x x] [n n]) - (wrf (car x) p t a?) - (if (fx= n 1) - (wrf (cdr x) p t a?) - (wrf-pair-loop1 (cdr x) (fx- n 1))))))))) + (cond + [(weak-pair? x) + (put-u8 p (constant fasl-type-weak-pair)) + (wrf (car x) p t a?) + (wrf (cdr x) p t a?)] + [(ephemeron-pair? x) + (put-u8 p (constant fasl-type-ephemeron)) + (wrf (car x) p t a?) + (wrf (cdr x) p t a?)] + [else + ; more like list* + (put-u8 p (constant fasl-type-pair)) + (let ([n (let wrf-pair-loop0 ([n 1] [x (cdr x)]) + ; cut off at end or at shared structure + (if (and (pair? x) + (not (weak-pair? x)) + (not (ephemeron-pair? x)) + (not (eq-hashtable-ref (table-hash t) x #f))) + (wrf-pair-loop0 (fx+ n 1) (cdr x)) + n))]) + (put-uptr p n) + (let wrf-pair-loop1 ([x x] [n n]) + (wrf (car x) p t a?) + (if (fx= n 1) + (wrf (cdr x) p t a?) + (wrf-pair-loop1 (cdr x) (fx- n 1)))))]))) (define wrf-symbol (lambda (x p t a?) diff --git a/s/library.ss b/s/library.ss index 9fb3a7eeaf..9ae9d5da2b 100644 --- a/s/library.ss +++ b/s/library.ss @@ -1435,7 +1435,7 @@ [b (vector-ref vec idx)]) (lookup-keyval x b values - (let ([keyval (if (eq-ht-weak? h) (weak-cons x v) (cons x v))]) + (let ([keyval (if (eq-ht-weak? h) (ephemeron-cons x v) (cons x v))]) (vector-set! vec idx ($make-tlc h keyval b)) (incr-size! h vec) keyval)))) @@ -1451,7 +1451,7 @@ (begin (vector-set! vec idx ($make-tlc h - (if (eq-ht-weak? h) (weak-cons x v) (cons x v)) + (if (eq-ht-weak? h) (ephemeron-cons x v) (cons x v)) b)) (incr-size! h vec)))))) diff --git a/s/mkheader.ss b/s/mkheader.ss index 0a420c721e..30f96ed9e0 100644 --- a/s/mkheader.ss +++ b/s/mkheader.ss @@ -781,6 +781,11 @@ (definit INITBOXREF box ref) (defset SETBOXREF box ref) + (defref EPHEMERONNEXT ephemeron next) + (definit INITEPHEMERONNEXT ephemeron next) + (defref EPHEMERONTRIGGERNEXT ephemeron trigger-next) + (definit INITEPHEMERONTRIGGERNEXT ephemeron trigger-next) + (defref TLCTYPE tlc type) (defref TLCKEYVAL tlc keyval) (defref TLCHT tlc ht) diff --git a/s/newhash.ss b/s/newhash.ss index 9783af0988..0862ff4c19 100644 --- a/s/newhash.ss +++ b/s/newhash.ss @@ -1019,7 +1019,7 @@ Documentation notes: b ($make-tlc h2 (let* ([keyval ($tlc-keyval b)] [key (car keyval)] [val (cdr keyval)]) - (if weak? (weak-cons key val) (cons key val))) + (if weak? (ephemeron-cons key val) (cons key val))) (inner ($tlc-next b)))))) (outer (fx+ i 1))))) h2)))) diff --git a/s/primdata.ss b/s/primdata.ss index b4eba151ec..b44d108457 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -1241,6 +1241,8 @@ (environment? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard]) (environment-mutable? [sig [(environment) -> (boolean)]] [flags pure mifoldable discard]) (environment-symbols [sig [(environment) -> (list)]] [flags true]) + (ephemeron-cons [sig [(ptr ptr) -> (ptr)]] [flags unrestricted alloc]) + (ephemeron-pair? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard]) (errorf [sig [(who string sub-ptr ...) -> (bottom)]] [flags abort-op]) ; second arg is format string (eq-hashtable? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard]) (eq-hashtable-cell [sig [(eq-hashtable ptr ptr) -> ((ptr . ptr))]] [flags true]) diff --git a/s/prims.ss b/s/prims.ss index b9a51846d0..c9f776196a 100644 --- a/s/prims.ss +++ b/s/prims.ss @@ -63,6 +63,16 @@ (scheme-object) scheme-object)) +(define ephemeron-cons + (foreign-procedure "(cs)s_ephemeron_cons" + (scheme-object scheme-object) + scheme-object)) + +(define ephemeron-pair? + (foreign-procedure "(cs)s_ephemeron_pairp" + (scheme-object) + scheme-object)) + (define $split-continuation (foreign-procedure "(cs)single_continuation" (scheme-object iptr)