Merge pull request #168 from mflatt/ephemeron

Ephemerons
original commit: 88b627e9c5fdfbed3a6ee3b99f0917a2e9ee1374
This commit is contained in:
R. Kent Dybvig 2017-05-25 18:03:13 -04:00 committed by GitHub
commit 7fe2de5e3a
18 changed files with 665 additions and 71 deletions

6
LOG
View File

@ -456,3 +456,9 @@
- fix overflow detection for fxsll, fxarithmetic-shift-left, and
fxarithmetic-shift
library.ss, fx.ms, release_notes.stex
- added ephemeron pairs and changed weak hashtables to use
ephemeron pairs for key--value mapping to avoid the key-in-value
problem
prims.ss, primdata.ss, newhash.ss, fasl.ss, mkheader.ss
cmacro.ss, prim5.c, fasl.c, gc.c, gcwrapper.c, types.h,
4.ms, hash.ms, objects.stex, smgmt.stex, csug.bib

View File

@ -860,6 +860,11 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) {
faslin(tc, &INITCAR(*x), t, pstrbuf, f);
faslin(tc, &INITCDR(*x), t, pstrbuf, f);
return;
case fasl_type_ephemeron:
*x = S_cons_in(space_ephemeron, 0, FIX(0), FIX(0));
faslin(tc, &INITCAR(*x), t, pstrbuf, f);
faslin(tc, &INITCDR(*x), t, pstrbuf, f);
return;
case fasl_type_code: {
iptr n, m, a; INT flags; iptr free;
ptr co, reloc, name;

259
c/gc.c
View File

@ -29,7 +29,7 @@ static uptr list_length PROTO((ptr ls));
static ptr dosort PROTO((ptr ls, uptr n));
static ptr domerge PROTO((ptr l1, ptr l2));
static IBOOL search_locked PROTO((ptr p));
static ptr copy PROTO((ptr pp, ISPC pps));
static ptr copy PROTO((ptr pp, seginfo *si));
static void sweep_ptrs PROTO((ptr *p, iptr n));
static void sweep PROTO((ptr tc, ptr p, IBOOL sweep_pure));
static ptr copy_stack PROTO((ptr old, iptr *length, iptr clength));
@ -49,6 +49,13 @@ static void sweep_code_object PROTO((ptr tc, ptr co));
static void record_dirty_segment PROTO((IGEN from_g, IGEN to_g, seginfo *si));
static void sweep_dirty PROTO((void));
static void resweep_dirty_weak_pairs PROTO((void));
static void add_ephemeron_to_pending PROTO((ptr p));
static void add_trigger_ephemerons_to_repending PROTO((ptr p));
static void check_trigger_ephemerons PROTO((seginfo *si));
static void check_ephemeron PROTO((ptr pe, int add_to_trigger));
static void check_pending_ephemerons PROTO(());
static int check_dirty_ephemeron PROTO((ptr pe, int tg, int youngest));
static void clear_trigger_ephemerons PROTO(());
/* MAXPTR is used to pad the sorted_locked_object vector. The pad value must be greater than any heap address */
#define MAXPTR ((ptr)-1)
@ -111,10 +118,10 @@ uptr list_length(ptr ls) {
* youngest = GENERATION(*ppp);
*/
#define relocate_dirty(ppp,tg,youngest) {\
ptr PP = *ppp; seginfo *SI; ISPC S;\
ptr PP = *ppp; seginfo *SI;\
if (!IMMEDIATE(PP) && (SI = MaybeSegInfo(ptr_get_segment(PP))) != NULL) {\
if ((S = SI->space) & space_old) {\
relocate_help_help(ppp, PP, S)\
if (SI->space & space_old) {\
relocate_help_help(ppp, PP, SI)\
youngest = tg;\
} else {\
IGEN pg;\
@ -126,38 +133,38 @@ uptr list_length(ptr ls) {
}
#define relocate_help(ppp, pp) {\
seginfo *SI; ISPC S;\
if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL && (S = SI->space) & space_old)\
relocate_help_help(ppp, pp, S)\
seginfo *SI; \
if (!IMMEDIATE(pp) && (SI = MaybeSegInfo(ptr_get_segment(pp))) != NULL && SI->space & space_old)\
relocate_help_help(ppp, pp, SI)\
}
#define relocate_help_help(ppp, pp, s) {\
#define relocate_help_help(ppp, pp, si) {\
if (FWDMARKER(pp) == forward_marker && TYPEBITS(pp) != type_flonum)\
*ppp = FWDADDRESS(pp);\
else\
*ppp = copy(pp, s);\
*ppp = copy(pp, si);\
}
#define relocate_return_addr(pcp) {\
ISPC S;\
seginfo *SI;\
ptr XCP;\
XCP = *(pcp);\
if ((S = SPACE(XCP)) & space_old) {\
if ((SI = SegInfo(ptr_get_segment(XCP)))->space & space_old) { \
iptr CO;\
CO = ENTRYOFFSET(XCP) + ((uptr)XCP - (uptr)&ENTRYOFFSET(XCP));\
relocate_code(pcp,XCP,CO,S)\
relocate_code(pcp,XCP,CO,SI)\
}\
}
/* in the call to copy below, assuming SPACE(PP) == SPACE(XCP) since
PP and XCP point to/into the same object */
#define relocate_code(pcp,XCP,CO,S) {\
#define relocate_code(pcp,XCP,CO,SI) {\
ptr PP;\
PP = (ptr)((uptr)XCP - CO);\
if (FWDMARKER(PP) == forward_marker)\
PP = FWDADDRESS(PP);\
else\
PP = copy(PP, S);\
PP = copy(PP, SI);\
*pcp = (ptr)((uptr)PP + CO);\
}
@ -185,7 +192,19 @@ static IBOOL search_locked(ptr p) {
#define locked(p) (sorted_locked_objects != FIX(0) && search_locked(p))
static ptr copy(pp, pps) ptr pp; ISPC pps; {
FORCEINLINE void check_trigger_ephemerons(seginfo *si) {
/* Registering ephemerons to recheck at the granularity of a segment
means that the worst-case complexity of GC is quadratic in the
number of objects that fit into a segment (but that only happens
if the objects are ephemeron keys that are reachable just through
a chain via the value field of the same ephemerons). */
if (si->trigger_ephemerons) {
add_trigger_ephemerons_to_repending(si->trigger_ephemerons);
si->trigger_ephemerons = NULL;
}
}
static ptr copy(pp, si) ptr pp; seginfo *si; {
ptr p, tf; ITYPE t; IGEN tg;
if (locked(pp)) return pp;
@ -194,6 +213,8 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; {
change = 1;
check_trigger_ephemerons(si);
if ((t = TYPEBITS(pp)) == type_typed_object) {
tf = TYPEFIELD(pp);
if (TYPEP(tf, mask_record, type_record)) {
@ -418,9 +439,18 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; {
return (ptr)0 /* not reached */;
}
} else if (t == type_pair) {
ptr qq = Scdr(pp); ptr q; seginfo *si;
if (qq != pp && TYPEBITS(qq) == type_pair && (si = MaybeSegInfo(ptr_get_segment(qq))) != NULL && si->space == pps && FWDMARKER(qq) != forward_marker && !locked(qq)) {
if (pps == (space_weakpair | space_old)) {
if (si->space == (space_ephemeron | space_old)) {
#ifdef ENABLE_OBJECT_COUNTS
S_G.countof[tg][countof_ephemeron] += 1;
#endif /* ENABLE_OBJECT_COUNTS */
find_room(space_ephemeron, tg, type_pair, size_ephemeron, p);
INITCAR(p) = Scar(pp);
INITCDR(p) = Scdr(pp);
} else {
ptr qq = Scdr(pp); ptr q; seginfo *qsi;
if (qq != pp && TYPEBITS(qq) == type_pair && (qsi = MaybeSegInfo(ptr_get_segment(qq))) != NULL && qsi->space == si->space && FWDMARKER(qq) != forward_marker && !locked(qq)) {
check_trigger_ephemerons(qsi);
if (si->space == (space_weakpair | space_old)) {
#ifdef ENABLE_OBJECT_COUNTS
S_G.countof[tg][countof_weakpair] += 2;
#endif /* ENABLE_OBJECT_COUNTS */
@ -439,7 +469,7 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; {
FWDMARKER(qq) = forward_marker;
FWDADDRESS(qq) = q;
} else {
if (pps == (space_weakpair | space_old)) {
if (si->space == (space_weakpair | space_old)) {
#ifdef ENABLE_OBJECT_COUNTS
S_G.countof[tg][countof_weakpair] += 1;
#endif /* ENABLE_OBJECT_COUNTS */
@ -453,6 +483,7 @@ static ptr copy(pp, pps) ptr pp; ISPC pps; {
INITCAR(p) = Scar(pp);
INITCDR(p) = qq;
}
}
} else if (t == type_closure) {
ptr code;
@ -533,10 +564,15 @@ static void sweep(ptr tc, ptr p, IBOOL sweep_pure) {
ptr tf; ITYPE t;
if ((t = TYPEBITS(p)) == type_pair) {
if ((SPACE(p) & ~(space_locked | space_old)) != space_weakpair) {
relocate(&INITCAR(p))
ISPC s = SPACE(p) & ~(space_locked | space_old);
if (s == space_ephemeron)
add_ephemeron_to_pending(p);
else {
if (s != space_weakpair) {
relocate(&INITCAR(p))
}
relocate(&INITCDR(p))
}
relocate(&INITCDR(p))
} else if (t == type_closure) {
if (sweep_pure) {
ptr code;
@ -827,7 +863,7 @@ void GCENTRY(ptr tc, IGEN mcg, IGEN tg) {
if (FWDMARKER(sym) != forward_marker &&
/* coordinate with alloc.c */
(SYMVAL(sym) != sunbound || SYMPLIST(sym) != Snil || SYMSPLIST(sym) != Snil))
(void)copy(sym, SPACE(sym));
(void)copy(sym, SegInfo(ptr_get_segment(sym)));
}
S_G.buckets_of_generation[g] = NULL;
}
@ -959,6 +995,9 @@ void GCENTRY(ptr tc, IGEN mcg, IGEN tg) {
resweep_dirty_weak_pairs();
resweep_weak_pairs(tg);
/* still-pending ephemerons all go to bwp */
clear_trigger_ephemerons();
/* forward car fields of locked and unlocked older weak pairs */
for (g = mcg + 1; g <= static_generation; INCRGEN(g)) {
for (ls = S_G.locked_objects[g]; ls != Snil; ls = Scdr(ls)) {
@ -1262,6 +1301,12 @@ static void sweep_generation(tc, g) ptr tc; IGEN g; {
pp += 1;
})
sweep_space(space_ephemeron, {
p = TYPE((ptr)pp, type_pair);
add_ephemeron_to_pending(p);
pp += size_ephemeron / sizeof(ptr);
})
sweep_space(space_pure, {
relocate_help(pp, p)
p = *(pp += 1);
@ -1292,6 +1337,13 @@ static void sweep_generation(tc, g) ptr tc; IGEN g; {
pp = (ptr *)((iptr)pp +
size_record_inst(UNFIX(RECORDDESCSIZE(RECORDINSTTYPE(p)))));
})
/* Waiting until sweeping doesn't trigger a change reduces the
chance that an ephemeron must be reigistered as a
segment-specific trigger or gets triggered for recheck, but
it doesn't change the worst-case complexity. */
if (!change)
check_pending_ephemerons();
} while (change);
}
@ -1299,7 +1351,11 @@ static iptr size_object(p) ptr p; {
ITYPE t; ptr tf;
if ((t = TYPEBITS(p)) == type_pair) {
return size_pair;
seginfo *si;
if ((si = MaybeSegInfo(ptr_get_segment(p))) != NULL && (si->space & ~(space_locked | space_old)) == space_ephemeron)
return size_ephemeron;
else
return size_pair;
} else if (t == type_closure) {
ptr code = CLOSCODE(p);
if (CODETYPE(code) & (code_flag_continuation << code_flags_offset))
@ -1873,6 +1929,12 @@ static void sweep_dirty(void) {
relocate_dirty(pp, tg, youngest)
pp += 1;
}
} else if (s == space_ephemeron) {
while (pp < ppend && *pp != forward_marker) {
ptr p = TYPE((ptr)pp, type_pair);
youngest = check_dirty_ephemeron(p, tg, youngest);
pp += size_ephemeron / sizeof(ptr);
}
} else {
S_error_abort("sweep_dirty(gc): unexpected space");
}
@ -1965,3 +2027,152 @@ static void resweep_dirty_weak_pairs() {
record_dirty_segment(from_g, min_youngest, dirty_si);
}
}
static ptr pending_ephemerons = NULL;
/* Ephemerons that we haven't looked at, chained through `next`. */
static ptr trigger_ephemerons = NULL;
/* Ephemerons that we've checked and added to segment triggers,
chained through `next`. Ephemerons attached to a segment are
chained through `trigger-next`. A #t in `trigger-next` means that
the ephemeron has been processed, so we don't need to remove it
from the trigger list in a segment. */
static ptr repending_ephemerons = NULL;
/* Ephemerons in `trigger_ephemerons` that we need to inspect again,
removed from the triggering segment and chained here through
`trigger-next`. */
static void add_ephemeron_to_pending(ptr pe) {
/* We could call check_ephemeron directly here, but the indirection
through `pending_ephemerons` can dramatically decrease the number
of times that we have to trigger re-checking, especially since
check_pending_pehemerons() is run only after all other sweep
opportunities are exhausted. */
EPHEMERONNEXT(pe) = pending_ephemerons;
pending_ephemerons = pe;
}
static void add_trigger_ephemerons_to_repending(ptr pe) {
ptr last_pe = pe, next_pe = EPHEMERONTRIGGERNEXT(pe);
while (next_pe != NULL) {
last_pe = next_pe;
next_pe = EPHEMERONTRIGGERNEXT(next_pe);
}
EPHEMERONTRIGGERNEXT(last_pe) = repending_ephemerons;
repending_ephemerons = pe;
}
static void check_ephemeron(ptr pe, int add_to_trigger) {
ptr p;
seginfo *si;
p = Scar(pe);
if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && si->space & space_old && !locked(p)) {
if (FWDMARKER(p) == forward_marker && TYPEBITS(p) != type_flonum) {
INITCAR(pe) = FWDADDRESS(p);
relocate(&INITCDR(pe))
if (!add_to_trigger)
EPHEMERONTRIGGERNEXT(pe) = Strue; /* in trigger list, #t means "done" */
} else {
/* Not reached, so far; install as trigger */
EPHEMERONTRIGGERNEXT(pe) = si->trigger_ephemerons;
si->trigger_ephemerons = pe;
if (add_to_trigger) {
EPHEMERONNEXT(pe) = trigger_ephemerons;
trigger_ephemerons = pe;
}
}
} else {
relocate(&INITCDR(pe))
}
}
static void check_pending_ephemerons() {
ptr pe, next_pe;
pe = pending_ephemerons;
pending_ephemerons = NULL;
while (pe != NULL) {
next_pe = EPHEMERONNEXT(pe);
check_ephemeron(pe, 1);
pe = next_pe;
}
pe = repending_ephemerons;
repending_ephemerons = NULL;
while (pe != NULL) {
next_pe = EPHEMERONTRIGGERNEXT(pe);
check_ephemeron(pe, 0);
pe = next_pe;
}
}
/* Like check_ephemeron(), but for a dirty, old-generation
ephemeron (that was not yet added to the pending list), so we can
be less pessimistic than setting `youngest` to the target
generation: */
static int check_dirty_ephemeron(ptr pe, int tg, int youngest) {
ptr p;
seginfo *si;
p = Scar(pe);
if (!IMMEDIATE(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL) {
if (si->space & space_old && !locked(p)) {
if (FWDMARKER(p) == forward_marker && TYPEBITS(p) != type_flonum) {
INITCAR(pe) = FWDADDRESS(p);
relocate(&INITCDR(pe))
youngest = tg;
} else {
/* Not reached, so far; install as trigger */
EPHEMERONTRIGGERNEXT(pe) = si->trigger_ephemerons;
si->trigger_ephemerons = pe;
EPHEMERONNEXT(pe) = trigger_ephemerons;
trigger_ephemerons = pe;
/* Make the consistent (but pessimistic w.r.t. to wrong-way
pointers) assumption that the key will stay live and move
to the target generation. That assumption covers the value
part, too, since it can't end up younger than the target
generation. */
youngest = tg;
}
} else {
int pg;
if ((pg = si->generation) < youngest)
youngest = pg;
relocate_dirty(&INITCDR(pe), tg, youngest)
}
} else {
/* Non-collectable key means that the value determines
`youngest`: */
relocate_dirty(&INITCDR(pe), tg, youngest)
}
return youngest;
}
static void clear_trigger_ephemerons() {
ptr pe;
if (pending_ephemerons != NULL)
S_error_abort("clear_trigger_ephemerons(gc): non-empty pending list");
pe = trigger_ephemerons;
trigger_ephemerons = NULL;
while (pe != NULL) {
if (EPHEMERONTRIGGERNEXT(pe) == Strue) {
/* The ephemeron was triggered and retains its key and value */
} else {
seginfo *si;
ptr p = Scar(pe);
/* Key never became reachable, so clear key and value */
INITCAR(pe) = Sbwp_object;
INITCDR(pe) = Sbwp_object;
/* Remove trigger */
si = SegInfo(ptr_get_segment(p));
si->trigger_ephemerons = NULL;
}
pe = EPHEMERONNEXT(pe);
}
}

View File

@ -128,6 +128,8 @@ void S_gc_init() {
S_G.countof_size[countof_guardian] = size_guardian_entry;
INITVECTIT(S_G.countof_names, countof_oblist) = S_intern((const unsigned char *)"oblist");
S_G.countof_size[countof_guardian] = 0;
INITVECTIT(S_G.countof_names, countof_ephemeron) = S_intern((const unsigned char *)"ephemron");
S_G.countof_size[countof_ephemeron] = 0;
for (i = 0; i < countof_types; i += 1) {
if (Svector_ref(S_G.countof_names, i) == FIX(0)) {
fprintf(stderr, "uninitialized countof_name at index %d\n", i);
@ -505,7 +507,7 @@ void S_check_heap(aftergc) IBOOL aftergc; {
S_checkheap_errors += 1;
printf("!!! unexpected generation %d segment %#tx in space_new\n", g, (ptrdiff_t)seg);
}
} else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair) {
} else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair || s == space_ephemeron) {
/* out of date: doesn't handle space_port, space_continuation, space_code, space_pure_typed_object, space_impure_record */
nl = (ptr *)S_G.next_loc[s][g];
@ -530,7 +532,7 @@ void S_check_heap(aftergc) IBOOL aftergc; {
/* verify that dirty bits are set appropriately */
/* out of date: doesn't handle space_impure_record, space_port, and maybe others */
/* also doesn't check the SYMCODE for symbols */
if (s == space_impure || s == space_symbol || s == space_weakpair) {
if (s == space_impure || s == space_symbol || s == space_weakpair || s == space_ephemeron) {
found_eos = 0;
pp2 = pp1 = build_ptr(seg, 0);
for (d = 0; d < cards_per_segment; d += 1) {
@ -588,7 +590,7 @@ void S_check_heap(aftergc) IBOOL aftergc; {
}
}
}
if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_impure_record))) {
if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_ephemeron && s != space_impure_record))) {
for (d = 0; d < cards_per_segment; d += 1) {
if (si->dirty_bytes[d] != 0xff) {
S_checkheap_errors += 1;
@ -669,7 +671,7 @@ static void check_dirty() {
S_checkheap_errors += 1;
printf("!!! (check_dirty): dirty byte = %d for segment %#tx in %d -> %d dirty list\n", mingval, (ptrdiff_t)(si->number), from_g, to_g);
}
if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair) {
if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair && s != space_ephemeron) {
S_checkheap_errors += 1;
printf("!!! (check_dirty): unexpected space %d for dirty segment %#tx\n", s, (ptrdiff_t)(si->number));
}
@ -684,6 +686,7 @@ static void check_dirty() {
check_dirty_space(space_port);
check_dirty_space(space_impure_record);
check_dirty_space(space_weakpair);
check_dirty_space(space_ephemeron);
fflush(stdout);
}

View File

@ -34,6 +34,8 @@ static ptr s_trunc_rem PROTO((ptr x, ptr y));
static ptr s_fltofx PROTO((ptr x));
static ptr s_weak_cons PROTO((ptr car, ptr cdr));
static ptr s_weak_pairp PROTO((ptr p));
static ptr s_ephemeron_cons PROTO((ptr car, ptr cdr));
static ptr s_ephemeron_pairp PROTO((ptr p));
static ptr s_oblist PROTO((void));
static ptr s_bigoddp PROTO((ptr n));
static ptr s_float PROTO((ptr x));
@ -176,6 +178,20 @@ static ptr s_weak_pairp(p) ptr p; {
return Spairp(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && (si->space & ~space_locked) == space_weakpair ? Strue : Sfalse;
}
static ptr s_ephemeron_cons(car, cdr) ptr car, cdr; {
ptr p;
tc_mutex_acquire()
p = S_cons_in(space_ephemeron, 0, car, cdr);
tc_mutex_release()
return p;
}
static ptr s_ephemeron_pairp(p) ptr p; {
seginfo *si;
return Spairp(p) && (si = MaybeSegInfo(ptr_get_segment(p))) != NULL && (si->space & ~space_locked) == space_ephemeron ? Strue : Sfalse;
}
static ptr s_oblist() {
ptr ls = Snil;
iptr idx = S_G.oblist_length;
@ -1465,6 +1481,8 @@ void S_prim5_init() {
Sforeign_symbol("(cs)s_fltofx", (void *)s_fltofx);
Sforeign_symbol("(cs)s_weak_cons", (void *)s_weak_cons);
Sforeign_symbol("(cs)s_weak_pairp", (void *)s_weak_pairp);
Sforeign_symbol("(cs)s_ephemeron_cons", (void *)s_ephemeron_cons);
Sforeign_symbol("(cs)s_ephemeron_pairp", (void *)s_ephemeron_pairp);
Sforeign_symbol("(cs)continuation_depth", (void *)S_continuation_depth);
Sforeign_symbol("(cs)single_continuation", (void *)S_single_continuation);
Sforeign_symbol("(cs)c_exit", (void *)c_exit);

View File

@ -125,6 +125,7 @@ typedef struct _seginfo {
struct _seginfo *next; /* pointer to the next seginfo (used in occupied_segments and unused_segs */
struct _seginfo **dirty_prev; /* pointer to the next pointer on the previous seginfo in the DirtySegments list */
struct _seginfo *dirty_next; /* pointer to the next seginfo on the DirtySegments list */
ptr trigger_ephemerons; /* ephemerons to re-check if object in segment is copied out */
octet dirty_bytes[cards_per_segment]; /* one dirty byte per card */
} seginfo;

View File

@ -555,3 +555,14 @@ year = 2008}
address = {Indianapolis, IN, USA},
school = {Indiana University}
}
@inproceedings{Hayes:ephemerons,
author = {Barry Hayes},
title = {Ephemerons: a New Finalization Mechanism},
booktitle = {\it Proceedings of the 12th ACM SIGPLAN
Conference on Object-Oriented Languages, Programming, Systems,
and Applications},
pages = {176--183},
url = {https://doi.org/10.1145/263700.263733},
year = {1997}
}

View File

@ -1820,7 +1820,9 @@ except the keys of the hashtable are held weakly, i.e., they are not
protected from the garbage collector.
Keys reclaimed by the garbage collector are removed from the table,
and their associated values are dropped the next time the table
is modified, if not sooner.
is modified, if not sooner. A value in the hashtable can refer to a
key in the hashtable without preventing the garbage collector from
reclaiming the key (because keys are paired values using ephemeron pairs).
A copy of a weak eq or eqv hashtable created by \scheme{hashtable-copy} is
also weak.

View File

@ -309,7 +309,7 @@ memory footprint, while setting it to a larger value may result in fewer
calls into the operating system to request and free memory space.
\section{Weak Pairs and Guardians\label{SECTGUARDWEAKPAIRS}}
\section{Weak Pairs, Ephemeron Pairs, and Guardians\label{SECTGUARDWEAKPAIRS}}
\index{weak pairs}\index{weak pointers}\emph{Weak pairs} allow programs
to maintain \emph{weak pointers} to objects.
@ -317,21 +317,30 @@ A weak pointer to an object does not prevent the object from being
reclaimed by the storage management system, but it does remain valid as
long as the object is otherwise accessible in the system.
\index{ephemeron pairs}\emph{Ephemeron pairs} are like weak pairs, but
ephemeron pairs combine two pointers where the second is retained only
as long as the first is retained.
\index{guardians}\emph{Guardians}
allow programs to protect objects from deallocation
by the garbage collector and to determine when the objects would
otherwise have been deallocated.
Weak pairs and guardians allow programs to retain
Weak pairs, ephemeron pairs, and guardians allow programs to retain
information about objects in separate data structures (such as hash
tables) without concern that maintaining this information will cause
the objects to remain indefinitely in the system.
the objects to remain indefinitely in the system. Ephemeron pairs
allow such data structures to retain key--value combinations
where a value may refer to its key, but the combination
can be reclaimed if neither must be saved otherwise.
In addition, guardians allow objects to be saved from deallocation
indefinitely so that they can be reused or so that clean-up or other
actions can be performed using the data stored within the objects.
The implementation of guardians and weak pairs used by {\ChezScheme}
is described in~\cite{Dybvig:guardians}.
is described in~\cite{Dybvig:guardians}. Ephemerons are described
in~\cite{Hayes:ephemerons}, but the implementation in {\ChezScheme}
avoids quadratic-time worst-case behavior.
%----------------------------------------------------------------------------
\entryheader\label{desc:weak-cons}
@ -417,6 +426,89 @@ dropped, but makes no guarantees about when this will occur.
\endschemedisplay
%----------------------------------------------------------------------------
\entryheader\label{desc:ephemeron-cons}
\formdef{ephemeron-cons}{\categoryprocedure}{(ephemeron-cons \var{obj_1} \var{obj_2})}
\returns a new ephemeron pair
\listlibraries
\endentryheader
\noindent
\var{obj_1} becomes the car and \var{obj_2} becomes the cdr of the
new pair.
Ephemeron pairs are indistinguishable from ordinary pairs in all but two ways:
\begin{itemize}
\item ephemeron pairs can be distinguished from pairs using the
\scheme{ephemeron-pair?} predicate, and
\item ephemeron pairs maintain a weak pointer to the object in the
car of the pair, and the cdr of the pair is preserved only as long
as the car of the pair is preserved.
\end{itemize}
\noindent
An ephemeron pair behaves like a weak pair, but the cdr is treated
specially in addition to the car: the cdr of an ephemeron is set to
\scheme{#!bwp} at the same time that the car is set to \scheme{#!bwp}.
Since the car and cdr fields are set to \scheme{#!bwp} at the same
time, then the fact that the car object may be referenced through the
cdr object does not by itself imply that car must be preserved (unlike
a weak pair); instead, the car must be saved for some reason
independent of the cdr object.
Like weak pairs and other pairs, ephemeron pairs may be altered using
\scheme{set-car!} and \scheme{set-cdr!}, and ephemeron pairs are
printed in the same manner as ordinary pairs; there is no reader
syntax for ephemeron pairs.
\schemedisplay
(define x (cons 'a 'b))
(define p (ephemeron-cons x x))
(car p) ;=> (a . b)
(cdr p) ;=> (a . b)
(define x (cons 'a 'b))
(define p (ephemeron-cons x x))
(set! x '*)
(collect)
(car p) ;=> #!bwp
(cdr p) ;=> #!bwp
(define x (cons 'a 'b))
(define p (weak-cons x x)) ; \var{not an ephemeron pair}
(set! x '*)
(collect)
(car p) ;=> (a . b)
(cdr p) ;=> (a . b)
\endschemedisplay
\noindent
As with weak pairs, the last two expressions of the middle example
above may in fact return \scheme{(a . b)} if a garbage collection
promoting the pair into an older generation occurs prior to the
assignment of \scheme{x} to \scheme{*}. In the last example above,
however, the results of the last two expressions will always be
\scheme{(a . b)}, because the cdr of a weak pair holds a non-weak
reference, and that non-weak reference prevents the car field from becoming
\scheme{#!bwp}.
%----------------------------------------------------------------------------
\entryheader
\formdef{ephemeron-pair?}{\categoryprocedure}{(ephemeron-pair? \var{obj})}
\returns \scheme{#t} if obj is a ephemeron pair, \scheme{#f} otherwise
\listlibraries
\endentryheader
\schemedisplay
(ephemeron-pair? (ephemeron-cons 'a 'b)) ;=> #t
(ephemeron-pair? (cons 'a 'b)) ;=> #f
(ephemeron-pair? (weak-cons 'a 'b)) ;=> #f
(ephemeron-pair? "oops") ;=> #f
\endschemedisplay
%----------------------------------------------------------------------------
\entryheader
\formdef{bwp-object?}{\categoryprocedure}{(bwp-object? \var{obj})}
@ -473,7 +565,7 @@ subdivided into two disjoint subgroups: a subgroup referred to
as ``accessible'' objects, and one referred to ``inaccessible'' objects.
Inaccessible objects are objects that have been proven to be
inaccessible (except through the guardian mechanism itself or through
the car field of a weak pair), and
the car field of a weak or ephemeron pair), and
accessible objects are objects that have not been proven so.
The word ``proven'' is important here: it may be that some objects in
the accessible group are indeed inaccessible but
@ -516,7 +608,7 @@ migrated into an older generation.)
Although an object registered without a representative and returned from
a guardian has been proven otherwise
inaccessible (except possibly via the car field of a weak pair), it has
inaccessible (except possibly via the car field of a weak or ephemeron pair), it has
not yet been reclaimed by the storage management system and will not be
reclaimed until after the last nonweak pointer to it within or outside
of the guardian system has been dropped.
@ -550,8 +642,8 @@ themselves can be registered with other guardians.
An object that has been registered with a guardian without a
representative and placed in
the car field of a weak pair remains in the car field of the
weak pair until after it has been returned from the guardian and
the car field of a weak or ephemeron pair remains in the car field of the
weak or ephemeron pair until after it has been returned from the guardian and
dropped by the program or until the guardian itself is dropped.
\schemedisplay
@ -577,7 +669,7 @@ This can also be forced by invoking \scheme{collect} several times.)
On the other hand, if a representative (other than the object itself)
is specified, the guarded object is dropped from the car field of the
weak pair at the same time as the representative becomes available
weak or ephemeron pair at the same time as the representative becomes available
from the guardian.
\schemedisplay
@ -592,7 +684,7 @@ from the guardian.
\endschemedisplay
The following example illustrates that the object is deallocated and
the car field of the weak pointer set to \scheme{#!bwp} when the guardian
the car field of the weak pair set to \scheme{#!bwp} when the guardian
itself is dropped:
\schemedisplay

183
mats/4.ms
View File

@ -3107,6 +3107,189 @@
(bwp-object? (car x))))))
)
(mat ephemeron
(begin
(define ephemeron-key car)
(define ephemeron-value cdr)
(define gdn (make-guardian))
#t)
(ephemeron-pair? (ephemeron-cons 1 2))
(begin
;; ----------------------------------------
;; Check that the ephemeron value doesn't retain
;; itself as an epehemeron key
(define-values (es wps saved)
(let loop ([n 1000] [es '()] [wps '()] [saved '()])
(cond
[(zero? n)
(values es wps saved)]
[else
(let ([k1 (gensym)]
[k2 (gensym)])
(gdn k2)
(loop (sub1 n)
(cons (ephemeron-cons k1 (box k1))
(cons (ephemeron-cons k2 (box k2))
es))
(weak-cons k1 (weak-cons k2 wps))
(cons k1 saved)))])))
(collect (collect-maximum-generation))
;; All now waiting to be reported by the guardian
(let loop ([es es] [wps wps] [saved saved])
(cond
[(null? saved) #t]
[else
(and
(eq? (car saved) (car wps))
(eq? (car saved) (ephemeron-key (car es)))
(eq? (car saved) (unbox (ephemeron-value (car es))))
(eq? (cadr wps) (ephemeron-key (cadr es)))
(eq? (cadr wps) (unbox (ephemeron-value (cadr es))))
(loop (cddr es) (cddr wps) (cdr saved)))])))
(begin
;; Report each from the guardian:
(let loop ([saved saved])
(unless (null? saved)
(gdn)
(loop (cdr saved))))
(collect (collect-maximum-generation))
(let loop ([es es] [wps wps] [saved saved])
(cond
[(null? saved) #t]
[else
(and
(eq? (car saved) (car wps))
(eq? (car saved) (ephemeron-key (car es)))
(eq? (car saved) (unbox (ephemeron-value (car es))))
(eq? #!bwp (cadr wps))
(eq? #!bwp (ephemeron-key (cadr es)))
(eq? #!bwp (ephemeron-value (cadr es)))
(loop (cddr es) (cddr wps) (cdr saved)))])))
;; ----------------------------------------
;; Stress test to check that the GC doesn't suffer from quadratic
;; behavior
(begin
(define (wrapper v) (list 1 2 3 4 5 v))
;; Create a chain of ephemerons where we have all
;; the the ephemerons immediately in a list,
;; but we discover the keys one at a time
(define (mk n prev-key es)
(cond
[(zero? n)
(values prev-key es)]
[else
(let ([key (gensym)])
(mk (sub1 n)
key
(cons (ephemeron-cons key (wrapper prev-key))
es)))]))
;; Create a chain of ephemerons where we have all
;; of the keys immediately in a list,
;; but we discover the ephemerons one at a time
(define (mk* n prev-e keys)
(cond
[(zero? n)
(values prev-e keys)]
[else
(let ([key (gensym)])
(mk* (sub1 n)
(ephemeron-cons key (wrapper prev-e))
(cons key
keys)))]))
(define (measure-time n keep-alive)
;; Hang the discover-keys-one-at-a-time chain
;; off the end of the discover-ephemerons-one-at-a-time
;; chain, which is the most complex case for avoiding
;; quadratic GC times
(define-values (key es) (mk n (gensym) '()))
(define-values (root holds) (mk* n key es))
(define start (current-time))
(collect (collect-maximum-generation))
(let ([delta (time-difference (current-time) start)])
;; Sanity check on ephemerons
(for-each (lambda (e)
(when (eq? #!bwp (ephemeron-key e))
(error 'check "oops")))
es)
;; Keep `root` and `holds` live:
(keep-alive (cons root holds))
;; Return duration:
delta))
(define N 10000)
;; The first time should be roughy x10 the second (not x100)
(let loop ([tries 3])
(define dummy #f)
(define (keep-alive v) (set! dummy (cons dummy v)))
(define t1 (measure-time (* 10 N) keep-alive))
(define dummy2 (set! dummy #f))
(define t2 (measure-time N keep-alive))
(define (duration->inexact t) (+ (* (time-second t) 1e9)
(time-nanosecond t)))
(set! dummy #f)
(or (< (/ (duration->inexact t1) (duration->inexact t2)) 20)
(and (positive? tries)
(loop (sub1 tries))))))
;; ----------------------------------------
;; Check interaction of mutation and generations
;; This check disables interrups so that a garbage collection
;; happens only for the explicit `collect` request.
(with-interrupts-disabled
(let ([e (ephemeron-cons (gensym) 'ok)])
(collect) ; => `e` is moved to generation 1
(and
(eq? #!bwp (ephemeron-key e))
(eq? #!bwp (ephemeron-value e))
(let ([s (gensym)])
(set-car! e s)
(set-cdr! e 'ok-again)
(collect) ; => `s` is moved to generation 1
(and
(eq? s (ephemeron-key e))
(eq? 'ok-again (ephemeron-value e))
(begin
(set! s #f)
(collect 1) ; collect former `s`
(and
(eq? #!bwp (ephemeron-key e))
(eq? #!bwp (ephemeron-value e)))))))))
;; ----------------------------------------
;; Check fasl:
(let ([s (gensym)])
(define-values (o get) (open-bytevector-output-port))
(fasl-write (list s
(ephemeron-cons s 'ok))
o)
(let* ([l (fasl-read (open-bytevector-input-port (get)))]
[e (cadr l)])
(and
(eq? (car l) (ephemeron-key e))
(eq? 'ok (ephemeron-value e))
(begin
(set! s #f)
(set! l #f)
(collect (collect-maximum-generation))
(and
(eq? #!bwp (ephemeron-key e))
(eq? #!bwp (ephemeron-value e))))))))
(mat $primitive
(procedure? #%car)
(procedure? #2%car)

View File

@ -1173,6 +1173,37 @@
(hashtable-delete! ht 'a)
(list (hashtable-size ht) (= (#%$hashtable-veclen ht) len)))
'(0 #t))
; test that weak-hashtable values do not imply that values
; are reachable
(let ([wk1 (list 1)]
[wk2 (list 2)]
[wk3 (list 3)]
[wk4 (list 4)]
[ht (make-weak-eq-hashtable)])
(hashtable-set! ht wk1 wk1)
(hashtable-set! ht wk2 wk1)
(hashtable-set! ht wk3 wk3)
(hashtable-set! ht wk4 wk2)
(collect (collect-maximum-generation))
(and
(same-elements? (hashtable-keys ht) '#((1) (2) (3) (4)))
(equal? (hashtable-ref ht wk1 #f) wk1)
(equal? (hashtable-ref ht wk2 #f) wk1)
(equal? (hashtable-ref ht wk3 #f) wk3)
(equal? (hashtable-ref ht wk4 #f) wk2)
(begin
(set! wk1 #f)
(set! wk2 #f)
(set! wk3 #f)
(collect (collect-maximum-generation))
(and
(same-elements? (hashtable-keys ht) '#((1) (2) (4)))
(equal? (hashtable-ref ht wk4 #f) '(2))
(begin
(set! wk4 #f)
(collect (collect-maximum-generation))
(same-elements? (hashtable-keys ht) '#()))))))
)
(mat eq-hashtable-cell

View File

@ -440,7 +440,7 @@
(define-constant fasl-type-small-integer 25)
(define-constant fasl-type-base-rtd 26)
(define-constant fasl-type-fxvector 27)
; 28
(define-constant fasl-type-ephemeron 28)
(define-constant fasl-type-bytevector 29)
(define-constant fasl-type-weak-pair 30)
(define-constant fasl-type-eq-hashtable 31)
@ -640,15 +640,16 @@
(symbol "symbol" #\x 2) ;
(port "port" #\q 3) ;
(weakpair "weakpr" #\w 4) ;
(pure "pure" #\p 5) ; swept immutable objects allocated here (all ptrs)
(continuation "cont" #\k 6) ;
(code "code" #\c 7) ;
(pure-typed-object "p-tobj" #\r 8) ;
(impure-record "ip-rec" #\s 9)) ;
(ephemeron "emph" #\e 5) ;
(pure "pure" #\p 6) ; swept immutable objects allocated here (all ptrs)
(continuation "cont" #\k 7) ;
(code "code" #\c 8) ;
(pure-typed-object "p-tobj" #\r 9) ;
(impure-record "ip-rec" #\s 10)) ;
(unswept
(data "data" #\d 10))) ; unswept objects allocated here
(data "data" #\d 11))) ; unswept objects allocated here
(unreal
(empty "empty" #\e 11))) ; available segments
(empty "empty" #\e 12))) ; available segments
;;; enumeration of types for which gc tracks object counts
;;; also update gc.c
@ -678,7 +679,8 @@
(define-constant countof-locked 22)
(define-constant countof-guardian 23)
(define-constant countof-oblist 24)
(define-constant countof-types 25)
(define-constant countof-ephemeron 25)
(define-constant countof-types 26)
;;; type-fixnum is assumed to be all zeros by at least by vector, fxvector,
;;; and bytevector index checks
@ -1182,6 +1184,12 @@
([iptr type]
[ptr ref]))
(define-primitive-structure-disps ephemeron type-pair
([ptr car]
[ptr cdr]
[ptr next] ; `next` is needed by the GC to keep track of pending ephemerons
[ptr trigger-next])) ; `trigger-next` is similar, but for segment-specific lists
(define-primitive-structure-disps tlc type-typed-object
([iptr type]
[ptr keyval]

View File

@ -204,26 +204,32 @@
(define wrf-pair
(lambda (x p t a?)
(if (weak-pair? x)
(begin
(put-u8 p (constant fasl-type-weak-pair))
(wrf (car x) p t a?)
(wrf (cdr x) p t a?))
(begin ; more like list*
(put-u8 p (constant fasl-type-pair))
(let ([n (let wrf-pair-loop0 ([n 1] [x (cdr x)])
; cut off at end or at shared structure
(if (and (pair? x)
(not (weak-pair? x))
(not (eq-hashtable-ref (table-hash t) x #f)))
(wrf-pair-loop0 (fx+ n 1) (cdr x))
n))])
(put-uptr p n)
(let wrf-pair-loop1 ([x x] [n n])
(wrf (car x) p t a?)
(if (fx= n 1)
(wrf (cdr x) p t a?)
(wrf-pair-loop1 (cdr x) (fx- n 1)))))))))
(cond
[(weak-pair? x)
(put-u8 p (constant fasl-type-weak-pair))
(wrf (car x) p t a?)
(wrf (cdr x) p t a?)]
[(ephemeron-pair? x)
(put-u8 p (constant fasl-type-ephemeron))
(wrf (car x) p t a?)
(wrf (cdr x) p t a?)]
[else
; more like list*
(put-u8 p (constant fasl-type-pair))
(let ([n (let wrf-pair-loop0 ([n 1] [x (cdr x)])
; cut off at end or at shared structure
(if (and (pair? x)
(not (weak-pair? x))
(not (ephemeron-pair? x))
(not (eq-hashtable-ref (table-hash t) x #f)))
(wrf-pair-loop0 (fx+ n 1) (cdr x))
n))])
(put-uptr p n)
(let wrf-pair-loop1 ([x x] [n n])
(wrf (car x) p t a?)
(if (fx= n 1)
(wrf (cdr x) p t a?)
(wrf-pair-loop1 (cdr x) (fx- n 1)))))])))
(define wrf-symbol
(lambda (x p t a?)

View File

@ -1435,7 +1435,7 @@
[b (vector-ref vec idx)])
(lookup-keyval x b
values
(let ([keyval (if (eq-ht-weak? h) (weak-cons x v) (cons x v))])
(let ([keyval (if (eq-ht-weak? h) (ephemeron-cons x v) (cons x v))])
(vector-set! vec idx ($make-tlc h keyval b))
(incr-size! h vec)
keyval))))
@ -1451,7 +1451,7 @@
(begin
(vector-set! vec idx
($make-tlc h
(if (eq-ht-weak? h) (weak-cons x v) (cons x v))
(if (eq-ht-weak? h) (ephemeron-cons x v) (cons x v))
b))
(incr-size! h vec))))))

View File

@ -781,6 +781,11 @@
(definit INITBOXREF box ref)
(defset SETBOXREF box ref)
(defref EPHEMERONNEXT ephemeron next)
(definit INITEPHEMERONNEXT ephemeron next)
(defref EPHEMERONTRIGGERNEXT ephemeron trigger-next)
(definit INITEPHEMERONTRIGGERNEXT ephemeron trigger-next)
(defref TLCTYPE tlc type)
(defref TLCKEYVAL tlc keyval)
(defref TLCHT tlc ht)

View File

@ -1019,7 +1019,7 @@ Documentation notes:
b
($make-tlc h2
(let* ([keyval ($tlc-keyval b)] [key (car keyval)] [val (cdr keyval)])
(if weak? (weak-cons key val) (cons key val)))
(if weak? (ephemeron-cons key val) (cons key val)))
(inner ($tlc-next b))))))
(outer (fx+ i 1)))))
h2))))

View File

@ -1241,6 +1241,8 @@
(environment? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard])
(environment-mutable? [sig [(environment) -> (boolean)]] [flags pure mifoldable discard])
(environment-symbols [sig [(environment) -> (list)]] [flags true])
(ephemeron-cons [sig [(ptr ptr) -> (ptr)]] [flags unrestricted alloc])
(ephemeron-pair? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard])
(errorf [sig [(who string sub-ptr ...) -> (bottom)]] [flags abort-op]) ; second arg is format string
(eq-hashtable? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard])
(eq-hashtable-cell [sig [(eq-hashtable ptr ptr) -> ((ptr . ptr))]] [flags true])

View File

@ -63,6 +63,16 @@
(scheme-object)
scheme-object))
(define ephemeron-cons
(foreign-procedure "(cs)s_ephemeron_cons"
(scheme-object scheme-object)
scheme-object))
(define ephemeron-pair?
(foreign-procedure "(cs)s_ephemeron_pairp"
(scheme-object)
scheme-object))
(define $split-continuation
(foreign-procedure "(cs)single_continuation"
(scheme-object iptr)