diff --git a/LOG b/LOG index 922b7b077f..effc48cf36 100644 --- a/LOG +++ b/LOG @@ -456,3 +456,9 @@ - fix overflow detection for fxsll, fxarithmetic-shift-left, and fxarithmetic-shift library.ss, fx.ms, release_notes.stex +- added ephemeron pairs and changed weak hashtables to use + ephemeron pairs for key--value mapping to avoid the key-in-value + problem + prims.ss, primdata.ss, newhash.ss, fasl.ss, mkheader.ss + cmacro.ss, prim5.c, fasl.c, gc.c, gcwrapper.c, types.h, + 4.ms, hash.ms, objects.stex, smgmt.stex, csug.bib diff --git a/c/gcwrapper.c b/c/gcwrapper.c index 1355f5164d..c9445920d9 100644 --- a/c/gcwrapper.c +++ b/c/gcwrapper.c @@ -507,7 +507,7 @@ void S_check_heap(aftergc) IBOOL aftergc; { S_checkheap_errors += 1; printf("!!! unexpected generation %d segment %#tx in space_new\n", g, (ptrdiff_t)seg); } - } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair) { + } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair || s == space_ephemeron) { /* out of date: doesn't handle space_port, space_continuation, space_code, space_pure_typed_object, space_impure_record */ nl = (ptr *)S_G.next_loc[s][g]; @@ -532,7 +532,7 @@ void S_check_heap(aftergc) IBOOL aftergc; { /* verify that dirty bits are set appropriately */ /* out of date: doesn't handle space_impure_record, space_port, and maybe others */ /* also doesn't check the SYMCODE for symbols */ - if (s == space_impure || s == space_symbol || s == space_weakpair) { + if (s == space_impure || s == space_symbol || s == space_weakpair || s == space_ephemeron) { found_eos = 0; pp2 = pp1 = build_ptr(seg, 0); for (d = 0; d < cards_per_segment; d += 1) { @@ -590,7 +590,7 @@ void S_check_heap(aftergc) IBOOL aftergc; { } } } - if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_impure_record))) { + if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_ephemeron && s != space_impure_record))) { for (d = 0; d < cards_per_segment; d += 1) { if (si->dirty_bytes[d] != 0xff) { S_checkheap_errors += 1; @@ -671,7 +671,7 @@ static void check_dirty() { S_checkheap_errors += 1; printf("!!! (check_dirty): dirty byte = %d for segment %#tx in %d -> %d dirty list\n", mingval, (ptrdiff_t)(si->number), from_g, to_g); } - if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair) { + if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair && s != space_ephemeron) { S_checkheap_errors += 1; printf("!!! (check_dirty): unexpected space %d for dirty segment %#tx\n", s, (ptrdiff_t)(si->number)); } @@ -686,6 +686,7 @@ static void check_dirty() { check_dirty_space(space_port); check_dirty_space(space_impure_record); check_dirty_space(space_weakpair); + check_dirty_space(space_ephemeron); fflush(stdout); } diff --git a/csug/csug.bib b/csug/csug.bib index 68bfe52e45..50d7eac70a 100644 --- a/csug/csug.bib +++ b/csug/csug.bib @@ -555,3 +555,14 @@ year = 2008} address = {Indianapolis, IN, USA}, school = {Indiana University} } + +@inproceedings{Hayes:ephemerons, + author = {Barry Hayes}, + title = {Ephemerons: a New Finalization Mechanism}, + booktitle = {\it Proceedings of the 12th ACM SIGPLAN + Conference on Object-Oriented Languages, Programming, Systems, + and Applications}, + pages = {176--183}, + url = {https://doi.org/10.1145/263700.263733}, + year = {1997} +} diff --git a/csug/objects.stex b/csug/objects.stex index c20ddd28ca..0d4e5569e4 100644 --- a/csug/objects.stex +++ b/csug/objects.stex @@ -1820,7 +1820,9 @@ except the keys of the hashtable are held weakly, i.e., they are not protected from the garbage collector. Keys reclaimed by the garbage collector are removed from the table, and their associated values are dropped the next time the table -is modified, if not sooner. +is modified, if not sooner. A value in the hashtable can refer to a +key in the hashtable without preventing the garbage collector from +reclaiming the key (because keys are paired values using ephemeron pairs). A copy of a weak eq or eqv hashtable created by \scheme{hashtable-copy} is also weak. diff --git a/csug/smgmt.stex b/csug/smgmt.stex index 2cdde59816..e7955f9562 100644 --- a/csug/smgmt.stex +++ b/csug/smgmt.stex @@ -309,7 +309,7 @@ memory footprint, while setting it to a larger value may result in fewer calls into the operating system to request and free memory space. -\section{Weak Pairs and Guardians\label{SECTGUARDWEAKPAIRS}} +\section{Weak Pairs, Ephemeron Pairs, and Guardians\label{SECTGUARDWEAKPAIRS}} \index{weak pairs}\index{weak pointers}\emph{Weak pairs} allow programs to maintain \emph{weak pointers} to objects. @@ -317,21 +317,30 @@ A weak pointer to an object does not prevent the object from being reclaimed by the storage management system, but it does remain valid as long as the object is otherwise accessible in the system. +\index{ephemeron pairs}\emph{Ephemeron pairs} are like weak pairs, but +ephemeron pairs combine two pointers where the second is retained only +as long as the first is retained. + \index{guardians}\emph{Guardians} allow programs to protect objects from deallocation by the garbage collector and to determine when the objects would otherwise have been deallocated. -Weak pairs and guardians allow programs to retain +Weak pairs, ephemeron pairs, and guardians allow programs to retain information about objects in separate data structures (such as hash tables) without concern that maintaining this information will cause -the objects to remain indefinitely in the system. +the objects to remain indefinitely in the system. Ephemeron pairs +allow such data structures to retain key--value combinations +where a value may refer to its key, but the combination +can be reclaimed if neither must be saved otherwise. In addition, guardians allow objects to be saved from deallocation indefinitely so that they can be reused or so that clean-up or other actions can be performed using the data stored within the objects. The implementation of guardians and weak pairs used by {\ChezScheme} -is described in~\cite{Dybvig:guardians}. +is described in~\cite{Dybvig:guardians}. Ephemerons are described +in~\cite{Hayes:ephemerons}, but the implementation in {\ChezScheme} +avoids quadratic-time worst-case behavior. %---------------------------------------------------------------------------- \entryheader\label{desc:weak-cons} @@ -417,6 +426,89 @@ dropped, but makes no guarantees about when this will occur. \endschemedisplay +%---------------------------------------------------------------------------- +\entryheader\label{desc:ephemeron-cons} +\formdef{ephemeron-cons}{\categoryprocedure}{(ephemeron-cons \var{obj_1} \var{obj_2})} +\returns a new ephemeron pair +\listlibraries +\endentryheader + +\noindent +\var{obj_1} becomes the car and \var{obj_2} becomes the cdr of the +new pair. +Ephemeron pairs are indistinguishable from ordinary pairs in all but two ways: + +\begin{itemize} +\item ephemeron pairs can be distinguished from pairs using the +\scheme{ephemeron-pair?} predicate, and + +\item ephemeron pairs maintain a weak pointer to the object in the +car of the pair, and the cdr of the pair is preserved only as long +as the car of the pair is preserved. +\end{itemize} + +\noindent + +An ephemeron pair behaves like a weak pair, but the cdr is treated +specially in addition to the car: the cdr of an ephemeron is set to +\scheme{#!bwp} at the same time that the car is set to \scheme{#!bwp}. +Since the car and cdr fields are set to \scheme{#!bwp} at the same +time, then the fact that the car object may be referenced through the +cdr object does not by itself imply that car must be preserved (unlike +a weak pair); instead, the car must be saved for some reason +independent of the cdr object. + +Like weak pairs and other pairs, ephemeron pairs may be altered using +\scheme{set-car!} and \scheme{set-cdr!}, and ephemeron pairs are +printed in the same manner as ordinary pairs; there is no reader +syntax for ephemeron pairs. + +\schemedisplay +(define x (cons 'a 'b)) +(define p (ephemeron-cons x x)) +(car p) ;=> (a . b) +(cdr p) ;=> (a . b) + +(define x (cons 'a 'b)) +(define p (ephemeron-cons x x)) +(set! x '*) +(collect) +(car p) ;=> #!bwp +(cdr p) ;=> #!bwp + +(define x (cons 'a 'b)) +(define p (weak-cons x x)) ; \var{not an ephemeron pair} +(set! x '*) +(collect) +(car p) ;=> (a . b) +(cdr p) ;=> (a . b) +\endschemedisplay + +\noindent +As for weak pairs, the last two expressions of the middle example +above may in fact return \scheme{(a . b)} if a garbage collection +promoting the pair into an older generation occurs prior to the +assignment of \scheme{x} to \scheme{*}. In the last example above, +however, the results of the last two expressions will always be +\scheme{(a . b)}, because the cdr of a weak pair holds a non-weak +reference, and that non-weak reference prevents the car field from becoming +\scheme{#!bwp}. + +%---------------------------------------------------------------------------- +\entryheader +\formdef{ephemeron-pair?}{\categoryprocedure}{(ephemeron-pair? \var{obj})} +\returns \scheme{#t} if obj is a ephemeron pair, \scheme{#f} otherwise +\listlibraries +\endentryheader + +\schemedisplay +(ephemeron-pair? (ephemeron-cons 'a 'b)) ;=> #t +(ephemeron-pair? (cons 'a 'b)) ;=> #f +(ephemeron-pair? (weaj-cons 'a 'b)) ;=> #f +(ephemeron-pair? "oops") ;=> #f +\endschemedisplay + + %---------------------------------------------------------------------------- \entryheader \formdef{bwp-object?}{\categoryprocedure}{(bwp-object? \var{obj})} @@ -473,7 +565,7 @@ subdivided into two disjoint subgroups: a subgroup referred to as ``accessible'' objects, and one referred to ``inaccessible'' objects. Inaccessible objects are objects that have been proven to be inaccessible (except through the guardian mechanism itself or through -the car field of a weak pair), and +the car field of a weak or ephemeron pair), and accessible objects are objects that have not been proven so. The word ``proven'' is important here: it may be that some objects in the accessible group are indeed inaccessible but @@ -516,7 +608,7 @@ migrated into an older generation.) Although an object registered without a representative and returned from a guardian has been proven otherwise -inaccessible (except possibly via the car field of a weak pair), it has +inaccessible (except possibly via the car field of a weak or ephemeron pair), it has not yet been reclaimed by the storage management system and will not be reclaimed until after the last nonweak pointer to it within or outside of the guardian system has been dropped. @@ -550,8 +642,8 @@ themselves can be registered with other guardians. An object that has been registered with a guardian without a representative and placed in -the car field of a weak pair remains in the car field of the -weak pair until after it has been returned from the guardian and +the car field of a weak or ephemeron pair remains in the car field of the +weak or ephemeron pair until after it has been returned from the guardian and dropped by the program or until the guardian itself is dropped. \schemedisplay @@ -577,7 +669,7 @@ This can also be forced by invoking \scheme{collect} several times.) On the other hand, if a representative (other than the object itself) is specified, the guarded object is dropped from the car field of the -weak pair at the same time as the representative becomes available +weak or ephemeron pair at the same time as the representative becomes available from the guardian. \schemedisplay @@ -592,7 +684,7 @@ from the guardian. \endschemedisplay The following example illustrates that the object is deallocated and -the car field of the weak pointer set to \scheme{#!bwp} when the guardian +the car field of the weak pair set to \scheme{#!bwp} when the guardian itself is dropped: \schemedisplay diff --git a/mats/4.ms b/mats/4.ms index 777fd76884..fff7e6e311 100644 --- a/mats/4.ms +++ b/mats/4.ms @@ -3107,6 +3107,189 @@ (bwp-object? (car x)))))) ) +(mat ephemeron + (begin + (define ephemeron-key car) + (define ephemeron-value cdr) + + (define gdn (make-guardian)) + #t) + + (ephemeron-pair? (ephemeron-cons 1 2)) + + (begin + ;; ---------------------------------------- + ;; Check that the ephemeron value doesn't retain + ;; itself as an epehemeron key + (define-values (es wps saved) + (let loop ([n 1000] [es '()] [wps '()] [saved '()]) + (cond + [(zero? n) + (values es wps saved)] + [else + (let ([k1 (gensym)] + [k2 (gensym)]) + (gdn k2) + (loop (sub1 n) + (cons (ephemeron-cons k1 (box k1)) + (cons (ephemeron-cons k2 (box k2)) + es)) + (weak-cons k1 (weak-cons k2 wps)) + (cons k1 saved)))]))) + + (collect (collect-maximum-generation)) + + ;; All now waiting to be reported by the guardian + (let loop ([es es] [wps wps] [saved saved]) + (cond + [(null? saved) #t] + [else + (and + (eq? (car saved) (car wps)) + (eq? (car saved) (ephemeron-key (car es))) + (eq? (car saved) (unbox (ephemeron-value (car es)))) + (eq? (cadr wps) (ephemeron-key (cadr es))) + (eq? (cadr wps) (unbox (ephemeron-value (cadr es)))) + (loop (cddr es) (cddr wps) (cdr saved)))]))) + + (begin + ;; Report each from the guardian: + (let loop ([saved saved]) + (unless (null? saved) + (gdn) + (loop (cdr saved)))) + + (collect (collect-maximum-generation)) + + (let loop ([es es] [wps wps] [saved saved]) + (cond + [(null? saved) #t] + [else + (and + (eq? (car saved) (car wps)) + (eq? (car saved) (ephemeron-key (car es))) + (eq? (car saved) (unbox (ephemeron-value (car es)))) + (eq? #!bwp (cadr wps)) + (eq? #!bwp (ephemeron-key (cadr es))) + (eq? #!bwp (ephemeron-value (cadr es))) + (loop (cddr es) (cddr wps) (cdr saved)))]))) + + ;; ---------------------------------------- + ;; Stress test to check that the GC doesn't suffer from quadratic + ;; behavior + (begin + (define (wrapper v) (list 1 2 3 4 5 v)) + + ;; Create a chain of ephemerons where we have all + ;; the the ephemerons immediately in a list, + ;; but we discover the keys one at a time + (define (mk n prev-key es) + (cond + [(zero? n) + (values prev-key es)] + [else + (let ([key (gensym)]) + (mk (sub1 n) + key + (cons (ephemeron-cons key (wrapper prev-key)) + es)))])) + + ;; Create a chain of ephemerons where we have all + ;; of the keys immediately in a list, + ;; but we discover the ephemerons one at a time + (define (mk* n prev-e keys) + (cond + [(zero? n) + (values prev-e keys)] + [else + (let ([key (gensym)]) + (mk* (sub1 n) + (ephemeron-cons key (wrapper prev-e)) + (cons key + keys)))])) + + (define (measure-time n keep-alive) + ;; Hang the discover-keys-one-at-a-time chain + ;; off the end of the discover-ephemerons-one-at-a-time + ;; chain, which is the most complex case for avoiding + ;; quadratic GC times + (define-values (key es) (mk n (gensym) '())) + (define-values (root holds) (mk* n key es)) + + (define start (current-time)) + (collect (collect-maximum-generation)) + (let ([delta (time-difference (current-time) start)]) + ;; Sanity check on ephemerons + (for-each (lambda (e) + (when (eq? #!bwp (ephemeron-key e)) + (error 'check "oops"))) + es) + ;; Keep `root` and `holds` live: + (keep-alive (cons root holds)) + ;; Return duration: + delta)) + + (define N 10000) + + ;; The first time should be roughy x10 the second (not x100) + (let loop ([tries 3]) + (define dummy #f) + (define (keep-alive v) (set! dummy (cons dummy v))) + (define t1 (measure-time (* 10 N) keep-alive)) + (define dummy2 (set! dummy #f)) + (define t2 (measure-time N keep-alive)) + (define (duration->inexact t) (+ (* (time-second t) 1e9) + (time-nanosecond t))) + (set! dummy #f) + (or (< (/ (duration->inexact t1) (duration->inexact t2)) 20) + (and (positive? tries) + (loop (sub1 tries)))))) + + ;; ---------------------------------------- + ;; Check interaction of mutation and generations + + ;; This check disables interrups so that a garbage collection + ;; happens only for the explicit `collect` request. + (with-interrupts-disabled + (let ([e (ephemeron-cons (gensym) 'ok)]) + (collect) ; => `e` is moved to generation 1 + (and + (eq? #!bwp (ephemeron-key e)) + (eq? #!bwp (ephemeron-value e)) + (let ([s (gensym)]) + (set-car! e s) + (set-cdr! e 'ok-again) + (collect) ; => `s` is moved to generation 1 + (and + (eq? s (ephemeron-key e)) + (eq? 'ok-again (ephemeron-value e)) + (begin + (set! s #f) + (collect 1) ; collect former `s` + (and + (eq? #!bwp (ephemeron-key e)) + (eq? #!bwp (ephemeron-value e))))))))) + + ;; ---------------------------------------- + ;; Check fasl: + (let ([s (gensym)]) + (define-values (o get) (open-bytevector-output-port)) + (fasl-write (list s + (ephemeron-cons s 'ok)) + o) + (let* ([l (fasl-read (open-bytevector-input-port (get)))] + [e (cadr l)]) + (and + (eq? (car l) (ephemeron-key e)) + (eq? 'ok (ephemeron-value e)) + (begin + (set! s #f) + (set! l #f) + (collect (collect-maximum-generation)) + (and + (eq? #!bwp (ephemeron-key e)) + (eq? #!bwp (ephemeron-value e)))))))) + (mat $primitive (procedure? #%car) (procedure? #2%car) diff --git a/mats/hash.ms b/mats/hash.ms index d781cdd524..dcc77b45f9 100644 --- a/mats/hash.ms +++ b/mats/hash.ms @@ -1173,6 +1173,37 @@ (hashtable-delete! ht 'a) (list (hashtable-size ht) (= (#%$hashtable-veclen ht) len))) '(0 #t)) + + ; test that weak-hashtable values do not imply that values + ; are reachable + (let ([wk1 (list 1)] + [wk2 (list 2)] + [wk3 (list 3)] + [wk4 (list 4)] + [ht (make-weak-eq-hashtable)]) + (hashtable-set! ht wk1 wk1) + (hashtable-set! ht wk2 wk1) + (hashtable-set! ht wk3 wk3) + (hashtable-set! ht wk4 wk2) + (collect (collect-maximum-generation)) + (and + (same-elements? (hashtable-keys ht) '#((1) (2) (3) (4))) + (equal? (hashtable-ref ht wk1 #f) wk1) + (equal? (hashtable-ref ht wk2 #f) wk1) + (equal? (hashtable-ref ht wk3 #f) wk3) + (equal? (hashtable-ref ht wk4 #f) wk2) + (begin + (set! wk1 #f) + (set! wk2 #f) + (set! wk3 #f) + (collect (collect-maximum-generation)) + (and + (same-elements? (hashtable-keys ht) '#((1) (2) (4))) + (equal? (hashtable-ref ht wk4 #f) '(2)) + (begin + (set! wk4 #f) + (collect (collect-maximum-generation)) + (same-elements? (hashtable-keys ht) '#())))))) ) (mat eq-hashtable-cell