tests and docs for ephemerons

original commit: 2ea7dcdfca1dea2c89c51c7e9ccd692ba673ba22
This commit is contained in:
Matthew Flatt 2017-05-01 10:26:15 -06:00
parent 0d5340c061
commit 211fe4cbd7
7 changed files with 341 additions and 15 deletions

6
LOG
View File

@ -456,3 +456,9 @@
- fix overflow detection for fxsll, fxarithmetic-shift-left, and - fix overflow detection for fxsll, fxarithmetic-shift-left, and
fxarithmetic-shift fxarithmetic-shift
library.ss, fx.ms, release_notes.stex library.ss, fx.ms, release_notes.stex
- added ephemeron pairs and changed weak hashtables to use
ephemeron pairs for key--value mapping to avoid the key-in-value
problem
prims.ss, primdata.ss, newhash.ss, fasl.ss, mkheader.ss
cmacro.ss, prim5.c, fasl.c, gc.c, gcwrapper.c, types.h,
4.ms, hash.ms, objects.stex, smgmt.stex, csug.bib

View File

@ -507,7 +507,7 @@ void S_check_heap(aftergc) IBOOL aftergc; {
S_checkheap_errors += 1; S_checkheap_errors += 1;
printf("!!! unexpected generation %d segment %#tx in space_new\n", g, (ptrdiff_t)seg); printf("!!! unexpected generation %d segment %#tx in space_new\n", g, (ptrdiff_t)seg);
} }
} else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair) { } else if (s == space_impure || s == space_symbol || s == space_pure || s == space_weakpair || s == space_ephemeron) {
/* out of date: doesn't handle space_port, space_continuation, space_code, space_pure_typed_object, space_impure_record */ /* out of date: doesn't handle space_port, space_continuation, space_code, space_pure_typed_object, space_impure_record */
nl = (ptr *)S_G.next_loc[s][g]; nl = (ptr *)S_G.next_loc[s][g];
@ -532,7 +532,7 @@ void S_check_heap(aftergc) IBOOL aftergc; {
/* verify that dirty bits are set appropriately */ /* verify that dirty bits are set appropriately */
/* out of date: doesn't handle space_impure_record, space_port, and maybe others */ /* out of date: doesn't handle space_impure_record, space_port, and maybe others */
/* also doesn't check the SYMCODE for symbols */ /* also doesn't check the SYMCODE for symbols */
if (s == space_impure || s == space_symbol || s == space_weakpair) { if (s == space_impure || s == space_symbol || s == space_weakpair || s == space_ephemeron) {
found_eos = 0; found_eos = 0;
pp2 = pp1 = build_ptr(seg, 0); pp2 = pp1 = build_ptr(seg, 0);
for (d = 0; d < cards_per_segment; d += 1) { for (d = 0; d < cards_per_segment; d += 1) {
@ -590,7 +590,7 @@ void S_check_heap(aftergc) IBOOL aftergc; {
} }
} }
} }
if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_impure_record))) { if (aftergc && s != space_empty && !(s & space_locked) && (g == 0 || (s != space_impure && s != space_symbol && s != space_port && s != space_weakpair && s != space_ephemeron && s != space_impure_record))) {
for (d = 0; d < cards_per_segment; d += 1) { for (d = 0; d < cards_per_segment; d += 1) {
if (si->dirty_bytes[d] != 0xff) { if (si->dirty_bytes[d] != 0xff) {
S_checkheap_errors += 1; S_checkheap_errors += 1;
@ -671,7 +671,7 @@ static void check_dirty() {
S_checkheap_errors += 1; S_checkheap_errors += 1;
printf("!!! (check_dirty): dirty byte = %d for segment %#tx in %d -> %d dirty list\n", mingval, (ptrdiff_t)(si->number), from_g, to_g); printf("!!! (check_dirty): dirty byte = %d for segment %#tx in %d -> %d dirty list\n", mingval, (ptrdiff_t)(si->number), from_g, to_g);
} }
if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair) { if (s != space_new && s != space_impure && s != space_symbol && s != space_port && s != space_impure_record && s != space_weakpair && s != space_ephemeron) {
S_checkheap_errors += 1; S_checkheap_errors += 1;
printf("!!! (check_dirty): unexpected space %d for dirty segment %#tx\n", s, (ptrdiff_t)(si->number)); printf("!!! (check_dirty): unexpected space %d for dirty segment %#tx\n", s, (ptrdiff_t)(si->number));
} }
@ -686,6 +686,7 @@ static void check_dirty() {
check_dirty_space(space_port); check_dirty_space(space_port);
check_dirty_space(space_impure_record); check_dirty_space(space_impure_record);
check_dirty_space(space_weakpair); check_dirty_space(space_weakpair);
check_dirty_space(space_ephemeron);
fflush(stdout); fflush(stdout);
} }

View File

@ -555,3 +555,14 @@ year = 2008}
address = {Indianapolis, IN, USA}, address = {Indianapolis, IN, USA},
school = {Indiana University} school = {Indiana University}
} }
@inproceedings{Hayes:ephemerons,
author = {Barry Hayes},
title = {Ephemerons: a New Finalization Mechanism},
booktitle = {\it Proceedings of the 12th ACM SIGPLAN
Conference on Object-Oriented Languages, Programming, Systems,
and Applications},
pages = {176--183},
url = {https://doi.org/10.1145/263700.263733},
year = {1997}
}

View File

@ -1820,7 +1820,9 @@ except the keys of the hashtable are held weakly, i.e., they are not
protected from the garbage collector. protected from the garbage collector.
Keys reclaimed by the garbage collector are removed from the table, Keys reclaimed by the garbage collector are removed from the table,
and their associated values are dropped the next time the table and their associated values are dropped the next time the table
is modified, if not sooner. is modified, if not sooner. A value in the hashtable can refer to a
key in the hashtable without preventing the garbage collector from
reclaiming the key (because keys are paired values using ephemeron pairs).
A copy of a weak eq or eqv hashtable created by \scheme{hashtable-copy} is A copy of a weak eq or eqv hashtable created by \scheme{hashtable-copy} is
also weak. also weak.

View File

@ -309,7 +309,7 @@ memory footprint, while setting it to a larger value may result in fewer
calls into the operating system to request and free memory space. calls into the operating system to request and free memory space.
\section{Weak Pairs and Guardians\label{SECTGUARDWEAKPAIRS}} \section{Weak Pairs, Ephemeron Pairs, and Guardians\label{SECTGUARDWEAKPAIRS}}
\index{weak pairs}\index{weak pointers}\emph{Weak pairs} allow programs \index{weak pairs}\index{weak pointers}\emph{Weak pairs} allow programs
to maintain \emph{weak pointers} to objects. to maintain \emph{weak pointers} to objects.
@ -317,21 +317,30 @@ A weak pointer to an object does not prevent the object from being
reclaimed by the storage management system, but it does remain valid as reclaimed by the storage management system, but it does remain valid as
long as the object is otherwise accessible in the system. long as the object is otherwise accessible in the system.
\index{ephemeron pairs}\emph{Ephemeron pairs} are like weak pairs, but
ephemeron pairs combine two pointers where the second is retained only
as long as the first is retained.
\index{guardians}\emph{Guardians} \index{guardians}\emph{Guardians}
allow programs to protect objects from deallocation allow programs to protect objects from deallocation
by the garbage collector and to determine when the objects would by the garbage collector and to determine when the objects would
otherwise have been deallocated. otherwise have been deallocated.
Weak pairs and guardians allow programs to retain Weak pairs, ephemeron pairs, and guardians allow programs to retain
information about objects in separate data structures (such as hash information about objects in separate data structures (such as hash
tables) without concern that maintaining this information will cause tables) without concern that maintaining this information will cause
the objects to remain indefinitely in the system. the objects to remain indefinitely in the system. Ephemeron pairs
allow such data structures to retain key--value combinations
where a value may refer to its key, but the combination
can be reclaimed if neither must be saved otherwise.
In addition, guardians allow objects to be saved from deallocation In addition, guardians allow objects to be saved from deallocation
indefinitely so that they can be reused or so that clean-up or other indefinitely so that they can be reused or so that clean-up or other
actions can be performed using the data stored within the objects. actions can be performed using the data stored within the objects.
The implementation of guardians and weak pairs used by {\ChezScheme} The implementation of guardians and weak pairs used by {\ChezScheme}
is described in~\cite{Dybvig:guardians}. is described in~\cite{Dybvig:guardians}. Ephemerons are described
in~\cite{Hayes:ephemerons}, but the implementation in {\ChezScheme}
avoids quadratic-time worst-case behavior.
%---------------------------------------------------------------------------- %----------------------------------------------------------------------------
\entryheader\label{desc:weak-cons} \entryheader\label{desc:weak-cons}
@ -417,6 +426,89 @@ dropped, but makes no guarantees about when this will occur.
\endschemedisplay \endschemedisplay
%----------------------------------------------------------------------------
\entryheader\label{desc:ephemeron-cons}
\formdef{ephemeron-cons}{\categoryprocedure}{(ephemeron-cons \var{obj_1} \var{obj_2})}
\returns a new ephemeron pair
\listlibraries
\endentryheader
\noindent
\var{obj_1} becomes the car and \var{obj_2} becomes the cdr of the
new pair.
Ephemeron pairs are indistinguishable from ordinary pairs in all but two ways:
\begin{itemize}
\item ephemeron pairs can be distinguished from pairs using the
\scheme{ephemeron-pair?} predicate, and
\item ephemeron pairs maintain a weak pointer to the object in the
car of the pair, and the cdr of the pair is preserved only as long
as the car of the pair is preserved.
\end{itemize}
\noindent
An ephemeron pair behaves like a weak pair, but the cdr is treated
specially in addition to the car: the cdr of an ephemeron is set to
\scheme{#!bwp} at the same time that the car is set to \scheme{#!bwp}.
Since the car and cdr fields are set to \scheme{#!bwp} at the same
time, then the fact that the car object may be referenced through the
cdr object does not by itself imply that car must be preserved (unlike
a weak pair); instead, the car must be saved for some reason
independent of the cdr object.
Like weak pairs and other pairs, ephemeron pairs may be altered using
\scheme{set-car!} and \scheme{set-cdr!}, and ephemeron pairs are
printed in the same manner as ordinary pairs; there is no reader
syntax for ephemeron pairs.
\schemedisplay
(define x (cons 'a 'b))
(define p (ephemeron-cons x x))
(car p) ;=> (a . b)
(cdr p) ;=> (a . b)
(define x (cons 'a 'b))
(define p (ephemeron-cons x x))
(set! x '*)
(collect)
(car p) ;=> #!bwp
(cdr p) ;=> #!bwp
(define x (cons 'a 'b))
(define p (weak-cons x x)) ; \var{not an ephemeron pair}
(set! x '*)
(collect)
(car p) ;=> (a . b)
(cdr p) ;=> (a . b)
\endschemedisplay
\noindent
As for weak pairs, the last two expressions of the middle example
above may in fact return \scheme{(a . b)} if a garbage collection
promoting the pair into an older generation occurs prior to the
assignment of \scheme{x} to \scheme{*}. In the last example above,
however, the results of the last two expressions will always be
\scheme{(a . b)}, because the cdr of a weak pair holds a non-weak
reference, and that non-weak reference prevents the car field from becoming
\scheme{#!bwp}.
%----------------------------------------------------------------------------
\entryheader
\formdef{ephemeron-pair?}{\categoryprocedure}{(ephemeron-pair? \var{obj})}
\returns \scheme{#t} if obj is a ephemeron pair, \scheme{#f} otherwise
\listlibraries
\endentryheader
\schemedisplay
(ephemeron-pair? (ephemeron-cons 'a 'b)) ;=> #t
(ephemeron-pair? (cons 'a 'b)) ;=> #f
(ephemeron-pair? (weaj-cons 'a 'b)) ;=> #f
(ephemeron-pair? "oops") ;=> #f
\endschemedisplay
%---------------------------------------------------------------------------- %----------------------------------------------------------------------------
\entryheader \entryheader
\formdef{bwp-object?}{\categoryprocedure}{(bwp-object? \var{obj})} \formdef{bwp-object?}{\categoryprocedure}{(bwp-object? \var{obj})}
@ -473,7 +565,7 @@ subdivided into two disjoint subgroups: a subgroup referred to
as ``accessible'' objects, and one referred to ``inaccessible'' objects. as ``accessible'' objects, and one referred to ``inaccessible'' objects.
Inaccessible objects are objects that have been proven to be Inaccessible objects are objects that have been proven to be
inaccessible (except through the guardian mechanism itself or through inaccessible (except through the guardian mechanism itself or through
the car field of a weak pair), and the car field of a weak or ephemeron pair), and
accessible objects are objects that have not been proven so. accessible objects are objects that have not been proven so.
The word ``proven'' is important here: it may be that some objects in The word ``proven'' is important here: it may be that some objects in
the accessible group are indeed inaccessible but the accessible group are indeed inaccessible but
@ -516,7 +608,7 @@ migrated into an older generation.)
Although an object registered without a representative and returned from Although an object registered without a representative and returned from
a guardian has been proven otherwise a guardian has been proven otherwise
inaccessible (except possibly via the car field of a weak pair), it has inaccessible (except possibly via the car field of a weak or ephemeron pair), it has
not yet been reclaimed by the storage management system and will not be not yet been reclaimed by the storage management system and will not be
reclaimed until after the last nonweak pointer to it within or outside reclaimed until after the last nonweak pointer to it within or outside
of the guardian system has been dropped. of the guardian system has been dropped.
@ -550,8 +642,8 @@ themselves can be registered with other guardians.
An object that has been registered with a guardian without a An object that has been registered with a guardian without a
representative and placed in representative and placed in
the car field of a weak pair remains in the car field of the the car field of a weak or ephemeron pair remains in the car field of the
weak pair until after it has been returned from the guardian and weak or ephemeron pair until after it has been returned from the guardian and
dropped by the program or until the guardian itself is dropped. dropped by the program or until the guardian itself is dropped.
\schemedisplay \schemedisplay
@ -577,7 +669,7 @@ This can also be forced by invoking \scheme{collect} several times.)
On the other hand, if a representative (other than the object itself) On the other hand, if a representative (other than the object itself)
is specified, the guarded object is dropped from the car field of the is specified, the guarded object is dropped from the car field of the
weak pair at the same time as the representative becomes available weak or ephemeron pair at the same time as the representative becomes available
from the guardian. from the guardian.
\schemedisplay \schemedisplay
@ -592,7 +684,7 @@ from the guardian.
\endschemedisplay \endschemedisplay
The following example illustrates that the object is deallocated and The following example illustrates that the object is deallocated and
the car field of the weak pointer set to \scheme{#!bwp} when the guardian the car field of the weak pair set to \scheme{#!bwp} when the guardian
itself is dropped: itself is dropped:
\schemedisplay \schemedisplay

183
mats/4.ms
View File

@ -3107,6 +3107,189 @@
(bwp-object? (car x)))))) (bwp-object? (car x))))))
) )
(mat ephemeron
(begin
(define ephemeron-key car)
(define ephemeron-value cdr)
(define gdn (make-guardian))
#t)
(ephemeron-pair? (ephemeron-cons 1 2))
(begin
;; ----------------------------------------
;; Check that the ephemeron value doesn't retain
;; itself as an epehemeron key
(define-values (es wps saved)
(let loop ([n 1000] [es '()] [wps '()] [saved '()])
(cond
[(zero? n)
(values es wps saved)]
[else
(let ([k1 (gensym)]
[k2 (gensym)])
(gdn k2)
(loop (sub1 n)
(cons (ephemeron-cons k1 (box k1))
(cons (ephemeron-cons k2 (box k2))
es))
(weak-cons k1 (weak-cons k2 wps))
(cons k1 saved)))])))
(collect (collect-maximum-generation))
;; All now waiting to be reported by the guardian
(let loop ([es es] [wps wps] [saved saved])
(cond
[(null? saved) #t]
[else
(and
(eq? (car saved) (car wps))
(eq? (car saved) (ephemeron-key (car es)))
(eq? (car saved) (unbox (ephemeron-value (car es))))
(eq? (cadr wps) (ephemeron-key (cadr es)))
(eq? (cadr wps) (unbox (ephemeron-value (cadr es))))
(loop (cddr es) (cddr wps) (cdr saved)))])))
(begin
;; Report each from the guardian:
(let loop ([saved saved])
(unless (null? saved)
(gdn)
(loop (cdr saved))))
(collect (collect-maximum-generation))
(let loop ([es es] [wps wps] [saved saved])
(cond
[(null? saved) #t]
[else
(and
(eq? (car saved) (car wps))
(eq? (car saved) (ephemeron-key (car es)))
(eq? (car saved) (unbox (ephemeron-value (car es))))
(eq? #!bwp (cadr wps))
(eq? #!bwp (ephemeron-key (cadr es)))
(eq? #!bwp (ephemeron-value (cadr es)))
(loop (cddr es) (cddr wps) (cdr saved)))])))
;; ----------------------------------------
;; Stress test to check that the GC doesn't suffer from quadratic
;; behavior
(begin
(define (wrapper v) (list 1 2 3 4 5 v))
;; Create a chain of ephemerons where we have all
;; the the ephemerons immediately in a list,
;; but we discover the keys one at a time
(define (mk n prev-key es)
(cond
[(zero? n)
(values prev-key es)]
[else
(let ([key (gensym)])
(mk (sub1 n)
key
(cons (ephemeron-cons key (wrapper prev-key))
es)))]))
;; Create a chain of ephemerons where we have all
;; of the keys immediately in a list,
;; but we discover the ephemerons one at a time
(define (mk* n prev-e keys)
(cond
[(zero? n)
(values prev-e keys)]
[else
(let ([key (gensym)])
(mk* (sub1 n)
(ephemeron-cons key (wrapper prev-e))
(cons key
keys)))]))
(define (measure-time n keep-alive)
;; Hang the discover-keys-one-at-a-time chain
;; off the end of the discover-ephemerons-one-at-a-time
;; chain, which is the most complex case for avoiding
;; quadratic GC times
(define-values (key es) (mk n (gensym) '()))
(define-values (root holds) (mk* n key es))
(define start (current-time))
(collect (collect-maximum-generation))
(let ([delta (time-difference (current-time) start)])
;; Sanity check on ephemerons
(for-each (lambda (e)
(when (eq? #!bwp (ephemeron-key e))
(error 'check "oops")))
es)
;; Keep `root` and `holds` live:
(keep-alive (cons root holds))
;; Return duration:
delta))
(define N 10000)
;; The first time should be roughy x10 the second (not x100)
(let loop ([tries 3])
(define dummy #f)
(define (keep-alive v) (set! dummy (cons dummy v)))
(define t1 (measure-time (* 10 N) keep-alive))
(define dummy2 (set! dummy #f))
(define t2 (measure-time N keep-alive))
(define (duration->inexact t) (+ (* (time-second t) 1e9)
(time-nanosecond t)))
(set! dummy #f)
(or (< (/ (duration->inexact t1) (duration->inexact t2)) 20)
(and (positive? tries)
(loop (sub1 tries))))))
;; ----------------------------------------
;; Check interaction of mutation and generations
;; This check disables interrups so that a garbage collection
;; happens only for the explicit `collect` request.
(with-interrupts-disabled
(let ([e (ephemeron-cons (gensym) 'ok)])
(collect) ; => `e` is moved to generation 1
(and
(eq? #!bwp (ephemeron-key e))
(eq? #!bwp (ephemeron-value e))
(let ([s (gensym)])
(set-car! e s)
(set-cdr! e 'ok-again)
(collect) ; => `s` is moved to generation 1
(and
(eq? s (ephemeron-key e))
(eq? 'ok-again (ephemeron-value e))
(begin
(set! s #f)
(collect 1) ; collect former `s`
(and
(eq? #!bwp (ephemeron-key e))
(eq? #!bwp (ephemeron-value e)))))))))
;; ----------------------------------------
;; Check fasl:
(let ([s (gensym)])
(define-values (o get) (open-bytevector-output-port))
(fasl-write (list s
(ephemeron-cons s 'ok))
o)
(let* ([l (fasl-read (open-bytevector-input-port (get)))]
[e (cadr l)])
(and
(eq? (car l) (ephemeron-key e))
(eq? 'ok (ephemeron-value e))
(begin
(set! s #f)
(set! l #f)
(collect (collect-maximum-generation))
(and
(eq? #!bwp (ephemeron-key e))
(eq? #!bwp (ephemeron-value e))))))))
(mat $primitive (mat $primitive
(procedure? #%car) (procedure? #%car)
(procedure? #2%car) (procedure? #2%car)

View File

@ -1173,6 +1173,37 @@
(hashtable-delete! ht 'a) (hashtable-delete! ht 'a)
(list (hashtable-size ht) (= (#%$hashtable-veclen ht) len))) (list (hashtable-size ht) (= (#%$hashtable-veclen ht) len)))
'(0 #t)) '(0 #t))
; test that weak-hashtable values do not imply that values
; are reachable
(let ([wk1 (list 1)]
[wk2 (list 2)]
[wk3 (list 3)]
[wk4 (list 4)]
[ht (make-weak-eq-hashtable)])
(hashtable-set! ht wk1 wk1)
(hashtable-set! ht wk2 wk1)
(hashtable-set! ht wk3 wk3)
(hashtable-set! ht wk4 wk2)
(collect (collect-maximum-generation))
(and
(same-elements? (hashtable-keys ht) '#((1) (2) (3) (4)))
(equal? (hashtable-ref ht wk1 #f) wk1)
(equal? (hashtable-ref ht wk2 #f) wk1)
(equal? (hashtable-ref ht wk3 #f) wk3)
(equal? (hashtable-ref ht wk4 #f) wk2)
(begin
(set! wk1 #f)
(set! wk2 #f)
(set! wk3 #f)
(collect (collect-maximum-generation))
(and
(same-elements? (hashtable-keys ht) '#((1) (2) (4)))
(equal? (hashtable-ref ht wk4 #f) '(2))
(begin
(set! wk4 #f)
(collect (collect-maximum-generation))
(same-elements? (hashtable-keys ht) '#()))))))
) )
(mat eq-hashtable-cell (mat eq-hashtable-cell