cs: use fused garbage collection and memory accounting

Chez Scheme now supports a single-pass combination of `collect` and
`compute-size-increments`, which makes a GC with accounting about
twice as fast. Meanwhile, other GC improvements reduce non-accounting
full-collection times by 10-20%.

Much of the GC implementation is now generated from a "Parenthe-C"
description, so update the bootstrap process for that step.
This commit is contained in:
Matthew Flatt 2020-04-03 18:33:53 -06:00
parent ed93eedecc
commit 282ec8125a
10 changed files with 202 additions and 128 deletions

View File

@ -12,7 +12,7 @@
(define collection 'multi)
(define version "7.6.0.18")
(define version "7.6.0.19")
(define deps `("racket-lib"
["racket" #:version ,version]))

View File

@ -528,8 +528,10 @@
--top--
(parameterize ([sandbox-output 'bytes]
[sandbox-error-output current-output-port]
[sandbox-memory-limit 2]
[sandbox-eval-limits '(2.5 1)])
[sandbox-memory-limit 4]
[sandbox-eval-limits (case (system-type 'vm)
[(chez-scheme) '(2.5 4)]
[else '(2.5 1)])])
(make-base-evaluator!))
;; GCing is needed to allow these to happen (note: the memory limit is very
;; tight here, this test usually fails if the sandbox library is not

View File

@ -341,6 +341,16 @@
(eval `(mkequates.h ,(path->string (build-path out-subdir "equates.h"))))
(plumber-flush-all (current-plumber))
(let ([mkgc.ss (build-path scheme-dir "s/mkgc.ss")])
(when (file-exists? mkgc.ss)
(status "Load mkgc")
(load-ss (build-path scheme-dir "s/mkgc.ss"))
(status "Generate GC")
(eval `(mkgc-ocd.inc ,(path->string (build-path out-subdir "gc-ocd.inc"))))
(eval `(mkgc-oce.inc ,(path->string (build-path out-subdir "gc-oce.inc"))))
(eval `(mkvfasl.inc ,(path->string (build-path out-subdir "vfasl.inc"))))
(plumber-flush-all (current-plumber))))
(when (getenv "MAKE_BOOT_FOR_CROSS")
;; Working bootfiles are not needed for a cross build (only the
;; ".h" files are needed), so just make dummy files in that case

View File

@ -293,6 +293,7 @@
(rename-out [s:open-output-file open-output-file])
$open-bytevector-list-output-port
open-bytevector-output-port
native-transcoder
port-file-compressed!
file-buffer-size
$source-file-descriptor
@ -1166,11 +1167,14 @@
(define bv (get-output-bytes p))
(values (list bv) (bytes-length bv)))))
(define (open-bytevector-output-port)
(define (open-bytevector-output-port [transcoder #f])
(define p (open-output-bytes))
(values p
(lambda () (get-output-bytes p))))
(define (native-transcoder)
#f)
(define (port-file-compressed! p)
(void))

View File

@ -161,6 +161,9 @@ sync-bootfiles:
mkdir -p $(SCHEME_SRC)/$(MACH)/boot/$(MACH)
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/equates.h
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.h
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-ocd.inc
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-oce.inc
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/vfasl.inc
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/petite.boot
$(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.boot
@ -169,6 +172,12 @@ $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/equates.h: $(SCHEME_SRC)/boot/$(MACH)/equates
cp $(SCHEME_SRC)/boot/$(MACH)/equates.h $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/equates.h
$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.h: $(SCHEME_SRC)/boot/$(MACH)/scheme.h
cp $(SCHEME_SRC)/boot/$(MACH)/scheme.h $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.h
$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-ocd.inc: $(SCHEME_SRC)/boot/$(MACH)/gc-ocd.inc
cp $(SCHEME_SRC)/boot/$(MACH)/gc-ocd.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-ocd.inc
$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-oce.inc: $(SCHEME_SRC)/boot/$(MACH)/gc-oce.inc
cp $(SCHEME_SRC)/boot/$(MACH)/gc-oce.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-oce.inc
$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/vfasl.inc: $(SCHEME_SRC)/boot/$(MACH)/vfasl.inc
cp $(SCHEME_SRC)/boot/$(MACH)/vfasl.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/vfasl.inc
$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/petite.boot: $(SCHEME_SRC)/boot/$(MACH)/petite.boot
cp $(SCHEME_SRC)/boot/$(MACH)/petite.boot $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/petite.boot
$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.boot: $(SCHEME_SRC)/boot/$(MACH)/scheme.boot

View File

@ -2,7 +2,7 @@
;; Check to make we're using a build of Chez Scheme
;; that has all the features we need.
(define-values (need-maj need-min need-sub need-dev)
(values 9 5 3 24))
(values 9 5 3 25))
(unless (guard (x [else #f]) (eval 'scheme-fork-version-number))
(error 'compile-file

View File

@ -21,7 +21,7 @@
post-time post-cpu-time)
(void)))
;; #f or a procedure that accepts `compute-size-increments` to be
;; #f or a procedure that accepts a CPSed `compute-size-increments` to be
;; called in any Chez Scheme thread (with all other threads paused)
;; after each major GC; this procedure must not do anything that might
;; use "control.ss":
@ -84,12 +84,32 @@
0]
[else req-gen])])
(run-collect-callbacks car)
(collect gen)
(let ([post-allocated (bytes-allocated)]
[post-allocated+overhead (current-memory-bytes)]
[post-time (real-time)]
[post-cpu-time (cpu-time)])
(when (= gen (collect-maximum-generation))
(let ([maybe-finish-accounting
(cond
[(and reachable-size-increments-callback
(fx= gen (collect-maximum-generation)))
;; Collect with a fused `collect-size-increments`
(reachable-size-increments-callback
(lambda (roots domains k)
(cond
[(null? roots)
;; Plain old collection, after all:
(collect gen)
#f]
[else
(let ([domains (weaken-accounting-domains domains)])
;; Accounting collection:
(let ([counts (collect gen gen (weaken-accounting-roots roots))])
(lambda () (k counts domains))))])))]
[else
;; Plain old collection:
(collect gen)
#f])])
(let ([post-allocated (bytes-allocated)]
[post-allocated+overhead (current-memory-bytes)]
[post-time (real-time)]
[post-cpu-time (cpu-time)])
(when (= gen (collect-maximum-generation))
;; Trigger a major GC when twice as much memory is used. Twice
;; `post-allocated+overhead` seems to be too long a wait, because
;; that value may include underused pages that have locked objects.
@ -100,13 +120,12 @@
(update-eq-hash-code-table-size!)
(update-struct-procs-table-sizes!)
(poll-foreign-guardian)
(when (and reachable-size-increments-callback
(fx= gen (collect-maximum-generation)))
(reachable-size-increments-callback compute-size-increments))
(when maybe-finish-accounting
(maybe-finish-accounting))
(run-collect-callbacks cdr)
(garbage-collect-notify gen
pre-allocated pre-allocated+overhead pre-time pre-cpu-time
post-allocated post-allocated+overhead post-time post-cpu-time
post-allocated post-allocated+overhead post-time post-cpu-time
(real-time) (cpu-time)))
(when (and (= req-gen (collect-maximum-generation))
(currently-in-engine?))
@ -118,7 +137,7 @@
(set! non-full-gc-counter 0)]
[else
(set! non-full-gc-counter (add1 non-full-gc-counter))])
(void))))
(void)))))
(define collect-garbage
(case-lambda
@ -168,6 +187,36 @@
(current-continuation-marks))))
(immediate-allocation-check n))))
;; ----------------------------------------
;; Any value wrapped as `strongly-reachable-for-accounting` will
;; be `cons`ed instead of `weak-cons`ed for accounting purposes
(define-record-type strongly-reachable-for-accounting (fields content))
(define (weaken-accounting-roots roots)
(let loop ([roots roots])
(cond
[(null? roots) '()]
[else
(let ([root (car roots)]
[rest (loop (cdr roots))])
(cond
[(thread? root) (cons root rest)]
[(strongly-reachable-for-accounting? root)
(cons (strongly-reachable-for-accounting-content root) rest)]
[else
(weak-cons root rest)]))])))
;; We want the elements of `domains` to be available for
;; finalization, so refer to all of them weakly
(define (weaken-accounting-domains domains)
(let loop ([domains domains])
(if (null? domains)
'()
(weak-cons (car domains) (loop (cdr domains))))))
;; ----------------------------------------
(define prev-stats-objects #f)
(define/who (dump-memory-stats . args)

View File

@ -94,8 +94,9 @@
(finish-proc result)))))
;; Must be called within an engine, used for memory accounting:
(define (current-place-roots)
(list (place-registers)
(current-engine-thread-cell-values)))]
(make-strongly-reachable-for-accounting
(list (place-registers)
(current-engine-thread-cell-values))))]
[else
(define (place-enabled?) #f)
(define (fork-place thunk finish-proc) #f)

View File

@ -16,7 +16,7 @@
#define MZSCHEME_VERSION_X 7
#define MZSCHEME_VERSION_Y 6
#define MZSCHEME_VERSION_Z 0
#define MZSCHEME_VERSION_W 18
#define MZSCHEME_VERSION_W 19
/* A level of indirection makes `#` work as needed: */
#define AS_a_STR_HELPER(x) #x

View File

@ -447,115 +447,114 @@
(void (set-reachable-size-increments-callback!
;; Called in an arbitrary host thread, with interrupts off and all other threads suspended:
(lambda (compute-size-increments)
(unless (zero? compute-memory-sizes)
(host:call-with-current-place-continuation
(lambda (starting-k)
;; A place may have future pthreads, and each pthread may
;; be running a future that becomes to a particular custodian;
;; build up a custodian-to-pthread mapping in this table:
(define custodian-future-threads (make-hasheq))
(future-scheduler-add-thread-custodian-mapping! (place-future-scheduler initial-place)
custodian-future-threads)
;; Get roots, which are threads and custodians, for all distinct accounting domains
(define-values (roots custs) ; parallel lists: root and custodian to charge for the root
(let c-loop ([c initial-place-root-custodian] [pl initial-place] [accum-roots null] [accum-custs null])
(set-custodian-memory-use! c 0)
(define gc-roots (custodian-gc-roots c))
(define roots (if gc-roots
(hash-keys gc-roots)
null))
(define host-regs (let ([pl (custodian-place c)])
(if (eq? (place-custodian pl) c)
;; Charge anything directly reachable from place registers
;; to the root custodian
(list (place-host-roots pl))
;; Not the root
(lambda (call-with-size-increments)
(if (zero? compute-memory-sizes)
(call-with-size-increments null null (lambda (sizes custs) (void)))
(host:call-with-current-place-continuation
(lambda (starting-k)
;; A place may have future pthreads, and each pthread may
;; be running a future that becomes to a particular custodian;
;; build up a custodian-to-pthread mapping in this table:
(define custodian-future-threads (make-hasheq))
(future-scheduler-add-thread-custodian-mapping! (place-future-scheduler initial-place)
custodian-future-threads)
;; Get roots, which are threads and custodians, for all distinct accounting domains
(define-values (roots custs) ; parallel lists: root and custodian to charge for the root
(let c-loop ([c initial-place-root-custodian] [pl initial-place] [accum-roots null] [accum-custs null])
(set-custodian-memory-use! c 0)
(define gc-roots (custodian-gc-roots c))
(define roots (if gc-roots
(hash-keys gc-roots)
null))
(define host-regs (let ([pl (custodian-place c)])
(if (eq? (place-custodian pl) c)
;; Charge anything directly reachable from place registers
;; to the root custodian; that will include scheduled threads,
;; so they will be known reachable for accounting
(list (place-host-roots pl))
;; Not the root
null)))
(let loop ([roots roots]
[local-accum-roots (cons c host-regs)]
[accum-roots accum-roots]
[accum-custs accum-custs])
(cond
[(null? roots)
(define local-custs (for/list ([root (in-list local-accum-roots)]) c))
;; values owned directly by this custodian need to go earlier in the list,
;; since we're traversing from parent custodian to children; and we
;; want the local accumulations reversed, so that the host roots
;; are first
(values (append (reverse local-accum-roots) accum-roots)
(append local-custs accum-custs))]
[(custodian? (car roots))
(define-values (new-roots new-custs) (c-loop (car roots) pl accum-roots accum-custs))
(loop (cdr roots) local-accum-roots new-roots new-custs)]
[(place? (car roots))
(define pl (car roots))
(define c (place-custodian pl))
(future-scheduler-add-thread-custodian-mapping! (place-future-scheduler pl)
custodian-future-threads)
(define-values (new-roots new-custs) (c-loop c pl accum-roots accum-custs))
(loop (cdr roots) local-accum-roots new-roots new-custs)]
[else
(define root (car roots))
(define new-local-roots (cons root local-accum-roots))
(define more-local-roots
(cond
[(eq? root (place-current-thread pl))
(define more-local-roots (cons (place-host-thread pl)
new-local-roots))
(if (eq? pl current-place) ; assuming host thread is place main thread
(cons starting-k more-local-roots)
more-local-roots)]
[else new-local-roots]))
(loop (cdr roots) more-local-roots accum-roots accum-custs)]))))
(call-with-size-increments
roots custs
(lambda (sizes custs)
(for ([size (in-list sizes)]
[c (in-list custs)])
(set-custodian-memory-use! c (+ size (custodian-memory-use c))))
;; Merge child counts to parents:
(define any-limits?
(let c-loop ([c initial-place-root-custodian])
(define gc-roots (custodian-gc-roots c))
(define roots (append
(hash-ref custodian-future-threads c null)
(if gc-roots
(hash-keys gc-roots)
null)))
(let loop ([roots roots]
[local-accum-roots (cons c host-regs)]
[accum-roots accum-roots]
[accum-custs accum-custs])
(cond
[(null? roots)
(define local-custs (for/list ([root (in-list local-accum-roots)]) c))
;; values owned directly by this custodian need to go earlier in the list,
;; since we're traversing from parent custodian to children
(values (append local-accum-roots accum-roots)
(append local-custs accum-custs))]
[(custodian? (car roots))
(define-values (new-roots new-custs) (c-loop (car roots) pl accum-roots accum-custs))
(loop (cdr roots) local-accum-roots new-roots new-custs)]
[(place? (car roots))
(define pl (car roots))
(define c (place-custodian pl))
(future-scheduler-add-thread-custodian-mapping! (place-future-scheduler pl)
custodian-future-threads)
(define-values (new-roots new-custs) (c-loop c pl accum-roots accum-custs))
(loop (cdr roots) local-accum-roots new-roots new-custs)]
[else
(define root (car roots))
(define new-local-roots (cons root local-accum-roots))
(define more-local-roots
(cond
[(eq? root (place-current-thread pl))
(define more-local-roots (cons (place-host-thread pl)
new-local-roots))
(if (eq? pl current-place) ; assuming host thread is place main thread
(cons starting-k more-local-roots)
more-local-roots)]
[else new-local-roots]))
(loop (cdr roots) more-local-roots accum-roots accum-custs)]))))
(define sizes (compute-size-increments roots
;; 'static is more accurrate, because it will
;; hit parameters more reliably; but there's
;; currently a significant cost, and the
;; approximation of using the oldest non-static
;; generation works well enough for many
;; purposes
#;'static))
(for ([size (in-list sizes)]
[c (in-list custs)])
(set-custodian-memory-use! c (+ size (custodian-memory-use c))))
;; Merge child counts to parents:
(define any-limits?
(let c-loop ([c initial-place-root-custodian])
(define gc-roots (custodian-gc-roots c))
(define roots (append
(hash-ref custodian-future-threads c null)
(if gc-roots
(hash-keys gc-roots)
null)))
(define any-limits?
(for/fold ([any-limits? #f]) ([root (in-list roots)]
#:when (or (custodian? root)
(place? root)))
(define next-c (if (custodian? root)
root
(place-custodian root)))
(define root-any-limits? (c-loop next-c))
(set-custodian-memory-use! c (+ (custodian-memory-use next-c)
(custodian-memory-use c)))
(or root-any-limits? any-limits?)))
(define use (custodian-memory-use c))
(define old-limits (custodian-memory-limits c))
(define new-limits
(for/list ([limit (in-list old-limits)]
#:when (cond
[((car limit) . <= . use)
(queue-custodian-shutdown! (cdr limit))
#f]
[else #t]))
limit))
(set-custodian-memory-limits! c new-limits)
(when (and (pair? old-limits)
(null? new-limits))
(hash-remove! custodians-with-limits c))
(or any-limits? (pair? new-limits))))
;; If no limits are installed, decay demand for memory counts:
(unless any-limits?
(set! compute-memory-sizes (sub1 compute-memory-sizes)))
(set! computed-memory-sizes? #t)))))))
(define any-limits?
(for/fold ([any-limits? #f]) ([root (in-list roots)]
#:when (or (custodian? root)
(place? root)))
(define next-c (if (custodian? root)
root
(place-custodian root)))
(define root-any-limits? (c-loop next-c))
(set-custodian-memory-use! c (+ (custodian-memory-use next-c)
(custodian-memory-use c)))
(or root-any-limits? any-limits?)))
(define use (custodian-memory-use c))
(define old-limits (custodian-memory-limits c))
(define new-limits
(for/list ([limit (in-list old-limits)]
#:when (cond
[((car limit) . <= . use)
(queue-custodian-shutdown! (cdr limit))
#f]
[else #t]))
limit))
(set-custodian-memory-limits! c new-limits)
(when (and (pair? old-limits)
(null? new-limits))
(hash-remove! custodians-with-limits c))
(or any-limits? (pair? new-limits))))
;; If no limits are installed, decay demand for memory counts:
(unless any-limits?
(set! compute-memory-sizes (sub1 compute-memory-sizes)))
(set! computed-memory-sizes? #t)))))))))
(void (set-custodian-memory-use-proc!
;; Get memory use for a custodian; the second argument is