From 282ec8125afe4ef05135e440ec72ee7d0f9d6f8d Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 3 Apr 2020 18:33:53 -0600 Subject: [PATCH] cs: use fused garbage collection and memory accounting Chez Scheme now supports a single-pass combination of `collect` and `compute-size-increments`, which makes a GC with accounting about twice as fast. Meanwhile, other GC improvements reduce non-accounting full-collection times by 10-20%. Much of the GC implementation is now generated from a "Parenthe-C" description, so update the bootstrap process for that step. --- pkgs/base/info.rkt | 2 +- .../tests/racket/sandbox.rktl | 6 +- racket/src/cs/bootstrap/make-boot.rkt | 10 + racket/src/cs/bootstrap/scheme-lang.rkt | 6 +- racket/src/cs/c/Makefile.in | 9 + racket/src/cs/compile-file.ss | 2 +- racket/src/cs/rumble/memory.ss | 73 +++++- racket/src/cs/rumble/place.ss | 5 +- racket/src/racket/src/schvers.h | 2 +- racket/src/thread/custodian.rkt | 215 +++++++++--------- 10 files changed, 202 insertions(+), 128 deletions(-) diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index 84ed5963a0..50a448aee7 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.6.0.18") +(define version "7.6.0.19") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/pkgs/racket-test-core/tests/racket/sandbox.rktl b/pkgs/racket-test-core/tests/racket/sandbox.rktl index 90fcc30e2c..df75bc7948 100644 --- a/pkgs/racket-test-core/tests/racket/sandbox.rktl +++ b/pkgs/racket-test-core/tests/racket/sandbox.rktl @@ -528,8 +528,10 @@ --top-- (parameterize ([sandbox-output 'bytes] [sandbox-error-output current-output-port] - [sandbox-memory-limit 2] - [sandbox-eval-limits '(2.5 1)]) + [sandbox-memory-limit 4] + [sandbox-eval-limits (case (system-type 'vm) + [(chez-scheme) '(2.5 4)] + [else '(2.5 1)])]) (make-base-evaluator!)) ;; GCing is needed to allow these to happen (note: the memory limit is very ;; tight here, this test usually fails if the sandbox library is not diff --git a/racket/src/cs/bootstrap/make-boot.rkt b/racket/src/cs/bootstrap/make-boot.rkt index d9b3153ccb..5bad66adf5 100644 --- a/racket/src/cs/bootstrap/make-boot.rkt +++ b/racket/src/cs/bootstrap/make-boot.rkt @@ -341,6 +341,16 @@ (eval `(mkequates.h ,(path->string (build-path out-subdir "equates.h")))) (plumber-flush-all (current-plumber)) + (let ([mkgc.ss (build-path scheme-dir "s/mkgc.ss")]) + (when (file-exists? mkgc.ss) + (status "Load mkgc") + (load-ss (build-path scheme-dir "s/mkgc.ss")) + (status "Generate GC") + (eval `(mkgc-ocd.inc ,(path->string (build-path out-subdir "gc-ocd.inc")))) + (eval `(mkgc-oce.inc ,(path->string (build-path out-subdir "gc-oce.inc")))) + (eval `(mkvfasl.inc ,(path->string (build-path out-subdir "vfasl.inc")))) + (plumber-flush-all (current-plumber)))) + (when (getenv "MAKE_BOOT_FOR_CROSS") ;; Working bootfiles are not needed for a cross build (only the ;; ".h" files are needed), so just make dummy files in that case diff --git a/racket/src/cs/bootstrap/scheme-lang.rkt b/racket/src/cs/bootstrap/scheme-lang.rkt index 33d448862a..c5faa73a0c 100644 --- a/racket/src/cs/bootstrap/scheme-lang.rkt +++ b/racket/src/cs/bootstrap/scheme-lang.rkt @@ -293,6 +293,7 @@ (rename-out [s:open-output-file open-output-file]) $open-bytevector-list-output-port open-bytevector-output-port + native-transcoder port-file-compressed! file-buffer-size $source-file-descriptor @@ -1166,11 +1167,14 @@ (define bv (get-output-bytes p)) (values (list bv) (bytes-length bv))))) -(define (open-bytevector-output-port) +(define (open-bytevector-output-port [transcoder #f]) (define p (open-output-bytes)) (values p (lambda () (get-output-bytes p)))) +(define (native-transcoder) + #f) + (define (port-file-compressed! p) (void)) diff --git a/racket/src/cs/c/Makefile.in b/racket/src/cs/c/Makefile.in index 95da2f61f0..801bc3cb58 100644 --- a/racket/src/cs/c/Makefile.in +++ b/racket/src/cs/c/Makefile.in @@ -161,6 +161,9 @@ sync-bootfiles: mkdir -p $(SCHEME_SRC)/$(MACH)/boot/$(MACH) $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/equates.h $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.h + $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-ocd.inc + $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-oce.inc + $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/vfasl.inc $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/petite.boot $(MAKE) $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.boot @@ -169,6 +172,12 @@ $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/equates.h: $(SCHEME_SRC)/boot/$(MACH)/equates cp $(SCHEME_SRC)/boot/$(MACH)/equates.h $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/equates.h $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.h: $(SCHEME_SRC)/boot/$(MACH)/scheme.h cp $(SCHEME_SRC)/boot/$(MACH)/scheme.h $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.h +$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-ocd.inc: $(SCHEME_SRC)/boot/$(MACH)/gc-ocd.inc + cp $(SCHEME_SRC)/boot/$(MACH)/gc-ocd.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-ocd.inc +$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-oce.inc: $(SCHEME_SRC)/boot/$(MACH)/gc-oce.inc + cp $(SCHEME_SRC)/boot/$(MACH)/gc-oce.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/gc-oce.inc +$(SCHEME_SRC)/$(MACH)/boot/$(MACH)/vfasl.inc: $(SCHEME_SRC)/boot/$(MACH)/vfasl.inc + cp $(SCHEME_SRC)/boot/$(MACH)/vfasl.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/vfasl.inc $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/petite.boot: $(SCHEME_SRC)/boot/$(MACH)/petite.boot cp $(SCHEME_SRC)/boot/$(MACH)/petite.boot $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/petite.boot $(SCHEME_SRC)/$(MACH)/boot/$(MACH)/scheme.boot: $(SCHEME_SRC)/boot/$(MACH)/scheme.boot diff --git a/racket/src/cs/compile-file.ss b/racket/src/cs/compile-file.ss index 4bbc59a803..6a03bc1026 100644 --- a/racket/src/cs/compile-file.ss +++ b/racket/src/cs/compile-file.ss @@ -2,7 +2,7 @@ ;; Check to make we're using a build of Chez Scheme ;; that has all the features we need. (define-values (need-maj need-min need-sub need-dev) - (values 9 5 3 24)) + (values 9 5 3 25)) (unless (guard (x [else #f]) (eval 'scheme-fork-version-number)) (error 'compile-file diff --git a/racket/src/cs/rumble/memory.ss b/racket/src/cs/rumble/memory.ss index c4678f10f6..3d11841f6a 100644 --- a/racket/src/cs/rumble/memory.ss +++ b/racket/src/cs/rumble/memory.ss @@ -21,7 +21,7 @@ post-time post-cpu-time) (void))) -;; #f or a procedure that accepts `compute-size-increments` to be +;; #f or a procedure that accepts a CPSed `compute-size-increments` to be ;; called in any Chez Scheme thread (with all other threads paused) ;; after each major GC; this procedure must not do anything that might ;; use "control.ss": @@ -84,12 +84,32 @@ 0] [else req-gen])]) (run-collect-callbacks car) - (collect gen) - (let ([post-allocated (bytes-allocated)] - [post-allocated+overhead (current-memory-bytes)] - [post-time (real-time)] - [post-cpu-time (cpu-time)]) - (when (= gen (collect-maximum-generation)) + (let ([maybe-finish-accounting + (cond + [(and reachable-size-increments-callback + (fx= gen (collect-maximum-generation))) + ;; Collect with a fused `collect-size-increments` + (reachable-size-increments-callback + (lambda (roots domains k) + (cond + [(null? roots) + ;; Plain old collection, after all: + (collect gen) + #f] + [else + (let ([domains (weaken-accounting-domains domains)]) + ;; Accounting collection: + (let ([counts (collect gen gen (weaken-accounting-roots roots))]) + (lambda () (k counts domains))))])))] + [else + ;; Plain old collection: + (collect gen) + #f])]) + (let ([post-allocated (bytes-allocated)] + [post-allocated+overhead (current-memory-bytes)] + [post-time (real-time)] + [post-cpu-time (cpu-time)]) + (when (= gen (collect-maximum-generation)) ;; Trigger a major GC when twice as much memory is used. Twice ;; `post-allocated+overhead` seems to be too long a wait, because ;; that value may include underused pages that have locked objects. @@ -100,13 +120,12 @@ (update-eq-hash-code-table-size!) (update-struct-procs-table-sizes!) (poll-foreign-guardian) - (when (and reachable-size-increments-callback - (fx= gen (collect-maximum-generation))) - (reachable-size-increments-callback compute-size-increments)) + (when maybe-finish-accounting + (maybe-finish-accounting)) (run-collect-callbacks cdr) (garbage-collect-notify gen pre-allocated pre-allocated+overhead pre-time pre-cpu-time - post-allocated post-allocated+overhead post-time post-cpu-time + post-allocated post-allocated+overhead post-time post-cpu-time (real-time) (cpu-time))) (when (and (= req-gen (collect-maximum-generation)) (currently-in-engine?)) @@ -118,7 +137,7 @@ (set! non-full-gc-counter 0)] [else (set! non-full-gc-counter (add1 non-full-gc-counter))]) - (void)))) + (void))))) (define collect-garbage (case-lambda @@ -168,6 +187,36 @@ (current-continuation-marks)))) (immediate-allocation-check n)))) +;; ---------------------------------------- + +;; Any value wrapped as `strongly-reachable-for-accounting` will +;; be `cons`ed instead of `weak-cons`ed for accounting purposes +(define-record-type strongly-reachable-for-accounting (fields content)) + +(define (weaken-accounting-roots roots) + (let loop ([roots roots]) + (cond + [(null? roots) '()] + [else + (let ([root (car roots)] + [rest (loop (cdr roots))]) + (cond + [(thread? root) (cons root rest)] + [(strongly-reachable-for-accounting? root) + (cons (strongly-reachable-for-accounting-content root) rest)] + [else + (weak-cons root rest)]))]))) + +;; We want the elements of `domains` to be available for +;; finalization, so refer to all of them weakly +(define (weaken-accounting-domains domains) + (let loop ([domains domains]) + (if (null? domains) + '() + (weak-cons (car domains) (loop (cdr domains)))))) + +;; ---------------------------------------- + (define prev-stats-objects #f) (define/who (dump-memory-stats . args) diff --git a/racket/src/cs/rumble/place.ss b/racket/src/cs/rumble/place.ss index f59d5f097f..72670e2138 100644 --- a/racket/src/cs/rumble/place.ss +++ b/racket/src/cs/rumble/place.ss @@ -94,8 +94,9 @@ (finish-proc result))))) ;; Must be called within an engine, used for memory accounting: (define (current-place-roots) - (list (place-registers) - (current-engine-thread-cell-values)))] + (make-strongly-reachable-for-accounting + (list (place-registers) + (current-engine-thread-cell-values))))] [else (define (place-enabled?) #f) (define (fork-place thunk finish-proc) #f) diff --git a/racket/src/racket/src/schvers.h b/racket/src/racket/src/schvers.h index 824f77f176..eab0570ce4 100644 --- a/racket/src/racket/src/schvers.h +++ b/racket/src/racket/src/schvers.h @@ -16,7 +16,7 @@ #define MZSCHEME_VERSION_X 7 #define MZSCHEME_VERSION_Y 6 #define MZSCHEME_VERSION_Z 0 -#define MZSCHEME_VERSION_W 18 +#define MZSCHEME_VERSION_W 19 /* A level of indirection makes `#` work as needed: */ #define AS_a_STR_HELPER(x) #x diff --git a/racket/src/thread/custodian.rkt b/racket/src/thread/custodian.rkt index e9b350fbd4..dd4c99145a 100644 --- a/racket/src/thread/custodian.rkt +++ b/racket/src/thread/custodian.rkt @@ -447,115 +447,114 @@ (void (set-reachable-size-increments-callback! ;; Called in an arbitrary host thread, with interrupts off and all other threads suspended: - (lambda (compute-size-increments) - (unless (zero? compute-memory-sizes) - (host:call-with-current-place-continuation - (lambda (starting-k) - ;; A place may have future pthreads, and each pthread may - ;; be running a future that becomes to a particular custodian; - ;; build up a custodian-to-pthread mapping in this table: - (define custodian-future-threads (make-hasheq)) - (future-scheduler-add-thread-custodian-mapping! (place-future-scheduler initial-place) - custodian-future-threads) - ;; Get roots, which are threads and custodians, for all distinct accounting domains - (define-values (roots custs) ; parallel lists: root and custodian to charge for the root - (let c-loop ([c initial-place-root-custodian] [pl initial-place] [accum-roots null] [accum-custs null]) - (set-custodian-memory-use! c 0) - (define gc-roots (custodian-gc-roots c)) - (define roots (if gc-roots - (hash-keys gc-roots) - null)) - (define host-regs (let ([pl (custodian-place c)]) - (if (eq? (place-custodian pl) c) - ;; Charge anything directly reachable from place registers - ;; to the root custodian - (list (place-host-roots pl)) - ;; Not the root + (lambda (call-with-size-increments) + (if (zero? compute-memory-sizes) + (call-with-size-increments null null (lambda (sizes custs) (void))) + (host:call-with-current-place-continuation + (lambda (starting-k) + ;; A place may have future pthreads, and each pthread may + ;; be running a future that becomes to a particular custodian; + ;; build up a custodian-to-pthread mapping in this table: + (define custodian-future-threads (make-hasheq)) + (future-scheduler-add-thread-custodian-mapping! (place-future-scheduler initial-place) + custodian-future-threads) + ;; Get roots, which are threads and custodians, for all distinct accounting domains + (define-values (roots custs) ; parallel lists: root and custodian to charge for the root + (let c-loop ([c initial-place-root-custodian] [pl initial-place] [accum-roots null] [accum-custs null]) + (set-custodian-memory-use! c 0) + (define gc-roots (custodian-gc-roots c)) + (define roots (if gc-roots + (hash-keys gc-roots) + null)) + (define host-regs (let ([pl (custodian-place c)]) + (if (eq? (place-custodian pl) c) + ;; Charge anything directly reachable from place registers + ;; to the root custodian; that will include scheduled threads, + ;; so they will be known reachable for accounting + (list (place-host-roots pl)) + ;; Not the root + null))) + (let loop ([roots roots] + [local-accum-roots (cons c host-regs)] + [accum-roots accum-roots] + [accum-custs accum-custs]) + (cond + [(null? roots) + (define local-custs (for/list ([root (in-list local-accum-roots)]) c)) + ;; values owned directly by this custodian need to go earlier in the list, + ;; since we're traversing from parent custodian to children; and we + ;; want the local accumulations reversed, so that the host roots + ;; are first + (values (append (reverse local-accum-roots) accum-roots) + (append local-custs accum-custs))] + [(custodian? (car roots)) + (define-values (new-roots new-custs) (c-loop (car roots) pl accum-roots accum-custs)) + (loop (cdr roots) local-accum-roots new-roots new-custs)] + [(place? (car roots)) + (define pl (car roots)) + (define c (place-custodian pl)) + (future-scheduler-add-thread-custodian-mapping! (place-future-scheduler pl) + custodian-future-threads) + (define-values (new-roots new-custs) (c-loop c pl accum-roots accum-custs)) + (loop (cdr roots) local-accum-roots new-roots new-custs)] + [else + (define root (car roots)) + (define new-local-roots (cons root local-accum-roots)) + (define more-local-roots + (cond + [(eq? root (place-current-thread pl)) + (define more-local-roots (cons (place-host-thread pl) + new-local-roots)) + (if (eq? pl current-place) ; assuming host thread is place main thread + (cons starting-k more-local-roots) + more-local-roots)] + [else new-local-roots])) + (loop (cdr roots) more-local-roots accum-roots accum-custs)])))) + (call-with-size-increments + roots custs + (lambda (sizes custs) + (for ([size (in-list sizes)] + [c (in-list custs)]) + (set-custodian-memory-use! c (+ size (custodian-memory-use c)))) + ;; Merge child counts to parents: + (define any-limits? + (let c-loop ([c initial-place-root-custodian]) + (define gc-roots (custodian-gc-roots c)) + (define roots (append + (hash-ref custodian-future-threads c null) + (if gc-roots + (hash-keys gc-roots) null))) - (let loop ([roots roots] - [local-accum-roots (cons c host-regs)] - [accum-roots accum-roots] - [accum-custs accum-custs]) - (cond - [(null? roots) - (define local-custs (for/list ([root (in-list local-accum-roots)]) c)) - ;; values owned directly by this custodian need to go earlier in the list, - ;; since we're traversing from parent custodian to children - (values (append local-accum-roots accum-roots) - (append local-custs accum-custs))] - [(custodian? (car roots)) - (define-values (new-roots new-custs) (c-loop (car roots) pl accum-roots accum-custs)) - (loop (cdr roots) local-accum-roots new-roots new-custs)] - [(place? (car roots)) - (define pl (car roots)) - (define c (place-custodian pl)) - (future-scheduler-add-thread-custodian-mapping! (place-future-scheduler pl) - custodian-future-threads) - (define-values (new-roots new-custs) (c-loop c pl accum-roots accum-custs)) - (loop (cdr roots) local-accum-roots new-roots new-custs)] - [else - (define root (car roots)) - (define new-local-roots (cons root local-accum-roots)) - (define more-local-roots - (cond - [(eq? root (place-current-thread pl)) - (define more-local-roots (cons (place-host-thread pl) - new-local-roots)) - (if (eq? pl current-place) ; assuming host thread is place main thread - (cons starting-k more-local-roots) - more-local-roots)] - [else new-local-roots])) - (loop (cdr roots) more-local-roots accum-roots accum-custs)])))) - (define sizes (compute-size-increments roots - ;; 'static is more accurrate, because it will - ;; hit parameters more reliably; but there's - ;; currently a significant cost, and the - ;; approximation of using the oldest non-static - ;; generation works well enough for many - ;; purposes - #;'static)) - (for ([size (in-list sizes)] - [c (in-list custs)]) - (set-custodian-memory-use! c (+ size (custodian-memory-use c)))) - ;; Merge child counts to parents: - (define any-limits? - (let c-loop ([c initial-place-root-custodian]) - (define gc-roots (custodian-gc-roots c)) - (define roots (append - (hash-ref custodian-future-threads c null) - (if gc-roots - (hash-keys gc-roots) - null))) - (define any-limits? - (for/fold ([any-limits? #f]) ([root (in-list roots)] - #:when (or (custodian? root) - (place? root))) - (define next-c (if (custodian? root) - root - (place-custodian root))) - (define root-any-limits? (c-loop next-c)) - (set-custodian-memory-use! c (+ (custodian-memory-use next-c) - (custodian-memory-use c))) - (or root-any-limits? any-limits?))) - (define use (custodian-memory-use c)) - (define old-limits (custodian-memory-limits c)) - (define new-limits - (for/list ([limit (in-list old-limits)] - #:when (cond - [((car limit) . <= . use) - (queue-custodian-shutdown! (cdr limit)) - #f] - [else #t])) - limit)) - (set-custodian-memory-limits! c new-limits) - (when (and (pair? old-limits) - (null? new-limits)) - (hash-remove! custodians-with-limits c)) - (or any-limits? (pair? new-limits)))) - ;; If no limits are installed, decay demand for memory counts: - (unless any-limits? - (set! compute-memory-sizes (sub1 compute-memory-sizes))) - (set! computed-memory-sizes? #t))))))) + (define any-limits? + (for/fold ([any-limits? #f]) ([root (in-list roots)] + #:when (or (custodian? root) + (place? root))) + (define next-c (if (custodian? root) + root + (place-custodian root))) + (define root-any-limits? (c-loop next-c)) + (set-custodian-memory-use! c (+ (custodian-memory-use next-c) + (custodian-memory-use c))) + (or root-any-limits? any-limits?))) + (define use (custodian-memory-use c)) + (define old-limits (custodian-memory-limits c)) + (define new-limits + (for/list ([limit (in-list old-limits)] + #:when (cond + [((car limit) . <= . use) + (queue-custodian-shutdown! (cdr limit)) + #f] + [else #t])) + limit)) + (set-custodian-memory-limits! c new-limits) + (when (and (pair? old-limits) + (null? new-limits)) + (hash-remove! custodians-with-limits c)) + (or any-limits? (pair? new-limits)))) + ;; If no limits are installed, decay demand for memory counts: + (unless any-limits? + (set! compute-memory-sizes (sub1 compute-memory-sizes))) + (set! computed-memory-sizes? #t))))))))) (void (set-custodian-memory-use-proc! ;; Get memory use for a custodian; the second argument is