From 33f8173970c350f707a2ccdc2cf1ce2d24cd43f1 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 11 Jan 2020 10:44:09 -0700 Subject: [PATCH] cs: switch to stencil-vector HAMT With recent improvements, the run-time performance of vector-stencil HAMTs for immutable hash tables seems close enough (on microbenchmarks) to the Patricia-trie implementation to be worthwhile, since they use less memory. Performance remains better in most cases than the traditional Racket implementation. The table at the end of this message summarizes relative performance on microbenchmarks. Overall, though, immutable hash-table operations are already so fast that these difference very rarely translate to measurable differences in overall run times --- not even for the macro expander, which relies heavily on immutable hash tables to represent scope sets. Stencil-vector HAMTs tend to take about 1/3 the space of Patricia tries, and those space savings can turn into run-time improvements in applications by reducing GC time. I've observed a 10% reduction in compile time for some programs. When building a full Racket distribution, run time shrinks by about 2 minutes out of 80 minutes, probbaly because just average memory use goes down by 10%. DrRacket's initial memory footprint goes down by about 37M out of 657M (a 5% savings). Mincrobenchmark relative performance, normalized to previous Racket CS implementation (measured on 2018 MacBook Pro, 2.7 GHz Core i7; Chez Scheme can substitute POPCNT instructions at link time): patricia = previous Racket CS implementation as a Patricia Trie stencil = new Racket CS implementation as a stencil-vector HAMT racket = traditional Racket implementation patricia stencil racket set-in-empty:eq#t: ==| ==| ==|= set-many:eq#t: ==| ==|== ==|======== set-many-in-order:eq#t: ==| ==| ==|==== set-same:eq#t: ==| == ==|= set-in-empty:eq: ==| == ==|= set-many:eq: ==| ==|== ==|======== set-many-in-order:eq: ==| ==|= ==|===== set-same:eq: ==| == ==|= set-in-empty:eqv: ==| ==| ==|== set-many:eqv: ==| ==|== ==|========= set-many-in-order:eqv: ==| ==|= ==|===== set-same:eqv: ==| ==| ==|= set-in-empty:equal: ==| ==|== ==|=== set-many:equal: ==| ==|== ==|===== set-many-in-order:equal: ==| ==|= ==|=== set-same:equal: ==| ==|= ==|=== ref:eq#t: ==| ==| ==|= ref-fail:eq#t: ==| ==| == ref:eq: ==| ==| ==|= ref-fail:eq: ==| ==| == ref:eqv: ==| ==| ==|==== ref-fail:eqv: ==| ==| ==| ref:equal: ==| ==| ==|=== ref-large:equal: ==| ==| == ref-fail:equal: ==| ==| ==|=== ref-large-fail:equal: ==| ==| == removes:eq#t: ==| ==|=== ==|=========== add+remove:eq#t: ==| ==|= ==|======= removes:eq: ==| ==|==== ==|============ add+remove:eq: ==| ==|= ==|======= removes:eqv: ==| ==|=== ==|============= add+remove:eqv: ==| ==| ==|======== removes:equal: ==| ==|== ==|======= add+remove:equal: ==| ==|= ==|====== iterate-keys:eq: ==| ==| ==|= iterate-vals:eq#t: ==| ==|= ==|= iterate-vals:eq: ==| ==|= ==|= iterate-unsafe-keys:eq: ==| ==| ==|======= iterate-unsafe-vals:eq#t: ==| ==| ==| iterate-unsafe-vals:eq: ==| ==|= ==| for-each:eq: ==| ==| ==|========== subset-lil-shared:eq: ==| ==| ==|= subset-lil-unshared:eq: ==| ==| ==|== subset-lil-not:eq: ==| == == subset-med+lil-shared:eq: ==| ==|==== ==|= subset-med+med-shared:eq: ==| ==|= ==|= subset-big-same:eq: ==| ==| ==|=============== subset-big+lil-shared:eq: ==| ==|=== ==|==== subset-big+med-shared:eq: ==| ==|== ==|=== subset-big-unshared:eq: ==| ==| ==|== --- pkgs/base/info.rkt | 2 +- .../tests/racket/benchmarks/hash/set.rkt | 4 +- racket/src/cs/compile-file.ss | 2 +- racket/src/cs/rumble/hamt-stencil.ss | 166 +++++++++--------- racket/src/cs/rumble/intmap.ss | 16 +- racket/src/racket/src/schvers.h | 2 +- 6 files changed, 98 insertions(+), 94 deletions(-) diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index 3297b055ed..6db7f20905 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.6.0.1") +(define version "7.6.0.2") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt index 1781513861..442b5b75fa 100644 --- a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt @@ -19,7 +19,7 @@ (loop (hash-set ht KEY (MAKE-VAL 'true)) (sub1 i))))) - 'set-many + 'set-many-in-order (times (for ([i (in-range Q)]) (let loop ([ht EMPTY] [i K]) @@ -28,7 +28,7 @@ (loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true)) (sub1 i)))))) - 'set-many-in-order + 'set-many (times (for ([i (in-range Q)]) (let loop ([ht EMPTY] [l shuffled]) diff --git a/racket/src/cs/compile-file.ss b/racket/src/cs/compile-file.ss index 3358209602..38ad9e28f6 100644 --- a/racket/src/cs/compile-file.ss +++ b/racket/src/cs/compile-file.ss @@ -2,7 +2,7 @@ ;; Check to make we're using a build of Chez Scheme ;; that has all the features we need. (define-values (need-maj need-min need-sub need-dev) - (values 9 5 3 10)) + (values 9 5 3 11)) (unless (guard (x [else #f]) (eval 'scheme-fork-version-number)) (error 'compile-file diff --git a/racket/src/cs/rumble/hamt-stencil.ss b/racket/src/cs/rumble/hamt-stencil.ss index be56e7fb3e..e916f145a5 100644 --- a/racket/src/cs/rumble/hamt-stencil.ss +++ b/racket/src/cs/rumble/hamt-stencil.ss @@ -174,8 +174,9 @@ [val-i (fx- i child-count)]) ; same as key index (bnode-val-local-index-ref n child-count key-count val-i))] [else - ;; Complicated case: we have to figure out how many - ;; previous keys have values + ;; Complicated case that we expect to be rare: figure out how many + ;; previous keys have values, since we don't know how the key/value + ;; index maps to a key/value bit (let* ([child-count (hamt-mask->child-count mask)] [key-count (hamt-mask->key-count mask)] [key-i (fx- i child-count)]) @@ -561,7 +562,10 @@ ;; ---------------------------------------- ;; unsafe iteration; position is a stack -;; represented by a list of (cons node index) +;; of the form +;; - '() +;; - (cons indent (cons node stack)) +;; - (cons (box assoc-list) stack) (define (unsafe-intmap-iterate-first h) (and (not (intmap-empty? h)) @@ -573,12 +577,12 @@ (let ([mask (stencil-vector-mask n)]) (let ([child-count (hamt-mask->child-count mask)] [key-count (hamt-mask->key-count mask)]) - (let ([stack (cons (cons n (fx+ key-count child-count -1)) stack)]) + (let ([stack (cons (fx+ key-count child-count -1) (cons n stack))]) (if (fx= key-count 0) (unsafe-node-iterate-first (bnode-child-index-ref n (fx- child-count 1)) stack) stack))))] [(cnode? n) - (cons (box (cnode-content n)) + (cons (#%box (cnode-content n)) stack)])) (define (unsafe-intmap-iterate-next h pos) @@ -590,32 +594,31 @@ ;; Stack is empty, so we're done #f] [else - (let ([p (car pos)] + (let ([i (car pos)] [stack (cdr pos)]) (cond - [(box? p) - ;; in a cnode - (let ([new-p (cdr (unbox p))]) - (if (null? new-p) - ;; Exhausted this node, so return to parent node - (unsafe-node-iterate-next stack) - ;; still in cnode: - (cons (box new-p) stack)))] - [else - (let ([n (car p)] - [i (cdr p)]) + [(fixnum? i) + (let ([n (car stack)]) (cond [(fx= 0 i) ;; Exhausted this node, so return to parent node - (unsafe-node-iterate-next stack)] + (unsafe-node-iterate-next (cdr stack))] [else ;; Move to next (lower) index in the current node (let ([i (fx1- i)]) (let ([child-count (hamt-mask->child-count (stencil-vector-mask n))] - [stack (cons (cons n i) stack)]) + [stack (cons i stack)]) (if (fx< i child-count) (unsafe-node-iterate-first (bnode-child-index-ref n i) stack) - stack)))]))]))])) + stack)))]))] + [else + ;; in a cnode + (let ([new-p (cdr (#%unbox i))]) + (if (null? new-p) + ;; Exhausted this node, so return to parent node + (unsafe-node-iterate-next stack) + ;; still in cnode: + (cons (#%box new-p) stack)))]))])) (define (unsafe-intmap-iterate-key h pos) (eqtype-dispatch @@ -924,84 +927,83 @@ (define (bnode-entry-at-position n pos mode fail) (let* ([mask (stencil-vector-mask n)] - [child-count (hamt-mask->child-count mask)]) - (let loop ([i 0] [pos pos]) - (cond - [(fx= i child-count) - (let ([key-count (hamt-mask->key-count mask)]) - (cond - [(fx< pos key-count) - (let ([get-key (lambda () (hamt-unwrap-key (bnode-key-index-ref n (fx+ pos child-count))))] - [get-value (lambda () (bnode-val-index-ref n (fx+ pos child-count)))]) - (case mode - [(key) (get-key)] - [(val) (get-value)] - [(both) (values (get-key) (get-value))] - [else (cons (get-key) (get-value))]))] - [else fail]))] - [else - (let ([c (bnode-child-index-ref n i)]) - (cond - [(bnode? c) - (let ([sz (hamt-count c)]) - (if (fx>= pos sz) - (loop (fx+ i 1) (fx- pos sz)) - (bnode-entry-at-position c pos mode fail)))] - [else - (let* ([alist (cnode-content c)] - [len (length alist)]) - (if (fx>= pos len) - (loop (fx+ i 1) (fx- pos len)) - (let ([p (list-ref alist pos)]) - (case mode - [(key) (car p)] - [(val) (cdr p)] - [(both) (values (car p) (cdr p))] - [else p]))))]))])))) + [child-count (hamt-mask->child-count mask)] + [key-count (hamt-mask->key-count mask)]) + (cond + [(fx< pos key-count) + (let ([get-key (lambda () (hamt-unwrap-key (bnode-key-index-ref n (fx+ pos child-count))))] + [get-value (lambda () (bnode-val-index-ref n (fx+ pos child-count)))]) + (case mode + [(key) (get-key)] + [(val) (get-value)] + [(both) (values (get-key) (get-value))] + [else (cons (get-key) (get-value))]))] + [else + (let loop ([i 0] [pos (fx- pos key-count)]) + (cond + [(fx= i child-count) + fail] + [else + (let ([c (bnode-child-index-ref n i)]) + (cond + [(bnode? c) + (let ([sz (hamt-count c)]) + (if (fx>= pos sz) + (loop (fx+ i 1) (fx- pos sz)) + (bnode-entry-at-position c pos mode fail)))] + [else + (let* ([alist (cnode-content c)] + [len (length alist)]) + (if (fx>= pos len) + (loop (fx+ i 1) (fx- pos len)) + (let ([p (list-ref alist pos)]) + (case mode + [(key) (car p)] + [(val) (cdr p)] + [(both) (values (car p) (cdr p))] + [else p]))))]))]))]))) (define (bnode-unsafe-intmap-iterate-key pos) - (let ([p (car pos)]) + (let ([i (car pos)]) (cond - [(box? p) - ;; in a cnode - (caar (unbox p))] + [(fixnum? i) + (let ([h (cadr pos)]) + (hamt-unwrap-key (bnode-key-index-ref h i)))] [else - (let ([h (car p)]) - (hamt-unwrap-key (bnode-key-index-ref h (cdr p))))]))) + ;; in a cnode + (caar (#%unbox i))]))) (define (bnode-unsafe-intmap-iterate-value pos) - (let ([p (car pos)]) + (let ([i (car pos)]) (cond - [(box? p) - ;; in a cnode - (cdar (unbox p))] + [(fixnum? i) + (bnode-val-index-ref (cadr pos) i)] [else - (bnode-val-index-ref (car p) (cdr p))]))) + ;; in a cnode + (cdar (#%unbox i))]))) (define (bnode-unsafe-intmap-iterate-key+value pos) - (let ([p (car pos)]) + (let ([i (car pos)]) (cond - [(box? p) - ;; in a cnode - (let ([pr (car (unbox p))]) - (values (car pr) (cdr pr)))] - [else - (let ([n (car p)] - [i (cdr p)]) + [(fixnum? i) + (let ([n (cadr pos)]) (values (hamt-unwrap-key (bnode-key-index-ref n i)) - (bnode-val-index-ref n i)))]))) + (bnode-val-index-ref n i)))] + [else + ;; in a cnode + (let ([pr (car (#%unbox i))]) + (values (car pr) (cdr pr)))]))) (define (bnode-unsafe-intmap-iterate-pair pos) - (let ([p (car pos)]) + (let ([i (car pos)]) (cond - [(box? p) - ;; in a cnode - (car (unbox p))] - [else - (let ([n (car p)] - [i (cdr p)]) + [(fixnum? i) + (let ([n (cadr pos)]) (cons (hamt-unwrap-key (bnode-key-index-ref n i)) - (bnode-val-index-ref n i)))]))) + (bnode-val-index-ref n i)))] + [else + ;; in a cnode + (car (#%unbox i))]))) (define (bnode=? a b eql? shift) (or diff --git a/racket/src/cs/rumble/intmap.ss b/racket/src/cs/rumble/intmap.ss index 24a20bd8f6..de5db5aaee 100644 --- a/racket/src/cs/rumble/intmap.ss +++ b/racket/src/cs/rumble/intmap.ss @@ -1,21 +1,23 @@ ;; We have several implementations of immutable hash tables. Pick one... -(include "rumble/patricia.ss") +;; (include "rumble/patricia.ss") ;; ;; This Patricia-trie implementation is the prettiest and fastest. It ;; uses the most memory, though --- typically much more than the ;; vector-stencil HAMT. -;; (include "rumble/hamt-stencil.ss") +(include "rumble/hamt-stencil.ss") ;; ;; This HAMT implementation using stencil vectors tends to use the -;; last memory, often by a lot. It's slower than the Patricia-tree -;; implementation, though, especially for `hash-keys-subset?`. +;; least memory, often 1/3 the space of the Patricia-trie +;; implementation. It's slower than the Patricia-tree implementation +;; for some operations, up to a factor of 2 for `hash-set` or +;; `hash-keys-subset?`. ;; (include "rumble/hamt-vector.ss") ;; ;; This HAMT implementaiton uses plain vectors instead of stencil -;; vectors. Its speed and memory use are intermediate, but its speed -;; is closer to the stencil-vector HAMT implementation, and memory use -;; is closer to the Patrica trie implementation. +;; vectors. Its speed and memory use are both worse than the +;; stencil-vector HAMT implementation, but it was the original source +;; of the stencil-vector implementation. diff --git a/racket/src/racket/src/schvers.h b/racket/src/racket/src/schvers.h index 575cc8893d..50d7923e07 100644 --- a/racket/src/racket/src/schvers.h +++ b/racket/src/racket/src/schvers.h @@ -16,7 +16,7 @@ #define MZSCHEME_VERSION_X 7 #define MZSCHEME_VERSION_Y 6 #define MZSCHEME_VERSION_Z 0 -#define MZSCHEME_VERSION_W 1 +#define MZSCHEME_VERSION_W 2 /* A level of indirection makes `#` work as needed: */ #define AS_a_STR_HELPER(x) #x