From a60f173b4639709120da54584a178b1135472e55 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Fri, 3 Jan 2020 17:10:10 -0700 Subject: [PATCH] hash benchmarks and stencil-vector HAMT experiment This commit adds an (unused) implementation of immutable hash tables for Racket CS that trades some run-time performance for an especially compact representation --- similar to the traditional Racket implementation of immutable hash tables. It uses a new "stencil vector" datatype at the Chez Scheme level, which overlays the bitmap needed for a HAMT node with the Chez-object type tag (and also provides an update operation that avoids unnecessary memory work). Compared to the current Racket CS implementation, the stencil-vector HAMT implementation of an immutable hash table takes only about 1/3 the space on avergae, which translates to a overall 5% savings in DrRacket's initial heap. It also makes a full Racket build slightly faster by reducing avergage memory use by 5-10%. But the run-time performance difference is significant, especially for the `hash-keys-subset?` operation (at least in microbenchmarks), and also for addition and iteration. Maybe there's an overall better point that reduces memory use of the current Patricia trie implementation without sacrificing as much performance. Besides the benchmarks and stencil-vector HAMT implementaiton, there are small changes to the way hash tables cooperate with `equal?`, which makes it a little easier to plug in different implementations. --- pkgs/base/info.rkt | 2 +- .../racket/benchmarks/control/hasheq.rkt | 77 + .../tests/racket/benchmarks/hash/config.rkt | 121 ++ .../tests/racket/benchmarks/hash/info.rkt | 3 + .../tests/racket/benchmarks/hash/iterate.rkt | 45 + .../tests/racket/benchmarks/hash/ref.rkt | 53 + .../tests/racket/benchmarks/hash/remove.rkt | 26 + .../tests/racket/benchmarks/hash/set.rkt | 32 + .../tests/racket/benchmarks/hash/subset.rkt | 95 ++ .../tests/racket/chaperone.rktl | 8 +- racket/src/cs/Makefile | 3 + racket/src/cs/bootstrap/scheme-lang.rkt | 20 + racket/src/cs/compile-file.ss | 2 +- racket/src/cs/demo/hash.ss | 133 +- racket/src/cs/rumble.sls | 2 +- racket/src/cs/rumble/equal.ss | 7 + racket/src/cs/rumble/graph.ss | 53 +- racket/src/cs/rumble/hamt-stencil.ss | 1306 +++++++++++++++++ .../src/cs/rumble/{hamt.ss => hamt-vector.ss} | 238 ++- racket/src/cs/rumble/hash-code.ss | 11 + racket/src/cs/rumble/hash.ss | 5 + racket/src/cs/rumble/intmap.ss | 503 +------ racket/src/cs/rumble/patricia.ss | 473 ++++++ racket/src/racket/src/schvers.h | 2 +- 24 files changed, 2591 insertions(+), 629 deletions(-) create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/control/hasheq.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/info.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/ref.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/remove.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt create mode 100644 pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt create mode 100644 racket/src/cs/rumble/hamt-stencil.ss rename racket/src/cs/rumble/{hamt.ss => hamt-vector.ss} (85%) create mode 100644 racket/src/cs/rumble/patricia.ss diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index fa40a13ab4..fb1ead5e47 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.5.0.16") +(define version "7.5.0.17") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/control/hasheq.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/control/hasheq.rkt new file mode 100644 index 0000000000..b0f4406c1d --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/control/hasheq.rkt @@ -0,0 +1,77 @@ +#lang racket/base +(require racket/include) + +(include "config.rktl") + +(define (f x) x) +(set! f f) + +'---------------------------------------- + +'hasheq-add-remove +(times + (let loop ([ht #hasheq()] [i L]) + (if (zero? i) + (void (f ht)) + (loop (hash-remove (hash-set ht 'a #t) 'a) + (sub1 i))))) + +'hasheq-adds +(times + (for ([i (in-range Q)]) + (let loop ([ht #hasheq()] [i 100]) + (if (zero? i) + (void (f ht)) + (loop (hash-set ht i 'true) + (sub1 i)))))) + +'hasheq-adds/#t +(times + (for ([i (in-range Q)]) + (let loop ([ht #hasheq()] [i 100]) + (if (zero? i) + (void (f ht)) + (loop (hash-set ht i #t) + (sub1 i)))))) + +'hasheq-addsame +(times + (for ([i (in-range Q)]) + (let loop ([ht #hasheq()] [i 100]) + (if (zero? i) + (void (f ht)) + (loop (hash-set ht 'a 'true) + (sub1 i)))))) + +'hasheq-removes +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (for ([i (in-range Q)]) + (let loop ([ht ht] [i 100]) + (if (zero? i) + (void (f ht)) + (loop (hash-remove ht i) + (sub1 i))))))) + +'hasheq-ref +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (for ([i (in-range Q)]) + (let loop ([v #f] [i 100]) + (if (zero? i) + (void (f v)) + (loop (hash-ref ht i #f) + (sub1 i))))))) + +'hasheq-reffail +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (for ([i (in-range Q)]) + (let loop ([v #f] [i 100]) + (if (zero? i) + (void (f v)) + (loop (hash-ref ht 'not-there #f) + (sub1 i))))))) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt new file mode 100644 index 0000000000..8f13624312 --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt @@ -0,0 +1,121 @@ +#lang racket/base +(require (for-syntax racket/base)) + +(provide Q M L N I + times + unknown + with-hash-variants + make-large-equal-key/share1 + make-large-equal-key/share2) + +;; Iterations for slow things: +(define Q 100000) + +;; The depth used for non-tail recursion, typically: +(define M (* Q 10)) + +;; Intermediate count: +(define L (* M 10)) + +;; Number of iteraitons used for a loop, typically +(define N (* L 10)) + +;; Number of times to run each benchamrk: +(define I 3) + +(define-syntax times + (syntax-rules () + [(_ e) + (let loop ([v #f] [i I]) + (if (zero? i) + v + (loop (time e) (sub1 i))))])) + +(define (unknown x) x) +(set! unknown unknown) + +(define-syntax (with-hash-variants stx) + (syntax-case stx () + [(_ body ...) + (let () + (define (body-with prefix + #:empty empty-stx + #:key key-stx + #:other-key other-key-stx + #:make-key make-key-stx + #:make-val make-val-stx + #:for/hash for/hash-stx) + (let loop ([body (syntax->list #'(body ...))]) + (cond + [(null? body) null] + [else + (syntax-case (car body) (quote) + [(quote sym) + (identifier? #'sym) + (cons #`(quote #,(string->symbol (format "~a:~a" prefix (syntax-e #'sym)))) + (loop (cdr body)))] + [#:only + (if (eq? prefix (syntax-e (cadr body))) + (loop (cddr body)) + (loop (cddddr body)))] + [_ + (cons + (let loop ([e (car body)]) + (cond + [(eq? e 'EMPTY) empty-stx] + [(eq? e 'KEY) key-stx] + [(eq? e 'OTHER-KEY) other-key-stx] + [(eq? e 'MAKE-KEY) make-key-stx] + [(eq? e 'MAKE-VAL) make-val-stx] + [(eq? e 'FOR/HASH) for/hash-stx] + [(syntax? e) (datum->syntax e (loop (syntax-e e)))] + [(pair? e) (cons (loop (car e)) (loop (cdr e)))] + [else e])) + (loop (cdr body)))])]))) + #`(begin + #,@(body-with 'eq#t + #:empty #'#hasheq() + #:key #''a + #:other-key #''not-there + #:make-key #'values + #:make-val #'(lambda (v) #t) + #:for/hash #'for/hasheq) + #,@(body-with 'eq + #:empty #'#hasheq() + #:key #''a + #:other-key #''not-there + #:make-key #'values + #:make-val #'values + #:for/hash #'for/hasheq) + #,@(body-with 'eqv + #:empty #'#hasheqv() + #:key #'12345 + #:other-key #'1/3 + #:make-key #'values + #:make-val #'values + #:for/hash #'for/hasheqv) + #,@(body-with 'equal + #:empty #'#hash() + #:key #''(a) + #:other-key #''(not-there) + #:make-key #'box + #:make-val #'values + #:for/hash #'for/hash)))])) + +(define (make-data) + (list 1 2 3 + (string-copy "Hello, world!") + 'apple + (box (seconds->date 0)) + 1/2)) + +(define share1-a (make-data)) +(define share1-b (make-data)) +(define share2-a (make-data)) +(define share2-b (make-data)) + +(define (make-large-equal-key/share1 c) + (vector share1-a c share1-b)) + +(define (make-large-equal-key/share2 c) + (vector share2-a c share2-b)) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/info.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/info.rkt new file mode 100644 index 0000000000..a5b71b1dd7 --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/info.rkt @@ -0,0 +1,3 @@ +#lang info + +(define test-omit-paths 'all) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt new file mode 100644 index 0000000000..3f3bd9d376 --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt @@ -0,0 +1,45 @@ +#lang racket/base +(require "config.rkt") + +'eq:keys +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (void + (for ([i (in-range Q)]) + (for/fold ([v #f]) ([k (in-hash-keys ht)]) + k))))) + +'eq:vals +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (for ([i (in-range Q)]) + (void + (for/fold ([v #f]) ([v (in-hash-values ht)]) + v))))) + +'eq:keys-unsafe +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (void + (for ([i (in-range Q)]) + (for/fold ([v #f]) ([k (in-immutable-hash-keys ht)]) + k))))) + +'eq:vals-unsafe +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (for ([i (in-range Q)]) + (void + (for/fold ([v #f]) ([v (in-immutable-hash-values ht)]) + v))))) + +'eq:for-each +(times + (let ([ht (for/hasheq ([i (in-range 100)]) + (values i i))]) + (for ([i (in-range Q)]) + (hash-for-each ht (lambda (k v) 'ok))))) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/ref.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/ref.rkt new file mode 100644 index 0000000000..82997201d6 --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/ref.rkt @@ -0,0 +1,53 @@ +#lang racket/base +(require "config.rkt") + +(with-hash-variants + + 'ref + (times + (let ([ht (FOR/HASH ([i (in-range 100)]) + (values (MAKE-KEY i) (MAKE-VAL i)))]) + (for ([i (in-range Q)]) + (let loop ([v #f] [i 100]) + (if (zero? i) + (void (unknown v)) + (loop (hash-ref ht (MAKE-KEY i) #f) + (sub1 i))))))) + + #:only equal + 'ref-large + (times + (let ([ht (for/hash ([i (in-range 100)]) + (values (make-large-equal-key/share1 i) i))]) + (for ([i (in-range (quotient Q 10))]) + (let loop ([v #f] [i 100]) + (if (zero? i) + (void (unknown v)) + (loop (hash-ref ht (make-large-equal-key/share2 i) #f) + (sub1 i))))))) + + 'ref-fail + (times + (let ([ht (FOR/HASH ([i (in-range 100)]) + (values (MAKE-KEY i) (MAKE-VAL i)))]) + (for ([i (in-range Q)]) + (let loop ([v #f] [i 100]) + (if (zero? i) + (void (unknown v)) + (loop (hash-ref ht OTHER-KEY #f) + (sub1 i))))))) + + #:only equal + 'ref-large-fail + (times + (let ([ht (for/hash ([i (in-range 100)]) + (values (make-large-equal-key/share1 i) i))] + [not-there (make-large-equal-key/share2 -1)]) + (for ([i (in-range (quotient Q 10))]) + (let loop ([v #f] [i 100]) + (if (zero? i) + (void (unknown v)) + (loop (hash-ref ht not-there #f) + (sub1 i))))))) + + (void)) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/remove.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/remove.rkt new file mode 100644 index 0000000000..6c13b00324 --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/remove.rkt @@ -0,0 +1,26 @@ +#lang racket/base +(require "config.rkt") + +(with-hash-variants + + 'hash-removes + (times + (let ([ht (FOR/HASH ([i (in-range 100)]) + (values (MAKE-KEY i) (MAKE-VAL i)))]) + (for ([i (in-range Q)]) + (let loop ([ht ht] [i 100]) + (if (zero? i) + (void (unknown ht)) + (loop (hash-remove ht (MAKE-KEY i)) + (sub1 i))))))) + + 'add+remove + (times + (let loop ([ht EMPTY] [i L]) + (if (zero? i) + (void (unknown ht)) + (loop (hash-remove (hash-set ht KEY (MAKE-VAL i)) KEY) + (sub1 i))))) + + (void)) + diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt new file mode 100644 index 0000000000..b9d0015398 --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt @@ -0,0 +1,32 @@ +#lang racket/base +(require "config.rkt") + +(with-hash-variants + + 'add-to-empty + (times + (let loop ([ht EMPTY] [i L]) + (if (zero? i) + (void (unknown ht)) + (loop (hash-set ht KEY (MAKE-VAL 'true)) + (sub1 i))))) + + 'add-many + (times + (for ([i (in-range Q)]) + (let loop ([ht EMPTY] [i 100]) + (if (zero? i) + (void (unknown ht)) + (loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true)) + (sub1 i)))))) + + 'add-same + (times + (for ([i (in-range Q)]) + (let loop ([ht EMPTY] [i 100]) + (if (zero? i) + (void (unknown ht)) + (loop (hash-set ht (MAKE-KEY 'a) (MAKE-VAL 'true)) + (sub1 i)))))) + + (void)) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt new file mode 100644 index 0000000000..284dcbf4ac --- /dev/null +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt @@ -0,0 +1,95 @@ +#lang racket/base +(require "config.rkt") + +(define elems + (parameterize ([current-pseudo-random-generator + (make-pseudo-random-generator)]) + (random-seed 12745) + (hash-keys + (for/fold ([ht #hasheqv()]) ([i 200]) + (let loop () + (define n (random 10000)) + (if (hash-ref ht n #f) + (loop) + (hash-set ht n #t))))))) + +(define (gen n) + (for/hasheq ([i (in-range n)] + [e (in-list elems)]) + (values e e))) + +(define (gen-more n ht) + (for/fold ([ht ht]) ([i (in-range n)] + [e (in-list (list-tail elems (hash-count ht)))]) + (hash-set ht e e))) + +(define (check-true v) + (unless v + (error "failed"))) + +(define (check-false v) + (when v + (error "failed"))) + +'eq:subset-unshared-small +(times + (let ([ht (gen 6)] + [sub-ht (gen 3)]) + (check-true + (for/and ([i (in-range M)]) + (hash-keys-subset? sub-ht ht))))) + +'eq:not-subset-unshared-small +(times + (let ([ht (gen 6)] + [sub-ht (gen 3)]) + (check-false + (for/or ([i (in-range L)]) + (hash-keys-subset? ht sub-ht))))) + +'eq:subset-shared-medium+small +(times + (let* ([sub-ht (gen 10)] + [ht (gen-more 1 sub-ht)]) + (check-true + (for/and ([i (in-range L)]) + (hash-keys-subset? sub-ht ht))))) + +'eq:subset-shared-medium+medium +(times + (let* ([sub-ht (gen 10)] + [ht (gen-more 10 sub-ht)]) + (check-true + (for/and ([i (in-range L)]) + (hash-keys-subset? sub-ht ht))))) + +'eq:subset-same-large +(times + (let* ([sub-ht (gen 100)]) + (check-true + (for/and ([i (in-range L)]) + (hash-keys-subset? sub-ht sub-ht))))) + +'eq:subset-shared-large+small +(times + (let* ([sub-ht (gen 100)] + [ht (gen-more 3 sub-ht)]) + (check-true + (for/and ([i (in-range L)]) + (hash-keys-subset? sub-ht ht))))) + +'eq:subset-shared-large+medium +(times + (let* ([sub-ht (gen 100)] + [ht (gen-more 10 sub-ht)]) + (check-true + (for/and ([i (in-range L)]) + (hash-keys-subset? sub-ht ht))))) + +'eq:subset-shared-large+large +(times + (let* ([sub-ht (gen 100)] + [ht (gen-more 100 sub-ht)]) + (check-true + (for/and ([i (in-range L)]) + (hash-keys-subset? sub-ht ht))))) diff --git a/pkgs/racket-test-core/tests/racket/chaperone.rktl b/pkgs/racket-test-core/tests/racket/chaperone.rktl index fe97d7e8db..4952289362 100644 --- a/pkgs/racket-test-core/tests/racket/chaperone.rktl +++ b/pkgs/racket-test-core/tests/racket/chaperone.rktl @@ -2019,7 +2019,7 @@ (test #f hash-ref h2 'key2 #f) (test '(key2 val2 key2 val2 key2 key2) list get-k get-v set-k set-v remove-k access-k) (hash-for-each h2 void) - (test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k) + (test '(for-each key val key2 val2 key2 key) list 'for-each get-k get-v set-k set-v remove-k access-k) (set! get-k #f) (set! get-v #f) (void (equal-hash-code h2)) @@ -2034,7 +2034,7 @@ (test #t values (equal? h2 (let* ([h2 (make-hash)]) (test (void) hash-set! h2 'key 'val) h2))) - (test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k) + (test '(equal? key val key2 val2 key2 key) list 'equal? get-k get-v set-k set-v remove-k access-k) (void))) (list make-hash make-hasheq make-hasheqv @@ -2092,7 +2092,7 @@ (test #f hash-ref h2 'key2 #f) (test '(key2 val2 key2 val2 key2 key2) list get-k get-v set-k set-v remove-k access-k) (hash-for-each h2 void) - (test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k) + (test '(mid key val key2 val2 key2 key) list 'mid get-k get-v set-k set-v remove-k access-k) (set! get-k #f) (set! get-v #f) (void (equal-hash-code h2)) @@ -2105,7 +2105,7 @@ (set! get-k #f) (set! get-v #f) (test #t values (equal? h2 (hash-set h1 'key 'val))) - (test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k) + (test '(equal?2 key val key2 val2 key2 key) list 'equal?2 get-k get-v set-k set-v remove-k access-k) (void)))))) ;; Check that `hash-set` propagates in a way that allows ;; `chaperone-of?` to work recursively: diff --git a/racket/src/cs/Makefile b/racket/src/cs/Makefile index 8502cefaec..1236b8d654 100644 --- a/racket/src/cs/Makefile +++ b/racket/src/cs/Makefile @@ -255,6 +255,9 @@ RUMBLE_SRCS = rumble/define.ss \ rumble/object-name.ss \ rumble/arity.ss \ rumble/intmap.ss \ + rumble/patricia.ss \ + rumble/hamt-stencil.ss \ + rumble/hamt-vector.ss \ rumble/hash.ss \ rumble/datum.ss \ rumble/lock.ss \ diff --git a/racket/src/cs/bootstrap/scheme-lang.rkt b/racket/src/cs/bootstrap/scheme-lang.rkt index eb2b22b791..bc3d104720 100644 --- a/racket/src/cs/bootstrap/scheme-lang.rkt +++ b/racket/src/cs/bootstrap/scheme-lang.rkt @@ -83,6 +83,7 @@ record? record-type-uid $object-ref + stencil-vector? (rename-out [s:vector-sort vector-sort] [s:vector-sort! vector-sort!]) vector-for-each @@ -175,6 +176,9 @@ [logtest fxlogtest]) fxsrl fxbit-field + fxpopcount + fxpopcount32 + fxpopcount16 bitwise-bit-count bitwise-arithmetic-shift-right bytevector-u16-native-ref @@ -746,6 +750,22 @@ [(proc . vecs) (list->vector (apply map proc (map vector->list vecs)))])) +(define (stencil-vector? v) #f) + +(define (fxpopcount32 x) + (let* ([x (- x (bitwise-and (arithmetic-shift x -1) #x55555555))] + [x (+ (bitwise-and x #x33333333) (bitwise-and (arithmetic-shift x -2) #x33333333))] + [x (bitwise-and (+ x (arithmetic-shift x -4)) #x0f0f0f0f)] + [x (+ x (arithmetic-shift x -8) (arithmetic-shift x -16) (arithmetic-shift x -24))]) + (bitwise-and x #x3f))) + +(define (fxpopcount x) + (fx+ (fxpopcount32 (bitwise-and x #xffffffff)) + (fxpopcount32 (arithmetic-shift x -32)))) + +(define (fxpopcount16 x) + (fxpopcount32 (bitwise-and x #xffff))) + (define (logbit? m n) (bitwise-bit-set? n m)) (define (logbit1 i n) diff --git a/racket/src/cs/compile-file.ss b/racket/src/cs/compile-file.ss index d199e6e09b..3358209602 100644 --- a/racket/src/cs/compile-file.ss +++ b/racket/src/cs/compile-file.ss @@ -2,7 +2,7 @@ ;; Check to make we're using a build of Chez Scheme ;; that has all the features we need. (define-values (need-maj need-min need-sub need-dev) - (values 9 5 3 9)) + (values 9 5 3 10)) (unless (guard (x [else #f]) (eval 'scheme-fork-version-number)) (error 'compile-file diff --git a/racket/src/cs/demo/hash.ss b/racket/src/cs/demo/hash.ss index bb93a90666..c3eb47c1e2 100644 --- a/racket/src/cs/demo/hash.ss +++ b/racket/src/cs/demo/hash.ss @@ -25,6 +25,22 @@ (define-values (struct:trans trans trans? trans-ref trans-set!) (make-struct-type 'top #f 2 0 #f '() #f)) +(define-values (struct:collide collide collide? collide-ref collide-set!) + (make-struct-type 'collide #f 2 0 #f + (list (cons prop:equal+hash + (list + (lambda (a b eql?) + (and (eql? (collide-1 a) + (collide-1 b)) + (eql? (collide-2 a) + (collide-2 b)))) + (lambda (a hc) + (hc (collide-1 a))) + (lambda (a hc) + (hc (collide-1 a)))))))) +(define collide-1 (make-struct-field-accessor collide-ref 0)) +(define collide-2 (make-struct-field-accessor collide-ref 1)) + (define-syntax check (syntax-rules () [(_ a b) @@ -32,7 +48,7 @@ [bv b]) (unless (equal? av bv) (error 'check (format "failed ~s = ~s [expected ~s]" 'a av bv))))])) - + (check (equal? (top 1 2) (top 1 3)) #t) (check (equal? (top 1 2) (top 2 2)) #f) (check (hash-ref (hash-set (hash) (top 1 2) 'ok) (top 1 3) #f) 'ok) @@ -42,9 +58,70 @@ (check (hash-ref (hash-set (hash) (trans 1 2) 'ok) (trans 1 2) #f) 'ok) (check (hash-ref (hash-set (hash) (trans 1 2) 'ok) (trans 1 3) #f) #f) +(check (equal? (collide 1 2) (collide 1 2)) #t) +(check (equal? (collide 1 2) (collide 1 3)) #f) +(check (equal? (equal-hash-code (collide 1 2)) (equal-hash-code (collide 1 3))) #t) + (check (equal? (hash 1 'x 2 'y) (hash 2 'y 1 'x)) #t) (check (hash-ref (hash (hash 1 'x 2 'y) 7) (hash 2 'y 1 'x) #f) 7) +(let ([simple (hash (cons 1 2) 2 (cons 1 3) 3)]) + (define (check-map l) + (check (or (equal? l (list (cons (cons 1 2) 2) (cons (cons 1 3) 3))) + (equal? l (list (cons (cons 1 3) 3) (cons (cons 1 2) 2)))) + #t)) + (check-map (hash-map simple cons)) + (check-map (let loop ([i (hash-iterate-first simple)]) + (if (not i) + '() + (cons (cons (hash-iterate-key simple i) + (hash-iterate-value simple i)) + (loop (hash-iterate-next simple i)))))) + (check-map (let loop ([i (unsafe-immutable-hash-iterate-first simple)]) + (if (not i) + '() + (cons (cons (unsafe-immutable-hash-iterate-key simple i) + (unsafe-immutable-hash-iterate-value simple i)) + (loop (unsafe-immutable-hash-iterate-next simple i))))))) + +(check (hash-ref (hash (collide 1 2) 2 (collide 1 3) 3) (collide 1 2)) 2) +(let ([collides (hash (collide 1 2) 2 (collide 1 3) 3)]) + (define (check-map l) + (check (or (equal? l (list (cons (collide 1 2) 2) (cons (collide 1 3) 3))) + (equal? l (list (cons (collide 1 3) 3) (cons (collide 1 2) 2)))) + #t)) + (check-map (hash-map collides cons)) + (check-map (let loop ([i (hash-iterate-first collides)]) + (if (not i) + '() + (cons (cons (hash-iterate-key collides i) + (hash-iterate-value collides i)) + (loop (hash-iterate-next collides i)))))) + (check-map (let loop ([i (unsafe-immutable-hash-iterate-first collides)]) + (if (not i) + '() + (cons (cons (unsafe-immutable-hash-iterate-key collides i) + (unsafe-immutable-hash-iterate-value collides i)) + (loop (unsafe-immutable-hash-iterate-next collides i))))))) + +(let ([mixed (hash 1 #t 2 'true 3 #t 4 'true)]) + (define (check-map l) + (check (list-sort (lambda (a b) (< (car a) (car b))) l) + '((1 . #t) (2 . true) (3 . #t) (4 . true)))) + (check-map (hash-map mixed cons)) + (check-map (let loop ([i (hash-iterate-first mixed)]) + (if (not i) + '() + (cons (cons (hash-iterate-key mixed i) + (hash-iterate-value mixed i)) + (loop (hash-iterate-next mixed i)))))) + (check-map (let loop ([i (unsafe-immutable-hash-iterate-first mixed)]) + (if (not i) + '() + (cons (cons (unsafe-immutable-hash-iterate-key mixed i) + (unsafe-immutable-hash-iterate-value mixed i)) + (loop (unsafe-immutable-hash-iterate-next mixed i))))))) + ;; Check `equal?`-based weak hash tables (let ([ht (make-weak-hash)] [apple (string #\a #\p #\p #\l #\e)] @@ -63,6 +140,16 @@ ;; Ensure that `ht` stays live until here (check (hash? ht) #t))) +(let ([ph (make-hasheq-placeholder null)]) + (check (make-reader-graph ph) (hasheq))) +(let ([ph (make-hasheq-placeholder (list (cons 1 2) (cons 'a 'b)))]) + (check (make-reader-graph ph) (hasheq 1 2 'a 'b))) +(let* ([ph1 (make-placeholder #f)] + [ph (make-hasheq-placeholder (list (cons ph1 2) (cons 'a 'b)))]) + (placeholder-set! ph1 ph) + (let ([g (make-reader-graph ph)]) + (check (hash-count g) 2))) + (define (shuffle l) (define a (make-vector (length l))) (let loop ([l l] [i 0]) @@ -423,3 +510,47 @@ (report "equal-hash-code" r-coll)) (time (for-each (lambda (i) (for-each equal-hash l)) l)) (time (for-each (lambda (i) (for-each equal-hash-code l)) l))) + +;; ---------------------------------------- +;; Stress text: + +; (random-seed 77889955) +(let loop ([n 1]) + (unless (fx= n 0) + (let ([nums (let loop ([l null] [n 0]) + (if (= n 1000) + l + (let ([r (collide (#%random #xFFFFF) + (#%random #xFFFFF))]) + (if (member r l) + (loop l n) + (loop (cons r l) (add1 n))))))] + [dup (lambda (x) + (if (collide? x) + (collide (collide-1 x) (collide-2 x)) + x))]) + (let ([ht (let loop ([ht (hash)] [l nums] [n 0]) + (check (hash-count ht) n) + (if (null? l) + ht + (loop (hash-set ht (dup (car l)) (car l)) (cdr l) (add1 n))))] + [rev-ht (let loop ([ht (hash)] [l (reverse nums)]) + (if (null? l) + ht + (loop (hash-set ht (dup (car l)) (car l)) (cdr l))))]) + (check (equal? ht rev-ht) #t) + (check (equal? (equal-hash-code ht) (equal-hash-code rev-ht)) #t) + (check (length nums) (hash-count ht)) + (for-each (lambda (i) + (check i (hash-ref ht i #f))) + nums) + (let loop ([sub-ht ht] [sub-rev-ht rev-ht] [l nums] [n (length nums)]) + (unless (null? l) + (check (hash-count sub-ht) n) + (check (hash-count sub-rev-ht) n) + (check (hash-keys-subset? sub-ht ht) #t) + (check (hash-keys-subset? sub-rev-ht ht) #t) + (check (equal? sub-ht sub-rev-ht) #t) + (check (equal? (equal-hash-code sub-ht) (equal-hash-code sub-rev-ht)) #t) + (loop (hash-remove sub-ht (car l)) (hash-remove sub-rev-ht (car l)) (cdr l) (sub1 n)))))) + (loop (fx- n 1)))) diff --git a/racket/src/cs/rumble.sls b/racket/src/cs/rumble.sls index e9194218d5..5c69427f1b 100644 --- a/racket/src/cs/rumble.sls +++ b/racket/src/cs/rumble.sls @@ -277,7 +277,7 @@ unsafe-weak-hash-iterate-key+value unsafe-weak-hash-iterate-pair unsafe-hash-seal! ; not exported to racket - hash? hash-eq? hash-equal? hash-eqv? hash-weak? immutable-hash? + hash? hash-eq? hash-equal? hash-eqv? hash-weak? hash-count hash-keys-subset? eq-hashtable->hash ; not exported to racket diff --git a/racket/src/cs/rumble/equal.ss b/racket/src/cs/rumble/equal.ss index bd6bed5e05..09902ca7c4 100644 --- a/racket/src/cs/rumble/equal.ss +++ b/racket/src/cs/rumble/equal.ss @@ -88,6 +88,13 @@ (eql? (unbox orig-a) (unbox orig-b)) (let ([ctx (deeper-context ctx)]) (equal? (unbox orig-a) (unbox orig-b) ctx)))))] + [(authentic-hash? a) + (and (authentic-hash? b) + (or (check-union-find ctx a b) + (let ([ctx (deeper-context ctx)]) + (hash=? orig-a orig-b + (lambda (a b) + (equal? a b ctx))))))] [(record? a) (and (record? b) ;; Check for `prop:impersonator-of` diff --git a/racket/src/cs/rumble/graph.ss b/racket/src/cs/rumble/graph.ss index e742e94b1c..c4a1159cfa 100644 --- a/racket/src/cs/rumble/graph.ss +++ b/racket/src/cs/rumble/graph.ss @@ -110,11 +110,20 @@ [(hash-eqv? v) (make-hasheqv)] [else (make-hasheq)])) (cond - [(hash-eq? v) (make-intmap-shell 'eq)] - [(hash-eqv? v) (make-intmap-shell 'eqv)] - [else (make-intmap-shell 'equal)]))]) + [(zero? (hash-count v)) v] + [else + (cond + [(hash-eq? v) (make-intmap-shell 'eq)] + [(hash-eqv? v) (make-intmap-shell 'eqv)] + [else (make-intmap-shell 'equal)])]))] + [p (if mutable? + orig-p + (cond + [(hash-eq? v) empty-hasheq] + [(hash-eqv? v) empty-hasheqv] + [else empty-hash]))]) (hashtable-set! ht v orig-p) - (let hloop ([p orig-p] [i (hash-iterate-first v)] [diff? #f]) + (let hloop ([p p] [i (hash-iterate-first v)] [diff? #f]) (cond [(not i) (cond @@ -122,7 +131,8 @@ (cond [mutable? orig-p] [else - (intmap-shell-sync! orig-p p) + (unless (zero? (hash-count p)) + (intmap-shell-sync! orig-p p)) orig-p])] [else (hashtable-set! ht v v) @@ -137,19 +147,28 @@ (hash-iterate-next v i) (or diff? (not (and (eq? key new-key) (eq? val new-val)))))))])))] [(hash-placeholder? v) - (let* ([orig-p (cond - [(hasheq-placeholder? v) (make-intmap-shell 'eq)] - [(hasheqv-placeholder? v) (make-intmap-shell 'eqv)] - [else (make-intmap-shell 'equal)])]) - (hashtable-set! ht v orig-p) - (let hloop ([p orig-p] [alst (hash-placeholder-alist v)]) + (let ([alst (hash-placeholder-alist v)]) + (cond + [(null? alst) (cond - [(null? alst) - (intmap-shell-sync! orig-p p) - orig-p] - [else - (hloop (hash-set p (loop (caar alst)) (loop (cdar alst))) - (cdr alst))])))] + [(hasheq-placeholder? v) empty-hasheq] + [(hasheqv-placeholder? v) empty-hasheqv] + [else empty-hash])] + [else + (let-values ([(orig-p p) + (cond + [(hasheq-placeholder? v) (values (make-intmap-shell 'eq) empty-hasheq)] + [(hasheqv-placeholder? v) (values (make-intmap-shell 'eqv) empty-hasheqv)] + [else (values (make-intmap-shell 'equal) empty-hash)])]) + (hashtable-set! ht v orig-p) + (let hloop ([p p] [alst alst]) + (cond + [(null? alst) + (intmap-shell-sync! orig-p p) + orig-p] + [else + (hloop (hash-set p (loop (caar alst)) (loop (cdar alst))) + (cdr alst))])))]))] [(prefab-struct-key v) => (lambda (key) (let ([args (cdr (vector->list (struct->vector v)))]) diff --git a/racket/src/cs/rumble/hamt-stencil.ss b/racket/src/cs/rumble/hamt-stencil.ss new file mode 100644 index 0000000000..c9fdb60c93 --- /dev/null +++ b/racket/src/cs/rumble/hamt-stencil.ss @@ -0,0 +1,1306 @@ +;; See also "intmap.ss" + +;; A HAMT node is implemented as a Chez Scheme stencil vector. A +;; stencil vector has 26 or 58 slots, so we make each node in the HAMT +;; of size 8 (32-bit platforms) or 16 (64-bit platforms) entries. That +;; way, we have use three bits per node entry: child, key, and value. +;; +;; * A child bit set means that the trie continues with a child node. +;; +;; * A key bit set meants that the trie ends with a specific key and +;; value; the key and child bits are mutually exclusive. +;; +;; * A value if is set only if the corresponding key bit is set. If +;; the key bit is not set, then the implicit value for the key is +;; `#t`. +;; +;; We use one extra slot in the stencil vector to record the tree size +;; and equality type (both packed into a single fixnum). That's first +;; in the stencil vector, so the stencil vector mask always has the +;; low bit set. After that "static field", the order is as above: +;; children, keys, then values. +;; +;; Keys in a bnode are "wrapped". A wrapped key differs from a key +;; only in an `equal?`-based hash table, where if a key is complex +;; enough --- so that keeping a hash code is worthwhile to speed up +;; comparions --- then it is paired with its hash code for future +;; reference. (This is not obviously a good idea, since it trades +;; space for speed, but right choice for `equal?`-based tables may be +;; different than for `eq?` and `eqv?`-based tables.) +;; +;; A "cnode" collision node is an association list. It's never a root +;; node, but it can be a child at any non-root depth. + +(define (intmap? x) (stencil-vector? x)) + +;; log of node size: +(define BNODE-BITS (if (fx> (stencil-vector-mask-width) 24) + 4 + 3)) +(define BNODE-MASK (fx1- (fxsll 1 BNODE-BITS))) + +;; node size: +(define HAMT-WIDTH (fxsll 1 BNODE-BITS)) +(define HAMT-GROUP-MASK (fx- (fxsll 1 HAMT-WIDTH) 1)) + +;; Static fields in stencil vector: +(define HAMT-STATIC-FIELD-COUNT 1) +;; First field is a count with eqtype encoded as a single fixnum: +(define HAMT-COUNT+EQTYPE-INDEX 0) +(define HAMT-COUNT+EQTYPE-BIT (fxsll 1 HAMT-COUNT+EQTYPE-INDEX)) + +;; Equality types: +(define HAMT-EQTYPE-EQ 0) +(define HAMT-EQTYPE-EQV 1) +(define HAMT-EQTYPE-EQUAL 2) +(define HAMT-EQTYPE-MASK (fx- (fxsll 1 (integer-length HAMT-EQTYPE-EQUAL)) 1)) + +(define HAMT-COUNT-OFFSET (integer-length HAMT-EQTYPE-MASK)) +(define ONE-COUNT-IN-COUNT+EQTYPE (fxsll 1 HAMT-COUNT-OFFSET)) + +(define (count+eqtype c t) + (fxior (fxsll c HAMT-COUNT-OFFSET) t)) +(define (count+eqtype-eqtype x) + (fxand x HAMT-EQTYPE-MASK)) +(define (count+eqtype-count x) + (fxsrl x HAMT-COUNT-OFFSET)) + +(define (hamt-count+eqtype h) + (stencil-vector-ref h HAMT-COUNT+EQTYPE-INDEX)) +(define (hamt-eqtype h) + (count+eqtype-eqtype (hamt-count+eqtype h))) +(define (hamt-count h) + (count+eqtype-count (hamt-count+eqtype h))) + +;; to dispatch on a bnode's equality type: +(define-syntax eqtype-case + (syntax-rules (eq eqv else) + [(_ h [(eq) a] [(eqv) b] [else c]) + (let ([eqt (hamt-eqtype h)]) + (cond + [(fx= eqt HAMT-EQTYPE-EQ) a] + [(fx= eqt HAMT-EQTYPE-EQV) b] + [else c]))])) + +;; Child, key, and value bits in the stencil-vector mask: +(define HAMT-CHILD-OFFSET HAMT-STATIC-FIELD-COUNT) +(define HAMT-KEY-OFFSET (fx+ HAMT-CHILD-OFFSET HAMT-WIDTH)) +(define HAMT-VAL-OFFSET (fx+ HAMT-KEY-OFFSET HAMT-WIDTH)) + +(define HAMT-CHILD-MASK (fxsll HAMT-GROUP-MASK HAMT-CHILD-OFFSET)) +(define HAMT-KEY-MASK (fxsll HAMT-GROUP-MASK HAMT-KEY-OFFSET)) +(define HAMT-VAL-MASK (fxsll HAMT-GROUP-MASK HAMT-VAL-OFFSET)) + +(define (hamt-mask->child-count mask) + (fxpopcount16 (fxsrl (fxand HAMT-CHILD-MASK mask) HAMT-CHILD-OFFSET))) +(define (hamt-mask->key-count mask) + (fxpopcount16 (fxsrl (fxand HAMT-KEY-MASK mask) HAMT-KEY-OFFSET))) +(define (hamt-mask->val-count mask) + (fxpopcount16 (fxsrl (fxand HAMT-VAL-MASK mask) HAMT-VAL-OFFSET))) + +(define (bnode? x) (stencil-vector? x)) + +(define-record-type cnode + [fields (immutable hash) + (immutable content)] ; association list + [nongenerative #{cnode pfwh0bwrq2nqlke97ikru0ds2-0}] + [sealed #t]) + +(define (bnode-bit-pos hash shift) + (bnode-mask hash shift)) + +(define (bnode-mask hash shift) + (fxand (fxsrl hash shift) BNODE-MASK)) + +(define (bnode-maps-key? node bit) + (fxbit-set? (stencil-vector-mask node) (fx+ bit HAMT-KEY-OFFSET))) + +(define (bnode-maps-child? node bit) + (fxbit-set? (stencil-vector-mask node) (fx+ bit HAMT-CHILD-OFFSET))) + +(define (bnode-down shift) + (fx+ shift BNODE-BITS)) + +(define (bnode-child-ref n bit) + (stencil-vector-ref n (fxpopcount32 + (fxand (stencil-vector-mask n) + (fx- (fxsll 1 (fx+ bit HAMT-CHILD-OFFSET)) 1))))) + +(define (bnode-key-ref n bit) + (stencil-vector-ref n (fxpopcount32 + (fxand (stencil-vector-mask n) + (fx- (fxsll 1 (fx+ bit HAMT-KEY-OFFSET)) 1))))) + +(define (bnode-val-ref n bit) + (let ([mask-bit (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))] + [mask (stencil-vector-mask n)]) + (if (fx= 0 (fxand mask-bit mask)) + #t ; not stored => #t + (stencil-vector-ref n (fxpopcount + (fxand mask (fx- mask-bit 1))))))) + +;; assumes no children +(define (bnode-only-key-ref n) + (stencil-vector-ref n HAMT-STATIC-FIELD-COUNT)) + +;; assumes no children +(define (bnode-only-val-ref n) + (if (fx= 0 (fxand (stencil-vector-mask n) HAMT-VAL-MASK)) + #t ; not stored => #t + (stencil-vector-ref n (fx+ 1 HAMT-STATIC-FIELD-COUNT)))) + +;; i counts from 0 through children + keys +(define (bnode-child-index-ref n i) + (stencil-vector-ref n (fx+ HAMT-CHILD-OFFSET i))) + +;; i counts from 0 through children + keys, so it encodes the number of preceding children +(define (bnode-key-index-ref n i) + (stencil-vector-ref n (fx+ HAMT-CHILD-OFFSET i))) + +;; i counts from 0 through children + keys +(define (bnode-val-index-ref n i) + (let* ([mask (stencil-vector-mask n)] + [val-mask (fxand mask HAMT-VAL-MASK)]) + (cond + [(fx= 0 val-mask) + ;; All values in this node are implicitly #t + #t] + [(fx= (fxsrl val-mask (fx- HAMT-VAL-OFFSET HAMT-KEY-OFFSET)) + (fxand mask HAMT-KEY-MASK)) + ;; All keys supplied, so the value-relative index + ;; matches the key-relative index + (let* ([child-count (hamt-mask->child-count mask)] + [key-count (hamt-mask->key-count mask)] + [val-i (fx- i child-count)]) ; same as key index + (bnode-val-local-index-ref n child-count key-count val-i))] + [else + ;; Complicated case: we have to figure out how many + ;; previous keys have values + (let* ([child-count (hamt-mask->child-count mask)] + [key-count (hamt-mask->key-count mask)] + [key-i (fx- i child-count)]) + (let loop ([i 0] [val-i 0] [bit HAMT-KEY-OFFSET]) + (cond + [(fxbit-set? mask bit) + ;; Found a key + (if (fxbit-set? mask (fx+ bit (fx- HAMT-VAL-OFFSET HAMT-KEY-OFFSET))) + ;; Also found a value: + (if (= i key-i) + (bnode-val-local-index-ref n child-count key-count val-i) + (loop (fx+ i 1) (fx+ val-i 1) (fx+ bit 1))) + ;; Did not find a value + (if (= i key-i) + #t ; implicit #t + (loop (fx+ i 1) val-i (fx+ bit 1))))] + [else + (loop i val-i (fx+ bit 1))])))]))) + +(define (bnode-val-local-index-ref n child-count key-count val-i) + (stencil-vector-ref n (fx+ val-i + HAMT-STATIC-FIELD-COUNT + child-count + key-count))) + +(define (bnode-add-key node wrapped-key val bit) + (if (eq? val #t) + (stencil-vector-update node + HAMT-COUNT+EQTYPE-BIT + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-KEY-OFFSET))) + (fx+ (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + ONE-COUNT-IN-COUNT+EQTYPE) + wrapped-key) + (stencil-vector-update node + HAMT-COUNT+EQTYPE-BIT + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-KEY-OFFSET)) + (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))) + (fx+ (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + ONE-COUNT-IN-COUNT+EQTYPE) + wrapped-key + val))) + +(define (bnode-remove-key node bit) + (let ([val-bit (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))]) + (stencil-vector-update node + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-KEY-OFFSET)) + (fxand (stencil-vector-mask node) val-bit)) + HAMT-COUNT+EQTYPE-BIT + (fx- (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + ONE-COUNT-IN-COUNT+EQTYPE)))) + +(define (bnode-replace-val node bit val) + (let ([val-bit (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))]) + (cond + [(fx= 0 (fxand (stencil-vector-mask node) val-bit)) + ;; old value was #t + (cond + [(eq? val #t) + node] + [else + (stencil-vector-update node 0 val-bit val)])] + [else + (cond + [(eq? val #t) + (stencil-vector-update node val-bit 0)] + [else + (stencil-vector-update node val-bit val-bit val)])]))) + +(define (bnode-replace-key+val node bit wrapped-key val) + (let* ([key-bit (fxsll 1 (fx+ bit HAMT-KEY-OFFSET))] + [val-bit (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))] + [key+val-bits (fxior key-bit val-bit)]) + (cond + [(fx= 0 (fxand (stencil-vector-mask node) val-bit)) + ;; old value was #t + (cond + [(eq? val #t) + (stencil-vector-update node key-bit key-bit wrapped-key)] + [else + (stencil-vector-update node key-bit key+val-bits wrapped-key val)])] + [else + (cond + [(eq? val #t) + (stencil-vector-update node key+val-bits key-bit wrapped-key)] + [else + (stencil-vector-update node key+val-bits key+val-bits wrapped-key val)])]))) + +(define (bnode-remove-key-add-child node child bit) + (let ([val-bit (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))]) + (stencil-vector-update node + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-KEY-OFFSET)) + (fxand (stencil-vector-mask node) val-bit)) + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-CHILD-OFFSET))) + (fx+ (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + ONE-COUNT-IN-COUNT+EQTYPE) + child))) + +(define (bnode-remove-child-add-key node wrapped-key val bit) + (cond + [(eq? val #t) + (stencil-vector-update node + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-CHILD-OFFSET))) + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-KEY-OFFSET))) + (fx- (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + ONE-COUNT-IN-COUNT+EQTYPE) + wrapped-key)] + [else + (stencil-vector-update node + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-CHILD-OFFSET))) + (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ bit HAMT-KEY-OFFSET)) + (fxsll 1 (fx+ bit HAMT-VAL-OFFSET))) + (fx- (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + ONE-COUNT-IN-COUNT+EQTYPE) + wrapped-key + val)])) + +(define (bnode-replace-child node old-child new-child bit) + (let ([child-bit (fxsll 1 (fx+ bit HAMT-CHILD-OFFSET))] + [delta (cond + [(and (bnode? old-child) + (bnode? new-child)) + (fx- (hamt-count+eqtype new-child) + (hamt-count+eqtype old-child))] + [else + (fxsll (fx- (node-count new-child) + (node-count old-child)) + HAMT-COUNT-OFFSET)])]) + (cond + [(fx= 0 delta) + (stencil-vector-update node child-bit child-bit new-child)] + [else + (let ([bits (fxior child-bit + HAMT-COUNT+EQTYPE-BIT)]) + (stencil-vector-update node + bits + bits + (fx+ (stencil-vector-ref node HAMT-COUNT+EQTYPE-INDEX) + delta) + new-child))]))) + +(define HASHCODE-BITS (fxbit-count (most-positive-fixnum))) + +(define (node-merge eqtype + wrapped-k1 k1 v1 h1 + wrapped-k2 k2 v2 h2 + shift) + (cond + [(and (fx< HASHCODE-BITS shift) + (fx= h1 h2)) + (pariah + ;; hash collision: make a cnode + (make-cnode h1 + (list (cons k1 v1) + (cons k2 v2))))] + + [else + (let ([m1 (bnode-mask h1 shift)] + [m2 (bnode-mask h2 shift)]) + (cond + [(fx= m1 m2) + ;; partial collision: descend + (let* ([child (node-merge eqtype wrapped-k1 k1 v1 h1 wrapped-k2 k2 v2 h2 (bnode-down shift))] + [cm (bnode-bit-pos h1 shift)]) + (stencil-vector (fxior HAMT-COUNT+EQTYPE-BIT + (fxsll 1 (fx+ cm HAMT-CHILD-OFFSET))) + (count+eqtype 2 eqtype) + child))] + + [else + ;; no collision, make a bnode + (let ([bit1 (bnode-bit-pos h1 shift)] + [bit2 (bnode-bit-pos h2 shift)] + [k1 wrapped-k1] + [k2 wrapped-k2]) + (let ([finish + (lambda (k1 v1 bit1 k2 v2 bit2) + (let ([key-bits (fxior (fxsll 1 (fx+ bit1 HAMT-KEY-OFFSET)) + (fxsll 1 (fx+ bit2 HAMT-KEY-OFFSET)))]) + (cond + [(eq? v1 #t) + (cond + [(eq? v2 #t) + (stencil-vector (fxior HAMT-COUNT+EQTYPE-BIT + key-bits) + (count+eqtype 2 eqtype) + k1 k2)] + [else + (stencil-vector (fxior HAMT-COUNT+EQTYPE-BIT + key-bits + (fxsll 1 (fx+ bit2 HAMT-VAL-OFFSET))) + (count+eqtype 2 eqtype) + k1 k2 + v2)])] + [else + (cond + [(eq? v2 #t) + (stencil-vector (fxior HAMT-COUNT+EQTYPE-BIT + key-bits + (fxsll 1 (fx+ bit1 HAMT-VAL-OFFSET))) + (count+eqtype 2 eqtype) + k1 k2 + v1)] + [else + (stencil-vector (fxior HAMT-COUNT+EQTYPE-BIT + key-bits + (fxsll 1 (fx+ bit1 HAMT-VAL-OFFSET)) + (fxsll 1 (fx+ bit2 HAMT-VAL-OFFSET))) + (count+eqtype 2 eqtype) + k1 k2 + v1 v2)])])))]) + (if (fx<= bit1 bit2) + (finish k1 v1 bit1 k2 v2 bit2) + (finish k2 v2 bit2 k1 v1 bit1))))]))])) + +;; Should only be called three times to create the canonical empty +;; hashes: +(define (make-empty-bnode eqtype) + (stencil-vector HAMT-COUNT+EQTYPE-BIT + (count+eqtype 0 eqtype))) + +;; intmap interface + +(define empty-hasheq (make-empty-bnode HAMT-EQTYPE-EQ)) +(define empty-hasheqv (make-empty-bnode HAMT-EQTYPE-EQV)) +(define empty-hash (make-empty-bnode HAMT-EQTYPE-EQUAL)) + +(define intmap-shell-falses (let loop ([n (fx* 2 HAMT-WIDTH)]) + (if (fx= n 0) + '() + (cons #f (loop (fx- n 1)))))) + +(define (make-intmap-shell eqtype-sym) + ;; a shell is a maximally sized node that claims to have 0 items + (let ([mask (fx- (fxsll 1 (fx+ HAMT-STATIC-FIELD-COUNT + (fx* 2 HAMT-WIDTH))) + 1)] + [eqtype (case eqtype-sym + [(eq) HAMT-EQTYPE-EQ] + [(eqv) HAMT-EQTYPE-EQV] + [else HAMT-EQTYPE-EQUAL])]) + (#%apply stencil-vector mask eqtype intmap-shell-falses))) + +(define (intmap-shell-sync! dest src) + (let ([mask (stencil-vector-mask src)]) + (stencil-vector-truncate! dest mask) + (let loop ([i (fx- (fxpopcount mask) 1)]) + (stencil-vector-set! dest i (stencil-vector-ref src i)) + (unless (fx= i 0) + (loop (fx- i 1)))))) + +(define (intmap-eq? h) + (eq? (hamt-eqtype h) HAMT-EQTYPE-EQ)) + +(define (intmap-eqv? h) + (eq? (hamt-eqtype h) HAMT-EQTYPE-EQV)) + +(define (intmap-equal? h) + (eq? (hamt-eqtype h) HAMT-EQTYPE-EQUAL)) + +(define (intmap-count h) + (hamt-count h)) + +(define (node-count h) + (if (bnode? h) + (hamt-count h) + (length (cnode-content h)))) + +(define (intmap-empty? h) + (fxzero? (hamt-count h))) + +(define-syntax (eqtype-dispatch stx) + (syntax-case stx () + [(_ h [id ...] e) + (let ([prefix (lambda (prefix e) + (let loop ([e e]) + (cond + [(#%identifier? e) + (if (#%memq (#%syntax->datum e) (map #%syntax->datum #'(id ...))) + (datum->syntax e + (#%string->symbol + (string-append (#%symbol->string prefix) + (#%symbol->string (syntax->datum e))))) + e)] + [else + (syntax-case e () + [(a . b) + #`(#,(loop #'a) . #,(loop #'b))] + [_ e])])))]) + (with-syntax ([eq:e (prefix 'eq: #'e)] + [eqv:e (prefix 'eqv: #'e)] + [equal:e (prefix 'equal: #'e)]) + #'(let ([et (hamt-eqtype h)]) + (cond + [(fx= et HAMT-EQTYPE-EQ) + eq:e] + [(fx= et HAMT-EQTYPE-EQV) + eqv:e] + [else + equal:e]))))] + [(_ h (f arg ...)) + #'(eqtype-dispatch h [f] (f arg ...))])) + +(define (intmap-has-key? h key) + (eqtype-dispatch + h [bnode-has-key? bnode-key-hash-code] + (bnode-has-key? h key (bnode-key-hash-code key) 0))) + +(define (intmap-ref h key default) + (let ([count+eqtype (hamt-count+eqtype h)]) + (cond + [(fx= 0 (count+eqtype-count count+eqtype)) + ;; Access on an empty HAMT is common, so don't even hash in that case + default] + [else + (let ([eqtype (count+eqtype-eqtype count+eqtype)]) + (cond + [(fx= eqtype HAMT-EQTYPE-EQ) + (eq:bnode-ref h key (eq:bnode-key-hash-code key) 0 default)] + [(fx= eqtype HAMT-EQTYPE-EQV) + (eqv:bnode-ref h key (eqv:bnode-key-hash-code key) 0 default)] + [else + (equal:bnode-ref h key (equal:bnode-key-hash-code key) 0 default)]))]))) + +(define (intmap-ref-key h key default) + (cond + [(intmap-empty? h) + default] + [else + (eqtype-dispatch + h [bnode-ref-key bnode-key-hash-code] + (bnode-ref-key h key (bnode-key-hash-code key) 0 default))])) + +(define (intmap-set h key val) + (eqtype-dispatch + h [bnode-set bnode-key-hash-code] + (bnode-set h key val (bnode-key-hash-code key) 0))) + +(define (intmap-remove h key) + (eqtype-dispatch + h [bnode-remove bnode-key-hash-code] + (bnode-remove h key (bnode-key-hash-code key) 0))) + +;; ---------------------------------------- +;; generic iteration by counting + +(define (intmap-iterate-first h) + (and (not (intmap-empty? h)) + 0)) + +(define (intmap-iterate-next h pos) + (let ([pos (fx1+ pos)]) + (and (not (fx= pos (intmap-count h))) + pos))) + +(define (intmap-iterate-key h pos fail) + (eqtype-dispatch + h + (bnode-entry-at-position h pos 'key fail))) + +(define (intmap-iterate-value h pos fail) + (eqtype-dispatch + h + (bnode-entry-at-position h pos 'val fail))) + +(define (intmap-iterate-key+value h pos fail) + (eqtype-dispatch + h + (bnode-entry-at-position h pos 'both fail))) + +(define (intmap-iterate-pair h pos fail) + (eqtype-dispatch + h + (bnode-entry-at-position h pos 'pair fail))) + +;; ---------------------------------------- +;; unsafe iteration; position is a stack +;; represented by a list of (cons node index) + +(define (unsafe-intmap-iterate-first h) + (and (not (intmap-empty? h)) + (unsafe-node-iterate-first h '()))) + +(define (unsafe-node-iterate-first n stack) + (cond + [(bnode? n) + (let ([mask (stencil-vector-mask n)]) + (let ([child-count (hamt-mask->child-count mask)] + [key-count (hamt-mask->key-count mask)]) + (let ([stack (cons (cons n (fx+ key-count child-count -1)) stack)]) + (if (fx= key-count 0) + (unsafe-node-iterate-first (bnode-child-index-ref n (fx- child-count 1)) stack) + stack))))] + [(cnode? n) + (cons (box (cnode-content n)) + stack)])) + +(define (unsafe-intmap-iterate-next h pos) + (unsafe-node-iterate-next pos)) + +(define (unsafe-node-iterate-next pos) + (cond + [(null? pos) + ;; Stack is empty, so we're done + #f] + [else + (let ([p (car pos)] + [stack (cdr pos)]) + (cond + [(box? p) + ;; in a cnode + (let ([new-p (cdr (unbox p))]) + (if (null? new-p) + ;; Exhausted this node, so return to parent node + (unsafe-node-iterate-next stack) + ;; still in cnode: + (cons (box new-p) stack)))] + [else + (let ([n (car p)] + [i (cdr p)]) + (cond + [(fx= 0 i) + ;; Exhausted this node, so return to parent node + (unsafe-node-iterate-next stack)] + [else + ;; Move to next (lower) index in the current node + (let ([i (fx1- i)]) + (let ([child-count (hamt-mask->child-count (stencil-vector-mask n))] + [stack (cons (cons n i) stack)]) + (if (fx< i child-count) + (unsafe-node-iterate-first (bnode-child-index-ref n i) stack) + stack)))]))]))])) + +(define (unsafe-intmap-iterate-key h pos) + (eqtype-dispatch + h + (bnode-unsafe-intmap-iterate-key pos))) + +(define (unsafe-intmap-iterate-value h pos) + (eqtype-dispatch + h + (bnode-unsafe-intmap-iterate-value pos))) + +(define (unsafe-intmap-iterate-key+value h pos) + (eqtype-dispatch + h + (bnode-unsafe-intmap-iterate-key+value pos))) + +(define (unsafe-intmap-iterate-pair h pos) + (eqtype-dispatch + h + (bnode-unsafe-intmap-iterate-pair pos))) + +(define (intmap=? a b eql?) + (and (fx= (hamt-count+eqtype a) + (hamt-count+eqtype b)) + (or (intmap-empty? a) ; explicit test in case `a` or `b` is a shell + (eqtype-dispatch + a + (bnode=? a b eql? 0))))) + +(define (intmap-keys-subset? a b) + (or (intmap-empty? a) + (eqtype-dispatch + a + (bnode-keys-subset? a b 0)))) + +(define (intmap-hash-code a hash) + (cond + [(intmap-empty? a) ;; explicit test in `a` is a shell + (hamt-count+eqtype a)] + [else + (eqtype-dispatch + a + (bnode-hash-code a hash 0))])) + +(define (intmap-for-each h proc) + (eqtype-dispatch + h + (bnode-fold h (lambda (k v _) (|#%app| proc k v) (void)) (void)))) + +(define (intmap-map h proc) + (eqtype-dispatch + h [bnode-fold] + (#%reverse (bnode-fold h (lambda (k v xs) (cons (|#%app| proc k v) xs)) '())))) + +;; ---------------------------------------- +;; eqtype-paramerized definitions + +(define-syntax-rule (define-bnode-for-eqtype + + ;; exports: + bnode-key-hash-code + bnode-ref + bnode-ref-key + bnode-has-key? + bnode-set + bnode-remove + bnode-entry-at-position + bnode-unsafe-intmap-iterate-key + bnode-unsafe-intmap-iterate-value + bnode-unsafe-intmap-iterate-key+value + bnode-unsafe-intmap-iterate-pair + bnode=? + bnode-keys-subset? + bnode-hash-code + bnode-fold + + ;; imports: + hamt-key-eqtype + hamt-wrap-key + hamt-unwrap-key + hamt-key=? + hamt-unwrapped-key=? + hamt-wrapped-key=? + hamt-key-hash-code + hamt-wrapped-key-hash-code) + + (begin + + (define (bnode-key-hash-code k) + (hamt-key-hash-code k)) + + (define (bnode-ref node key keyhash shift default) + (let ([bit (bnode-bit-pos keyhash shift)]) + (cond + [(bnode-maps-key? node bit) + (let* ([k (bnode-key-ref node bit)]) + (if (hamt-key=? key keyhash k) + (bnode-val-ref node bit) + default))] + + [(bnode-maps-child? node bit) + (let* ([c (bnode-child-ref node bit)]) + (cond + [(bnode? c) + (bnode-ref c key keyhash (bnode-down shift) default)] + [else + (cnode-ref c key keyhash default)]))] + + [else + default]))) + + (define (bnode-ref-key node key keyhash shift default) + (let ([bit (bnode-bit-pos keyhash shift)]) + (cond + [(bnode-maps-key? node bit) + (let ([k (bnode-key-ref node bit)]) + (if (hamt-key=? key keyhash k) + (hamt-unwrap-key k) + default))] + + [(bnode-maps-child? node bit) + (let* ([c (bnode-child-ref node bit)]) + (cond + [(bnode? c) + (bnode-ref-key c key keyhash (bnode-down shift) default)] + [else + (cnode-ref-key c key keyhash default)]))] + + [else + default]))) + + (define (bnode-has-key? node key keyhash shift) + (let ([bit (bnode-bit-pos keyhash shift)]) + (cond + [(bnode-maps-key? node bit) + (let ([k (bnode-key-ref node bit)]) + (hamt-key=? key keyhash k))] + + [(bnode-maps-child? node bit) + (let* ([c (bnode-child-ref node bit)]) + (cond + [(bnode? c) + (bnode-has-key? c key keyhash (bnode-down shift))] + [else + (cnode-has-key? c key keyhash)]))] + + [else #f]))) + + (define (bnode-set node key val keyhash shift) + (let ([bit (bnode-bit-pos keyhash shift)]) + (cond + [(bnode-maps-key? node bit) + (let* ([k (bnode-key-ref node bit)] + [v (bnode-val-ref node bit)]) + (cond + [(hamt-key=? key keyhash k) + ;; For consistency, we're required to discard the old key and keep the new one + (if (eq? key k) + (if (eq? val v) + node + (bnode-replace-val node bit val)) + (bnode-replace-key+val node bit (hamt-wrap-key key keyhash) val))] + [else + (let* ([h (hamt-wrapped-key-hash-code node k)] + [child (node-merge hamt-key-eqtype + k (hamt-unwrap-key k) v h + (hamt-wrap-key key keyhash) key val keyhash + (bnode-down shift))]) + (bnode-remove-key-add-child node child bit))]))] + + [(bnode-maps-child? node bit) + (let* ([child (bnode-child-ref node bit)] + [new-child (cond + [(bnode? child) + (bnode-set child key val keyhash (bnode-down shift))] + [else + (cnode-set child key val keyhash)])]) + (if (eq? new-child child) + node + (bnode-replace-child node child new-child bit)))] + + [else + (bnode-add-key node (hamt-wrap-key key keyhash) val bit)]))) + + (define (bnode-remove node key keyhash shift) + (let ([bit (bnode-bit-pos keyhash shift)]) + + (cond + [(bnode-maps-key? node bit) + (let* ([k (bnode-key-ref node bit)]) + (cond + [(hamt-key=? key keyhash k) + (let ([mask (stencil-vector-mask node)]) + (cond + [(and (fx= (fxand mask HAMT-KEY-MASK) (fxsll 1 (fx+ bit HAMT-KEY-OFFSET))) + (fxzero? (fxand mask HAMT-CHILD-MASK))) + ;; return canonical empty value + (eqtype-case + node + [(eq) empty-hasheq] + [(eqv) empty-hasheqv] + [else empty-hash])] + [else + (bnode-remove-key node bit)]))] + [else + node]))] + + [(bnode-maps-child? node bit) + (let* ([child (bnode-child-ref node bit)] + [new-child (cond + [(bnode? child) + (bnode-remove child key keyhash (bnode-down shift))] + [else + (cnode-remove child key keyhash)])]) + (cond + [(eq? new-child child) node] + [(and (bnode? new-child) + (fx= 1 (hamt-count new-child))) + ;; Replace child with its sole key and value + (bnode-remove-child-add-key node (bnode-only-key-ref new-child) (bnode-only-val-ref new-child) bit)] + [(and (cnode? new-child) + (null? (cdr (cnode-content new-child)))) + ;; Replace child with its sole key and value + (let ([p (car (cnode-content new-child))]) + (bnode-remove-child-add-key node (hamt-wrap-key (car p) (cnode-hash new-child)) (cdr p) bit))] + [else + (bnode-replace-child node child new-child bit)]))] + + [else + node]))) + + (define (cnode-ref node key keyhash default) + (cond + [(fx= keyhash (cnode-hash node)) + (let ([p (cnode-assoc (cnode-content node) key)]) + (if p + (cdr p) + default))] + [else default])) + + (define (cnode-ref-key node key keyhash default) + (cond + [(fx= keyhash (cnode-hash node)) + (let ([p (cnode-assoc (cnode-content node) key)]) + (if p + (car p) + default))] + [else default])) + + (define (cnode-has-key? n key keyhash) + (and (fx= keyhash (cnode-hash n)) + (cnode-assoc (cnode-content n) key) + #t)) + + (define (cnode-assoc alist key) + (cond + [(null? alist) #f] + [(hamt-unwrapped-key=? key (caar alist)) + (car alist)] + [else (cnode-assoc (cdr alist) key)])) + + (define (cnode-set node key val keyhash) + (make-cnode (cnode-hash node) + (cnode-assoc-update (cnode-content node) key (cons key val)))) + + (define (cnode-remove node key keyhash) + (make-cnode (cnode-hash node) + (cnode-assoc-update (cnode-content node) key #f))) + + (define (cnode-assoc-update alist key new-p) + (cond + [(null? alist) (if new-p + (list new-p) + null)] + [(hamt-unwrapped-key=? key (caar alist)) + (if new-p + (cons new-p (cdr alist)) + (cdr alist))] + [else + (cons (car alist) + (cnode-assoc-update (cdr alist) key new-p))])) + + (define (cnode=? a b eql?) + (or + (eq? a b) + (and + (cnode? b) + (cnode-keys-subset/equal? a b eql?) + (fx= (length (cnode-content a)) + (length (cnode-content b)))))) + + (define (cnode-keys-subset/equal? a b eql?) + (or + (eq? a b) + (and + (cnode? b) + (fx= (cnode-hash a) (cnode-hash b)) + (let ([ac (cnode-content a)] + [bc (cnode-content b)]) + (let loop ([ac ac]) + (cond + [(null? ac) #t] + [else + (let ([p (cnode-assoc bc (caar ac))]) + (and p + (or (not eql?) (eql? (cdar ac) (cdr p))) + (loop (cdr ac))))])))))) + + (define (bnode-entry-at-position n pos mode fail) + (let* ([mask (stencil-vector-mask n)] + [child-count (hamt-mask->child-count mask)]) + (let loop ([i 0] [pos pos]) + (cond + [(fx= i child-count) + (let ([key-count (hamt-mask->key-count mask)]) + (cond + [(fx< pos key-count) + (let ([get-key (lambda () (hamt-unwrap-key (bnode-key-index-ref n (fx+ pos child-count))))] + [get-value (lambda () (bnode-val-index-ref n (fx+ pos child-count)))]) + (case mode + [(key) (get-key)] + [(val) (get-value)] + [(both) (values (get-key) (get-value))] + [else (cons (get-key) (get-value))]))] + [else fail]))] + [else + (let ([c (bnode-child-index-ref n i)]) + (cond + [(bnode? c) + (let ([sz (hamt-count c)]) + (if (fx>= pos sz) + (loop (fx+ i 1) (fx- pos sz)) + (bnode-entry-at-position c pos mode fail)))] + [else + (let* ([alist (cnode-content c)] + [len (length alist)]) + (if (fx>= pos len) + (loop (fx+ i 1) (fx- pos len)) + (let ([p (list-ref alist pos)]) + (case mode + [(key) (car p)] + [(val) (cdr p)] + [(both) (values (car p) (cdr p))] + [else p]))))]))])))) + + (define (bnode-unsafe-intmap-iterate-key pos) + (let ([p (car pos)]) + (cond + [(box? p) + ;; in a cnode + (caar (unbox p))] + [else + (let ([h (car p)]) + (hamt-unwrap-key (bnode-key-index-ref h (cdr p))))]))) + + (define (bnode-unsafe-intmap-iterate-value pos) + (let ([p (car pos)]) + (cond + [(box? p) + ;; in a cnode + (cdar (unbox p))] + [else + (bnode-val-index-ref (car p) (cdr p))]))) + + (define (bnode-unsafe-intmap-iterate-key+value pos) + (let ([p (car pos)]) + (cond + [(box? p) + ;; in a cnode + (let ([pr (car (unbox p))]) + (values (car pr) (cdr pr)))] + [else + (let ([n (car p)] + [i (cdr p)]) + (values (hamt-unwrap-key (bnode-key-index-ref n i)) + (bnode-val-index-ref n i)))]))) + + (define (bnode-unsafe-intmap-iterate-pair pos) + (let ([p (car pos)]) + (cond + [(box? p) + ;; in a cnode + (car (unbox p))] + [else + (let ([n (car p)] + [i (cdr p)]) + (cons (hamt-unwrap-key (bnode-key-index-ref n i)) + (bnode-val-index-ref n i)))]))) + + (define (bnode=? a b eql? shift) + (or + (eq? a b) + (and + (fx= (hamt-count a) (hamt-count b)) + (let ([a-mask (stencil-vector-mask a)] + [b-mask (stencil-vector-mask b)]) + (and + (fx= a-mask b-mask) + (let ([child-count (hamt-mask->child-count a-mask)]) + (let loop ([i 0]) + (cond + [(fx= i child-count) + (let ([key-count (hamt-mask->key-count a-mask)]) + (let loop ([j 0]) + (cond + [(fx= j key-count) #t] + [else + (let ([i (fx+ j child-count)]) + (let ([ak (bnode-key-index-ref a i)] + [bk (bnode-key-index-ref b i)]) + (and (hamt-wrapped-key=? ak bk) + (eql? (bnode-val-index-ref a i) (bnode-val-index-ref b i)) + (loop (fx+ j 1)))))])))] + [else + (let ([an (bnode-child-index-ref a i)] + [bn (bnode-child-index-ref b i)]) + (and (or (eq? an bn) + (cond + [(bnode? an) + (and (bnode? b) + (bnode=? an bn eql? (bnode-down shift)))] + [else + (cnode=? an bn eql?)])) + (loop (fx+ i 1))))])))))))) + + (define (bnode-keys-subset? a b shift) + (or + (eq? a b) + (cond + [(cnode? b) + ;; only possible if `a` has just one key, since if it + ;; has multiple keys, they have different hash code and + ;; can be a subset of the the keys of `c` + (and (fx= (hamt-count a) 1) + (let* ([k (bnode-only-key-ref a)] + [hashcode (hamt-wrapped-key-hash-code a k)]) + (cnode-has-key? b (hamt-unwrap-key k) hashcode)))] + [(fx> (hamt-count a) (hamt-count b)) + #f] + [else + (let* ([a-mask (stencil-vector-mask a)] + [akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] + [acm (fxand (fxsrl a-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)] + [abm (fxior acm akm)] + [b-mask (stencil-vector-mask b)] + [bcm (fxand (fxsrl b-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)] + [bkm (fxand (fxsrl b-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] + [bbm (fxior bcm bkm)]) + (and + (fx= abm (fxand abm bbm)) + (let loop ([abm abm] [bit 0] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0]) + (cond + [(fxzero? abm) #t] + [(fxbit-set? akm bit) + (cond + [(fxbit-set? bkm bit) + (and + (hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki)) + (loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) (fx1+ bki) aci bci))] + [else + (and + (let ([akey (bnode-key-index-ref a aki)] + [bchild (bnode-child-index-ref b bci)]) + (cond + [(bnode? bchild) + (bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))] + [else + (cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))])) + (loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) bki aci (fx1+ bci)))])] + [(fxbit-set? acm bit) + (cond + [(fxbit-set? bkm bit) #f] + [else + (and (let ([ac (bnode-child-index-ref a aci)] + [bc (bnode-child-index-ref b bci)]) + (cond + [(bnode? ac) + (bnode-keys-subset? ac bc (bnode-down shift))] + [else + (cnode-keys-subset/equal? ac bc #f)])) + (loop (fxsrl abm 1) (fx1+ bit) aki bki (fx1+ aci) (fx1+ bci)))])] + [(fxbit-set? bkm bit) + (loop (fxsrl abm 1) (fx1+ bit) aki (fx1+ bki) aci bci)] + [(fxbit-set? bcm bit) + (loop (fxsrl abm 1) (fx1+ bit) aki bki aci (fx1+ bci))] + [else + (loop (fxsrl abm 1) (fx1+ bit) aki bki aci bci)]))))]))) + + (define (bnode-hash-code n hash hc) + (let* ([mask (stencil-vector-mask n)] + [hc (hash-code-combine hc mask)] + [child-count (hamt-mask->child-count mask)] + [key-count (hamt-mask->key-count mask)] + [val-count (hamt-mask->val-count mask)]) + (let loop ([i 0] [hc hc]) + (cond + [(fx< i child-count) + (loop (fx1+ i) + (let ([c (bnode-child-index-ref n i)]) + (cond + [(bnode? c) + (bnode-hash-code c hash hc)] + [else + ;; Hash code needs to be order-independent, so + ;; collision nodes are a problem; simplify by just + ;; using the hash code and hope that collisions are + ;; rare. + (hash-code-combine hc (cnode-hash c))])))] + [else + (let loop ([i 0] [hc hc]) + (cond + [(fx< i val-count) + (loop (fx1+ i) + (hash-code-combine hc (hash (stencil-vector-ref n (fx+ i child-count key-count)))))] + [else hc]))])))) + + (define (bnode-fold n f nil) + (let* ([mask (stencil-vector-mask n)] + [child-count (hamt-mask->child-count mask)] + [key-count (hamt-mask->key-count mask)]) + (let loop ([i 0] [nil nil]) + (cond + [(fx= i child-count) + (let loop ([i 0] [nil nil]) + (cond + [(fx= i key-count) + nil] + [else + (loop (fx+ i 1) + (f (hamt-unwrap-key (bnode-key-index-ref n (fx+ i child-count))) + (bnode-val-index-ref n (fx+ i child-count)) + nil))]))] + [else + (let ([c (bnode-child-index-ref n i)]) + (cond + [(bnode? c) + (loop (fx+ i 1) + (bnode-fold c f nil))] + [else + (let aloop ([alist (cnode-content c)] [nil nil]) + (cond + [(null? alist) (loop (fx+ i 1) nil)] + [else + (let ([rest-alist (cdr alist)]) + (aloop rest-alist + (f (caar alist) + (cdar alist) + nil)))]))]))])))))) + +(define-syntax (define-prefixed-bnode-for-eqtype stx) + (syntax-case stx (hamt-wrap-key + hamt-unwrap-key + hamt-key=? + hamt-unwrapped-key=? + hamt-wrapped-key=? + hamt-key-hash-code + hamt-wrapped-key-hash-code) + [(_ p: + hamt-key-eqtype hamt-key-eqtype-impl + hamt-wrap-key hamt-wrap-key-impl + hamt-unwrap-key hamt-unwrap-key-impl + hamt-key=? hamt-key=?-impl + hamt-unwrapped-key=? hamt-unwrapped-key=?-impl + hamt-wrapped-key=? hamt-wrapped-key=?-impl + hamt-key-hash-code hamt-key-hash-code-impl + hamt-wrapped-key-hash-code hamt-wrapped-key-hash-code-impl) + (let ([prefixed (lambda (s) + (datum->syntax #'p: + (#%string->symbol + (string-append (#%symbol->string (syntax->datum #'p:)) + (#%symbol->string s)))))]) + (with-syntax ([p:bnode-key-hash-code (prefixed 'bnode-key-hash-code)] + [p:bnode-ref (prefixed 'bnode-ref)] + [p:bnode-ref-key (prefixed 'bnode-ref-key)] + [p:bnode-has-key? (prefixed 'bnode-has-key?)] + [p:bnode-set (prefixed 'bnode-set)] + [p:bnode-remove (prefixed 'bnode-remove)] + [p:bnode-entry-at-position (prefixed 'bnode-entry-at-position)] + [p:bnode-unsafe-intmap-iterate-key (prefixed 'bnode-unsafe-intmap-iterate-key)] + [p:bnode-unsafe-intmap-iterate-value (prefixed 'bnode-unsafe-intmap-iterate-value)] + [p:bnode-unsafe-intmap-iterate-key+value (prefixed 'bnode-unsafe-intmap-iterate-key+value)] + [p:bnode-unsafe-intmap-iterate-pair (prefixed 'bnode-unsafe-intmap-iterate-pair)] + [p:bnode=? (prefixed 'bnode=?)] + [p:bnode-keys-subset? (prefixed 'bnode-keys-subset?)] + [p:bnode-hash-code (prefixed 'bnode-hash-code)] + [p:bnode-fold (prefixed 'bnode-fold)]) + #'(define-bnode-for-eqtype + ;; exports: + p:bnode-key-hash-code + p:bnode-ref + p:bnode-ref-key + p:bnode-has-key? + p:bnode-set + p:bnode-remove + p:bnode-entry-at-position + p:bnode-unsafe-intmap-iterate-key + p:bnode-unsafe-intmap-iterate-value + p:bnode-unsafe-intmap-iterate-key+value + p:bnode-unsafe-intmap-iterate-pair + p:bnode=? + p:bnode-keys-subset? + p:bnode-hash-code + p:bnode-fold + + ;; imports: + hamt-key-eqtype-impl + hamt-wrap-key-impl + hamt-unwrap-key-impl + hamt-key=?-impl + hamt-unwrapped-key=?-impl + hamt-wrapped-key=?-impl + hamt-key-hash-code-impl + hamt-wrapped-key-hash-code-impl)))])) + +;; ---------------------------------------- + +(define-prefixed-bnode-for-eqtype + eq: + hamt-key-eqtype HAMT-EQTYPE-EQ + hamt-wrap-key (lambda (k k-hash) k) + hamt-unwrap-key (lambda (k) k) + hamt-key=? (lambda (k1 k1-hash wrapped-k2) (eq? k1 wrapped-k2)) + hamt-unwrapped-key=? (lambda (k1 k2) (eq? k1 k2)) + hamt-wrapped-key=? (lambda (k1 k2) (eq? k1 k2)) + hamt-key-hash-code (lambda (k) (eq-hash-code k)) + hamt-wrapped-key-hash-code (lambda (n k) (eq-hash-code k))) + +(define-prefixed-bnode-for-eqtype + eqv: + hamt-key-eqtype HAMT-EQTYPE-EQV + hamt-wrap-key (lambda (k k-hash) k) + hamt-unwrap-key (lambda (k) k) + hamt-key=? (lambda (k1 k1-hash wrapped-k2) (eqv? k1 wrapped-k2)) + hamt-unwrapped-key=? (lambda (k1 k2) (eqv? k1 k2)) + hamt-wrapped-key=? (lambda (k1 k2) (eqv? k1 k2)) + hamt-key-hash-code (lambda (k) (eqv-hash-code k)) + hamt-wrapped-key-hash-code (lambda (n k) (eqv-hash-code k))) + +(define (equal:hamt-wrap-key k k-hash) + (if (fast-equal-hash-code? k) ; must not include pairs + k + (cons k-hash k))) + +(define (equal:hamt-unwrap-key k) + (if (pair? k) (cdr k) k)) + +;; second key is wrapped +(define (equal:hamt-key=? k1 k1-hash wrapped-k2) + (if (pair? wrapped-k2) + (and (fx= k1-hash (car wrapped-k2)) + (key-equal? k1 (cdr wrapped-k2))) + (key-equal? k1 wrapped-k2))) + +(define (equal:hamt-unwrapped-key=? k1 k2) + (key-equal? k1 k2)) + +(define (equal:hamt-wrapped-key=? k1 k2) + (cond + [(pair? k1) + (cond + [(pair? k2) + (and (fx= (car k1) (car k2)) + (key-equal? (cdr k1) (cdr k2)))] + [else (key-equal? (cdr k1) k2)])] + [else + (cond + [(pair? k2) + (key-equal? k1 (cdr k2))] + [else (key-equal? k1 k2)])])) + +(define (equal:hamt-key-hash-code k) + (key-equal-hash-code k)) + +(define (equal:hamt-wrapped-key-hash-code n k) + (if (pair? k) + (car k) + (key-equal-hash-code k))) + +(define-prefixed-bnode-for-eqtype + equal: + hamt-key-eqtype HAMT-EQTYPE-EQUAL + hamt-wrap-key equal:hamt-wrap-key + hamt-unwrap-key equal:hamt-unwrap-key + hamt-key=? equal:hamt-key=? + hamt-unwrapped-key=? equal:hamt-unwrapped-key=? + hamt-wrapped-key=? equal:hamt-wrapped-key=? + hamt-key-hash-code equal:hamt-key-hash-code + hamt-wrapped-key-hash-code equal:hamt-wrapped-key-hash-code) diff --git a/racket/src/cs/rumble/hamt.ss b/racket/src/cs/rumble/hamt-vector.ss similarity index 85% rename from racket/src/cs/rumble/hamt.ss rename to racket/src/cs/rumble/hamt-vector.ss index a92d826dd2..39170f049e 100644 --- a/racket/src/cs/rumble/hamt.ss +++ b/racket/src/cs/rumble/hamt-vector.ss @@ -1,15 +1,7 @@ -;; HAMT +;; See also "intmap.ss" -;; the absence of something -(define NOTHING (gensym 'nothing)) - -;; 16-bit popcount (define (popcount x) - (let* ([x (fx- x (fxand (fxsrl x 1) #x5555))] - [x (fx+ (fxand x #x3333) (fxand (fxsrl x 2) #x3333))] - [x (fxand (fx+ x (fxsrl x 4)) #x0f0f)] - [x (fx+ x (fxsrl x 8))]) - (fxand x #x1f))) + (fxpopcount16 x)) ;; record types (define-record-type hnode @@ -19,42 +11,16 @@ (mutable vals)] [nongenerative #{hnode pfwh8wvaevt3r6pcwsqn90ry8-0}]) -(meta-cond - [(> (most-positive-fixnum) (expt 2 32)) +(define-record-type bnode + [parent hnode] + [fields (mutable keymap) + (mutable childmap)] + [nongenerative #{bnode pfwhzqkm2ycuuyedzz2nxjx2e-1}] + [sealed #t]) - ;; 64-bit bnode (pack the bitmaps into a single fixnum) - (define-record-type (bnode make-raw-bnode bnode?) - [parent hnode] - [fields (mutable bitmap)] - [nongenerative #{bnode pfwhzqkm2ycuuyedzz2nxjx2e-0}] - [sealed #t]) - - (define (make-bnode eqtype count keys vals keymap childmap) - (let ([bitmap (fxior keymap (fxsll childmap 16))]) - (make-raw-bnode eqtype count keys vals bitmap))) - - (define (bnode-keymap n) - (fxand #xffff (bnode-bitmap n))) - - (define (bnode-childmap n) - (fxsrl (bnode-bitmap n) 16)) - - (define (bnode-copy-bitmaps! dest src) - (bnode-bitmap-set! dest (bnode-bitmap src)))] - - [else - - ;; 32-bit bnode (separate bitmaps) - (define-record-type bnode - [parent hnode] - [fields (mutable keymap) - (mutable childmap)] - [nongenerative #{bnode pfwhzqkm2ycuuyedzz2nxjx2e-1}] - [sealed #t]) - - (define (bnode-copy-bitmaps! dest src) - (bnode-set-keymap! dest (bnode-keymap src)) - (bnode-set-childmap! dest (bnode-childmap src)))]) +(define (bnode-copy-bitmaps! dest src) + (bnode-keymap-set! dest (bnode-keymap src)) + (bnode-childmap-set! dest (bnode-childmap src))) (define-record-type cnode [parent hnode] @@ -74,83 +40,72 @@ (define empty-hasheqv (make-empty-bnode 'eqv)) (define empty-hash (make-empty-bnode 'equal)) -(define (make-hamt-shell eqtype) +(define (make-intmap-shell eqtype) (make-empty-bnode eqtype)) -(define (hamt-shell-sync! dest src) +(define (intmap-shell-sync! dest src) (hnode-count-set! dest (hnode-count src)) (hnode-keys-set! dest (hnode-keys src)) (hnode-vals-set! dest (hnode-vals src)) (bnode-copy-bitmaps! dest src)) -;; hamt interface -(define hamt? hnode?) -(define immutable-hash? hnode?) +;; intmap interface +(define intmap? bnode?) +(define immutable-hash? bnode?) -(define (hamt-eq? h) +(define (intmap-eq? h) (eq? (hnode-eqtype h) 'eq)) -(define (hamt-eqv? h) +(define (intmap-eqv? h) (eq? (hnode-eqtype h) 'eqv)) -(define (hamt-equal? h) +(define (intmap-equal? h) (eq? (hnode-eqtype h) 'equal)) -(define (hamt-has-key? h key) +(define (intmap-has-key? h key) (node-has-key? h key (hash-code h key) 0)) (define (node-has-key? n key keyhash shift) (cond [(bnode? n) (bnode-has-key? n key keyhash shift)] [else (cnode-has-key? n key)])) -(define (hamt-ref h key default) +(define (intmap-ref h key default) (cond - [(hamt-empty? h) + [(intmap-empty? h) ;; Access on an empty HAMT is common, so don't even hash in that case - (if (procedure? default) - (default) - default)] + default] [else - (let ([res (bnode-ref h key (hash-code h key) 0)]) - (if (eq? res NOTHING) - (if (procedure? default) - (default) - default) - res))])) + (bnode-ref h key (hash-code h key) 0 default)])) -(define (hamt-set h key val) +(define (intmap-ref-key h key default) + (cond + [(intmap-empty? h) + default] + [else + (bnode-ref-key h key (hash-code h key) 0 default)])) + +(define (intmap-set h key val) (bnode-set h key val (hash-code h key) 0)) -(define (hamt-remove h key) +(define (intmap-remove h key) (bnode-remove h key (hash-code h key) 0)) -(define (hamt-count h) +(define (intmap-count h) (hnode-count h)) -(define (hamt-empty? h) - (fxzero? (hamt-count h))) +(define (intmap-empty? h) + (fxzero? (intmap-count h))) -(define (hamt=? a b eql?) +(define (intmap=? a b eql?) (and (eq? (hnode-eqtype a) (hnode-eqtype b)) (node=? a b eql? 0))) -(define (hamt-hash-code a hash) +(define (intmap-hash-code a hash) (node-hash-code a hash 0)) -(define ignored/hamt - (begin - ;; Go through generic `hash` versions to support `a` - ;; and `b` as impersonated hash tables - (record-type-equal-procedure (record-type-descriptor bnode) - (lambda (a b eql?) - (hash=? a b eql?))) - (record-type-hash-procedure (record-type-descriptor bnode) - (lambda (a hash) - (hash-hash-code a hash))))) - -(define (hamt-keys-subset? a b) - (or (hamt-empty? a) +(define (intmap-keys-subset? a b) + (or (intmap-empty? a) (node-keys-subset? a b 0))) (define (hamt-foldk h f nil kont) @@ -173,48 +128,48 @@ (define (hamt-values h) (hamt-fold h '() (lambda (_ v xs) (cons v xs)))) -(define (hamt-for-each h proc) +(define (intmap-for-each h proc) (hamt-fold h (void) (lambda (k v _) (proc k v) (void)))) -(define (hamt-map h proc) +(define (intmap-map h proc) (hamt-fold h '() (lambda (k v xs) (cons (proc k v) xs)))) ;; generatic iteration by counting -(define (hamt-iterate-first h) - (and (not (hamt-empty? h)) +(define (intmap-iterate-first h) + (and (not (intmap-empty? h)) 0)) -(define (hamt-iterate-next h pos) +(define (intmap-iterate-next h pos) (let ([pos (fx1+ pos)]) - (and (not (fx= pos (hamt-count h))) + (and (not (fx= pos (intmap-count h))) pos))) -(define (hamt-iterate-key h pos fail) +(define (intmap-iterate-key h pos fail) (let ([p (node-entry-at-position h pos)]) (if p (car p) fail))) -(define (hamt-iterate-value h pos fail) +(define (intmap-iterate-value h pos fail) (let ([p (node-entry-at-position h pos)]) (if p (cdr p) fail))) -(define (hamt-iterate-key+value h pos fail) +(define (intmap-iterate-key+value h pos fail) (let ([p (node-entry-at-position h pos)]) (if p (values (car p) (cdr p)) fail))) -(define (hamt-iterate-pair h pos fail) +(define (intmap-iterate-pair h pos fail) (let ([p (node-entry-at-position h pos)]) (or p fail))) ;; unsafe iteration; position is a stack ;; represented by a list of (cons node index) -(define (unsafe-hamt-iterate-first h) - (and (not (hamt-empty? h)) +(define (unsafe-intmap-iterate-first h) + (and (not (intmap-empty? h)) (unsafe-node-iterate-first h '()))) (define (unsafe-node-iterate-first n stack) @@ -230,7 +185,7 @@ (let ([i (fx1- (#%vector-length (hnode-keys n)))]) (cons (cons n i) stack))])) -(define (unsafe-hamt-iterate-next h pos) +(define (unsafe-intmap-iterate-next h pos) (unsafe-node-iterate-next pos)) (define (unsafe-node-iterate-next pos) @@ -260,22 +215,22 @@ [(cnode? n) (cons (cons n i) stack)]))])))])) -(define (unsafe-hamt-iterate-key h pos) +(define (unsafe-intmap-iterate-key h pos) (let ([p (car pos)]) (key-ref (car p) (cdr p)))) -(define (unsafe-hamt-iterate-value h pos) +(define (unsafe-intmap-iterate-value h pos) (let ([p (car pos)]) (val-ref (car p) (cdr p)))) -(define (unsafe-hamt-iterate-key+value h pos) +(define (unsafe-intmap-iterate-key+value h pos) (let ([p (car pos)]) (let ([n (car p)] [i (cdr p)]) (values (key-ref n i) (val-ref n i))))) -(define (unsafe-hamt-iterate-pair h pos) +(define (unsafe-intmap-iterate-pair h pos) (let ([p (car pos)]) (let ([n (car p)] [i (cdr p)]) @@ -324,9 +279,13 @@ (or (not vals) (#%vector-ref vals i)))) -(define (node-ref n key keyhash shift) - (cond [(bnode? n) (bnode-ref n key keyhash shift)] - [else (cnode-ref n key)])) +(define (node-ref n key keyhash shift default) + (cond [(bnode? n) (bnode-ref n key keyhash shift default)] + [else (cnode-ref n key default)])) + +(define (node-ref-key n key keyhash shift default) + (cond [(bnode? n) (bnode-ref-key n key keyhash shift default)] + [else (cnode-ref-key n key default)])) (define (node-set n key val keyhash shift) (cond [(bnode? n) (bnode-set n key val keyhash shift)] @@ -386,7 +345,7 @@ [else (cnode-foldk n f nil kont)])) ;; bnode operations -(define (bnode-ref node key keyhash shift) +(define (bnode-ref node key keyhash shift default) (let ([bit (bnode-bit-pos keyhash shift)]) (cond [(bnode-maps-key? node bit) @@ -394,18 +353,36 @@ [k (key-ref node ki)]) (if (key=? node key k) (val-ref node ki) - NOTHING))] + default))] [(bnode-maps-child? node bit) (let* ([ci (bnode-child-index node bit)] [c (child-ref node ci)]) - (node-ref c key keyhash (down shift)))] + (node-ref c key keyhash (down shift) default))] [else - NOTHING]))) + default]))) + +(define (bnode-ref-key node key keyhash shift default) + (let ([bit (bnode-bit-pos keyhash shift)]) + (cond + [(bnode-maps-key? node bit) + (let* ([ki (bnode-key-index node bit)] + [k (key-ref node ki)]) + (if (key=? node key k) + k + default))] + + [(bnode-maps-child? node bit) + (let* ([ci (bnode-child-index node bit)] + [c (child-ref node ci)]) + (node-ref-key c key keyhash (down shift) default))] + + [else + default]))) (define (bnode-has-key? n key keyhash shift) - (not (eq? NOTHING (bnode-ref n key keyhash shift)))) + (not (eq? none2 (bnode-ref-key n key keyhash shift none2)))) (define (bnode-set node key val keyhash shift) (let ([bit (bnode-bit-pos keyhash shift)]) @@ -419,7 +396,8 @@ [(key=? node key k) (if (eq? val v) node - (bnode-replace-val node ki val))] + ;; for consistency, we're required to keep the new key: + (bnode-replace-val node ki val key (not (eq? key k))))] [else (let* ([h (hash-code node k)] [eqtype (hnode-eqtype node)] @@ -448,10 +426,19 @@ [(key=? node key k) (let ([km (bnode-keymap node)] [cm (bnode-childmap node)]) - (if (and (fx= (popcount km) 2) + (let ([n (popcount km)]) + (cond + [(and (fx= n 2) (fxzero? cm)) - (bnode-singleton node ki bit keyhash shift) - (bnode-remove-key node ki bit)))] + (bnode-singleton node ki bit keyhash shift)] + [(and (fx= n 1) + (fxzero? cm)) + (case (hnode-eqtype node) + [(eq) empty-hasheq] + [(eqv) empty-hasheqv] + [else empty-hash])] + [else + (bnode-remove-key node ki bit)])))] [else node]))] @@ -613,20 +600,25 @@ (fxxor (bnode-keymap node) bit) (bnode-childmap node)))) -(define (bnode-replace-val node ki val) +(define (bnode-replace-val node ki val key update-key?) (let* ([vals (hnode-vals node)] [new-vals (if vals (#%vector-copy vals) (pariah ; reify values (let ([pop (popcount (bnode-keymap node))]) - (#%make-vector pop #t))))]) + (#%make-vector pop #t))))] + [new-keys (if update-key? + (#%vector-copy (hnode-keys node)) + (hnode-keys node))]) (#%vector-set! new-vals ki val) + (when update-key? + (#%vector-set! new-keys ki key)) (make-bnode (hnode-eqtype node) (hnode-count node) - (hnode-keys node) + new-keys new-vals (bnode-keymap node) (bnode-childmap node)))) @@ -808,11 +800,17 @@ [(key=? node key (#%vector-ref keys i)) i] [else (loop (fx1+ i))])))) -(define (cnode-ref node key) +(define (cnode-ref node key default) (let ([i (cnode-index node key)]) (if i (val-ref node i) - NOTHING))) + default))) + +(define (cnode-ref-key node key default) + (let ([i (cnode-index node key)]) + (if i + (key-ref node i) + default))) (define (cnode-has-key? n key) (not (not (cnode-index n key)))) @@ -859,7 +857,7 @@ [(fx= i alen) #t] [else (let* ([akey (key-ref a i)] - [bval (cnode-ref b akey)]) + [bval (cnode-ref b akey none2)]) (and (eql? (val-ref a i) bval) (loop (fx1+ i))))])))))) diff --git a/racket/src/cs/rumble/hash-code.ss b/racket/src/cs/rumble/hash-code.ss index 889e2d4dfd..373606e747 100644 --- a/racket/src/cs/rumble/hash-code.ss +++ b/racket/src/cs/rumble/hash-code.ss @@ -75,6 +75,17 @@ (call-with-values (lambda () (equal-hash-loop x 0 0)) (lambda (hc burn) (logand hc (most-positive-fixnum))))) +;; A #t result implies that `equal-hash-code` and equality checking is +;; pretty fast +(define (fast-equal-hash-code? x) + (or (boolean? x) + (null? x) + (number? x) + (char? x) + (symbol? x) + (and (#%$record? x) + (not (#%$record-hash-procedure x))))) + (define (equal-secondary-hash-code x) (cond [(boolean? x) 1] diff --git a/racket/src/cs/rumble/hash.ss b/racket/src/cs/rumble/hash.ss index 43246a6a84..792ab136c2 100644 --- a/racket/src/cs/rumble/hash.ss +++ b/racket/src/cs/rumble/hash.ss @@ -572,12 +572,17 @@ (intmap=? ht1 ht2 eql?)] [(and (hash? ht1) (hash? ht2) + ;; Same mutability? + (eq? (intmap? (strip-impersonator ht1)) + (intmap? (strip-impersonator ht2))) + ;; Same key comparison? (or (and (hash-eq? ht1) (hash-eq? ht2)) (and (hash-eqv? ht1) (hash-eqv? ht2)) (and (hash-equal? ht1) (hash-equal? ht2))) + ;; Same weakness? (eq? (hash-weak? ht1) (hash-weak? ht2))) (and (= (hash-count ht1) (hash-count ht2)) ;; This generic comparison supports impersonators diff --git a/racket/src/cs/rumble/intmap.ss b/racket/src/cs/rumble/intmap.ss index ea2c181f5a..24a20bd8f6 100644 --- a/racket/src/cs/rumble/intmap.ss +++ b/racket/src/cs/rumble/intmap.ss @@ -1,484 +1,21 @@ -;; Immutable maps represented as big-endian Patricia tries. -;; Based on Okasaki & Gill's "Fast Mergeable Integer Maps," -;; (1998) with an added collision node. + +;; We have several implementations of immutable hash tables. Pick one... + +(include "rumble/patricia.ss") ;; -;; I also consulted Leijen and Palamarchuk's Haskell implementation -;; of Data.IntMap. - -(define-record-type intmap - [fields (immutable eqtype) - (mutable root)] - [nongenerative #{intmap pfwguidjcvqbvofiirp097jco-0}] - [sealed #t]) - -(define-record-type Br - [fields (immutable count) - (immutable prefix) - (immutable mask) - (immutable left) - (immutable right)] - [nongenerative #{Br pfwguidjcvqbvofiirp097jco-1}] - [sealed #t]) - -(define-record-type Lf - [fields (immutable hash) - (immutable key) - (immutable value)] - [nongenerative #{Lf pfwguidjcvqbvofiirp097jco-2}] - [sealed #t]) - -(define-record-type Co - [fields (immutable hash) - (immutable pairs)] - [nongenerative #{Co pfwguidjcvqbvofiirp097jco-3}] - [sealed #t]) - -(define immutable-hash? intmap?) - -(define empty-hash (make-intmap 'equal #f)) -(define empty-hasheqv (make-intmap 'eqv #f)) -(define empty-hasheq (make-intmap 'eq #f)) - -(define (make-intmap-shell et) - (make-intmap et #f)) - -(define (intmap-shell-sync! dst src) - (intmap-root-set! dst (intmap-root src))) - -(define (intmap-equal? t) (eq? 'equal (intmap-eqtype t))) -(define (intmap-eqv? t) (eq? 'eqv (intmap-eqtype t))) -(define (intmap-eq? t) (eq? 'eq (intmap-eqtype t))) - -(define (intmap-count t) - ($intmap-count (intmap-root t))) - -(define (intmap-empty? t) - (fx= 0 (intmap-count t))) - -(define ($intmap-count t) - (cond [(Br? t) (Br-count t)] - [(Lf? t) 1] - [(Co? t) (length (Co-pairs t))] - [else 0])) - -(define-syntax-rule (define-intmap-ref (intmap-ref formal ...) - (_ arg ... with-leaf with-pair) - $intmap-ref) - (begin - (define (intmap-ref formal ...) - (do-intmap-ref arg ...)) - - (define (do-intmap-ref t key default) - (let ([root (intmap-root t)]) - (if root - (let ([et (intmap-eqtype t)]) - ($intmap-ref et root (hash-code et key) key default)) - default))) - - (define ($intmap-ref et t h key default) - (let loop ([t t]) - (cond - [(Br? t) - (if (fx<= h (Br-prefix t)) - (loop (Br-left t)) - (loop (Br-right t)))] - - [(Lf? t) - (if (and (fx= h (Lf-hash t)) - (key=? et key (Lf-key t))) - (with-leaf t) - default)] - - [(Co? t) - (if (fx= h (Co-hash t)) - ($collision-ref et t key with-pair default) - default)] - - [else - default]))))) - -(define-intmap-ref (intmap-ref t key default) - (do-intmap-ref t key default Lf-value cdr) - $intmap-ref) - -(define-intmap-ref (intmap-ref-key t key default) - (do-intmap-ref t key default Lf-key car) - $intmap-ref-key) - -(define-intmap-ref (intmap-has-key? t key) - (do-intmap-ref t key #f (lambda (_) #t) (lambda (_) #t)) - $intmap-has-key?) - -(define (intmap-set t key val) - (let ([et (intmap-eqtype t)]) - (make-intmap - et - ($intmap-set et (intmap-root t) (hash-code et key) key val)))) - -(define ($intmap-set et t h key val) - (cond - [(Br? t) - (let ([p (Br-prefix t)] - [m (Br-mask t)]) - (cond - [(not (match-prefix? h p m)) - (join h (make-Lf h key val) p t)] - [(fx<= h p) - (br p m ($intmap-set et (Br-left t) h key val) (Br-right t))] - [else - (br p m (Br-left t) ($intmap-set et (Br-right t) h key val))]))] - - [(Lf? t) - (let ([j (Lf-hash t)]) - (cond - [(not (fx= h j)) - (join h (make-Lf h key val) j t)] - [(key=? et key (Lf-key t)) - (make-Lf h key val)] - [else - (make-Co h (list (cons key val) (cons (Lf-key t) (Lf-value t))))]))] - - [(Co? t) - (let ([j (Co-hash t)]) - (if (fx= h j) - (make-Co j ($collision-set et t key val)) - (join h (make-Lf h key val) j t)))] - - [else - (make-Lf h key val)])) - -(define (join p0 t0 p1 t1) - (let* ([m (branching-bit p0 p1)] - [p (mask p0 m)]) - (if (fx<= p0 p1) - (br p m t0 t1) - (br p m t1 t0)))) - -(define (intmap-remove t key) - (let ([et (intmap-eqtype t)]) - (let ([r ($intmap-remove et (intmap-root t) (hash-code et key) key)]) - (if r - (make-intmap et r) - (case et - [(eq) empty-hasheq] - [(equal) empty-hash] - [else empty-hasheqv]))))) - -(define ($intmap-remove et t h key) - (cond - [(Br? t) - (let ([p (Br-prefix t)] - [m (Br-mask t)]) - (cond - [(not (match-prefix? h p m)) - t] - [(fx<= h p) - (br/check-left p m ($intmap-remove et (Br-left t) h key) (Br-right t))] - [else - (br/check-right p m (Br-left t) ($intmap-remove et (Br-right t) h key))]))] - - [(Lf? t) - (if (key=? et key (Lf-key t)) - #f - t)] - - [(Co? t) - (cond - [(fx=? h (Co-hash t)) - ;; A collision node always has at least 2 key-value pairs, - ;; so when we remove one, we know the resulting list is non-empty. - (let ([pairs ($collision-remove et t key)]) - (if (null? (cdr pairs)) - (make-Lf h (caar pairs) (cdar pairs)) - (make-Co h pairs)))] - [else - t])] - - [else - #f])) - -;; collision ops -(define ($collision-ref et t key with-pair default) - (let loop ([xs (Co-pairs t)]) - (cond [(null? xs) default] - [(key=? et key (caar xs)) (with-pair (car xs))] - [else (loop (cdr xs))]))) - -(define ($collision-set et t key val) - (cons (cons key val) - (let loop ([xs (Co-pairs t)]) - (cond [(null? xs) '()] - [(key=? et key (caar xs)) (loop (cdr xs))] - [else (cons (car xs) (loop (cdr xs)))])))) - -(define ($collision-remove et t key) - (let loop ([xs (Co-pairs t)]) - (cond [(null? xs) '()] - [(key=? et key (caar xs)) (loop (cdr xs))] - [else (cons (car xs) (loop (cdr xs)))]))) - -;; bit twiddling -(define-syntax-rule (match-prefix? h p m) - (fx= (mask h m) p)) - -(define-syntax-rule (mask h m) - (fxand (fxior h (fx1- m)) (fxnot m))) - -(define-syntax-rule (branching-bit p m) - (highest-set-bit (fxxor p m))) - -(define-syntax-rule (highest-set-bit x1) - (let* ([x2 (fxior x1 (fxsrl x1 1))] - [x3 (fxior x2 (fxsrl x2 2))] - [x4 (fxior x3 (fxsrl x3 4))] - [x5 (fxior x4 (fxsrl x4 8))] - [x6 (fxior x5 (fxsrl x5 16))] - [x7 (if (> (fixnum-width) 32) - (fxior x6 (fxsrl x6 32)) - x6)]) - (fxxor x7 (fxsrl x7 1)))) - -;; basic utils -(define (br p m l r) - (let ([c (fx+ ($intmap-count l) ($intmap-count r))]) - (make-Br c p m l r))) - -(define (br/check-left p m l r) - (if l - (br p m l r) - r)) - -(define (br/check-right p m l r) - (if r - (br p m l r) - l)) - -(define-syntax-rule (key=? et k1 k2) - (cond [(eq? et 'eq) (eq? k1 k2)] - [(eq? et 'eqv) (eqv? k1 k2)] - [else (key-equal? k1 k2)])) - -(define-syntax-rule (hash-code et k) - (cond [(eq? et 'eq) (eq-hash-code k)] - [(eq? et 'eqv) (eqv-hash-code k)] - [else (key-equal-hash-code k)])) - -;; iteration -(define (intmap-iterate-first t) - (and (fx> (intmap-count t) 0) - 0)) - -(define (intmap-iterate-next t pos) - (let ([pos (fx1+ pos)]) - (and (fx< pos (intmap-count t)) - pos))) - -(define (intmap-iterate-pair t pos fail) - (or ($intmap-nth (intmap-root t) pos) - fail)) - -(define (intmap-iterate-key t pos fail) - (let ([p ($intmap-nth (intmap-root t) pos)]) - (if p (car p) fail))) - -(define (intmap-iterate-value t pos fail) - (let ([p ($intmap-nth (intmap-root t) pos)]) - (if p (cdr p) fail))) - -(define (intmap-iterate-key+value t pos fail) - (let ([p ($intmap-nth (intmap-root t) pos)]) - (if p - (values (car p) (cdr p)) - fail))) - -(define ($intmap-nth t n) - (cond - [(Br? t) - (let* ([left (Br-left t)] - [left-count ($intmap-count left)]) - (if (fx< n left-count) - ($intmap-nth left n) - ($intmap-nth (Br-right t) (fx- n left-count))))] - - [(Lf? t) - (and (fx= 0 n) - (cons (Lf-key t) (Lf-value t)))] - - [(Co? t) - (let ([pairs (Co-pairs t)]) - (and (fx< n (length pairs)) - (list-ref pairs n)))] - - [else - #f])) - -(define (unsafe-intmap-iterate-first t) - ($intmap-enum (intmap-root t) #f)) - -(define (unsafe-intmap-iterate-next t pos) - (let ([next (cdr pos)]) - (and next - ($intmap-enum (car next) (cdr next))))) - -(define (unsafe-intmap-iterate-pair t pos) - (car pos)) - -(define (unsafe-intmap-iterate-key t pos) - (caar pos)) - -(define (unsafe-intmap-iterate-value t pos) - (cdar pos)) - -(define (unsafe-intmap-iterate-key+value t pos) - (values (caar pos) (cdar pos))) - -(define ($intmap-enum t next) - (cond - [(Br? t) - ($intmap-enum (Br-left t) (cons (Br-right t) next))] - - [(Lf? t) - (cons (cons (Lf-key t) (Lf-value t)) next)] - - [(Co? t) - (let ([pairs (Co-pairs t)]) - (let ([fst (car pairs)] - [rst (cdr pairs)]) - (if (null? rst) - (cons fst next) - (cons fst (cons (make-Co #f rst) next)))))] - - [else - next])) - -(define (intmap-fold t nil proc) - (let loop ([pos (unsafe-intmap-iterate-first t)] [nil nil]) - (cond - [pos - (let ([p (unsafe-intmap-iterate-pair t pos)]) - (loop (unsafe-intmap-iterate-next t pos) - (proc (car p) (cdr p) nil)))] - [else - nil]))) - -(define (intmap-for-each t proc) - (intmap-fold t (void) (lambda (k v _) (|#%app| proc k v) (void)))) - -(define (intmap-map t proc) - (intmap-fold t '() (lambda (k v xs) (cons (|#%app| proc k v) xs)))) - -;; equality -(define (intmap=? a b eql?) - (and (eq? (intmap-eqtype a) (intmap-eqtype b)) - ($intmap=? (intmap-eqtype a) (intmap-root a) (intmap-root b) eql?))) - -(define ($intmap=? et a b eql?) - (or - (eq? a b) - - (cond - [(Br? a) - (and (Br? b) - (fx= (Br-count a) (Br-count b)) - (fx= (Br-prefix a) (Br-prefix b)) - (fx= (Br-mask a) (Br-mask b)) - ($intmap=? et (Br-left a) (Br-left b) eql?) - ($intmap=? et (Br-right a) (Br-right b) eql?))] - - [(Lf? a) - (and (Lf? b) - (key=? et (Lf-key a) (Lf-key b)) - (eql? (Lf-value a) (Lf-value b)))] - - [(Co? a) - (and (Co? b) - (let ([xs (Co-pairs a)]) - (and (fx= (length xs) (length (Co-pairs b))) - (let loop ([xs xs]) - (cond [(null? xs) - #t] - [else - (let ([pair ($collision-ref et b (caar xs) values #f)]) - (and pair - (eql? (cdar xs) (cdr pair)) - (loop (cdr xs))))])))))] - - [else (and (not a) (not b))]))) - -;; hash code -(define (intmap-hash-code t hash) - ($intmap-hash-code (intmap-root t) hash 0)) - -(define ($intmap-hash-code t hash hc) - (cond - [(Br? t) - (let* ([hc (hash-code-combine hc (hash (Br-prefix t)))] - [hc (hash-code-combine hc (hash (Br-mask t)))] - [hc (hash-code-combine hc ($intmap-hash-code (Br-left t) hash hc))] - [hc (hash-code-combine hc ($intmap-hash-code (Br-right t) hash hc))]) - hc)] - - [(Lf? t) - (let* ([hc (hash-code-combine hc (Lf-hash t))] - [hc (hash-code-combine hc (hash (Lf-value t)))]) - hc)] - - [(Co? t) - (hash-code-combine hc (Co-hash t))] - - [else - (hash-code-combine hc (hash #f))])) - -(define ignored/intmap - (begin - ;; Go through generic `hash` versions to support `a` - ;; and `b` as impersonated hash tables - (record-type-equal-procedure (record-type-descriptor intmap) - (lambda (a b eql?) - (hash=? a b eql?))) - (record-type-hash-procedure (record-type-descriptor intmap) - (lambda (a hash) - (hash-hash-code a hash))))) - -;; subset -(define (intmap-keys-subset? a b) - ($intmap-keys-subset? (intmap-eqtype a) (intmap-root a) (intmap-root b))) - -(define ($intmap-keys-subset? et a b) - (or - (eq? a b) - - (cond - [(Br? a) - (and - (Br? b) - - (let ([p1 (Br-prefix a)] - [m1 (Br-mask a)] - [p2 (Br-prefix b)] - [m2 (Br-mask b)]) - (cond - [(fx> m1 m2) #f] - [(fx> m2 m1) - (and (match-prefix? p1 p2 m2) - (if (fx<= p1 p2) - ($intmap-keys-subset? et a (Br-left b)) - ($intmap-keys-subset? et a (Br-right b))))] - [else - (and (fx= p1 p2) - ($intmap-keys-subset? et (Br-left a) (Br-left b)) - ($intmap-keys-subset? et (Br-right a) (Br-right b)))])))] - - [(Lf? a) - (if (Lf? b) - (key=? et (Lf-key a) (Lf-key b)) - ($intmap-has-key? et b (Lf-hash a) (Lf-key a) #f))] - - [(Co? a) - (let loop ([xs (Co-pairs a)]) - (cond [(null? xs) #t] - [($intmap-has-key? et b (Co-hash a) (caar xs) #f) (loop (cdr xs))] - [else #f]))] - - [else - #t]))) +;; This Patricia-trie implementation is the prettiest and fastest. It +;; uses the most memory, though --- typically much more than the +;; vector-stencil HAMT. + +;; (include "rumble/hamt-stencil.ss") +;; +;; This HAMT implementation using stencil vectors tends to use the +;; last memory, often by a lot. It's slower than the Patricia-tree +;; implementation, though, especially for `hash-keys-subset?`. + +;; (include "rumble/hamt-vector.ss") +;; +;; This HAMT implementaiton uses plain vectors instead of stencil +;; vectors. Its speed and memory use are intermediate, but its speed +;; is closer to the stencil-vector HAMT implementation, and memory use +;; is closer to the Patrica trie implementation. diff --git a/racket/src/cs/rumble/patricia.ss b/racket/src/cs/rumble/patricia.ss new file mode 100644 index 0000000000..fc84678cdb --- /dev/null +++ b/racket/src/cs/rumble/patricia.ss @@ -0,0 +1,473 @@ +;; See also "intmap.ss" + +;; Immutable maps represented as big-endian Patricia tries. +;; Based on Okasaki & Gill's "Fast Mergeable Integer Maps," +;; (1998) with an added collision node. +;; +;; I also consulted Leijen and Palamarchuk's Haskell implementation +;; of Data.IntMap. + +(define-record-type intmap + [fields (immutable eqtype) + (mutable root)] + [nongenerative #{intmap pfwguidjcvqbvofiirp097jco-0}] + [sealed #t]) + +(define-record-type Br + [fields (immutable count) + (immutable prefix) + (immutable mask) + (immutable left) + (immutable right)] + [nongenerative #{Br pfwguidjcvqbvofiirp097jco-1}] + [sealed #t]) + +(define-record-type Lf + [fields (immutable hash) + (immutable key) + (immutable value)] + [nongenerative #{Lf pfwguidjcvqbvofiirp097jco-2}] + [sealed #t]) + +(define-record-type Co + [fields (immutable hash) + (immutable pairs)] + [nongenerative #{Co pfwguidjcvqbvofiirp097jco-3}] + [sealed #t]) + +(define empty-hash (make-intmap 'equal #f)) +(define empty-hasheqv (make-intmap 'eqv #f)) +(define empty-hasheq (make-intmap 'eq #f)) + +(define (make-intmap-shell et) + (make-intmap et #f)) + +(define (intmap-shell-sync! dst src) + (intmap-root-set! dst (intmap-root src))) + +(define (intmap-equal? t) (eq? 'equal (intmap-eqtype t))) +(define (intmap-eqv? t) (eq? 'eqv (intmap-eqtype t))) +(define (intmap-eq? t) (eq? 'eq (intmap-eqtype t))) + +(define (intmap-count t) + ($intmap-count (intmap-root t))) + +(define (intmap-empty? t) + (fx= 0 (intmap-count t))) + +(define ($intmap-count t) + (cond [(Br? t) (Br-count t)] + [(Lf? t) 1] + [(Co? t) (length (Co-pairs t))] + [else 0])) + +(define-syntax-rule (define-intmap-ref (intmap-ref formal ...) + (_ arg ... with-leaf with-pair) + $intmap-ref) + (begin + (define (intmap-ref formal ...) + (do-intmap-ref arg ...)) + + (define (do-intmap-ref t key default) + (let ([root (intmap-root t)]) + (if root + (let ([et (intmap-eqtype t)]) + ($intmap-ref et root (hash-code et key) key default)) + default))) + + (define ($intmap-ref et t h key default) + (let loop ([t t]) + (cond + [(Br? t) + (if (fx<= h (Br-prefix t)) + (loop (Br-left t)) + (loop (Br-right t)))] + + [(Lf? t) + (if (and (fx= h (Lf-hash t)) + (key=? et key (Lf-key t))) + (with-leaf t) + default)] + + [(Co? t) + (if (fx= h (Co-hash t)) + ($collision-ref et t key with-pair default) + default)] + + [else + default]))))) + +(define-intmap-ref (intmap-ref t key default) + (do-intmap-ref t key default Lf-value cdr) + $intmap-ref) + +(define-intmap-ref (intmap-ref-key t key default) + (do-intmap-ref t key default Lf-key car) + $intmap-ref-key) + +(define-intmap-ref (intmap-has-key? t key) + (do-intmap-ref t key #f (lambda (_) #t) (lambda (_) #t)) + $intmap-has-key?) + +(define (intmap-set t key val) + (let ([et (intmap-eqtype t)]) + (make-intmap + et + ($intmap-set et (intmap-root t) (hash-code et key) key val)))) + +(define ($intmap-set et t h key val) + (cond + [(Br? t) + (let ([p (Br-prefix t)] + [m (Br-mask t)]) + (cond + [(not (match-prefix? h p m)) + (join h (make-Lf h key val) p t)] + [(fx<= h p) + (br p m ($intmap-set et (Br-left t) h key val) (Br-right t))] + [else + (br p m (Br-left t) ($intmap-set et (Br-right t) h key val))]))] + + [(Lf? t) + (let ([j (Lf-hash t)]) + (cond + [(not (fx= h j)) + (join h (make-Lf h key val) j t)] + [(key=? et key (Lf-key t)) + (make-Lf h key val)] + [else + (make-Co h (list (cons key val) (cons (Lf-key t) (Lf-value t))))]))] + + [(Co? t) + (let ([j (Co-hash t)]) + (if (fx= h j) + (make-Co j ($collision-set et t key val)) + (join h (make-Lf h key val) j t)))] + + [else + (make-Lf h key val)])) + +(define (join p0 t0 p1 t1) + (let* ([m (branching-bit p0 p1)] + [p (mask p0 m)]) + (if (fx<= p0 p1) + (br p m t0 t1) + (br p m t1 t0)))) + +(define (intmap-remove t key) + (let ([et (intmap-eqtype t)]) + (let ([r ($intmap-remove et (intmap-root t) (hash-code et key) key)]) + (if r + (make-intmap et r) + (case et + [(eq) empty-hasheq] + [(equal) empty-hash] + [else empty-hasheqv]))))) + +(define ($intmap-remove et t h key) + (cond + [(Br? t) + (let ([p (Br-prefix t)] + [m (Br-mask t)]) + (cond + [(not (match-prefix? h p m)) + t] + [(fx<= h p) + (br/check-left p m ($intmap-remove et (Br-left t) h key) (Br-right t))] + [else + (br/check-right p m (Br-left t) ($intmap-remove et (Br-right t) h key))]))] + + [(Lf? t) + (if (key=? et key (Lf-key t)) + #f + t)] + + [(Co? t) + (cond + [(fx=? h (Co-hash t)) + ;; A collision node always has at least 2 key-value pairs, + ;; so when we remove one, we know the resulting list is non-empty. + (let ([pairs ($collision-remove et t key)]) + (if (null? (cdr pairs)) + (make-Lf h (caar pairs) (cdar pairs)) + (make-Co h pairs)))] + [else + t])] + + [else + #f])) + +;; collision ops +(define ($collision-ref et t key with-pair default) + (let loop ([xs (Co-pairs t)]) + (cond [(null? xs) default] + [(key=? et key (caar xs)) (with-pair (car xs))] + [else (loop (cdr xs))]))) + +(define ($collision-set et t key val) + (cons (cons key val) + (let loop ([xs (Co-pairs t)]) + (cond [(null? xs) '()] + [(key=? et key (caar xs)) (loop (cdr xs))] + [else (cons (car xs) (loop (cdr xs)))])))) + +(define ($collision-remove et t key) + (let loop ([xs (Co-pairs t)]) + (cond [(null? xs) '()] + [(key=? et key (caar xs)) (loop (cdr xs))] + [else (cons (car xs) (loop (cdr xs)))]))) + +;; bit twiddling +(define-syntax-rule (match-prefix? h p m) + (fx= (mask h m) p)) + +(define-syntax-rule (mask h m) + (fxand (fxior h (fx1- m)) (fxnot m))) + +(define-syntax-rule (branching-bit p m) + (highest-set-bit (fxxor p m))) + +(define-syntax-rule (highest-set-bit x1) + (let* ([x2 (fxior x1 (fxsrl x1 1))] + [x3 (fxior x2 (fxsrl x2 2))] + [x4 (fxior x3 (fxsrl x3 4))] + [x5 (fxior x4 (fxsrl x4 8))] + [x6 (fxior x5 (fxsrl x5 16))] + [x7 (if (> (fixnum-width) 32) + (fxior x6 (fxsrl x6 32)) + x6)]) + (fxxor x7 (fxsrl x7 1)))) + +;; basic utils +(define (br p m l r) + (let ([c (fx+ ($intmap-count l) ($intmap-count r))]) + (make-Br c p m l r))) + +(define (br/check-left p m l r) + (if l + (br p m l r) + r)) + +(define (br/check-right p m l r) + (if r + (br p m l r) + l)) + +(define-syntax-rule (key=? et k1 k2) + (cond [(eq? et 'eq) (eq? k1 k2)] + [(eq? et 'eqv) (eqv? k1 k2)] + [else (key-equal? k1 k2)])) + +(define-syntax-rule (hash-code et k) + (cond [(eq? et 'eq) (eq-hash-code k)] + [(eq? et 'eqv) (eqv-hash-code k)] + [else (key-equal-hash-code k)])) + +;; iteration +(define (intmap-iterate-first t) + (and (fx> (intmap-count t) 0) + 0)) + +(define (intmap-iterate-next t pos) + (let ([pos (fx1+ pos)]) + (and (fx< pos (intmap-count t)) + pos))) + +(define (intmap-iterate-pair t pos fail) + (or ($intmap-nth (intmap-root t) pos) + fail)) + +(define (intmap-iterate-key t pos fail) + (let ([p ($intmap-nth (intmap-root t) pos)]) + (if p (car p) fail))) + +(define (intmap-iterate-value t pos fail) + (let ([p ($intmap-nth (intmap-root t) pos)]) + (if p (cdr p) fail))) + +(define (intmap-iterate-key+value t pos fail) + (let ([p ($intmap-nth (intmap-root t) pos)]) + (if p + (values (car p) (cdr p)) + fail))) + +(define ($intmap-nth t n) + (cond + [(Br? t) + (let* ([left (Br-left t)] + [left-count ($intmap-count left)]) + (if (fx< n left-count) + ($intmap-nth left n) + ($intmap-nth (Br-right t) (fx- n left-count))))] + + [(Lf? t) + (and (fx= 0 n) + (cons (Lf-key t) (Lf-value t)))] + + [(Co? t) + (let ([pairs (Co-pairs t)]) + (and (fx< n (length pairs)) + (list-ref pairs n)))] + + [else + #f])) + +(define (unsafe-intmap-iterate-first t) + ($intmap-enum (intmap-root t) #f)) + +(define (unsafe-intmap-iterate-next t pos) + (let ([next (cdr pos)]) + (and next + ($intmap-enum (car next) (cdr next))))) + +(define (unsafe-intmap-iterate-pair t pos) + (car pos)) + +(define (unsafe-intmap-iterate-key t pos) + (caar pos)) + +(define (unsafe-intmap-iterate-value t pos) + (cdar pos)) + +(define (unsafe-intmap-iterate-key+value t pos) + (values (caar pos) (cdar pos))) + +(define ($intmap-enum t next) + (cond + [(Br? t) + ($intmap-enum (Br-left t) (cons (Br-right t) next))] + + [(Lf? t) + (cons (cons (Lf-key t) (Lf-value t)) next)] + + [(Co? t) + (let ([pairs (Co-pairs t)]) + (let ([fst (car pairs)] + [rst (cdr pairs)]) + (if (null? rst) + (cons fst next) + (cons fst (cons (make-Co #f rst) next)))))] + + [else + next])) + +(define (intmap-fold t nil proc) + (let loop ([pos (unsafe-intmap-iterate-first t)] [nil nil]) + (cond + [pos + (let ([p (unsafe-intmap-iterate-pair t pos)]) + (loop (unsafe-intmap-iterate-next t pos) + (proc (car p) (cdr p) nil)))] + [else + nil]))) + +(define (intmap-for-each t proc) + (intmap-fold t (void) (lambda (k v _) (|#%app| proc k v) (void)))) + +(define (intmap-map t proc) + (#%reverse (intmap-fold t '() (lambda (k v xs) (cons (|#%app| proc k v) xs))))) + +;; equality +(define (intmap=? a b eql?) + (and (eq? (intmap-eqtype a) (intmap-eqtype b)) + ($intmap=? (intmap-eqtype a) (intmap-root a) (intmap-root b) eql?))) + +(define ($intmap=? et a b eql?) + (or + (eq? a b) + + (cond + [(Br? a) + (and (Br? b) + (fx= (Br-count a) (Br-count b)) + (fx= (Br-prefix a) (Br-prefix b)) + (fx= (Br-mask a) (Br-mask b)) + ($intmap=? et (Br-left a) (Br-left b) eql?) + ($intmap=? et (Br-right a) (Br-right b) eql?))] + + [(Lf? a) + (and (Lf? b) + (key=? et (Lf-key a) (Lf-key b)) + (eql? (Lf-value a) (Lf-value b)))] + + [(Co? a) + (and (Co? b) + (let ([xs (Co-pairs a)]) + (and (fx= (length xs) (length (Co-pairs b))) + (let loop ([xs xs]) + (cond [(null? xs) + #t] + [else + (let ([pair ($collision-ref et b (caar xs) values #f)]) + (and pair + (eql? (cdar xs) (cdr pair)) + (loop (cdr xs))))])))))] + + [else (and (not a) (not b))]))) + +;; hash code +(define (intmap-hash-code t hash) + ($intmap-hash-code (intmap-root t) hash 0)) + +(define ($intmap-hash-code t hash hc) + (cond + [(Br? t) + (let* ([hc (hash-code-combine hc (hash (Br-prefix t)))] + [hc (hash-code-combine hc (hash (Br-mask t)))] + [hc (hash-code-combine hc ($intmap-hash-code (Br-left t) hash hc))] + [hc (hash-code-combine hc ($intmap-hash-code (Br-right t) hash hc))]) + hc)] + + [(Lf? t) + (let* ([hc (hash-code-combine hc (Lf-hash t))] + [hc (hash-code-combine hc (hash (Lf-value t)))]) + hc)] + + [(Co? t) + (hash-code-combine hc (Co-hash t))] + + [else + (hash-code-combine hc (hash #f))])) + +;; subset +(define (intmap-keys-subset? a b) + ($intmap-keys-subset? (intmap-eqtype a) (intmap-root a) (intmap-root b))) + +(define ($intmap-keys-subset? et a b) + (or + (eq? a b) + + (cond + [(Br? a) + (and + (Br? b) + + (let ([p1 (Br-prefix a)] + [m1 (Br-mask a)] + [p2 (Br-prefix b)] + [m2 (Br-mask b)]) + (cond + [(fx> m1 m2) #f] + [(fx> m2 m1) + (and (match-prefix? p1 p2 m2) + (if (fx<= p1 p2) + ($intmap-keys-subset? et a (Br-left b)) + ($intmap-keys-subset? et a (Br-right b))))] + [else + (and (fx= p1 p2) + ($intmap-keys-subset? et (Br-left a) (Br-left b)) + ($intmap-keys-subset? et (Br-right a) (Br-right b)))])))] + + [(Lf? a) + (if (Lf? b) + (key=? et (Lf-key a) (Lf-key b)) + ($intmap-has-key? et b (Lf-hash a) (Lf-key a) #f))] + + [(Co? a) + (let loop ([xs (Co-pairs a)]) + (cond [(null? xs) #t] + [($intmap-has-key? et b (Co-hash a) (caar xs) #f) (loop (cdr xs))] + [else #f]))] + + [else + #t]))) diff --git a/racket/src/racket/src/schvers.h b/racket/src/racket/src/schvers.h index a1acde579b..4113683b91 100644 --- a/racket/src/racket/src/schvers.h +++ b/racket/src/racket/src/schvers.h @@ -16,7 +16,7 @@ #define MZSCHEME_VERSION_X 7 #define MZSCHEME_VERSION_Y 5 #define MZSCHEME_VERSION_Z 0 -#define MZSCHEME_VERSION_W 16 +#define MZSCHEME_VERSION_W 17 /* A level of indirection makes `#` work as needed: */ #define AS_a_STR_HELPER(x) #x