hash benchmarks and stencil-vector HAMT experiment

This commit adds an (unused) implementation of immutable hash tables
for Racket CS that trades some run-time performance for an especially
compact representation --- similar to the traditional Racket
implementation of immutable hash tables. It uses a new "stencil
vector" datatype at the Chez Scheme level, which overlays the bitmap
needed for a HAMT node with the Chez-object type tag (and also
provides an update operation that avoids unnecessary memory work).

Compared to the current Racket CS implementation, the stencil-vector
HAMT implementation of an immutable hash table takes only about 1/3
the space on avergae, which translates to a overall 5% savings in
DrRacket's initial heap. It also makes a full Racket build slightly
faster by reducing avergage memory use by 5-10%.

But the run-time performance difference is significant, especially for
the `hash-keys-subset?` operation (at least in microbenchmarks), and
also for addition and iteration. Maybe there's an overall better point
that reduces memory use of the current Patricia trie implementation
without sacrificing as much performance.

Besides the benchmarks and stencil-vector HAMT implementaiton, there
are small changes to the way hash tables cooperate with `equal?`,
which makes it a little easier to plug in different implementations.
This commit is contained in:
Matthew Flatt 2020-01-03 17:10:10 -07:00
parent f0a63b5921
commit a60f173b46
24 changed files with 2591 additions and 629 deletions

View File

@ -12,7 +12,7 @@
(define collection 'multi)
(define version "7.5.0.16")
(define version "7.5.0.17")
(define deps `("racket-lib"
["racket" #:version ,version]))

View File

@ -0,0 +1,77 @@
#lang racket/base
(require racket/include)
(include "config.rktl")
(define (f x) x)
(set! f f)
'----------------------------------------
'hasheq-add-remove
(times
(let loop ([ht #hasheq()] [i L])
(if (zero? i)
(void (f ht))
(loop (hash-remove (hash-set ht 'a #t) 'a)
(sub1 i)))))
'hasheq-adds
(times
(for ([i (in-range Q)])
(let loop ([ht #hasheq()] [i 100])
(if (zero? i)
(void (f ht))
(loop (hash-set ht i 'true)
(sub1 i))))))
'hasheq-adds/#t
(times
(for ([i (in-range Q)])
(let loop ([ht #hasheq()] [i 100])
(if (zero? i)
(void (f ht))
(loop (hash-set ht i #t)
(sub1 i))))))
'hasheq-addsame
(times
(for ([i (in-range Q)])
(let loop ([ht #hasheq()] [i 100])
(if (zero? i)
(void (f ht))
(loop (hash-set ht 'a 'true)
(sub1 i))))))
'hasheq-removes
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(for ([i (in-range Q)])
(let loop ([ht ht] [i 100])
(if (zero? i)
(void (f ht))
(loop (hash-remove ht i)
(sub1 i)))))))
'hasheq-ref
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(for ([i (in-range Q)])
(let loop ([v #f] [i 100])
(if (zero? i)
(void (f v))
(loop (hash-ref ht i #f)
(sub1 i)))))))
'hasheq-reffail
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(for ([i (in-range Q)])
(let loop ([v #f] [i 100])
(if (zero? i)
(void (f v))
(loop (hash-ref ht 'not-there #f)
(sub1 i)))))))

View File

@ -0,0 +1,121 @@
#lang racket/base
(require (for-syntax racket/base))
(provide Q M L N I
times
unknown
with-hash-variants
make-large-equal-key/share1
make-large-equal-key/share2)
;; Iterations for slow things:
(define Q 100000)
;; The depth used for non-tail recursion, typically:
(define M (* Q 10))
;; Intermediate count:
(define L (* M 10))
;; Number of iteraitons used for a loop, typically
(define N (* L 10))
;; Number of times to run each benchamrk:
(define I 3)
(define-syntax times
(syntax-rules ()
[(_ e)
(let loop ([v #f] [i I])
(if (zero? i)
v
(loop (time e) (sub1 i))))]))
(define (unknown x) x)
(set! unknown unknown)
(define-syntax (with-hash-variants stx)
(syntax-case stx ()
[(_ body ...)
(let ()
(define (body-with prefix
#:empty empty-stx
#:key key-stx
#:other-key other-key-stx
#:make-key make-key-stx
#:make-val make-val-stx
#:for/hash for/hash-stx)
(let loop ([body (syntax->list #'(body ...))])
(cond
[(null? body) null]
[else
(syntax-case (car body) (quote)
[(quote sym)
(identifier? #'sym)
(cons #`(quote #,(string->symbol (format "~a:~a" prefix (syntax-e #'sym))))
(loop (cdr body)))]
[#:only
(if (eq? prefix (syntax-e (cadr body)))
(loop (cddr body))
(loop (cddddr body)))]
[_
(cons
(let loop ([e (car body)])
(cond
[(eq? e 'EMPTY) empty-stx]
[(eq? e 'KEY) key-stx]
[(eq? e 'OTHER-KEY) other-key-stx]
[(eq? e 'MAKE-KEY) make-key-stx]
[(eq? e 'MAKE-VAL) make-val-stx]
[(eq? e 'FOR/HASH) for/hash-stx]
[(syntax? e) (datum->syntax e (loop (syntax-e e)))]
[(pair? e) (cons (loop (car e)) (loop (cdr e)))]
[else e]))
(loop (cdr body)))])])))
#`(begin
#,@(body-with 'eq#t
#:empty #'#hasheq()
#:key #''a
#:other-key #''not-there
#:make-key #'values
#:make-val #'(lambda (v) #t)
#:for/hash #'for/hasheq)
#,@(body-with 'eq
#:empty #'#hasheq()
#:key #''a
#:other-key #''not-there
#:make-key #'values
#:make-val #'values
#:for/hash #'for/hasheq)
#,@(body-with 'eqv
#:empty #'#hasheqv()
#:key #'12345
#:other-key #'1/3
#:make-key #'values
#:make-val #'values
#:for/hash #'for/hasheqv)
#,@(body-with 'equal
#:empty #'#hash()
#:key #''(a)
#:other-key #''(not-there)
#:make-key #'box
#:make-val #'values
#:for/hash #'for/hash)))]))
(define (make-data)
(list 1 2 3
(string-copy "Hello, world!")
'apple
(box (seconds->date 0))
1/2))
(define share1-a (make-data))
(define share1-b (make-data))
(define share2-a (make-data))
(define share2-b (make-data))
(define (make-large-equal-key/share1 c)
(vector share1-a c share1-b))
(define (make-large-equal-key/share2 c)
(vector share2-a c share2-b))

View File

@ -0,0 +1,3 @@
#lang info
(define test-omit-paths 'all)

View File

@ -0,0 +1,45 @@
#lang racket/base
(require "config.rkt")
'eq:keys
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(void
(for ([i (in-range Q)])
(for/fold ([v #f]) ([k (in-hash-keys ht)])
k)))))
'eq:vals
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(for ([i (in-range Q)])
(void
(for/fold ([v #f]) ([v (in-hash-values ht)])
v)))))
'eq:keys-unsafe
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(void
(for ([i (in-range Q)])
(for/fold ([v #f]) ([k (in-immutable-hash-keys ht)])
k)))))
'eq:vals-unsafe
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(for ([i (in-range Q)])
(void
(for/fold ([v #f]) ([v (in-immutable-hash-values ht)])
v)))))
'eq:for-each
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(values i i))])
(for ([i (in-range Q)])
(hash-for-each ht (lambda (k v) 'ok)))))

View File

@ -0,0 +1,53 @@
#lang racket/base
(require "config.rkt")
(with-hash-variants
'ref
(times
(let ([ht (FOR/HASH ([i (in-range 100)])
(values (MAKE-KEY i) (MAKE-VAL i)))])
(for ([i (in-range Q)])
(let loop ([v #f] [i 100])
(if (zero? i)
(void (unknown v))
(loop (hash-ref ht (MAKE-KEY i) #f)
(sub1 i)))))))
#:only equal
'ref-large
(times
(let ([ht (for/hash ([i (in-range 100)])
(values (make-large-equal-key/share1 i) i))])
(for ([i (in-range (quotient Q 10))])
(let loop ([v #f] [i 100])
(if (zero? i)
(void (unknown v))
(loop (hash-ref ht (make-large-equal-key/share2 i) #f)
(sub1 i)))))))
'ref-fail
(times
(let ([ht (FOR/HASH ([i (in-range 100)])
(values (MAKE-KEY i) (MAKE-VAL i)))])
(for ([i (in-range Q)])
(let loop ([v #f] [i 100])
(if (zero? i)
(void (unknown v))
(loop (hash-ref ht OTHER-KEY #f)
(sub1 i)))))))
#:only equal
'ref-large-fail
(times
(let ([ht (for/hash ([i (in-range 100)])
(values (make-large-equal-key/share1 i) i))]
[not-there (make-large-equal-key/share2 -1)])
(for ([i (in-range (quotient Q 10))])
(let loop ([v #f] [i 100])
(if (zero? i)
(void (unknown v))
(loop (hash-ref ht not-there #f)
(sub1 i)))))))
(void))

View File

@ -0,0 +1,26 @@
#lang racket/base
(require "config.rkt")
(with-hash-variants
'hash-removes
(times
(let ([ht (FOR/HASH ([i (in-range 100)])
(values (MAKE-KEY i) (MAKE-VAL i)))])
(for ([i (in-range Q)])
(let loop ([ht ht] [i 100])
(if (zero? i)
(void (unknown ht))
(loop (hash-remove ht (MAKE-KEY i))
(sub1 i)))))))
'add+remove
(times
(let loop ([ht EMPTY] [i L])
(if (zero? i)
(void (unknown ht))
(loop (hash-remove (hash-set ht KEY (MAKE-VAL i)) KEY)
(sub1 i)))))
(void))

View File

@ -0,0 +1,32 @@
#lang racket/base
(require "config.rkt")
(with-hash-variants
'add-to-empty
(times
(let loop ([ht EMPTY] [i L])
(if (zero? i)
(void (unknown ht))
(loop (hash-set ht KEY (MAKE-VAL 'true))
(sub1 i)))))
'add-many
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [i 100])
(if (zero? i)
(void (unknown ht))
(loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true))
(sub1 i))))))
'add-same
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [i 100])
(if (zero? i)
(void (unknown ht))
(loop (hash-set ht (MAKE-KEY 'a) (MAKE-VAL 'true))
(sub1 i))))))
(void))

View File

@ -0,0 +1,95 @@
#lang racket/base
(require "config.rkt")
(define elems
(parameterize ([current-pseudo-random-generator
(make-pseudo-random-generator)])
(random-seed 12745)
(hash-keys
(for/fold ([ht #hasheqv()]) ([i 200])
(let loop ()
(define n (random 10000))
(if (hash-ref ht n #f)
(loop)
(hash-set ht n #t)))))))
(define (gen n)
(for/hasheq ([i (in-range n)]
[e (in-list elems)])
(values e e)))
(define (gen-more n ht)
(for/fold ([ht ht]) ([i (in-range n)]
[e (in-list (list-tail elems (hash-count ht)))])
(hash-set ht e e)))
(define (check-true v)
(unless v
(error "failed")))
(define (check-false v)
(when v
(error "failed")))
'eq:subset-unshared-small
(times
(let ([ht (gen 6)]
[sub-ht (gen 3)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))
'eq:not-subset-unshared-small
(times
(let ([ht (gen 6)]
[sub-ht (gen 3)])
(check-false
(for/or ([i (in-range L)])
(hash-keys-subset? ht sub-ht)))))
'eq:subset-shared-medium+small
(times
(let* ([sub-ht (gen 10)]
[ht (gen-more 1 sub-ht)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-medium+medium
(times
(let* ([sub-ht (gen 10)]
[ht (gen-more 10 sub-ht)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-same-large
(times
(let* ([sub-ht (gen 100)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht sub-ht)))))
'eq:subset-shared-large+small
(times
(let* ([sub-ht (gen 100)]
[ht (gen-more 3 sub-ht)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-large+medium
(times
(let* ([sub-ht (gen 100)]
[ht (gen-more 10 sub-ht)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-large+large
(times
(let* ([sub-ht (gen 100)]
[ht (gen-more 100 sub-ht)])
(check-true
(for/and ([i (in-range L)])
(hash-keys-subset? sub-ht ht)))))

View File

@ -2019,7 +2019,7 @@
(test #f hash-ref h2 'key2 #f)
(test '(key2 val2 key2 val2 key2 key2) list get-k get-v set-k set-v remove-k access-k)
(hash-for-each h2 void)
(test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k)
(test '(for-each key val key2 val2 key2 key) list 'for-each get-k get-v set-k set-v remove-k access-k)
(set! get-k #f)
(set! get-v #f)
(void (equal-hash-code h2))
@ -2034,7 +2034,7 @@
(test #t values (equal? h2 (let* ([h2 (make-hash)])
(test (void) hash-set! h2 'key 'val)
h2)))
(test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k)
(test '(equal? key val key2 val2 key2 key) list 'equal? get-k get-v set-k set-v remove-k access-k)
(void)))
(list
make-hash make-hasheq make-hasheqv
@ -2092,7 +2092,7 @@
(test #f hash-ref h2 'key2 #f)
(test '(key2 val2 key2 val2 key2 key2) list get-k get-v set-k set-v remove-k access-k)
(hash-for-each h2 void)
(test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k)
(test '(mid key val key2 val2 key2 key) list 'mid get-k get-v set-k set-v remove-k access-k)
(set! get-k #f)
(set! get-v #f)
(void (equal-hash-code h2))
@ -2105,7 +2105,7 @@
(set! get-k #f)
(set! get-v #f)
(test #t values (equal? h2 (hash-set h1 'key 'val)))
(test '(key val key2 val2 key2 key) list get-k get-v set-k set-v remove-k access-k)
(test '(equal?2 key val key2 val2 key2 key) list 'equal?2 get-k get-v set-k set-v remove-k access-k)
(void))))))
;; Check that `hash-set` propagates in a way that allows
;; `chaperone-of?` to work recursively:

View File

@ -255,6 +255,9 @@ RUMBLE_SRCS = rumble/define.ss \
rumble/object-name.ss \
rumble/arity.ss \
rumble/intmap.ss \
rumble/patricia.ss \
rumble/hamt-stencil.ss \
rumble/hamt-vector.ss \
rumble/hash.ss \
rumble/datum.ss \
rumble/lock.ss \

View File

@ -83,6 +83,7 @@
record?
record-type-uid
$object-ref
stencil-vector?
(rename-out [s:vector-sort vector-sort]
[s:vector-sort! vector-sort!])
vector-for-each
@ -175,6 +176,9 @@
[logtest fxlogtest])
fxsrl
fxbit-field
fxpopcount
fxpopcount32
fxpopcount16
bitwise-bit-count
bitwise-arithmetic-shift-right
bytevector-u16-native-ref
@ -746,6 +750,22 @@
[(proc . vecs)
(list->vector (apply map proc (map vector->list vecs)))]))
(define (stencil-vector? v) #f)
(define (fxpopcount32 x)
(let* ([x (- x (bitwise-and (arithmetic-shift x -1) #x55555555))]
[x (+ (bitwise-and x #x33333333) (bitwise-and (arithmetic-shift x -2) #x33333333))]
[x (bitwise-and (+ x (arithmetic-shift x -4)) #x0f0f0f0f)]
[x (+ x (arithmetic-shift x -8) (arithmetic-shift x -16) (arithmetic-shift x -24))])
(bitwise-and x #x3f)))
(define (fxpopcount x)
(fx+ (fxpopcount32 (bitwise-and x #xffffffff))
(fxpopcount32 (arithmetic-shift x -32))))
(define (fxpopcount16 x)
(fxpopcount32 (bitwise-and x #xffff)))
(define (logbit? m n)
(bitwise-bit-set? n m))
(define (logbit1 i n)

View File

@ -2,7 +2,7 @@
;; Check to make we're using a build of Chez Scheme
;; that has all the features we need.
(define-values (need-maj need-min need-sub need-dev)
(values 9 5 3 9))
(values 9 5 3 10))
(unless (guard (x [else #f]) (eval 'scheme-fork-version-number))
(error 'compile-file

View File

@ -25,6 +25,22 @@
(define-values (struct:trans trans trans? trans-ref trans-set!)
(make-struct-type 'top #f 2 0 #f '() #f))
(define-values (struct:collide collide collide? collide-ref collide-set!)
(make-struct-type 'collide #f 2 0 #f
(list (cons prop:equal+hash
(list
(lambda (a b eql?)
(and (eql? (collide-1 a)
(collide-1 b))
(eql? (collide-2 a)
(collide-2 b))))
(lambda (a hc)
(hc (collide-1 a)))
(lambda (a hc)
(hc (collide-1 a))))))))
(define collide-1 (make-struct-field-accessor collide-ref 0))
(define collide-2 (make-struct-field-accessor collide-ref 1))
(define-syntax check
(syntax-rules ()
[(_ a b)
@ -42,9 +58,70 @@
(check (hash-ref (hash-set (hash) (trans 1 2) 'ok) (trans 1 2) #f) 'ok)
(check (hash-ref (hash-set (hash) (trans 1 2) 'ok) (trans 1 3) #f) #f)
(check (equal? (collide 1 2) (collide 1 2)) #t)
(check (equal? (collide 1 2) (collide 1 3)) #f)
(check (equal? (equal-hash-code (collide 1 2)) (equal-hash-code (collide 1 3))) #t)
(check (equal? (hash 1 'x 2 'y) (hash 2 'y 1 'x)) #t)
(check (hash-ref (hash (hash 1 'x 2 'y) 7) (hash 2 'y 1 'x) #f) 7)
(let ([simple (hash (cons 1 2) 2 (cons 1 3) 3)])
(define (check-map l)
(check (or (equal? l (list (cons (cons 1 2) 2) (cons (cons 1 3) 3)))
(equal? l (list (cons (cons 1 3) 3) (cons (cons 1 2) 2))))
#t))
(check-map (hash-map simple cons))
(check-map (let loop ([i (hash-iterate-first simple)])
(if (not i)
'()
(cons (cons (hash-iterate-key simple i)
(hash-iterate-value simple i))
(loop (hash-iterate-next simple i))))))
(check-map (let loop ([i (unsafe-immutable-hash-iterate-first simple)])
(if (not i)
'()
(cons (cons (unsafe-immutable-hash-iterate-key simple i)
(unsafe-immutable-hash-iterate-value simple i))
(loop (unsafe-immutable-hash-iterate-next simple i)))))))
(check (hash-ref (hash (collide 1 2) 2 (collide 1 3) 3) (collide 1 2)) 2)
(let ([collides (hash (collide 1 2) 2 (collide 1 3) 3)])
(define (check-map l)
(check (or (equal? l (list (cons (collide 1 2) 2) (cons (collide 1 3) 3)))
(equal? l (list (cons (collide 1 3) 3) (cons (collide 1 2) 2))))
#t))
(check-map (hash-map collides cons))
(check-map (let loop ([i (hash-iterate-first collides)])
(if (not i)
'()
(cons (cons (hash-iterate-key collides i)
(hash-iterate-value collides i))
(loop (hash-iterate-next collides i))))))
(check-map (let loop ([i (unsafe-immutable-hash-iterate-first collides)])
(if (not i)
'()
(cons (cons (unsafe-immutable-hash-iterate-key collides i)
(unsafe-immutable-hash-iterate-value collides i))
(loop (unsafe-immutable-hash-iterate-next collides i)))))))
(let ([mixed (hash 1 #t 2 'true 3 #t 4 'true)])
(define (check-map l)
(check (list-sort (lambda (a b) (< (car a) (car b))) l)
'((1 . #t) (2 . true) (3 . #t) (4 . true))))
(check-map (hash-map mixed cons))
(check-map (let loop ([i (hash-iterate-first mixed)])
(if (not i)
'()
(cons (cons (hash-iterate-key mixed i)
(hash-iterate-value mixed i))
(loop (hash-iterate-next mixed i))))))
(check-map (let loop ([i (unsafe-immutable-hash-iterate-first mixed)])
(if (not i)
'()
(cons (cons (unsafe-immutable-hash-iterate-key mixed i)
(unsafe-immutable-hash-iterate-value mixed i))
(loop (unsafe-immutable-hash-iterate-next mixed i)))))))
;; Check `equal?`-based weak hash tables
(let ([ht (make-weak-hash)]
[apple (string #\a #\p #\p #\l #\e)]
@ -63,6 +140,16 @@
;; Ensure that `ht` stays live until here
(check (hash? ht) #t)))
(let ([ph (make-hasheq-placeholder null)])
(check (make-reader-graph ph) (hasheq)))
(let ([ph (make-hasheq-placeholder (list (cons 1 2) (cons 'a 'b)))])
(check (make-reader-graph ph) (hasheq 1 2 'a 'b)))
(let* ([ph1 (make-placeholder #f)]
[ph (make-hasheq-placeholder (list (cons ph1 2) (cons 'a 'b)))])
(placeholder-set! ph1 ph)
(let ([g (make-reader-graph ph)])
(check (hash-count g) 2)))
(define (shuffle l)
(define a (make-vector (length l)))
(let loop ([l l] [i 0])
@ -423,3 +510,47 @@
(report "equal-hash-code" r-coll))
(time (for-each (lambda (i) (for-each equal-hash l)) l))
(time (for-each (lambda (i) (for-each equal-hash-code l)) l)))
;; ----------------------------------------
;; Stress text:
; (random-seed 77889955)
(let loop ([n 1])
(unless (fx= n 0)
(let ([nums (let loop ([l null] [n 0])
(if (= n 1000)
l
(let ([r (collide (#%random #xFFFFF)
(#%random #xFFFFF))])
(if (member r l)
(loop l n)
(loop (cons r l) (add1 n))))))]
[dup (lambda (x)
(if (collide? x)
(collide (collide-1 x) (collide-2 x))
x))])
(let ([ht (let loop ([ht (hash)] [l nums] [n 0])
(check (hash-count ht) n)
(if (null? l)
ht
(loop (hash-set ht (dup (car l)) (car l)) (cdr l) (add1 n))))]
[rev-ht (let loop ([ht (hash)] [l (reverse nums)])
(if (null? l)
ht
(loop (hash-set ht (dup (car l)) (car l)) (cdr l))))])
(check (equal? ht rev-ht) #t)
(check (equal? (equal-hash-code ht) (equal-hash-code rev-ht)) #t)
(check (length nums) (hash-count ht))
(for-each (lambda (i)
(check i (hash-ref ht i #f)))
nums)
(let loop ([sub-ht ht] [sub-rev-ht rev-ht] [l nums] [n (length nums)])
(unless (null? l)
(check (hash-count sub-ht) n)
(check (hash-count sub-rev-ht) n)
(check (hash-keys-subset? sub-ht ht) #t)
(check (hash-keys-subset? sub-rev-ht ht) #t)
(check (equal? sub-ht sub-rev-ht) #t)
(check (equal? (equal-hash-code sub-ht) (equal-hash-code sub-rev-ht)) #t)
(loop (hash-remove sub-ht (car l)) (hash-remove sub-rev-ht (car l)) (cdr l) (sub1 n))))))
(loop (fx- n 1))))

View File

@ -277,7 +277,7 @@
unsafe-weak-hash-iterate-key+value unsafe-weak-hash-iterate-pair
unsafe-hash-seal! ; not exported to racket
hash? hash-eq? hash-equal? hash-eqv? hash-weak? immutable-hash?
hash? hash-eq? hash-equal? hash-eqv? hash-weak?
hash-count
hash-keys-subset?
eq-hashtable->hash ; not exported to racket

View File

@ -88,6 +88,13 @@
(eql? (unbox orig-a) (unbox orig-b))
(let ([ctx (deeper-context ctx)])
(equal? (unbox orig-a) (unbox orig-b) ctx)))))]
[(authentic-hash? a)
(and (authentic-hash? b)
(or (check-union-find ctx a b)
(let ([ctx (deeper-context ctx)])
(hash=? orig-a orig-b
(lambda (a b)
(equal? a b ctx))))))]
[(record? a)
(and (record? b)
;; Check for `prop:impersonator-of`

View File

@ -110,11 +110,20 @@
[(hash-eqv? v) (make-hasheqv)]
[else (make-hasheq)]))
(cond
[(hash-eq? v) (make-intmap-shell 'eq)]
[(hash-eqv? v) (make-intmap-shell 'eqv)]
[else (make-intmap-shell 'equal)]))])
[(zero? (hash-count v)) v]
[else
(cond
[(hash-eq? v) (make-intmap-shell 'eq)]
[(hash-eqv? v) (make-intmap-shell 'eqv)]
[else (make-intmap-shell 'equal)])]))]
[p (if mutable?
orig-p
(cond
[(hash-eq? v) empty-hasheq]
[(hash-eqv? v) empty-hasheqv]
[else empty-hash]))])
(hashtable-set! ht v orig-p)
(let hloop ([p orig-p] [i (hash-iterate-first v)] [diff? #f])
(let hloop ([p p] [i (hash-iterate-first v)] [diff? #f])
(cond
[(not i)
(cond
@ -122,7 +131,8 @@
(cond
[mutable? orig-p]
[else
(intmap-shell-sync! orig-p p)
(unless (zero? (hash-count p))
(intmap-shell-sync! orig-p p))
orig-p])]
[else
(hashtable-set! ht v v)
@ -137,19 +147,28 @@
(hash-iterate-next v i)
(or diff? (not (and (eq? key new-key) (eq? val new-val)))))))])))]
[(hash-placeholder? v)
(let* ([orig-p (cond
[(hasheq-placeholder? v) (make-intmap-shell 'eq)]
[(hasheqv-placeholder? v) (make-intmap-shell 'eqv)]
[else (make-intmap-shell 'equal)])])
(hashtable-set! ht v orig-p)
(let hloop ([p orig-p] [alst (hash-placeholder-alist v)])
(let ([alst (hash-placeholder-alist v)])
(cond
[(null? alst)
(cond
[(null? alst)
(intmap-shell-sync! orig-p p)
orig-p]
[else
(hloop (hash-set p (loop (caar alst)) (loop (cdar alst)))
(cdr alst))])))]
[(hasheq-placeholder? v) empty-hasheq]
[(hasheqv-placeholder? v) empty-hasheqv]
[else empty-hash])]
[else
(let-values ([(orig-p p)
(cond
[(hasheq-placeholder? v) (values (make-intmap-shell 'eq) empty-hasheq)]
[(hasheqv-placeholder? v) (values (make-intmap-shell 'eqv) empty-hasheqv)]
[else (values (make-intmap-shell 'equal) empty-hash)])])
(hashtable-set! ht v orig-p)
(let hloop ([p p] [alst alst])
(cond
[(null? alst)
(intmap-shell-sync! orig-p p)
orig-p]
[else
(hloop (hash-set p (loop (caar alst)) (loop (cdar alst)))
(cdr alst))])))]))]
[(prefab-struct-key v)
=> (lambda (key)
(let ([args (cdr (vector->list (struct->vector v)))])

File diff suppressed because it is too large Load Diff

View File

@ -1,15 +1,7 @@
;; HAMT
;; See also "intmap.ss"
;; the absence of something
(define NOTHING (gensym 'nothing))
;; 16-bit popcount
(define (popcount x)
(let* ([x (fx- x (fxand (fxsrl x 1) #x5555))]
[x (fx+ (fxand x #x3333) (fxand (fxsrl x 2) #x3333))]
[x (fxand (fx+ x (fxsrl x 4)) #x0f0f)]
[x (fx+ x (fxsrl x 8))])
(fxand x #x1f)))
(fxpopcount16 x))
;; record types
(define-record-type hnode
@ -19,42 +11,16 @@
(mutable vals)]
[nongenerative #{hnode pfwh8wvaevt3r6pcwsqn90ry8-0}])
(meta-cond
[(> (most-positive-fixnum) (expt 2 32))
(define-record-type bnode
[parent hnode]
[fields (mutable keymap)
(mutable childmap)]
[nongenerative #{bnode pfwhzqkm2ycuuyedzz2nxjx2e-1}]
[sealed #t])
;; 64-bit bnode (pack the bitmaps into a single fixnum)
(define-record-type (bnode make-raw-bnode bnode?)
[parent hnode]
[fields (mutable bitmap)]
[nongenerative #{bnode pfwhzqkm2ycuuyedzz2nxjx2e-0}]
[sealed #t])
(define (make-bnode eqtype count keys vals keymap childmap)
(let ([bitmap (fxior keymap (fxsll childmap 16))])
(make-raw-bnode eqtype count keys vals bitmap)))
(define (bnode-keymap n)
(fxand #xffff (bnode-bitmap n)))
(define (bnode-childmap n)
(fxsrl (bnode-bitmap n) 16))
(define (bnode-copy-bitmaps! dest src)
(bnode-bitmap-set! dest (bnode-bitmap src)))]
[else
;; 32-bit bnode (separate bitmaps)
(define-record-type bnode
[parent hnode]
[fields (mutable keymap)
(mutable childmap)]
[nongenerative #{bnode pfwhzqkm2ycuuyedzz2nxjx2e-1}]
[sealed #t])
(define (bnode-copy-bitmaps! dest src)
(bnode-set-keymap! dest (bnode-keymap src))
(bnode-set-childmap! dest (bnode-childmap src)))])
(define (bnode-copy-bitmaps! dest src)
(bnode-keymap-set! dest (bnode-keymap src))
(bnode-childmap-set! dest (bnode-childmap src)))
(define-record-type cnode
[parent hnode]
@ -74,83 +40,72 @@
(define empty-hasheqv (make-empty-bnode 'eqv))
(define empty-hash (make-empty-bnode 'equal))
(define (make-hamt-shell eqtype)
(define (make-intmap-shell eqtype)
(make-empty-bnode eqtype))
(define (hamt-shell-sync! dest src)
(define (intmap-shell-sync! dest src)
(hnode-count-set! dest (hnode-count src))
(hnode-keys-set! dest (hnode-keys src))
(hnode-vals-set! dest (hnode-vals src))
(bnode-copy-bitmaps! dest src))
;; hamt interface
(define hamt? hnode?)
(define immutable-hash? hnode?)
;; intmap interface
(define intmap? bnode?)
(define immutable-hash? bnode?)
(define (hamt-eq? h)
(define (intmap-eq? h)
(eq? (hnode-eqtype h) 'eq))
(define (hamt-eqv? h)
(define (intmap-eqv? h)
(eq? (hnode-eqtype h) 'eqv))
(define (hamt-equal? h)
(define (intmap-equal? h)
(eq? (hnode-eqtype h) 'equal))
(define (hamt-has-key? h key)
(define (intmap-has-key? h key)
(node-has-key? h key (hash-code h key) 0))
(define (node-has-key? n key keyhash shift)
(cond [(bnode? n) (bnode-has-key? n key keyhash shift)]
[else (cnode-has-key? n key)]))
(define (hamt-ref h key default)
(define (intmap-ref h key default)
(cond
[(hamt-empty? h)
[(intmap-empty? h)
;; Access on an empty HAMT is common, so don't even hash in that case
(if (procedure? default)
(default)
default)]
default]
[else
(let ([res (bnode-ref h key (hash-code h key) 0)])
(if (eq? res NOTHING)
(if (procedure? default)
(default)
default)
res))]))
(bnode-ref h key (hash-code h key) 0 default)]))
(define (hamt-set h key val)
(define (intmap-ref-key h key default)
(cond
[(intmap-empty? h)
default]
[else
(bnode-ref-key h key (hash-code h key) 0 default)]))
(define (intmap-set h key val)
(bnode-set h key val (hash-code h key) 0))
(define (hamt-remove h key)
(define (intmap-remove h key)
(bnode-remove h key (hash-code h key) 0))
(define (hamt-count h)
(define (intmap-count h)
(hnode-count h))
(define (hamt-empty? h)
(fxzero? (hamt-count h)))
(define (intmap-empty? h)
(fxzero? (intmap-count h)))
(define (hamt=? a b eql?)
(define (intmap=? a b eql?)
(and (eq? (hnode-eqtype a)
(hnode-eqtype b))
(node=? a b eql? 0)))
(define (hamt-hash-code a hash)
(define (intmap-hash-code a hash)
(node-hash-code a hash 0))
(define ignored/hamt
(begin
;; Go through generic `hash` versions to support `a`
;; and `b` as impersonated hash tables
(record-type-equal-procedure (record-type-descriptor bnode)
(lambda (a b eql?)
(hash=? a b eql?)))
(record-type-hash-procedure (record-type-descriptor bnode)
(lambda (a hash)
(hash-hash-code a hash)))))
(define (hamt-keys-subset? a b)
(or (hamt-empty? a)
(define (intmap-keys-subset? a b)
(or (intmap-empty? a)
(node-keys-subset? a b 0)))
(define (hamt-foldk h f nil kont)
@ -173,48 +128,48 @@
(define (hamt-values h)
(hamt-fold h '() (lambda (_ v xs) (cons v xs))))
(define (hamt-for-each h proc)
(define (intmap-for-each h proc)
(hamt-fold h (void) (lambda (k v _) (proc k v) (void))))
(define (hamt-map h proc)
(define (intmap-map h proc)
(hamt-fold h '() (lambda (k v xs) (cons (proc k v) xs))))
;; generatic iteration by counting
(define (hamt-iterate-first h)
(and (not (hamt-empty? h))
(define (intmap-iterate-first h)
(and (not (intmap-empty? h))
0))
(define (hamt-iterate-next h pos)
(define (intmap-iterate-next h pos)
(let ([pos (fx1+ pos)])
(and (not (fx= pos (hamt-count h)))
(and (not (fx= pos (intmap-count h)))
pos)))
(define (hamt-iterate-key h pos fail)
(define (intmap-iterate-key h pos fail)
(let ([p (node-entry-at-position h pos)])
(if p
(car p)
fail)))
(define (hamt-iterate-value h pos fail)
(define (intmap-iterate-value h pos fail)
(let ([p (node-entry-at-position h pos)])
(if p
(cdr p)
fail)))
(define (hamt-iterate-key+value h pos fail)
(define (intmap-iterate-key+value h pos fail)
(let ([p (node-entry-at-position h pos)])
(if p
(values (car p) (cdr p))
fail)))
(define (hamt-iterate-pair h pos fail)
(define (intmap-iterate-pair h pos fail)
(let ([p (node-entry-at-position h pos)])
(or p fail)))
;; unsafe iteration; position is a stack
;; represented by a list of (cons node index)
(define (unsafe-hamt-iterate-first h)
(and (not (hamt-empty? h))
(define (unsafe-intmap-iterate-first h)
(and (not (intmap-empty? h))
(unsafe-node-iterate-first h '())))
(define (unsafe-node-iterate-first n stack)
@ -230,7 +185,7 @@
(let ([i (fx1- (#%vector-length (hnode-keys n)))])
(cons (cons n i) stack))]))
(define (unsafe-hamt-iterate-next h pos)
(define (unsafe-intmap-iterate-next h pos)
(unsafe-node-iterate-next pos))
(define (unsafe-node-iterate-next pos)
@ -260,22 +215,22 @@
[(cnode? n)
(cons (cons n i) stack)]))])))]))
(define (unsafe-hamt-iterate-key h pos)
(define (unsafe-intmap-iterate-key h pos)
(let ([p (car pos)])
(key-ref (car p) (cdr p))))
(define (unsafe-hamt-iterate-value h pos)
(define (unsafe-intmap-iterate-value h pos)
(let ([p (car pos)])
(val-ref (car p) (cdr p))))
(define (unsafe-hamt-iterate-key+value h pos)
(define (unsafe-intmap-iterate-key+value h pos)
(let ([p (car pos)])
(let ([n (car p)]
[i (cdr p)])
(values (key-ref n i)
(val-ref n i)))))
(define (unsafe-hamt-iterate-pair h pos)
(define (unsafe-intmap-iterate-pair h pos)
(let ([p (car pos)])
(let ([n (car p)]
[i (cdr p)])
@ -324,9 +279,13 @@
(or (not vals)
(#%vector-ref vals i))))
(define (node-ref n key keyhash shift)
(cond [(bnode? n) (bnode-ref n key keyhash shift)]
[else (cnode-ref n key)]))
(define (node-ref n key keyhash shift default)
(cond [(bnode? n) (bnode-ref n key keyhash shift default)]
[else (cnode-ref n key default)]))
(define (node-ref-key n key keyhash shift default)
(cond [(bnode? n) (bnode-ref-key n key keyhash shift default)]
[else (cnode-ref-key n key default)]))
(define (node-set n key val keyhash shift)
(cond [(bnode? n) (bnode-set n key val keyhash shift)]
@ -386,7 +345,7 @@
[else (cnode-foldk n f nil kont)]))
;; bnode operations
(define (bnode-ref node key keyhash shift)
(define (bnode-ref node key keyhash shift default)
(let ([bit (bnode-bit-pos keyhash shift)])
(cond
[(bnode-maps-key? node bit)
@ -394,18 +353,36 @@
[k (key-ref node ki)])
(if (key=? node key k)
(val-ref node ki)
NOTHING))]
default))]
[(bnode-maps-child? node bit)
(let* ([ci (bnode-child-index node bit)]
[c (child-ref node ci)])
(node-ref c key keyhash (down shift)))]
(node-ref c key keyhash (down shift) default))]
[else
NOTHING])))
default])))
(define (bnode-ref-key node key keyhash shift default)
(let ([bit (bnode-bit-pos keyhash shift)])
(cond
[(bnode-maps-key? node bit)
(let* ([ki (bnode-key-index node bit)]
[k (key-ref node ki)])
(if (key=? node key k)
k
default))]
[(bnode-maps-child? node bit)
(let* ([ci (bnode-child-index node bit)]
[c (child-ref node ci)])
(node-ref-key c key keyhash (down shift) default))]
[else
default])))
(define (bnode-has-key? n key keyhash shift)
(not (eq? NOTHING (bnode-ref n key keyhash shift))))
(not (eq? none2 (bnode-ref-key n key keyhash shift none2))))
(define (bnode-set node key val keyhash shift)
(let ([bit (bnode-bit-pos keyhash shift)])
@ -419,7 +396,8 @@
[(key=? node key k)
(if (eq? val v)
node
(bnode-replace-val node ki val))]
;; for consistency, we're required to keep the new key:
(bnode-replace-val node ki val key (not (eq? key k))))]
[else
(let* ([h (hash-code node k)]
[eqtype (hnode-eqtype node)]
@ -448,10 +426,19 @@
[(key=? node key k)
(let ([km (bnode-keymap node)]
[cm (bnode-childmap node)])
(if (and (fx= (popcount km) 2)
(let ([n (popcount km)])
(cond
[(and (fx= n 2)
(fxzero? cm))
(bnode-singleton node ki bit keyhash shift)
(bnode-remove-key node ki bit)))]
(bnode-singleton node ki bit keyhash shift)]
[(and (fx= n 1)
(fxzero? cm))
(case (hnode-eqtype node)
[(eq) empty-hasheq]
[(eqv) empty-hasheqv]
[else empty-hash])]
[else
(bnode-remove-key node ki bit)])))]
[else
node]))]
@ -613,20 +600,25 @@
(fxxor (bnode-keymap node) bit)
(bnode-childmap node))))
(define (bnode-replace-val node ki val)
(define (bnode-replace-val node ki val key update-key?)
(let* ([vals (hnode-vals node)]
[new-vals
(if vals
(#%vector-copy vals)
(pariah ; reify values
(let ([pop (popcount (bnode-keymap node))])
(#%make-vector pop #t))))])
(#%make-vector pop #t))))]
[new-keys (if update-key?
(#%vector-copy (hnode-keys node))
(hnode-keys node))])
(#%vector-set! new-vals ki val)
(when update-key?
(#%vector-set! new-keys ki key))
(make-bnode (hnode-eqtype node)
(hnode-count node)
(hnode-keys node)
new-keys
new-vals
(bnode-keymap node)
(bnode-childmap node))))
@ -808,11 +800,17 @@
[(key=? node key (#%vector-ref keys i)) i]
[else (loop (fx1+ i))]))))
(define (cnode-ref node key)
(define (cnode-ref node key default)
(let ([i (cnode-index node key)])
(if i
(val-ref node i)
NOTHING)))
default)))
(define (cnode-ref-key node key default)
(let ([i (cnode-index node key)])
(if i
(key-ref node i)
default)))
(define (cnode-has-key? n key)
(not (not (cnode-index n key))))
@ -859,7 +857,7 @@
[(fx= i alen) #t]
[else
(let* ([akey (key-ref a i)]
[bval (cnode-ref b akey)])
[bval (cnode-ref b akey none2)])
(and
(eql? (val-ref a i) bval)
(loop (fx1+ i))))]))))))

View File

@ -75,6 +75,17 @@
(call-with-values (lambda () (equal-hash-loop x 0 0))
(lambda (hc burn) (logand hc (most-positive-fixnum)))))
;; A #t result implies that `equal-hash-code` and equality checking is
;; pretty fast
(define (fast-equal-hash-code? x)
(or (boolean? x)
(null? x)
(number? x)
(char? x)
(symbol? x)
(and (#%$record? x)
(not (#%$record-hash-procedure x)))))
(define (equal-secondary-hash-code x)
(cond
[(boolean? x) 1]

View File

@ -572,12 +572,17 @@
(intmap=? ht1 ht2 eql?)]
[(and (hash? ht1)
(hash? ht2)
;; Same mutability?
(eq? (intmap? (strip-impersonator ht1))
(intmap? (strip-impersonator ht2)))
;; Same key comparison?
(or (and (hash-eq? ht1)
(hash-eq? ht2))
(and (hash-eqv? ht1)
(hash-eqv? ht2))
(and (hash-equal? ht1)
(hash-equal? ht2)))
;; Same weakness?
(eq? (hash-weak? ht1) (hash-weak? ht2)))
(and (= (hash-count ht1) (hash-count ht2))
;; This generic comparison supports impersonators

View File

@ -1,484 +1,21 @@
;; Immutable maps represented as big-endian Patricia tries.
;; Based on Okasaki & Gill's "Fast Mergeable Integer Maps,"
;; (1998) with an added collision node.
;; We have several implementations of immutable hash tables. Pick one...
(include "rumble/patricia.ss")
;;
;; I also consulted Leijen and Palamarchuk's Haskell implementation
;; of Data.IntMap.
(define-record-type intmap
[fields (immutable eqtype)
(mutable root)]
[nongenerative #{intmap pfwguidjcvqbvofiirp097jco-0}]
[sealed #t])
(define-record-type Br
[fields (immutable count)
(immutable prefix)
(immutable mask)
(immutable left)
(immutable right)]
[nongenerative #{Br pfwguidjcvqbvofiirp097jco-1}]
[sealed #t])
(define-record-type Lf
[fields (immutable hash)
(immutable key)
(immutable value)]
[nongenerative #{Lf pfwguidjcvqbvofiirp097jco-2}]
[sealed #t])
(define-record-type Co
[fields (immutable hash)
(immutable pairs)]
[nongenerative #{Co pfwguidjcvqbvofiirp097jco-3}]
[sealed #t])
(define immutable-hash? intmap?)
(define empty-hash (make-intmap 'equal #f))
(define empty-hasheqv (make-intmap 'eqv #f))
(define empty-hasheq (make-intmap 'eq #f))
(define (make-intmap-shell et)
(make-intmap et #f))
(define (intmap-shell-sync! dst src)
(intmap-root-set! dst (intmap-root src)))
(define (intmap-equal? t) (eq? 'equal (intmap-eqtype t)))
(define (intmap-eqv? t) (eq? 'eqv (intmap-eqtype t)))
(define (intmap-eq? t) (eq? 'eq (intmap-eqtype t)))
(define (intmap-count t)
($intmap-count (intmap-root t)))
(define (intmap-empty? t)
(fx= 0 (intmap-count t)))
(define ($intmap-count t)
(cond [(Br? t) (Br-count t)]
[(Lf? t) 1]
[(Co? t) (length (Co-pairs t))]
[else 0]))
(define-syntax-rule (define-intmap-ref (intmap-ref formal ...)
(_ arg ... with-leaf with-pair)
$intmap-ref)
(begin
(define (intmap-ref formal ...)
(do-intmap-ref arg ...))
(define (do-intmap-ref t key default)
(let ([root (intmap-root t)])
(if root
(let ([et (intmap-eqtype t)])
($intmap-ref et root (hash-code et key) key default))
default)))
(define ($intmap-ref et t h key default)
(let loop ([t t])
(cond
[(Br? t)
(if (fx<= h (Br-prefix t))
(loop (Br-left t))
(loop (Br-right t)))]
[(Lf? t)
(if (and (fx= h (Lf-hash t))
(key=? et key (Lf-key t)))
(with-leaf t)
default)]
[(Co? t)
(if (fx= h (Co-hash t))
($collision-ref et t key with-pair default)
default)]
[else
default])))))
(define-intmap-ref (intmap-ref t key default)
(do-intmap-ref t key default Lf-value cdr)
$intmap-ref)
(define-intmap-ref (intmap-ref-key t key default)
(do-intmap-ref t key default Lf-key car)
$intmap-ref-key)
(define-intmap-ref (intmap-has-key? t key)
(do-intmap-ref t key #f (lambda (_) #t) (lambda (_) #t))
$intmap-has-key?)
(define (intmap-set t key val)
(let ([et (intmap-eqtype t)])
(make-intmap
et
($intmap-set et (intmap-root t) (hash-code et key) key val))))
(define ($intmap-set et t h key val)
(cond
[(Br? t)
(let ([p (Br-prefix t)]
[m (Br-mask t)])
(cond
[(not (match-prefix? h p m))
(join h (make-Lf h key val) p t)]
[(fx<= h p)
(br p m ($intmap-set et (Br-left t) h key val) (Br-right t))]
[else
(br p m (Br-left t) ($intmap-set et (Br-right t) h key val))]))]
[(Lf? t)
(let ([j (Lf-hash t)])
(cond
[(not (fx= h j))
(join h (make-Lf h key val) j t)]
[(key=? et key (Lf-key t))
(make-Lf h key val)]
[else
(make-Co h (list (cons key val) (cons (Lf-key t) (Lf-value t))))]))]
[(Co? t)
(let ([j (Co-hash t)])
(if (fx= h j)
(make-Co j ($collision-set et t key val))
(join h (make-Lf h key val) j t)))]
[else
(make-Lf h key val)]))
(define (join p0 t0 p1 t1)
(let* ([m (branching-bit p0 p1)]
[p (mask p0 m)])
(if (fx<= p0 p1)
(br p m t0 t1)
(br p m t1 t0))))
(define (intmap-remove t key)
(let ([et (intmap-eqtype t)])
(let ([r ($intmap-remove et (intmap-root t) (hash-code et key) key)])
(if r
(make-intmap et r)
(case et
[(eq) empty-hasheq]
[(equal) empty-hash]
[else empty-hasheqv])))))
(define ($intmap-remove et t h key)
(cond
[(Br? t)
(let ([p (Br-prefix t)]
[m (Br-mask t)])
(cond
[(not (match-prefix? h p m))
t]
[(fx<= h p)
(br/check-left p m ($intmap-remove et (Br-left t) h key) (Br-right t))]
[else
(br/check-right p m (Br-left t) ($intmap-remove et (Br-right t) h key))]))]
[(Lf? t)
(if (key=? et key (Lf-key t))
#f
t)]
[(Co? t)
(cond
[(fx=? h (Co-hash t))
;; A collision node always has at least 2 key-value pairs,
;; so when we remove one, we know the resulting list is non-empty.
(let ([pairs ($collision-remove et t key)])
(if (null? (cdr pairs))
(make-Lf h (caar pairs) (cdar pairs))
(make-Co h pairs)))]
[else
t])]
[else
#f]))
;; collision ops
(define ($collision-ref et t key with-pair default)
(let loop ([xs (Co-pairs t)])
(cond [(null? xs) default]
[(key=? et key (caar xs)) (with-pair (car xs))]
[else (loop (cdr xs))])))
(define ($collision-set et t key val)
(cons (cons key val)
(let loop ([xs (Co-pairs t)])
(cond [(null? xs) '()]
[(key=? et key (caar xs)) (loop (cdr xs))]
[else (cons (car xs) (loop (cdr xs)))]))))
(define ($collision-remove et t key)
(let loop ([xs (Co-pairs t)])
(cond [(null? xs) '()]
[(key=? et key (caar xs)) (loop (cdr xs))]
[else (cons (car xs) (loop (cdr xs)))])))
;; bit twiddling
(define-syntax-rule (match-prefix? h p m)
(fx= (mask h m) p))
(define-syntax-rule (mask h m)
(fxand (fxior h (fx1- m)) (fxnot m)))
(define-syntax-rule (branching-bit p m)
(highest-set-bit (fxxor p m)))
(define-syntax-rule (highest-set-bit x1)
(let* ([x2 (fxior x1 (fxsrl x1 1))]
[x3 (fxior x2 (fxsrl x2 2))]
[x4 (fxior x3 (fxsrl x3 4))]
[x5 (fxior x4 (fxsrl x4 8))]
[x6 (fxior x5 (fxsrl x5 16))]
[x7 (if (> (fixnum-width) 32)
(fxior x6 (fxsrl x6 32))
x6)])
(fxxor x7 (fxsrl x7 1))))
;; basic utils
(define (br p m l r)
(let ([c (fx+ ($intmap-count l) ($intmap-count r))])
(make-Br c p m l r)))
(define (br/check-left p m l r)
(if l
(br p m l r)
r))
(define (br/check-right p m l r)
(if r
(br p m l r)
l))
(define-syntax-rule (key=? et k1 k2)
(cond [(eq? et 'eq) (eq? k1 k2)]
[(eq? et 'eqv) (eqv? k1 k2)]
[else (key-equal? k1 k2)]))
(define-syntax-rule (hash-code et k)
(cond [(eq? et 'eq) (eq-hash-code k)]
[(eq? et 'eqv) (eqv-hash-code k)]
[else (key-equal-hash-code k)]))
;; iteration
(define (intmap-iterate-first t)
(and (fx> (intmap-count t) 0)
0))
(define (intmap-iterate-next t pos)
(let ([pos (fx1+ pos)])
(and (fx< pos (intmap-count t))
pos)))
(define (intmap-iterate-pair t pos fail)
(or ($intmap-nth (intmap-root t) pos)
fail))
(define (intmap-iterate-key t pos fail)
(let ([p ($intmap-nth (intmap-root t) pos)])
(if p (car p) fail)))
(define (intmap-iterate-value t pos fail)
(let ([p ($intmap-nth (intmap-root t) pos)])
(if p (cdr p) fail)))
(define (intmap-iterate-key+value t pos fail)
(let ([p ($intmap-nth (intmap-root t) pos)])
(if p
(values (car p) (cdr p))
fail)))
(define ($intmap-nth t n)
(cond
[(Br? t)
(let* ([left (Br-left t)]
[left-count ($intmap-count left)])
(if (fx< n left-count)
($intmap-nth left n)
($intmap-nth (Br-right t) (fx- n left-count))))]
[(Lf? t)
(and (fx= 0 n)
(cons (Lf-key t) (Lf-value t)))]
[(Co? t)
(let ([pairs (Co-pairs t)])
(and (fx< n (length pairs))
(list-ref pairs n)))]
[else
#f]))
(define (unsafe-intmap-iterate-first t)
($intmap-enum (intmap-root t) #f))
(define (unsafe-intmap-iterate-next t pos)
(let ([next (cdr pos)])
(and next
($intmap-enum (car next) (cdr next)))))
(define (unsafe-intmap-iterate-pair t pos)
(car pos))
(define (unsafe-intmap-iterate-key t pos)
(caar pos))
(define (unsafe-intmap-iterate-value t pos)
(cdar pos))
(define (unsafe-intmap-iterate-key+value t pos)
(values (caar pos) (cdar pos)))
(define ($intmap-enum t next)
(cond
[(Br? t)
($intmap-enum (Br-left t) (cons (Br-right t) next))]
[(Lf? t)
(cons (cons (Lf-key t) (Lf-value t)) next)]
[(Co? t)
(let ([pairs (Co-pairs t)])
(let ([fst (car pairs)]
[rst (cdr pairs)])
(if (null? rst)
(cons fst next)
(cons fst (cons (make-Co #f rst) next)))))]
[else
next]))
(define (intmap-fold t nil proc)
(let loop ([pos (unsafe-intmap-iterate-first t)] [nil nil])
(cond
[pos
(let ([p (unsafe-intmap-iterate-pair t pos)])
(loop (unsafe-intmap-iterate-next t pos)
(proc (car p) (cdr p) nil)))]
[else
nil])))
(define (intmap-for-each t proc)
(intmap-fold t (void) (lambda (k v _) (|#%app| proc k v) (void))))
(define (intmap-map t proc)
(intmap-fold t '() (lambda (k v xs) (cons (|#%app| proc k v) xs))))
;; equality
(define (intmap=? a b eql?)
(and (eq? (intmap-eqtype a) (intmap-eqtype b))
($intmap=? (intmap-eqtype a) (intmap-root a) (intmap-root b) eql?)))
(define ($intmap=? et a b eql?)
(or
(eq? a b)
(cond
[(Br? a)
(and (Br? b)
(fx= (Br-count a) (Br-count b))
(fx= (Br-prefix a) (Br-prefix b))
(fx= (Br-mask a) (Br-mask b))
($intmap=? et (Br-left a) (Br-left b) eql?)
($intmap=? et (Br-right a) (Br-right b) eql?))]
[(Lf? a)
(and (Lf? b)
(key=? et (Lf-key a) (Lf-key b))
(eql? (Lf-value a) (Lf-value b)))]
[(Co? a)
(and (Co? b)
(let ([xs (Co-pairs a)])
(and (fx= (length xs) (length (Co-pairs b)))
(let loop ([xs xs])
(cond [(null? xs)
#t]
[else
(let ([pair ($collision-ref et b (caar xs) values #f)])
(and pair
(eql? (cdar xs) (cdr pair))
(loop (cdr xs))))])))))]
[else (and (not a) (not b))])))
;; hash code
(define (intmap-hash-code t hash)
($intmap-hash-code (intmap-root t) hash 0))
(define ($intmap-hash-code t hash hc)
(cond
[(Br? t)
(let* ([hc (hash-code-combine hc (hash (Br-prefix t)))]
[hc (hash-code-combine hc (hash (Br-mask t)))]
[hc (hash-code-combine hc ($intmap-hash-code (Br-left t) hash hc))]
[hc (hash-code-combine hc ($intmap-hash-code (Br-right t) hash hc))])
hc)]
[(Lf? t)
(let* ([hc (hash-code-combine hc (Lf-hash t))]
[hc (hash-code-combine hc (hash (Lf-value t)))])
hc)]
[(Co? t)
(hash-code-combine hc (Co-hash t))]
[else
(hash-code-combine hc (hash #f))]))
(define ignored/intmap
(begin
;; Go through generic `hash` versions to support `a`
;; and `b` as impersonated hash tables
(record-type-equal-procedure (record-type-descriptor intmap)
(lambda (a b eql?)
(hash=? a b eql?)))
(record-type-hash-procedure (record-type-descriptor intmap)
(lambda (a hash)
(hash-hash-code a hash)))))
;; subset
(define (intmap-keys-subset? a b)
($intmap-keys-subset? (intmap-eqtype a) (intmap-root a) (intmap-root b)))
(define ($intmap-keys-subset? et a b)
(or
(eq? a b)
(cond
[(Br? a)
(and
(Br? b)
(let ([p1 (Br-prefix a)]
[m1 (Br-mask a)]
[p2 (Br-prefix b)]
[m2 (Br-mask b)])
(cond
[(fx> m1 m2) #f]
[(fx> m2 m1)
(and (match-prefix? p1 p2 m2)
(if (fx<= p1 p2)
($intmap-keys-subset? et a (Br-left b))
($intmap-keys-subset? et a (Br-right b))))]
[else
(and (fx= p1 p2)
($intmap-keys-subset? et (Br-left a) (Br-left b))
($intmap-keys-subset? et (Br-right a) (Br-right b)))])))]
[(Lf? a)
(if (Lf? b)
(key=? et (Lf-key a) (Lf-key b))
($intmap-has-key? et b (Lf-hash a) (Lf-key a) #f))]
[(Co? a)
(let loop ([xs (Co-pairs a)])
(cond [(null? xs) #t]
[($intmap-has-key? et b (Co-hash a) (caar xs) #f) (loop (cdr xs))]
[else #f]))]
[else
#t])))
;; This Patricia-trie implementation is the prettiest and fastest. It
;; uses the most memory, though --- typically much more than the
;; vector-stencil HAMT.
;; (include "rumble/hamt-stencil.ss")
;;
;; This HAMT implementation using stencil vectors tends to use the
;; last memory, often by a lot. It's slower than the Patricia-tree
;; implementation, though, especially for `hash-keys-subset?`.
;; (include "rumble/hamt-vector.ss")
;;
;; This HAMT implementaiton uses plain vectors instead of stencil
;; vectors. Its speed and memory use are intermediate, but its speed
;; is closer to the stencil-vector HAMT implementation, and memory use
;; is closer to the Patrica trie implementation.

View File

@ -0,0 +1,473 @@
;; See also "intmap.ss"
;; Immutable maps represented as big-endian Patricia tries.
;; Based on Okasaki & Gill's "Fast Mergeable Integer Maps,"
;; (1998) with an added collision node.
;;
;; I also consulted Leijen and Palamarchuk's Haskell implementation
;; of Data.IntMap.
(define-record-type intmap
[fields (immutable eqtype)
(mutable root)]
[nongenerative #{intmap pfwguidjcvqbvofiirp097jco-0}]
[sealed #t])
(define-record-type Br
[fields (immutable count)
(immutable prefix)
(immutable mask)
(immutable left)
(immutable right)]
[nongenerative #{Br pfwguidjcvqbvofiirp097jco-1}]
[sealed #t])
(define-record-type Lf
[fields (immutable hash)
(immutable key)
(immutable value)]
[nongenerative #{Lf pfwguidjcvqbvofiirp097jco-2}]
[sealed #t])
(define-record-type Co
[fields (immutable hash)
(immutable pairs)]
[nongenerative #{Co pfwguidjcvqbvofiirp097jco-3}]
[sealed #t])
(define empty-hash (make-intmap 'equal #f))
(define empty-hasheqv (make-intmap 'eqv #f))
(define empty-hasheq (make-intmap 'eq #f))
(define (make-intmap-shell et)
(make-intmap et #f))
(define (intmap-shell-sync! dst src)
(intmap-root-set! dst (intmap-root src)))
(define (intmap-equal? t) (eq? 'equal (intmap-eqtype t)))
(define (intmap-eqv? t) (eq? 'eqv (intmap-eqtype t)))
(define (intmap-eq? t) (eq? 'eq (intmap-eqtype t)))
(define (intmap-count t)
($intmap-count (intmap-root t)))
(define (intmap-empty? t)
(fx= 0 (intmap-count t)))
(define ($intmap-count t)
(cond [(Br? t) (Br-count t)]
[(Lf? t) 1]
[(Co? t) (length (Co-pairs t))]
[else 0]))
(define-syntax-rule (define-intmap-ref (intmap-ref formal ...)
(_ arg ... with-leaf with-pair)
$intmap-ref)
(begin
(define (intmap-ref formal ...)
(do-intmap-ref arg ...))
(define (do-intmap-ref t key default)
(let ([root (intmap-root t)])
(if root
(let ([et (intmap-eqtype t)])
($intmap-ref et root (hash-code et key) key default))
default)))
(define ($intmap-ref et t h key default)
(let loop ([t t])
(cond
[(Br? t)
(if (fx<= h (Br-prefix t))
(loop (Br-left t))
(loop (Br-right t)))]
[(Lf? t)
(if (and (fx= h (Lf-hash t))
(key=? et key (Lf-key t)))
(with-leaf t)
default)]
[(Co? t)
(if (fx= h (Co-hash t))
($collision-ref et t key with-pair default)
default)]
[else
default])))))
(define-intmap-ref (intmap-ref t key default)
(do-intmap-ref t key default Lf-value cdr)
$intmap-ref)
(define-intmap-ref (intmap-ref-key t key default)
(do-intmap-ref t key default Lf-key car)
$intmap-ref-key)
(define-intmap-ref (intmap-has-key? t key)
(do-intmap-ref t key #f (lambda (_) #t) (lambda (_) #t))
$intmap-has-key?)
(define (intmap-set t key val)
(let ([et (intmap-eqtype t)])
(make-intmap
et
($intmap-set et (intmap-root t) (hash-code et key) key val))))
(define ($intmap-set et t h key val)
(cond
[(Br? t)
(let ([p (Br-prefix t)]
[m (Br-mask t)])
(cond
[(not (match-prefix? h p m))
(join h (make-Lf h key val) p t)]
[(fx<= h p)
(br p m ($intmap-set et (Br-left t) h key val) (Br-right t))]
[else
(br p m (Br-left t) ($intmap-set et (Br-right t) h key val))]))]
[(Lf? t)
(let ([j (Lf-hash t)])
(cond
[(not (fx= h j))
(join h (make-Lf h key val) j t)]
[(key=? et key (Lf-key t))
(make-Lf h key val)]
[else
(make-Co h (list (cons key val) (cons (Lf-key t) (Lf-value t))))]))]
[(Co? t)
(let ([j (Co-hash t)])
(if (fx= h j)
(make-Co j ($collision-set et t key val))
(join h (make-Lf h key val) j t)))]
[else
(make-Lf h key val)]))
(define (join p0 t0 p1 t1)
(let* ([m (branching-bit p0 p1)]
[p (mask p0 m)])
(if (fx<= p0 p1)
(br p m t0 t1)
(br p m t1 t0))))
(define (intmap-remove t key)
(let ([et (intmap-eqtype t)])
(let ([r ($intmap-remove et (intmap-root t) (hash-code et key) key)])
(if r
(make-intmap et r)
(case et
[(eq) empty-hasheq]
[(equal) empty-hash]
[else empty-hasheqv])))))
(define ($intmap-remove et t h key)
(cond
[(Br? t)
(let ([p (Br-prefix t)]
[m (Br-mask t)])
(cond
[(not (match-prefix? h p m))
t]
[(fx<= h p)
(br/check-left p m ($intmap-remove et (Br-left t) h key) (Br-right t))]
[else
(br/check-right p m (Br-left t) ($intmap-remove et (Br-right t) h key))]))]
[(Lf? t)
(if (key=? et key (Lf-key t))
#f
t)]
[(Co? t)
(cond
[(fx=? h (Co-hash t))
;; A collision node always has at least 2 key-value pairs,
;; so when we remove one, we know the resulting list is non-empty.
(let ([pairs ($collision-remove et t key)])
(if (null? (cdr pairs))
(make-Lf h (caar pairs) (cdar pairs))
(make-Co h pairs)))]
[else
t])]
[else
#f]))
;; collision ops
(define ($collision-ref et t key with-pair default)
(let loop ([xs (Co-pairs t)])
(cond [(null? xs) default]
[(key=? et key (caar xs)) (with-pair (car xs))]
[else (loop (cdr xs))])))
(define ($collision-set et t key val)
(cons (cons key val)
(let loop ([xs (Co-pairs t)])
(cond [(null? xs) '()]
[(key=? et key (caar xs)) (loop (cdr xs))]
[else (cons (car xs) (loop (cdr xs)))]))))
(define ($collision-remove et t key)
(let loop ([xs (Co-pairs t)])
(cond [(null? xs) '()]
[(key=? et key (caar xs)) (loop (cdr xs))]
[else (cons (car xs) (loop (cdr xs)))])))
;; bit twiddling
(define-syntax-rule (match-prefix? h p m)
(fx= (mask h m) p))
(define-syntax-rule (mask h m)
(fxand (fxior h (fx1- m)) (fxnot m)))
(define-syntax-rule (branching-bit p m)
(highest-set-bit (fxxor p m)))
(define-syntax-rule (highest-set-bit x1)
(let* ([x2 (fxior x1 (fxsrl x1 1))]
[x3 (fxior x2 (fxsrl x2 2))]
[x4 (fxior x3 (fxsrl x3 4))]
[x5 (fxior x4 (fxsrl x4 8))]
[x6 (fxior x5 (fxsrl x5 16))]
[x7 (if (> (fixnum-width) 32)
(fxior x6 (fxsrl x6 32))
x6)])
(fxxor x7 (fxsrl x7 1))))
;; basic utils
(define (br p m l r)
(let ([c (fx+ ($intmap-count l) ($intmap-count r))])
(make-Br c p m l r)))
(define (br/check-left p m l r)
(if l
(br p m l r)
r))
(define (br/check-right p m l r)
(if r
(br p m l r)
l))
(define-syntax-rule (key=? et k1 k2)
(cond [(eq? et 'eq) (eq? k1 k2)]
[(eq? et 'eqv) (eqv? k1 k2)]
[else (key-equal? k1 k2)]))
(define-syntax-rule (hash-code et k)
(cond [(eq? et 'eq) (eq-hash-code k)]
[(eq? et 'eqv) (eqv-hash-code k)]
[else (key-equal-hash-code k)]))
;; iteration
(define (intmap-iterate-first t)
(and (fx> (intmap-count t) 0)
0))
(define (intmap-iterate-next t pos)
(let ([pos (fx1+ pos)])
(and (fx< pos (intmap-count t))
pos)))
(define (intmap-iterate-pair t pos fail)
(or ($intmap-nth (intmap-root t) pos)
fail))
(define (intmap-iterate-key t pos fail)
(let ([p ($intmap-nth (intmap-root t) pos)])
(if p (car p) fail)))
(define (intmap-iterate-value t pos fail)
(let ([p ($intmap-nth (intmap-root t) pos)])
(if p (cdr p) fail)))
(define (intmap-iterate-key+value t pos fail)
(let ([p ($intmap-nth (intmap-root t) pos)])
(if p
(values (car p) (cdr p))
fail)))
(define ($intmap-nth t n)
(cond
[(Br? t)
(let* ([left (Br-left t)]
[left-count ($intmap-count left)])
(if (fx< n left-count)
($intmap-nth left n)
($intmap-nth (Br-right t) (fx- n left-count))))]
[(Lf? t)
(and (fx= 0 n)
(cons (Lf-key t) (Lf-value t)))]
[(Co? t)
(let ([pairs (Co-pairs t)])
(and (fx< n (length pairs))
(list-ref pairs n)))]
[else
#f]))
(define (unsafe-intmap-iterate-first t)
($intmap-enum (intmap-root t) #f))
(define (unsafe-intmap-iterate-next t pos)
(let ([next (cdr pos)])
(and next
($intmap-enum (car next) (cdr next)))))
(define (unsafe-intmap-iterate-pair t pos)
(car pos))
(define (unsafe-intmap-iterate-key t pos)
(caar pos))
(define (unsafe-intmap-iterate-value t pos)
(cdar pos))
(define (unsafe-intmap-iterate-key+value t pos)
(values (caar pos) (cdar pos)))
(define ($intmap-enum t next)
(cond
[(Br? t)
($intmap-enum (Br-left t) (cons (Br-right t) next))]
[(Lf? t)
(cons (cons (Lf-key t) (Lf-value t)) next)]
[(Co? t)
(let ([pairs (Co-pairs t)])
(let ([fst (car pairs)]
[rst (cdr pairs)])
(if (null? rst)
(cons fst next)
(cons fst (cons (make-Co #f rst) next)))))]
[else
next]))
(define (intmap-fold t nil proc)
(let loop ([pos (unsafe-intmap-iterate-first t)] [nil nil])
(cond
[pos
(let ([p (unsafe-intmap-iterate-pair t pos)])
(loop (unsafe-intmap-iterate-next t pos)
(proc (car p) (cdr p) nil)))]
[else
nil])))
(define (intmap-for-each t proc)
(intmap-fold t (void) (lambda (k v _) (|#%app| proc k v) (void))))
(define (intmap-map t proc)
(#%reverse (intmap-fold t '() (lambda (k v xs) (cons (|#%app| proc k v) xs)))))
;; equality
(define (intmap=? a b eql?)
(and (eq? (intmap-eqtype a) (intmap-eqtype b))
($intmap=? (intmap-eqtype a) (intmap-root a) (intmap-root b) eql?)))
(define ($intmap=? et a b eql?)
(or
(eq? a b)
(cond
[(Br? a)
(and (Br? b)
(fx= (Br-count a) (Br-count b))
(fx= (Br-prefix a) (Br-prefix b))
(fx= (Br-mask a) (Br-mask b))
($intmap=? et (Br-left a) (Br-left b) eql?)
($intmap=? et (Br-right a) (Br-right b) eql?))]
[(Lf? a)
(and (Lf? b)
(key=? et (Lf-key a) (Lf-key b))
(eql? (Lf-value a) (Lf-value b)))]
[(Co? a)
(and (Co? b)
(let ([xs (Co-pairs a)])
(and (fx= (length xs) (length (Co-pairs b)))
(let loop ([xs xs])
(cond [(null? xs)
#t]
[else
(let ([pair ($collision-ref et b (caar xs) values #f)])
(and pair
(eql? (cdar xs) (cdr pair))
(loop (cdr xs))))])))))]
[else (and (not a) (not b))])))
;; hash code
(define (intmap-hash-code t hash)
($intmap-hash-code (intmap-root t) hash 0))
(define ($intmap-hash-code t hash hc)
(cond
[(Br? t)
(let* ([hc (hash-code-combine hc (hash (Br-prefix t)))]
[hc (hash-code-combine hc (hash (Br-mask t)))]
[hc (hash-code-combine hc ($intmap-hash-code (Br-left t) hash hc))]
[hc (hash-code-combine hc ($intmap-hash-code (Br-right t) hash hc))])
hc)]
[(Lf? t)
(let* ([hc (hash-code-combine hc (Lf-hash t))]
[hc (hash-code-combine hc (hash (Lf-value t)))])
hc)]
[(Co? t)
(hash-code-combine hc (Co-hash t))]
[else
(hash-code-combine hc (hash #f))]))
;; subset
(define (intmap-keys-subset? a b)
($intmap-keys-subset? (intmap-eqtype a) (intmap-root a) (intmap-root b)))
(define ($intmap-keys-subset? et a b)
(or
(eq? a b)
(cond
[(Br? a)
(and
(Br? b)
(let ([p1 (Br-prefix a)]
[m1 (Br-mask a)]
[p2 (Br-prefix b)]
[m2 (Br-mask b)])
(cond
[(fx> m1 m2) #f]
[(fx> m2 m1)
(and (match-prefix? p1 p2 m2)
(if (fx<= p1 p2)
($intmap-keys-subset? et a (Br-left b))
($intmap-keys-subset? et a (Br-right b))))]
[else
(and (fx= p1 p2)
($intmap-keys-subset? et (Br-left a) (Br-left b))
($intmap-keys-subset? et (Br-right a) (Br-right b)))])))]
[(Lf? a)
(if (Lf? b)
(key=? et (Lf-key a) (Lf-key b))
($intmap-has-key? et b (Lf-hash a) (Lf-key a) #f))]
[(Co? a)
(let loop ([xs (Co-pairs a)])
(cond [(null? xs) #t]
[($intmap-has-key? et b (Co-hash a) (caar xs) #f) (loop (cdr xs))]
[else #f]))]
[else
#t])))

View File

@ -16,7 +16,7 @@
#define MZSCHEME_VERSION_X 7
#define MZSCHEME_VERSION_Y 5
#define MZSCHEME_VERSION_Z 0
#define MZSCHEME_VERSION_W 16
#define MZSCHEME_VERSION_W 17
/* A level of indirection makes `#` work as needed: */
#define AS_a_STR_HELPER(x) #x