cs: improve stencil-vector HMAT hash-keys-subset? performance

Also, improve some hash benchmarks.
This commit is contained in:
Matthew Flatt 2020-01-10 10:46:10 -07:00
parent d5930a18c6
commit 76726ee928
5 changed files with 127 additions and 87 deletions

View File

@ -1,13 +1,16 @@
#lang racket/base #lang racket/base
(require (for-syntax racket/base)) (require (for-syntax racket/base))
(provide Q M L N I (provide K Q M L N I
times times
unknown unknown
with-hash-variants with-hash-variants
make-large-equal-key/share1 make-large-equal-key/share1
make-large-equal-key/share2) make-large-equal-key/share2)
;; Iterations for nested things
(define K 100)
;; Iterations for slow things: ;; Iterations for slow things:
(define Q 100000) (define Q 100000)

View File

@ -3,7 +3,7 @@
'eq:keys 'eq:keys
(times (times
(let ([ht (for/hasheq ([i (in-range 100)]) (let ([ht (for/hasheq ([i (in-range K)])
(values i i))]) (values i i))])
(void (void
(for ([i (in-range Q)]) (for ([i (in-range Q)])
@ -12,7 +12,7 @@
'eq:vals 'eq:vals
(times (times
(let ([ht (for/hasheq ([i (in-range 100)]) (let ([ht (for/hasheq ([i (in-range K)])
(values i i))]) (values i i))])
(for ([i (in-range Q)]) (for ([i (in-range Q)])
(void (void
@ -21,7 +21,7 @@
'eq:keys-unsafe 'eq:keys-unsafe
(times (times
(let ([ht (for/hasheq ([i (in-range 100)]) (let ([ht (for/hasheq ([i (in-range K)])
(values i i))]) (values i i))])
(void (void
(for ([i (in-range Q)]) (for ([i (in-range Q)])
@ -30,7 +30,7 @@
'eq:vals-unsafe 'eq:vals-unsafe
(times (times
(let ([ht (for/hasheq ([i (in-range 100)]) (let ([ht (for/hasheq ([i (in-range K)])
(values i i))]) (values i i))])
(for ([i (in-range Q)]) (for ([i (in-range Q)])
(void (void
@ -39,7 +39,7 @@
'eq:for-each 'eq:for-each
(times (times
(let ([ht (for/hasheq ([i (in-range 100)]) (let ([ht (for/hasheq ([i (in-range K)])
(values i i))]) (values i i))])
(for ([i (in-range Q)]) (for ([i (in-range Q)])
(hash-for-each ht (lambda (k v) 'ok))))) (hash-for-each ht (lambda (k v) 'ok)))))

View File

@ -1,5 +1,13 @@
#lang racket/base #lang racket/base
(require "config.rkt") (require "config.rkt"
racket/list)
(define shuffled
(parameterize ([current-pseudo-random-generator
(make-pseudo-random-generator)])
(random-seed 12745)
(shuffle (for/list ([i (in-range K)])
i))))
(with-hash-variants (with-hash-variants
@ -14,16 +22,25 @@
'add-many 'add-many
(times (times
(for ([i (in-range Q)]) (for ([i (in-range Q)])
(let loop ([ht EMPTY] [i 100]) (let loop ([ht EMPTY] [i K])
(if (zero? i) (if (zero? i)
(void (unknown ht)) (void (unknown ht))
(loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true)) (loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true))
(sub1 i)))))) (sub1 i))))))
'add-many-in-order
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [l shuffled])
(if (null? l)
(void (unknown ht))
(loop (hash-set ht (car l) (MAKE-VAL 'true))
(cdr l))))))
'add-same 'add-same
(times (times
(for ([i (in-range Q)]) (for ([i (in-range Q)])
(let loop ([ht EMPTY] [i 100]) (let loop ([ht EMPTY] [i K])
(if (zero? i) (if (zero? i)
(void (unknown ht)) (void (unknown ht))
(loop (hash-set ht (MAKE-KEY 'a) (MAKE-VAL 'true)) (loop (hash-set ht (MAKE-KEY 'a) (MAKE-VAL 'true))

View File

@ -1,17 +1,19 @@
#lang racket/base #lang racket/base
(require "config.rkt") (require "config.rkt"
racket/list)
(define elems (define elems
(parameterize ([current-pseudo-random-generator (parameterize ([current-pseudo-random-generator
(make-pseudo-random-generator)]) (make-pseudo-random-generator)])
(random-seed 12745) (random-seed 12745)
(shuffle
(hash-keys (hash-keys
(for/fold ([ht #hasheqv()]) ([i 200]) (for/fold ([ht #hasheqv()]) ([i 200])
(let loop () (let loop ()
(define n (random 10000)) (define n (random 10000))
(if (hash-ref ht n #f) (if (hash-ref ht n #f)
(loop) (loop)
(hash-set ht n #t))))))) (hash-set ht n #t))))))))
(define (gen n) (define (gen n)
(for/hasheq ([i (in-range n)] (for/hasheq ([i (in-range n)]
@ -31,6 +33,14 @@
(when v (when v
(error "failed"))) (error "failed")))
'eq:subset-shared-small
(times
(let* ([sub-ht (gen 6)]
[ht (gen-more 3 sub-ht)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-unshared-small 'eq:subset-unshared-small
(times (times
(let ([ht (gen 6)] (let ([ht (gen 6)]
@ -83,13 +93,15 @@
(let* ([sub-ht (gen 100)] (let* ([sub-ht (gen 100)]
[ht (gen-more 10 sub-ht)]) [ht (gen-more 10 sub-ht)])
(check-true (check-true
(for/and ([i (in-range L)]) (for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht))))) (hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-large+large ;; This one amounts to a test of how fast the subset
;; operation iterates internally:
'eq:subset-unshared-large
(times (times
(let* ([sub-ht (gen 100)] (let* ([sub-ht (gen 100)]
[ht (gen-more 100 sub-ht)]) [ht (gen 100)])
(check-true (check-true
(for/and ([i (in-range L)]) (for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht))))) (hash-keys-subset? sub-ht ht)))))

View File

@ -898,20 +898,18 @@
(cons (car alist) (cons (car alist)
(cnode-assoc-update (cdr alist) key new-p))])) (cnode-assoc-update (cdr alist) key new-p))]))
;; `a` and `b` must both be cnodes
(define (cnode=? a b eql?) (define (cnode=? a b eql?)
(or
(eq? a b)
(and (and
(cnode? b)
(cnode-keys-subset/equal? a b eql?) (cnode-keys-subset/equal? a b eql?)
(fx= (length (cnode-content a)) (fx= (length (cnode-content a))
(length (cnode-content b)))))) (length (cnode-content b)))))
;; `a` and `b` must both be cnodes
(define (cnode-keys-subset/equal? a b eql?) (define (cnode-keys-subset/equal? a b eql?)
(or (or
(eq? a b) (eq? a b)
(and (and
(cnode? b)
(fx= (cnode-hash a) (cnode-hash b)) (fx= (cnode-hash a) (cnode-hash b))
(let ([ac (cnode-content a)] (let ([ac (cnode-content a)]
[bc (cnode-content b)]) [bc (cnode-content b)])
@ -1038,23 +1036,15 @@
(and (bnode? b) (and (bnode? b)
(bnode=? an bn eql? (bnode-down shift)))] (bnode=? an bn eql? (bnode-down shift)))]
[else [else
;; `bn` must be a cnode, too
(cnode=? an bn eql?)])) (cnode=? an bn eql?)]))
(loop (fx+ i 1))))])))))))) (loop (fx+ i 1))))]))))))))
;; `a` and `b` must both be bnodes
(define (bnode-keys-subset? a b shift) (define (bnode-keys-subset? a b shift)
(or
(eq? a b)
(cond (cond
[(cnode? b) [(eq? a b) #t]
;; only possible if `a` has just one key, since if it [(fx> (hamt-count a) (hamt-count b)) #f]
;; has multiple keys, they have different hash code and
;; can be a subset of the the keys of `c`
(and (fx= (hamt-count a) 1)
(let* ([k (bnode-only-key-ref a)]
[hashcode (hamt-wrapped-key-hash-code a k)])
(cnode-has-key? b (hamt-unwrap-key k) hashcode)))]
[(fx> (hamt-count a) (hamt-count b))
#f]
[else [else
(let* ([a-mask (stencil-vector-mask a)] (let* ([a-mask (stencil-vector-mask a)]
[akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] [akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)]
@ -1066,16 +1056,33 @@
[bbm (fxior bcm bkm)]) [bbm (fxior bcm bkm)])
(and (and
(fx= abm (fxand abm bbm)) (fx= abm (fxand abm bbm))
(let loop ([abm abm] [bit 0] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0]) ;; At this point, we know that bbm has a bit for every key/child in
;; `b`, and we know that `a` has a key/child only where `b` does
(let loop ([bm bbm] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0])
(cond (cond
[(fxzero? abm) #t] [(fxzero? bm) #t]
[(fxbit-set? akm bit) [else
(let ([bm-bit (fxand bm (fxxor bm (fx- bm 1)))]) ; peel off lowest set bit of `bm`
(cond (cond
[(fxbit-set? bkm bit) [(fx= 0 (fxand bm-bit abm))
;; No key or child in `a`
(cond
[(fx= 0 (fxand bm-bit bcm))
;; Key in `b`
(loop (fx- bm bm-bit) aki (fx1+ bki) aci bci)]
[else
;; Child in `b`
(loop (fx- bm bm-bit) aki bki aci (fx1+ bci))])]
[(fx= 0 (fxand bm-bit acm))
;; Key in `a`
(cond
[(fx= 0 (fxand bm-bit bcm))
;; Key in `b`
(and (and
(hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki)) (hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki))
(loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) (fx1+ bki) aci bci))] (loop (fx- bm bm-bit) (fx1+ aki) (fx1+ bki) aci bci))]
[else [else
;; Child in `b`
(and (and
(let ([akey (bnode-key-index-ref a aki)] (let ([akey (bnode-key-index-ref a aki)]
[bchild (bnode-child-index-ref b bci)]) [bchild (bnode-child-index-ref b bci)])
@ -1084,25 +1091,26 @@
(bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))] (bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))]
[else [else
(cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))])) (cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))]))
(loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) bki aci (fx1+ bci)))])] (loop (fx- bm bm-bit) (fx1+ aki) bki aci (fx1+ bci)))])]
[(fxbit-set? acm bit)
(cond
[(fxbit-set? bkm bit) #f]
[else [else
;; Child in `a`
(cond
[(fx= 0 (fxand bm-bit bcm))
;; Key in `b`, and multiple keys in `a` child means `a` is not a subset
#f]
[else
;; Child in `b`
(and (let ([ac (bnode-child-index-ref a aci)] (and (let ([ac (bnode-child-index-ref a aci)]
[bc (bnode-child-index-ref b bci)]) [bc (bnode-child-index-ref b bci)])
;; Because a cnode is always at the end of the deepest
;; possible chain of bnodes, `ac` and `bc` must both be
;; the same kind of node
(cond (cond
[(bnode? ac) [(bnode? ac)
(bnode-keys-subset? ac bc (bnode-down shift))] (bnode-keys-subset? ac bc (bnode-down shift))]
[else [else
(cnode-keys-subset/equal? ac bc #f)])) (cnode-keys-subset/equal? ac bc #f)]))
(loop (fxsrl abm 1) (fx1+ bit) aki bki (fx1+ aci) (fx1+ bci)))])] (loop (fx- bm bm-bit) aki bki (fx1+ aci) (fx1+ bci)))])]))]))))]))
[(fxbit-set? bkm bit)
(loop (fxsrl abm 1) (fx1+ bit) aki (fx1+ bki) aci bci)]
[(fxbit-set? bcm bit)
(loop (fxsrl abm 1) (fx1+ bit) aki bki aci (fx1+ bci))]
[else
(loop (fxsrl abm 1) (fx1+ bit) aki bki aci bci)]))))])))
(define (bnode-hash-code n hash hc) (define (bnode-hash-code n hash hc)
(let* ([mask (stencil-vector-mask n)] (let* ([mask (stencil-vector-mask n)]