cs: improve stencil-vector HMAT hash-keys-subset? performance

Also, improve some hash benchmarks.
This commit is contained in:
Matthew Flatt 2020-01-10 10:46:10 -07:00
parent d5930a18c6
commit 76726ee928
5 changed files with 127 additions and 87 deletions

View File

@ -1,13 +1,16 @@
#lang racket/base
(require (for-syntax racket/base))
(provide Q M L N I
(provide K Q M L N I
times
unknown
with-hash-variants
make-large-equal-key/share1
make-large-equal-key/share2)
;; Iterations for nested things
(define K 100)
;; Iterations for slow things:
(define Q 100000)

View File

@ -3,7 +3,7 @@
'eq:keys
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
(void
(for ([i (in-range Q)])
@ -12,7 +12,7 @@
'eq:vals
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
(for ([i (in-range Q)])
(void
@ -21,7 +21,7 @@
'eq:keys-unsafe
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
(void
(for ([i (in-range Q)])
@ -30,7 +30,7 @@
'eq:vals-unsafe
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
(for ([i (in-range Q)])
(void
@ -39,7 +39,7 @@
'eq:for-each
(times
(let ([ht (for/hasheq ([i (in-range 100)])
(let ([ht (for/hasheq ([i (in-range K)])
(values i i))])
(for ([i (in-range Q)])
(hash-for-each ht (lambda (k v) 'ok)))))

View File

@ -1,5 +1,13 @@
#lang racket/base
(require "config.rkt")
(require "config.rkt"
racket/list)
(define shuffled
(parameterize ([current-pseudo-random-generator
(make-pseudo-random-generator)])
(random-seed 12745)
(shuffle (for/list ([i (in-range K)])
i))))
(with-hash-variants
@ -14,16 +22,25 @@
'add-many
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [i 100])
(let loop ([ht EMPTY] [i K])
(if (zero? i)
(void (unknown ht))
(loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true))
(sub1 i))))))
'add-many-in-order
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [l shuffled])
(if (null? l)
(void (unknown ht))
(loop (hash-set ht (car l) (MAKE-VAL 'true))
(cdr l))))))
'add-same
(times
(for ([i (in-range Q)])
(let loop ([ht EMPTY] [i 100])
(let loop ([ht EMPTY] [i K])
(if (zero? i)
(void (unknown ht))
(loop (hash-set ht (MAKE-KEY 'a) (MAKE-VAL 'true))

View File

@ -1,17 +1,19 @@
#lang racket/base
(require "config.rkt")
(require "config.rkt"
racket/list)
(define elems
(parameterize ([current-pseudo-random-generator
(make-pseudo-random-generator)])
(random-seed 12745)
(hash-keys
(for/fold ([ht #hasheqv()]) ([i 200])
(let loop ()
(define n (random 10000))
(if (hash-ref ht n #f)
(loop)
(hash-set ht n #t)))))))
(shuffle
(hash-keys
(for/fold ([ht #hasheqv()]) ([i 200])
(let loop ()
(define n (random 10000))
(if (hash-ref ht n #f)
(loop)
(hash-set ht n #t))))))))
(define (gen n)
(for/hasheq ([i (in-range n)]
@ -31,6 +33,14 @@
(when v
(error "failed")))
'eq:subset-shared-small
(times
(let* ([sub-ht (gen 6)]
[ht (gen-more 3 sub-ht)])
(check-true
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-unshared-small
(times
(let ([ht (gen 6)]
@ -83,13 +93,15 @@
(let* ([sub-ht (gen 100)]
[ht (gen-more 10 sub-ht)])
(check-true
(for/and ([i (in-range L)])
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))
'eq:subset-shared-large+large
;; This one amounts to a test of how fast the subset
;; operation iterates internally:
'eq:subset-unshared-large
(times
(let* ([sub-ht (gen 100)]
[ht (gen-more 100 sub-ht)])
[ht (gen 100)])
(check-true
(for/and ([i (in-range L)])
(for/and ([i (in-range M)])
(hash-keys-subset? sub-ht ht)))))

View File

@ -898,20 +898,18 @@
(cons (car alist)
(cnode-assoc-update (cdr alist) key new-p))]))
;; `a` and `b` must both be cnodes
(define (cnode=? a b eql?)
(or
(eq? a b)
(and
(cnode? b)
(cnode-keys-subset/equal? a b eql?)
(fx= (length (cnode-content a))
(length (cnode-content b))))))
(length (cnode-content b)))))
;; `a` and `b` must both be cnodes
(define (cnode-keys-subset/equal? a b eql?)
(or
(eq? a b)
(and
(cnode? b)
(fx= (cnode-hash a) (cnode-hash b))
(let ([ac (cnode-content a)]
[bc (cnode-content b)])
@ -1038,71 +1036,81 @@
(and (bnode? b)
(bnode=? an bn eql? (bnode-down shift)))]
[else
;; `bn` must be a cnode, too
(cnode=? an bn eql?)]))
(loop (fx+ i 1))))]))))))))
;; `a` and `b` must both be bnodes
(define (bnode-keys-subset? a b shift)
(or
(eq? a b)
(cond
[(cnode? b)
;; only possible if `a` has just one key, since if it
;; has multiple keys, they have different hash code and
;; can be a subset of the the keys of `c`
(and (fx= (hamt-count a) 1)
(let* ([k (bnode-only-key-ref a)]
[hashcode (hamt-wrapped-key-hash-code a k)])
(cnode-has-key? b (hamt-unwrap-key k) hashcode)))]
[(fx> (hamt-count a) (hamt-count b))
#f]
[else
(let* ([a-mask (stencil-vector-mask a)]
[akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)]
[acm (fxand (fxsrl a-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)]
[abm (fxior acm akm)]
[b-mask (stencil-vector-mask b)]
[bcm (fxand (fxsrl b-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)]
[bkm (fxand (fxsrl b-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)]
[bbm (fxior bcm bkm)])
(and
(fx= abm (fxand abm bbm))
(let loop ([abm abm] [bit 0] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0])
(cond
[(fxzero? abm) #t]
[(fxbit-set? akm bit)
(cond
[(fxbit-set? bkm bit)
(and
(hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki))
(loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) (fx1+ bki) aci bci))]
[else
(and
(let ([akey (bnode-key-index-ref a aki)]
[bchild (bnode-child-index-ref b bci)])
(cond
[(bnode? bchild)
(bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))]
[else
(cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))]))
(loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) bki aci (fx1+ bci)))])]
[(fxbit-set? acm bit)
(cond
[(fxbit-set? bkm bit) #f]
[else
(and (let ([ac (bnode-child-index-ref a aci)]
[bc (bnode-child-index-ref b bci)])
(cond
[(bnode? ac)
(bnode-keys-subset? ac bc (bnode-down shift))]
[else
(cnode-keys-subset/equal? ac bc #f)]))
(loop (fxsrl abm 1) (fx1+ bit) aki bki (fx1+ aci) (fx1+ bci)))])]
[(fxbit-set? bkm bit)
(loop (fxsrl abm 1) (fx1+ bit) aki (fx1+ bki) aci bci)]
[(fxbit-set? bcm bit)
(loop (fxsrl abm 1) (fx1+ bit) aki bki aci (fx1+ bci))]
[else
(loop (fxsrl abm 1) (fx1+ bit) aki bki aci bci)]))))])))
(cond
[(eq? a b) #t]
[(fx> (hamt-count a) (hamt-count b)) #f]
[else
(let* ([a-mask (stencil-vector-mask a)]
[akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)]
[acm (fxand (fxsrl a-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)]
[abm (fxior acm akm)]
[b-mask (stencil-vector-mask b)]
[bcm (fxand (fxsrl b-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)]
[bkm (fxand (fxsrl b-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)]
[bbm (fxior bcm bkm)])
(and
(fx= abm (fxand abm bbm))
;; At this point, we know that bbm has a bit for every key/child in
;; `b`, and we know that `a` has a key/child only where `b` does
(let loop ([bm bbm] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0])
(cond
[(fxzero? bm) #t]
[else
(let ([bm-bit (fxand bm (fxxor bm (fx- bm 1)))]) ; peel off lowest set bit of `bm`
(cond
[(fx= 0 (fxand bm-bit abm))
;; No key or child in `a`
(cond
[(fx= 0 (fxand bm-bit bcm))
;; Key in `b`
(loop (fx- bm bm-bit) aki (fx1+ bki) aci bci)]
[else
;; Child in `b`
(loop (fx- bm bm-bit) aki bki aci (fx1+ bci))])]
[(fx= 0 (fxand bm-bit acm))
;; Key in `a`
(cond
[(fx= 0 (fxand bm-bit bcm))
;; Key in `b`
(and
(hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki))
(loop (fx- bm bm-bit) (fx1+ aki) (fx1+ bki) aci bci))]
[else
;; Child in `b`
(and
(let ([akey (bnode-key-index-ref a aki)]
[bchild (bnode-child-index-ref b bci)])
(cond
[(bnode? bchild)
(bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))]
[else
(cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))]))
(loop (fx- bm bm-bit) (fx1+ aki) bki aci (fx1+ bci)))])]
[else
;; Child in `a`
(cond
[(fx= 0 (fxand bm-bit bcm))
;; Key in `b`, and multiple keys in `a` child means `a` is not a subset
#f]
[else
;; Child in `b`
(and (let ([ac (bnode-child-index-ref a aci)]
[bc (bnode-child-index-ref b bci)])
;; Because a cnode is always at the end of the deepest
;; possible chain of bnodes, `ac` and `bc` must both be
;; the same kind of node
(cond
[(bnode? ac)
(bnode-keys-subset? ac bc (bnode-down shift))]
[else
(cnode-keys-subset/equal? ac bc #f)]))
(loop (fx- bm bm-bit) aki bki (fx1+ aci) (fx1+ bci)))])]))]))))]))
(define (bnode-hash-code n hash hc)
(let* ([mask (stencil-vector-mask n)]