diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt index 8f13624312..31a3a67011 100644 --- a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/config.rkt @@ -1,13 +1,16 @@ #lang racket/base (require (for-syntax racket/base)) -(provide Q M L N I +(provide K Q M L N I times unknown with-hash-variants make-large-equal-key/share1 make-large-equal-key/share2) +;; Iterations for nested things +(define K 100) + ;; Iterations for slow things: (define Q 100000) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt index 3f3bd9d376..4da6b6620d 100644 --- a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/iterate.rkt @@ -3,7 +3,7 @@ 'eq:keys (times - (let ([ht (for/hasheq ([i (in-range 100)]) + (let ([ht (for/hasheq ([i (in-range K)]) (values i i))]) (void (for ([i (in-range Q)]) @@ -12,7 +12,7 @@ 'eq:vals (times - (let ([ht (for/hasheq ([i (in-range 100)]) + (let ([ht (for/hasheq ([i (in-range K)]) (values i i))]) (for ([i (in-range Q)]) (void @@ -21,7 +21,7 @@ 'eq:keys-unsafe (times - (let ([ht (for/hasheq ([i (in-range 100)]) + (let ([ht (for/hasheq ([i (in-range K)]) (values i i))]) (void (for ([i (in-range Q)]) @@ -30,7 +30,7 @@ 'eq:vals-unsafe (times - (let ([ht (for/hasheq ([i (in-range 100)]) + (let ([ht (for/hasheq ([i (in-range K)]) (values i i))]) (for ([i (in-range Q)]) (void @@ -39,7 +39,7 @@ 'eq:for-each (times - (let ([ht (for/hasheq ([i (in-range 100)]) + (let ([ht (for/hasheq ([i (in-range K)]) (values i i))]) (for ([i (in-range Q)]) (hash-for-each ht (lambda (k v) 'ok))))) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt index b9d0015398..6b2bd34a8c 100644 --- a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/set.rkt @@ -1,5 +1,13 @@ #lang racket/base -(require "config.rkt") +(require "config.rkt" + racket/list) + +(define shuffled + (parameterize ([current-pseudo-random-generator + (make-pseudo-random-generator)]) + (random-seed 12745) + (shuffle (for/list ([i (in-range K)]) + i)))) (with-hash-variants @@ -14,16 +22,25 @@ 'add-many (times (for ([i (in-range Q)]) - (let loop ([ht EMPTY] [i 100]) + (let loop ([ht EMPTY] [i K]) (if (zero? i) (void (unknown ht)) (loop (hash-set ht (MAKE-KEY i) (MAKE-VAL 'true)) (sub1 i)))))) + 'add-many-in-order + (times + (for ([i (in-range Q)]) + (let loop ([ht EMPTY] [l shuffled]) + (if (null? l) + (void (unknown ht)) + (loop (hash-set ht (car l) (MAKE-VAL 'true)) + (cdr l)))))) + 'add-same (times (for ([i (in-range Q)]) - (let loop ([ht EMPTY] [i 100]) + (let loop ([ht EMPTY] [i K]) (if (zero? i) (void (unknown ht)) (loop (hash-set ht (MAKE-KEY 'a) (MAKE-VAL 'true)) diff --git a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt index 284dcbf4ac..3a4fa8c82e 100644 --- a/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt +++ b/pkgs/racket-benchmarks/tests/racket/benchmarks/hash/subset.rkt @@ -1,17 +1,19 @@ #lang racket/base -(require "config.rkt") +(require "config.rkt" + racket/list) (define elems (parameterize ([current-pseudo-random-generator (make-pseudo-random-generator)]) (random-seed 12745) - (hash-keys - (for/fold ([ht #hasheqv()]) ([i 200]) - (let loop () - (define n (random 10000)) - (if (hash-ref ht n #f) - (loop) - (hash-set ht n #t))))))) + (shuffle + (hash-keys + (for/fold ([ht #hasheqv()]) ([i 200]) + (let loop () + (define n (random 10000)) + (if (hash-ref ht n #f) + (loop) + (hash-set ht n #t)))))))) (define (gen n) (for/hasheq ([i (in-range n)] @@ -31,6 +33,14 @@ (when v (error "failed"))) +'eq:subset-shared-small +(times + (let* ([sub-ht (gen 6)] + [ht (gen-more 3 sub-ht)]) + (check-true + (for/and ([i (in-range M)]) + (hash-keys-subset? sub-ht ht))))) + 'eq:subset-unshared-small (times (let ([ht (gen 6)] @@ -83,13 +93,15 @@ (let* ([sub-ht (gen 100)] [ht (gen-more 10 sub-ht)]) (check-true - (for/and ([i (in-range L)]) + (for/and ([i (in-range M)]) (hash-keys-subset? sub-ht ht))))) -'eq:subset-shared-large+large +;; This one amounts to a test of how fast the subset +;; operation iterates internally: +'eq:subset-unshared-large (times (let* ([sub-ht (gen 100)] - [ht (gen-more 100 sub-ht)]) + [ht (gen 100)]) (check-true - (for/and ([i (in-range L)]) + (for/and ([i (in-range M)]) (hash-keys-subset? sub-ht ht))))) diff --git a/racket/src/cs/rumble/hamt-stencil.ss b/racket/src/cs/rumble/hamt-stencil.ss index c9fdb60c93..be56e7fb3e 100644 --- a/racket/src/cs/rumble/hamt-stencil.ss +++ b/racket/src/cs/rumble/hamt-stencil.ss @@ -898,20 +898,18 @@ (cons (car alist) (cnode-assoc-update (cdr alist) key new-p))])) + ;; `a` and `b` must both be cnodes (define (cnode=? a b eql?) - (or - (eq? a b) (and - (cnode? b) (cnode-keys-subset/equal? a b eql?) (fx= (length (cnode-content a)) - (length (cnode-content b)))))) + (length (cnode-content b))))) + ;; `a` and `b` must both be cnodes (define (cnode-keys-subset/equal? a b eql?) (or (eq? a b) (and - (cnode? b) (fx= (cnode-hash a) (cnode-hash b)) (let ([ac (cnode-content a)] [bc (cnode-content b)]) @@ -1038,71 +1036,81 @@ (and (bnode? b) (bnode=? an bn eql? (bnode-down shift)))] [else + ;; `bn` must be a cnode, too (cnode=? an bn eql?)])) (loop (fx+ i 1))))])))))))) + ;; `a` and `b` must both be bnodes (define (bnode-keys-subset? a b shift) - (or - (eq? a b) - (cond - [(cnode? b) - ;; only possible if `a` has just one key, since if it - ;; has multiple keys, they have different hash code and - ;; can be a subset of the the keys of `c` - (and (fx= (hamt-count a) 1) - (let* ([k (bnode-only-key-ref a)] - [hashcode (hamt-wrapped-key-hash-code a k)]) - (cnode-has-key? b (hamt-unwrap-key k) hashcode)))] - [(fx> (hamt-count a) (hamt-count b)) - #f] - [else - (let* ([a-mask (stencil-vector-mask a)] - [akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] - [acm (fxand (fxsrl a-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)] - [abm (fxior acm akm)] - [b-mask (stencil-vector-mask b)] - [bcm (fxand (fxsrl b-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)] - [bkm (fxand (fxsrl b-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] - [bbm (fxior bcm bkm)]) - (and - (fx= abm (fxand abm bbm)) - (let loop ([abm abm] [bit 0] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0]) - (cond - [(fxzero? abm) #t] - [(fxbit-set? akm bit) - (cond - [(fxbit-set? bkm bit) - (and - (hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki)) - (loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) (fx1+ bki) aci bci))] - [else - (and - (let ([akey (bnode-key-index-ref a aki)] - [bchild (bnode-child-index-ref b bci)]) - (cond - [(bnode? bchild) - (bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))] - [else - (cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))])) - (loop (fxsrl abm 1) (fx1+ bit) (fx1+ aki) bki aci (fx1+ bci)))])] - [(fxbit-set? acm bit) - (cond - [(fxbit-set? bkm bit) #f] - [else - (and (let ([ac (bnode-child-index-ref a aci)] - [bc (bnode-child-index-ref b bci)]) - (cond - [(bnode? ac) - (bnode-keys-subset? ac bc (bnode-down shift))] - [else - (cnode-keys-subset/equal? ac bc #f)])) - (loop (fxsrl abm 1) (fx1+ bit) aki bki (fx1+ aci) (fx1+ bci)))])] - [(fxbit-set? bkm bit) - (loop (fxsrl abm 1) (fx1+ bit) aki (fx1+ bki) aci bci)] - [(fxbit-set? bcm bit) - (loop (fxsrl abm 1) (fx1+ bit) aki bki aci (fx1+ bci))] - [else - (loop (fxsrl abm 1) (fx1+ bit) aki bki aci bci)]))))]))) + (cond + [(eq? a b) #t] + [(fx> (hamt-count a) (hamt-count b)) #f] + [else + (let* ([a-mask (stencil-vector-mask a)] + [akm (fxand (fxsrl a-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] + [acm (fxand (fxsrl a-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)] + [abm (fxior acm akm)] + [b-mask (stencil-vector-mask b)] + [bcm (fxand (fxsrl b-mask HAMT-CHILD-OFFSET) HAMT-GROUP-MASK)] + [bkm (fxand (fxsrl b-mask HAMT-KEY-OFFSET) HAMT-GROUP-MASK)] + [bbm (fxior bcm bkm)]) + (and + (fx= abm (fxand abm bbm)) + ;; At this point, we know that bbm has a bit for every key/child in + ;; `b`, and we know that `a` has a key/child only where `b` does + (let loop ([bm bbm] [aki (fxpopcount16 acm)] [bki (fxpopcount16 bcm)] [aci 0] [bci 0]) + (cond + [(fxzero? bm) #t] + [else + (let ([bm-bit (fxand bm (fxxor bm (fx- bm 1)))]) ; peel off lowest set bit of `bm` + (cond + [(fx= 0 (fxand bm-bit abm)) + ;; No key or child in `a` + (cond + [(fx= 0 (fxand bm-bit bcm)) + ;; Key in `b` + (loop (fx- bm bm-bit) aki (fx1+ bki) aci bci)] + [else + ;; Child in `b` + (loop (fx- bm bm-bit) aki bki aci (fx1+ bci))])] + [(fx= 0 (fxand bm-bit acm)) + ;; Key in `a` + (cond + [(fx= 0 (fxand bm-bit bcm)) + ;; Key in `b` + (and + (hamt-wrapped-key=? (bnode-key-index-ref a aki) (bnode-key-index-ref b bki)) + (loop (fx- bm bm-bit) (fx1+ aki) (fx1+ bki) aci bci))] + [else + ;; Child in `b` + (and + (let ([akey (bnode-key-index-ref a aki)] + [bchild (bnode-child-index-ref b bci)]) + (cond + [(bnode? bchild) + (bnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey) (bnode-down shift))] + [else + (cnode-has-key? bchild (hamt-unwrap-key akey) (hamt-wrapped-key-hash-code a akey))])) + (loop (fx- bm bm-bit) (fx1+ aki) bki aci (fx1+ bci)))])] + [else + ;; Child in `a` + (cond + [(fx= 0 (fxand bm-bit bcm)) + ;; Key in `b`, and multiple keys in `a` child means `a` is not a subset + #f] + [else + ;; Child in `b` + (and (let ([ac (bnode-child-index-ref a aci)] + [bc (bnode-child-index-ref b bci)]) + ;; Because a cnode is always at the end of the deepest + ;; possible chain of bnodes, `ac` and `bc` must both be + ;; the same kind of node + (cond + [(bnode? ac) + (bnode-keys-subset? ac bc (bnode-down shift))] + [else + (cnode-keys-subset/equal? ac bc #f)])) + (loop (fx- bm bm-bit) aki bki (fx1+ aci) (fx1+ bci)))])]))]))))])) (define (bnode-hash-code n hash hc) (let* ([mask (stencil-vector-mask n)]