cs: improve equal-hash-code

Mixing for sequences did not produce enough variety related to the
length of the sequence. For example, '(0 0) and '(0 0 0) and '(0 0 0
0) had the same hash code.
This commit is contained in:
Matthew Flatt 2020-11-19 19:21:34 -07:00
parent d1a61e5ab5
commit f34ff31aef

View File

@ -52,13 +52,18 @@
;; There was contention, so try again ;; There was contention, so try again
(eq-hash-code x)]))])) (eq-hash-code x)]))]))
(define (mix-hash-code hc)
(let ([hc2 (fx+/wraparound hc (fxsll/wraparound (fx+/wraparound hc 1) 10))])
(fxlogxor hc2 (fxsrl hc2 6))))
(define (->fx v)
(if (fixnum? v)
v
(bitwise-and v (greatest-fixnum))))
;; Mostly copied from Chez Scheme's "newhash.ss": ;; Mostly copied from Chez Scheme's "newhash.ss":
(define number-hash (define number-hash
(lambda (z) (lambda (z)
(let* ([mix
(lambda (hc)
(let ([hc2 (fx+/wraparound hc (fxsll/wraparound hc 10))])
(fxlogxor hc2 (fxsrl hc2 6))))])
(cond (cond
[(fixnum? z) (if (fx< z 0) (fxand z (most-positive-fixnum)) z)] [(fixnum? z) (if (fx< z 0) (fxand z (most-positive-fixnum)) z)]
[(flonum? z) (#3%$flhash z)] [(flonum? z) (#3%$flhash z)]
@ -70,10 +75,10 @@
(let ([next-i (fx+ i (fx- (fixnum-width) 1))]) (let ([next-i (fx+ i (fx- (fixnum-width) 1))])
(loop next-i (loop next-i
(fx+/wraparound (bitwise-bit-field z i next-i) (fx+/wraparound (bitwise-bit-field z i next-i)
(mix hc))))])))] (mix-hash-code hc))))])))]
[(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))] [(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))]
[else (logand (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z))) [else (logand (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z)))
(most-positive-fixnum))])))) (most-positive-fixnum))])))
(define (eqv-hash-code x) (define (eqv-hash-code x)
(cond (cond
@ -117,18 +122,6 @@
(define MAX-HASH-BURN 128) (define MAX-HASH-BURN 128)
(define (equal-hash-loop x burn hc) (define (equal-hash-loop x burn hc)
(let* ([->fx
(lambda (v)
(if (fixnum? v)
v
(modulo v (greatest-fixnum))))]
[mix1
(lambda (hc)
(let ([hc2 (fx+/wraparound hc (fxsll/wraparound hc 10))])
(fxlogxor hc2 (fxsrl hc2 6))))]
[mix2
(lambda (hc)
(mix1 hc))])
(cond (cond
[(fx> burn MAX-HASH-BURN) (values hc burn)] [(fx> burn MAX-HASH-BURN) (values hc burn)]
[(boolean? x) (values (fx+/wraparound hc (if x #x0ace0120 #x0cafe121)) burn)] [(boolean? x) (values (fx+/wraparound hc (if x #x0ace0120 #x0cafe121)) burn)]
@ -143,7 +136,7 @@
[(box? x) (equal-hash-loop (unbox x) (fx+ burn 1) (fx+/wraparound hc 1))] [(box? x) (equal-hash-loop (unbox x) (fx+ burn 1) (fx+/wraparound hc 1))]
[(pair? x) [(pair? x)
(let-values ([(hc0 burn) (equal-hash-loop (car x) (fx+ burn 2) 0)]) (let-values ([(hc0 burn) (equal-hash-loop (car x) (fx+ burn 2) 0)])
(let ([hc (fx+/wraparound (mix1 hc) hc0)] (let ([hc (fx+/wraparound (mix-hash-code hc) hc0)]
[r (cdr x)]) [r (cdr x)])
(if (and (pair? r) (list? r)) (if (and (pair? r) (list? r))
;; If it continues as a list, don't count cdr direction as burn: ;; If it continues as a list, don't count cdr direction as burn:
@ -161,7 +154,7 @@
(let-values ([(hc0 burn) (equal-hash-loop (vector-ref x i) (fx+ burn 2) 0)]) (let-values ([(hc0 burn) (equal-hash-loop (vector-ref x i) (fx+ burn 2) 0)])
(vec-loop (fx+ i 1) (vec-loop (fx+ i 1)
burn burn
(fx+/wraparound (mix2 hc) hc0)))]))]))] (fx+/wraparound (mix-hash-code hc) hc0)))]))]))]
[(hash? x) [(hash? x)
;; Treat hash-table hashing specially, so it can be order-insensitive ;; Treat hash-table hashing specially, so it can be order-insensitive
(let ([burn (fx* (fxmax burn 1) 2)]) (let ([burn (fx* (fxmax burn 1) 2)])
@ -185,14 +178,13 @@
;; `eq?` hashing, such as for a procedure, then make sure ;; `eq?` hashing, such as for a procedure, then make sure
;; we discard the impersonator wrapper. ;; we discard the impersonator wrapper.
(equal-hash-loop (impersonator-val x) burn hc)] (equal-hash-loop (impersonator-val x) burn hc)]
[else (values (fx+/wraparound hc (eq-hash-code x)) burn)]))) [else (values (fx+/wraparound hc (eq-hash-code x)) burn)]))
(define (hash-code-combine hc v) (define (hash-code-combine hc v)
(bitwise-and (+ (bitwise-arithmetic-shift-left hc 10) (fx+/wraparound (mix-hash-code (->fx hc))
(bitwise-arithmetic-shift-right hc 6) (->fx v)))
v)
(greatest-fixnum)))
(define (hash-code-combine-unordered hc v) ;; Required to be associative and commutative:
(bitwise-and (+ hc v) (define (hash-code-combine-unordered a b)
(greatest-fixnum))) (fx+/wraparound (->fx a)
(->fx b)))