From f34ff31aef5bb95f22c90afd6bcdee7d52cf7673 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 19 Nov 2020 19:21:34 -0700 Subject: [PATCH] cs: improve `equal-hash-code` Mixing for sequences did not produce enough variety related to the length of the sequence. For example, '(0 0) and '(0 0 0) and '(0 0 0 0) had the same hash code. --- racket/src/cs/rumble/hash-code.ss | 174 ++++++++++++++---------------- 1 file changed, 83 insertions(+), 91 deletions(-) diff --git a/racket/src/cs/rumble/hash-code.ss b/racket/src/cs/rumble/hash-code.ss index 4c704367a4..68d1496404 100644 --- a/racket/src/cs/rumble/hash-code.ss +++ b/racket/src/cs/rumble/hash-code.ss @@ -52,28 +52,33 @@ ;; There was contention, so try again (eq-hash-code x)]))])) +(define (mix-hash-code hc) + (let ([hc2 (fx+/wraparound hc (fxsll/wraparound (fx+/wraparound hc 1) 10))]) + (fxlogxor hc2 (fxsrl hc2 6)))) + +(define (->fx v) + (if (fixnum? v) + v + (bitwise-and v (greatest-fixnum)))) + ;; Mostly copied from Chez Scheme's "newhash.ss": (define number-hash (lambda (z) - (let* ([mix - (lambda (hc) - (let ([hc2 (fx+/wraparound hc (fxsll/wraparound hc 10))]) - (fxlogxor hc2 (fxsrl hc2 6))))]) - (cond - [(fixnum? z) (if (fx< z 0) (fxand z (most-positive-fixnum)) z)] - [(flonum? z) (#3%$flhash z)] - [(bignum? z) (let ([len (integer-length z)]) - (let loop ([i 0] [hc 0]) - (cond - [(fx>= i len) hc] - [else - (let ([next-i (fx+ i (fx- (fixnum-width) 1))]) - (loop next-i - (fx+/wraparound (bitwise-bit-field z i next-i) - (mix hc))))])))] - [(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))] - [else (logand (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z))) - (most-positive-fixnum))])))) + (cond + [(fixnum? z) (if (fx< z 0) (fxand z (most-positive-fixnum)) z)] + [(flonum? z) (#3%$flhash z)] + [(bignum? z) (let ([len (integer-length z)]) + (let loop ([i 0] [hc 0]) + (cond + [(fx>= i len) hc] + [else + (let ([next-i (fx+ i (fx- (fixnum-width) 1))]) + (loop next-i + (fx+/wraparound (bitwise-bit-field z i next-i) + (mix-hash-code hc))))])))] + [(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))] + [else (logand (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z))) + (most-positive-fixnum))]))) (define (eqv-hash-code x) (cond @@ -117,82 +122,69 @@ (define MAX-HASH-BURN 128) (define (equal-hash-loop x burn hc) - (let* ([->fx - (lambda (v) - (if (fixnum? v) - v - (modulo v (greatest-fixnum))))] - [mix1 - (lambda (hc) - (let ([hc2 (fx+/wraparound hc (fxsll/wraparound hc 10))]) - (fxlogxor hc2 (fxsrl hc2 6))))] - [mix2 - (lambda (hc) - (mix1 hc))]) - (cond - [(fx> burn MAX-HASH-BURN) (values hc burn)] - [(boolean? x) (values (fx+/wraparound hc (if x #x0ace0120 #x0cafe121)) burn)] - [(null? x) (values (fx+/wraparound hc #x0cabd122) burn)] - [(number? x) (values (fx+/wraparound hc (number-hash x)) burn)] - [(char? x) (values (fx+/wraparound hc (char->integer x)) burn)] - [(symbol? x) (values (fx+/wraparound hc (symbol-hash x)) burn)] - [(string? x) (values (fx+/wraparound hc (string-hash x)) burn)] - [(bytevector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] - [(fxvector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] - [(flvector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] - [(box? x) (equal-hash-loop (unbox x) (fx+ burn 1) (fx+/wraparound hc 1))] - [(pair? x) - (let-values ([(hc0 burn) (equal-hash-loop (car x) (fx+ burn 2) 0)]) - (let ([hc (fx+/wraparound (mix1 hc) hc0)] - [r (cdr x)]) - (if (and (pair? r) (list? r)) - ;; If it continues as a list, don't count cdr direction as burn: - (equal-hash-loop r (fx- burn 2) hc) - (equal-hash-loop r burn hc))))] - [(vector? x) - (let ([len (vector-length x)]) - (cond + (cond + [(fx> burn MAX-HASH-BURN) (values hc burn)] + [(boolean? x) (values (fx+/wraparound hc (if x #x0ace0120 #x0cafe121)) burn)] + [(null? x) (values (fx+/wraparound hc #x0cabd122) burn)] + [(number? x) (values (fx+/wraparound hc (number-hash x)) burn)] + [(char? x) (values (fx+/wraparound hc (char->integer x)) burn)] + [(symbol? x) (values (fx+/wraparound hc (symbol-hash x)) burn)] + [(string? x) (values (fx+/wraparound hc (string-hash x)) burn)] + [(bytevector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] + [(fxvector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] + [(flvector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] + [(box? x) (equal-hash-loop (unbox x) (fx+ burn 1) (fx+/wraparound hc 1))] + [(pair? x) + (let-values ([(hc0 burn) (equal-hash-loop (car x) (fx+ burn 2) 0)]) + (let ([hc (fx+/wraparound (mix-hash-code hc) hc0)] + [r (cdr x)]) + (if (and (pair? r) (list? r)) + ;; If it continues as a list, don't count cdr direction as burn: + (equal-hash-loop r (fx- burn 2) hc) + (equal-hash-loop r burn hc))))] + [(vector? x) + (let ([len (vector-length x)]) + (cond [(fx= len 0) (values (fx+/wraparound hc 1) burn)] [else (let vec-loop ([i 0] [burn burn] [hc hc]) (cond - [(fx= i len) (values hc burn)] - [else - (let-values ([(hc0 burn) (equal-hash-loop (vector-ref x i) (fx+ burn 2) 0)]) - (vec-loop (fx+ i 1) - burn - (fx+/wraparound (mix2 hc) hc0)))]))]))] - [(hash? x) - ;; Treat hash-table hashing specially, so it can be order-insensitive - (let ([burn (fx* (fxmax burn 1) 2)]) - (let ([hc (fx+/wraparound hc (->fx (hash-hash-code - x - (lambda (x) - (let-values ([(hc0 burn0) (equal-hash-loop x burn 0)]) - hc0)))))]) - (values hc burn)))] - [(and (#%$record? x) (#%$record-hash-procedure x)) - => (lambda (rec-hash) - (let ([burn (fx+ burn 2)]) - (let ([hc (fx+/wraparound hc (->fx - (rec-hash x (lambda (x) - (let-values ([(hc0 burn0) (equal-hash-loop x burn 0)]) - (set! burn burn0) - hc0)))))]) - (values hc burn))))] - [(impersonator? x) - ;; If an impersonator wraps a value where `equal?` hashing is - ;; `eq?` hashing, such as for a procedure, then make sure - ;; we discard the impersonator wrapper. - (equal-hash-loop (impersonator-val x) burn hc)] - [else (values (fx+/wraparound hc (eq-hash-code x)) burn)]))) + [(fx= i len) (values hc burn)] + [else + (let-values ([(hc0 burn) (equal-hash-loop (vector-ref x i) (fx+ burn 2) 0)]) + (vec-loop (fx+ i 1) + burn + (fx+/wraparound (mix-hash-code hc) hc0)))]))]))] + [(hash? x) + ;; Treat hash-table hashing specially, so it can be order-insensitive + (let ([burn (fx* (fxmax burn 1) 2)]) + (let ([hc (fx+/wraparound hc (->fx (hash-hash-code + x + (lambda (x) + (let-values ([(hc0 burn0) (equal-hash-loop x burn 0)]) + hc0)))))]) + (values hc burn)))] + [(and (#%$record? x) (#%$record-hash-procedure x)) + => (lambda (rec-hash) + (let ([burn (fx+ burn 2)]) + (let ([hc (fx+/wraparound hc (->fx + (rec-hash x (lambda (x) + (let-values ([(hc0 burn0) (equal-hash-loop x burn 0)]) + (set! burn burn0) + hc0)))))]) + (values hc burn))))] + [(impersonator? x) + ;; If an impersonator wraps a value where `equal?` hashing is + ;; `eq?` hashing, such as for a procedure, then make sure + ;; we discard the impersonator wrapper. + (equal-hash-loop (impersonator-val x) burn hc)] + [else (values (fx+/wraparound hc (eq-hash-code x)) burn)])) (define (hash-code-combine hc v) - (bitwise-and (+ (bitwise-arithmetic-shift-left hc 10) - (bitwise-arithmetic-shift-right hc 6) - v) - (greatest-fixnum))) + (fx+/wraparound (mix-hash-code (->fx hc)) + (->fx v))) -(define (hash-code-combine-unordered hc v) - (bitwise-and (+ hc v) - (greatest-fixnum))) +;; Required to be associative and commutative: +(define (hash-code-combine-unordered a b) + (fx+/wraparound (->fx a) + (->fx b)))