From 3c382284a44f230e48c7ac215aeac9253ff864cb Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 7 Dec 2020 17:38:28 -0700 Subject: [PATCH] cs: better `equal-secondary-hash-code` Since CS doesn't use secondary hash code internally, the `equal-secondary-hash-code` function wasn't really implemented. Implement it reasonably for applications that might use it to implement other data structures. Testing exposed other problems related to error reporting for a broken hash-function result and for using values within immutable hash tables. Closes #3536 --- .../racket-test-core/tests/racket/struct.rktl | 37 +++++++ racket/src/cs/c/Makefile.in | 2 + racket/src/cs/rumble/hamt-stencil.ss | 13 +-- racket/src/cs/rumble/hash-code.ss | 96 +++++++++++++++---- racket/src/cs/rumble/struct.ss | 3 +- 5 files changed, 127 insertions(+), 24 deletions(-) diff --git a/pkgs/racket-test-core/tests/racket/struct.rktl b/pkgs/racket-test-core/tests/racket/struct.rktl index 17fc8fc77a..751d4f2c9c 100644 --- a/pkgs/racket-test-core/tests/racket/struct.rktl +++ b/pkgs/racket-test-core/tests/racket/struct.rktl @@ -990,6 +990,43 @@ ;; ---------------------------------------- +(let ([got-here? #f]) + (struct foo2 (f g) #:transparent + #:property prop:equal+hash + (list (λ (a b recur) #f) + (λ (a recur) 0) + (λ (a recur) (set! got-here? #t) 0))) + (define (check-secondary-used v) + (set! got-here? #f) + (equal-secondary-hash-code v) + got-here?) + (test #t check-secondary-used (foo2 0 "ggg")) + ;; Although nothing promises that we'll hash an element within a + ;; list, vector, etc., the current implementation is meant to + ;; do so in at least these cases: + (test #t check-secondary-used (list (foo2 0 "ggg"))) + (test #t check-secondary-used (cons 6 (foo2 0 "ggg"))) + (test #t check-secondary-used (vector (foo2 0 "ggg"))) + (test #t check-secondary-used (box (foo2 0 "ggg"))) + (test #t check-secondary-used (hash 'ok (foo2 0 "ggg")))) + +;; ---------------------------------------- + +(let ([got-here? #f]) + (struct foo (f g) + #:property prop:equal+hash + (list (λ (a b recur) #f) + (λ (a recur) 'wrong) + (λ (a recur) 'wrong))) + (err/rt-test (equal-hash-code (foo 1 2)) + exn:fail:contract? + #rx"hash procedure returned a value other than an exact integer") + (err/rt-test (equal-secondary-hash-code (foo 1 2)) + exn:fail:contract? + #rx"hash procedure returned a value other than an exact integer")) + +;; ---------------------------------------- + (let () (define-struct foo (a [b #:mutable]) #:transparent) (define-struct (bar foo) (f g) diff --git a/racket/src/cs/c/Makefile.in b/racket/src/cs/c/Makefile.in index 579b0ac882..5c2e73873c 100644 --- a/racket/src/cs/c/Makefile.in +++ b/racket/src/cs/c/Makefile.in @@ -281,12 +281,14 @@ adjust-framework-boot-compress: $(BOOTSTRAP_RACKET) $(srcdir)/adjust-compress.rkt @BOOT_COMPRESS_COMP@ $(FW_BOOT_DEST)/petite.boot $(FW_BOOT_DEST)/scheme.boot $(FW_BOOT_DEST)/racket.boot mac-embed-boot-fw: + rm -f $(EMBED_DEST) cp $(EMBED_SRC) $(EMBED_DEST) $(STRIP_SIGNATURE) $(EMBED_DEST) FW_BOOT_DESTS = $(FW_BOOT_DEST)/petite.boot $(FW_BOOT_DEST)/scheme.boot $(FW_BOOT_DEST)/racket.boot mac-embed-boot-static: + rm -f $(EMBED_DEST) cp $(EMBED_SRC) $(EMBED_DEST) $(STRIP_SIGNATURE) $(EMBED_DEST) $(BOOTSTRAP_RACKET) $(srcdir)/embed-boot.rkt @BOOT_COMPRESS_COMP@ $(EMBED_SRC) $(EMBED_DEST) $(FW_BOOT_DESTS) diff --git a/racket/src/cs/rumble/hamt-stencil.ss b/racket/src/cs/rumble/hamt-stencil.ss index 5f1dd3832d..4680ca5148 100644 --- a/racket/src/cs/rumble/hamt-stencil.ss +++ b/racket/src/cs/rumble/hamt-stencil.ss @@ -1139,12 +1139,13 @@ ;; rare. (hash-code-combine hc (cnode-hash c))])))] [else - (let loop ([i 0] [hc hc]) - (cond - [(fx< i val-count) - (loop (fx1+ i) - (hash-code-combine hc (hash (stencil-vector-ref n (fx+ i child-count key-count)))))] - [else hc]))])))) + (let ([offset (fx+ HAMT-STATIC-FIELD-COUNT child-count key-count)]) + (let loop ([i 0] [hc hc]) + (cond + [(fx< i val-count) + (loop (fx1+ i) + (hash-code-combine hc (hash (stencil-vector-ref n offset))))] + [else hc])))])))) (define (bnode-fold n f nil) (let* ([mask (stencil-vector-mask n)] diff --git a/racket/src/cs/rumble/hash-code.ss b/racket/src/cs/rumble/hash-code.ss index 68d1496404..653499d5a6 100644 --- a/racket/src/cs/rumble/hash-code.ss +++ b/racket/src/cs/rumble/hash-code.ss @@ -61,6 +61,14 @@ v (bitwise-and v (greatest-fixnum)))) +(define (->fx/checked who v) + (cond + [(fixnum? v) v] + [(bignum? v) (bitwise-and v (greatest-fixnum))] + [else (raise-arguments-error who + "hash procedure returned a value other than an exact integer" + "result" v)])) + ;; Mostly copied from Chez Scheme's "newhash.ss": (define number-hash (lambda (z) @@ -89,8 +97,8 @@ ;; We don't use `equal-hash` because we need impersonators to be able ;; to generate the same hash code as the unwrapped value. (define (equal-hash-code x) - (call-with-values (lambda () (equal-hash-loop x 0 0)) - (lambda (hc burn) (logand hc (most-positive-fixnum))))) + (let-values ([(hc burn) (equal-hash-loop x 0 0)]) + hc)) ;; A #t result implies that `equal-hash-code` and equality checking is ;; pretty fast @@ -104,20 +112,8 @@ (not (#%$record-hash-procedure x))))) (define (equal-secondary-hash-code x) - (cond - [(boolean? x) 1] - [(null? x) 2] - [(number? x) 3] - [(char? x) 4] - [(symbol? x) 5] - [(string? x) 6] - [(bytevector? x) 7] - [(box? x) 8] - [(pair? x) 9] - [(vector? x) (vector-length x)] - [(#%$record? x) (eq-hash-code (record-rtd x))] - [(impersonator? x) (equal-secondary-hash-code (impersonator-val x))] - [else 100])) + (let-values ([(hc burn) (equal-secondary-hash-loop x 0 0)]) + hc)) (define MAX-HASH-BURN 128) @@ -167,7 +163,8 @@ [(and (#%$record? x) (#%$record-hash-procedure x)) => (lambda (rec-hash) (let ([burn (fx+ burn 2)]) - (let ([hc (fx+/wraparound hc (->fx + (let ([hc (fx+/wraparound hc (->fx/checked + 'equal-hash-code (rec-hash x (lambda (x) (let-values ([(hc0 burn0) (equal-hash-loop x burn 0)]) (set! burn burn0) @@ -180,6 +177,71 @@ (equal-hash-loop (impersonator-val x) burn hc)] [else (values (fx+/wraparound hc (eq-hash-code x)) burn)])) +(define (equal-secondary-hash-loop x burn hc) + (cond + [(fx> burn MAX-HASH-BURN) (values hc burn)] + [(boolean? x) (values (fx+/wraparound hc 1) burn)] + [(null? x) (values (fx+/wraparound hc 2) burn)] + [(number? x) (values (fx+/wraparound hc (number-secondary-hash x)) burn)] + [(char? x) (values (fx+/wraparound hc (fxnot (char->integer x))) burn)] + [(symbol? x) (values (fx+/wraparound hc (fxnot (symbol-hash x))) burn)] + [(string? x) (values (fx+/wraparound hc (fxnot (string-hash x))) burn)] + [(bytevector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] + [(fxvector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] + [(flvector? x) (values (fx+/wraparound hc (equal-hash x)) burn)] + [(box? x) (equal-secondary-hash-loop (unbox x) (fx+ burn 1) (fx+/wraparound hc 10))] + [(pair? x) + (let-values ([(hc0 burn) (equal-secondary-hash-loop (car x) (fx+ burn 2) 0)]) + (let ([hc (fx+/wraparound (mix-hash-code hc) hc0)]) + (equal-secondary-hash-loop (cdr x) burn hc)))] + [(vector? x) + (let ([len (vector-length x)]) + (let vec-loop ([i 0] [burn burn] [hc (mix-hash-code hc)]) + (cond + [(fx= i len) (values hc burn)] + [else + (let-values ([(hc0 burn) (equal-secondary-hash-loop (vector-ref x i) (fx+ burn 2) 0)]) + (vec-loop (fx+ i 1) + burn + (fx+/wraparound (mix-hash-code hc) hc0)))])))] + [(hash? x) + ;; Treat hash-table hashing specially, so it can be order-insensitive + (let ([burn (fx* (fxmax burn 1) 2)]) + (let ([hc (fx+/wraparound hc (->fx (hash-hash-code + x + (lambda (x) + (let-values ([(hc0 burn0) (equal-secondary-hash-loop x burn 0)]) + hc0)))))]) + (values hc burn)))] + [(and (#%$record? x) + (or (struct-property-ref 'secondary-hash (#%$record-type-descriptor x) #f) + ;; to use default hash proc as default secondary hash proc: + (#%$record-hash-procedure x))) + => (lambda (rec-hash) + (let ([burn (fx+ burn 2)]) + (let ([hc (fx+/wraparound hc (->fx/checked + 'equal-secondary-hash-code + (rec-hash x (lambda (x) + (let-values ([(hc0 burn0) (equal-secondary-hash-loop x burn 0)]) + (set! burn burn0) + hc0)))))]) + (values hc burn))))] + [(impersonator? x) + (equal-secondary-hash-loop (impersonator-val x) burn hc)] + [else (values (fx+/wraparound hc (fxnot (eq-hash-code x))) burn)])) + +(define number-secondary-hash + (lambda (z) + (cond + [(fixnum? z) (fxnot z)] + [(flonum? z) (fxsll (#3%$flhash z) 2)] + [(bignum? z) (fx+/wraparound (integer-length z) + (bitwise-bit-field z 0 (fx- (fixnum-width) 1)))] + [(ratnum? z) (hash-code-combine (number-secondary-hash (numerator z)) + (number-secondary-hash (denominator z)))] + [else (hash-code-combine (number-secondary-hash (real-part z)) + (number-secondary-hash (imag-part z)))]))) + (define (hash-code-combine hc v) (fx+/wraparound (mix-hash-code (->fx hc)) (->fx v))) diff --git a/racket/src/cs/rumble/struct.ss b/racket/src/cs/rumble/struct.ss index 879dd2995d..b5b9256eb3 100644 --- a/racket/src/cs/rumble/struct.ss +++ b/racket/src/cs/rumble/struct.ss @@ -356,7 +356,8 @@ (record-type-hash-procedure rtd (let ([p (caddr guarded-val)]) (if (#%procedure? p) p - (lambda (v h) (|#%app| p v h)))))) + (lambda (v h) (|#%app| p v h))))) + (struct-property-set! 'secondary-hash rtd (cadddr guarded-val))) (struct-property-set! prop rtd guarded-val) (values (hash-set ht prop check-val) (append