diff --git a/pkgs/racket-doc/scribblings/reference/equality.scrbl b/pkgs/racket-doc/scribblings/reference/equality.scrbl index eff0e0b35e..bd9bf6e215 100644 --- a/pkgs/racket-doc/scribblings/reference/equality.scrbl +++ b/pkgs/racket-doc/scribblings/reference/equality.scrbl @@ -221,7 +221,18 @@ indexing and comparison operations, especially in the implementation of The second argument is an @racket[equal-hash-code]-like procedure to use for recursive hash-code computation; use the given procedure instead of - @racket[equal-hash-code] to ensure that data cycles are handled properly.} + @racket[equal-hash-code] to ensure that data cycles are handled properly. + + Although the result of @racket[_hash-proc] can be any exact + integer, it will be truncated for most purposes to a @tech{fixnum} + (e.g., for the result of @racket[equal-hash-code]). Roughly, + truncation uses @racket[bitwise-and] to take the lower bits of the + number. Thus, variation in the hash-code computation should be + reflected in the fixnum-compatible bits of @racket[_hash-proc]'s + result. Consumers of a hash code are expected to use variation + within the fixnum range appropriately, and producers are @emph{not} + responsible to reflect variation in hash codes across the full + range of bits that fit within a fixnum.} @item{@racket[_hash2-proc : (any/c (any/c . -> . exact-integer?) . -> . exact-integer?)] --- diff --git a/pkgs/racket-test/tests/racket/stress/number-hash.rkt b/pkgs/racket-test/tests/racket/stress/number-hash.rkt new file mode 100644 index 0000000000..7889b7342a --- /dev/null +++ b/pkgs/racket-test/tests/racket/stress/number-hash.rkt @@ -0,0 +1,46 @@ +#lang racket/base + +;; This test should take tens of milliseconds. With bad hash +;; functions, it can easily take tens of seconds. + +(define N 30000) + +(define start-time (current-process-milliseconds)) + +(define (test-hashing ht) + (time + (for ([i N]) + (hash-set! ht i i))) + + (hash-clear! ht) + (time + (for ([i N]) + (hash-set! ht (+ 0.0 i) i))) + + (hash-clear! ht) + (time + (for ([i N]) + (hash-set! ht (arithmetic-shift i 16) i))) + + (hash-clear! ht) + (time + (for ([i N]) + (hash-set! ht (arithmetic-shift i 32) i))) + + (hash-clear! ht) + (time + (for ([i N]) + (hash-set! ht (arithmetic-shift i 100) i)))) + +(test-hashing (make-hash)) +(test-hashing (make-hasheqv)) +(test-hashing (make-hasheq)) + +(test-hashing (make-weak-hash)) +(test-hashing (make-weak-hasheqv)) +(test-hashing (make-weak-hasheq)) + +(when (> (- (current-process-milliseconds) start-time) + (* 1000.0 10)) + (error "number hashing seems to take orders of magnitude too long")) + diff --git a/racket/src/ChezScheme/c/fasl.c b/racket/src/ChezScheme/c/fasl.c index d55df9d128..c8a924cecb 100644 --- a/racket/src/ChezScheme/c/fasl.c +++ b/racket/src/ChezScheme/c/fasl.c @@ -896,7 +896,7 @@ static void faslin(ptr tc, ptr *x, ptr t, ptr *pstrbuf, faslFile f) { } faslin(tc, &INITCAR(keyval), t, pstrbuf, f); faslin(tc, &INITCDR(keyval), t, pstrbuf, f); - i = ((uptr)Scar(keyval) >> primary_type_bits) & (veclen - 1); + i = eq_hash(Scar(keyval)) & (veclen - 1); INITVECTIT(v, i) = S_tlc(keyval, ht, Svector_ref(v, i)); n -= 1; } diff --git a/racket/src/ChezScheme/c/gc.c b/racket/src/ChezScheme/c/gc.c index c175547fce..bd32db85ce 100644 --- a/racket/src/ChezScheme/c/gc.c +++ b/racket/src/ChezScheme/c/gc.c @@ -1714,7 +1714,7 @@ ptr GCENTRY(ptr tc, ptr count_roots_ls) { } INITTLCNEXT(tlc) = Sfalse; INITPTRFIELD(ht,eq_hashtable_size_disp) = FIX(UNFIX(PTRFIELD(ht,eq_hashtable_size_disp)) - 1); - } else if ((new_idx = ((uptr)key >> primary_type_bits) & (veclen - 1)) != old_idx) { + } else if ((new_idx = eq_hash(key) & (veclen - 1)) != old_idx) { /* remove tlc from old bucket */ b = Svector_ref(vec, old_idx); if (b == tlc) { diff --git a/racket/src/ChezScheme/c/segment.h b/racket/src/ChezScheme/c/segment.h index bc7d142614..48a72b5c7e 100644 --- a/racket/src/ChezScheme/c/segment.h +++ b/racket/src/ChezScheme/c/segment.h @@ -84,3 +84,20 @@ FORCEINLINE seginfo *MaybeSegInfo(uptr i) { #define SegmentSpace(i) (SegInfo(i)->space) #define SegmentGeneration(i) (SegInfo(i)->generation) #define SegmentOldSpace(i) (SegInfo(i)->old_space) + + + +FORCEINLINE uptr eq_hash(ptr key) { + if (Sfixnump(key)) { + uptr x = UNFIX(key); +#if (ptr_bits == 64) + uptr x1 = x ^ ((x & (uptr)0xFFFFFFFF00000000) >> 32); +#else + uptr x1 = x; +#endif + uptr x2 = x1 ^ ((x1 & (uptr)0xFFFF0000) >> 16); + uptr x3 = x2 ^ ((x2 & (uptr)0xFF00) >> 8); + return x3; + } else + return (uptr)key >> primary_type_bits; +} diff --git a/racket/src/ChezScheme/c/types.h b/racket/src/ChezScheme/c/types.h index 865574dccf..499b6b9e0c 100644 --- a/racket/src/ChezScheme/c/types.h +++ b/racket/src/ChezScheme/c/types.h @@ -170,6 +170,9 @@ typedef struct _seginfo { octet *counting_mask; /* bitmap of counting roots during a GC */ octet *measured_mask; /* bitmap of objects that have been measured */ #ifdef PORTABLE_BYTECODE +# ifndef PTHREADS + void *encorage_alignment; /* hack for 32-bit systems that align 64-bit values on 4 bytes */ +# endif union { ptr force_alignment; #endif octet dirty_bytes[cards_per_segment]; /* one dirty byte per card */ diff --git a/racket/src/ChezScheme/s/cmacros.ss b/racket/src/ChezScheme/s/cmacros.ss index 173f387012..ab6a2efc41 100644 --- a/racket/src/ChezScheme/s/cmacros.ss +++ b/racket/src/ChezScheme/s/cmacros.ss @@ -2144,6 +2144,20 @@ (define-constant eq-hashtable-subtype-weak 1) (define-constant eq-hashtable-subtype-ephemeron 2) +(define-syntax fixmix + (syntax-rules () + [(_ x-expr) + ;; Since we tend to use the low bits of a hash code, make sure + ;; higher bits of a hash code are represented there. There's + ;; a copy of this conversion for rehashing in "segment.h". + (let* ([x x-expr] + [x1 (constant-case ptr-bits + [(64) (fxxor x (fxand (fxsrl x 32) #xFFFFFFFF))] + [else x])] + [x2 (fxxor x1 (fxand (fxsrl x1 16) #xFFFF))] + [x3 (fxxor x2 (fxand (fxsrl x2 8) #xFF))]) + x3)])) + ; keep in sync with make-date (define-constant dtvec-nsec 0) (define-constant dtvec-sec 1) diff --git a/racket/src/ChezScheme/s/library.ss b/racket/src/ChezScheme/s/library.ss index 190acd889e..a3a0fdb499 100644 --- a/racket/src/ChezScheme/s/library.ss +++ b/racket/src/ChezScheme/s/library.ss @@ -1535,6 +1535,13 @@ (if (fx<= n2 target) (adjust! h vec n n2) (loop n2)))))))])) + + (define-syntax eq-hash + (syntax-rules () + [(_ v-expr) (let ([v v-expr]) + (if (fixnum? v) + (fixmix v) + ($fxaddress v)))])) (define adjust! (lambda (h vec1 n1 n2) @@ -1544,7 +1551,7 @@ (let loop ([b (vector-ref vec1 i1)]) (unless (fixnum? b) (let ([next ($tlc-next b)] [keyval ($tlc-keyval b)]) - (let ([i2 (fxlogand ($fxaddress (car keyval)) mask2)]) + (let ([i2 (fxlogand (eq-hash (car keyval)) mask2)]) ($set-tlc-next! b (vector-ref vec2 i2)) (vector-set! vec2 i2 b)) (loop next))))) @@ -1553,26 +1560,26 @@ (define-library-entry (eq-hashtable-ref h x v) (lookup-keyval x (let ([vec (ht-vec h)]) - (vector-ref vec (fxlogand ($fxaddress x) (fx- (vector-length vec) 1)))) + (vector-ref vec (fxlogand (eq-hash x) (fx- (vector-length vec) 1)))) cdr v)) (define-library-entry (eq-hashtable-ref-cell h x) (lookup-keyval x (let ([vec (ht-vec h)]) - (vector-ref vec (fxlogand ($fxaddress x) (fx- (vector-length vec) 1)))) + (vector-ref vec (fxlogand (eq-hash x) (fx- (vector-length vec) 1)))) (lambda (x) x) #f)) (define-library-entry (eq-hashtable-contains? h x) (lookup-keyval x (let ([vec (ht-vec h)]) - (vector-ref vec (fxlogand ($fxaddress x) (fx- (vector-length vec) 1)))) + (vector-ref vec (fxlogand (eq-hash x) (fx- (vector-length vec) 1)))) (lambda (x) #t) #f)) (define-library-entry (eq-hashtable-cell h x v) (let* ([vec (ht-vec h)] - [idx (fxlogand ($fxaddress x) (fx- (vector-length vec) 1))] + [idx (fxlogand (eq-hash x) (fx- (vector-length vec) 1))] [b (vector-ref vec idx)]) (lookup-keyval x b values @@ -1590,7 +1597,7 @@ ;; resizing. (define-library-entry (eq-hashtable-try-atomic-cell h x v) (let* ([vec (ht-vec h)] - [idx (fxlogand ($fxaddress x) (fx- (vector-length vec) 1))] + [idx (fxlogand (eq-hash x) (fx- (vector-length vec) 1))] [b (vector-ref vec idx)]) (lookup-keyval x b values @@ -1611,7 +1618,7 @@ (define do-set! (lambda (h x v) (let* ([vec (ht-vec h)] - [idx (fxlogand ($fxaddress x) (fx- (vector-length vec) 1))] + [idx (fxlogand (eq-hash x) (fx- (vector-length vec) 1))] [b (vector-ref vec idx)]) (lookup-keyval x b (lambda (keyval) (set-cdr! keyval v)) @@ -1631,7 +1638,7 @@ (define-library-entry (eq-hashtable-update! h x p v) (let* ([vec (ht-vec h)] - [idx (fxlogand ($fxaddress x) (fx- (vector-length vec) 1))] + [idx (fxlogand (eq-hash x) (fx- (vector-length vec) 1))] [b (vector-ref vec idx)]) (lookup-keyval x b (lambda (a) (set-cdr! a (p (cdr a)))) @@ -1639,7 +1646,7 @@ (define-library-entry (eq-hashtable-delete! h x) (let* ([vec (ht-vec h)] - [idx (fxlogand ($fxaddress x) (fx- (vector-length vec) 1))] + [idx (fxlogand (eq-hash x) (fx- (vector-length vec) 1))] [b (vector-ref vec idx)]) (unless (fixnum? b) (if (eq? (car ($tlc-keyval b)) x) diff --git a/racket/src/ChezScheme/s/newhash.ss b/racket/src/ChezScheme/s/newhash.ss index 1bb88dcb58..a2afcfbf9a 100644 --- a/racket/src/ChezScheme/s/newhash.ss +++ b/racket/src/ChezScheme/s/newhash.ss @@ -113,8 +113,8 @@ Documentation notes: ; NB: allow negative exact integers. (let ([i (hash x)]) (cond - [(fixnum? i) (fxlogand i mask)] - [(bignum? i) (logand i mask)] + [(fixnum? i) (fxlogand (fixmix i) mask)] + [(bignum? i) (fxlogand (fixmix (bitwise-and i (most-positive-fixnum))) mask)] [else ($oops who "invalid hash-function ~s return value ~s for ~s" hash i x)])))) (define size->minlen @@ -428,12 +428,35 @@ Documentation notes: [cells2 ($ht-hashtable-cells (eqv-ht-genht h) (fx- max-sz (vector-length cells1)))]) (vector-append cells1 cells2))))) + (define (fixmix x) + ;; Since mutable hash tables tend to use the low bits of a hash code, + ;; make sure higher bits of a fixnum are represented there + (let* ([x1 (constant-case ptr-bits + [(64) (fxxor x (fxand (fxsrl x 32) #xFFFFFFFF))] + [else x])] + [x2 (fxxor x1 (fxand (fxsrl x1 16) #xFFFF))] + [x3 (fxxor x2 (fxand (fxsrl x2 8) #xFF))]) + x3)) + (define number-hash (lambda (z) (cond [(fixnum? z) (if (fx< z 0) (fxnot z) z)] [(flonum? z) ($flhash z)] - [(bignum? z) (modulo z (most-positive-fixnum))] + [(bignum? z) (let ([len (integer-length z)] + [update (lambda (hc k) + (let ([hc2 (#3%fx+ hc (#3%fxsll (#3%fx+ hc k) 10))]) + (fxlogxor hc2 (fxsrl hc2 6))))]) + (let loop ([i 0] [hc 0]) + (cond + [(fx>= i len) hc] + [else + (let ([next-i (fx+ i (fx- (fixnum-width) 1))]) + (loop next-i + (bitwise-and + (most-positive-fixnum) + (update (bitwise-bit-field z i next-i) + hc))))])))] [(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))] [else (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z)))]))) diff --git a/racket/src/bc/src/hash.c b/racket/src/bc/src/hash.c index 5f4ac68bcb..a154dccd4f 100644 --- a/racket/src/bc/src/hash.c +++ b/racket/src/bc/src/hash.c @@ -223,7 +223,7 @@ static int equal_w_key_wraps(Scheme_Object *ekey, Scheme_Object *tkey, Scheme_Ob } /*========================================================================*/ -/* normal hash table */ +/* normal mutable hash table */ /*========================================================================*/ #ifdef REVERSE_HASH_TABLE_ORDER @@ -232,6 +232,20 @@ static int equal_w_key_wraps(Scheme_Object *ekey, Scheme_Object *tkey, Scheme_Ob # define HASH_TO_ARRAY_INDEX(h, mask) (h) #endif +/* Since mutable hash tables tend to use the low bits of a hash code, + make sure higher bits of a fixnum are represented there: */ +XFORM_NONGCING static uintptr_t fixmix(uintptr_t x) { +#ifdef SIXTY_FOUR_BIT_INTEGERS + uintptr_t x1 = x ^ ((x & (uintptr_t)0xFFFFFFFF00000000) >> 32); +#else + uintptr_t x1 = x; +#endif + uintptr_t x2 = x1 ^ ((x1 & (uintptr_t)0xFFFF0000) >> 16); + uintptr_t x3 = x2 ^ ((x2 & (uintptr_t)0xFF00) >> 8); + + return x3; +} + Scheme_Hash_Table *scheme_make_hash_table(int type) { Scheme_Hash_Table *table; @@ -280,7 +294,7 @@ static Scheme_Object *do_hash(Scheme_Hash_Table *table, Scheme_Object *key, int ekey = key; h2 = 0; hx = scheme_equal_hash_key(ekey); - h = to_unsigned_hash(hx) & mask; + h = fixmix(to_unsigned_hash(hx)) & mask; } else { GC_CAN_IGNORE intptr_t *_h2x; if (table->compare) { @@ -289,14 +303,14 @@ static Scheme_Object *do_hash(Scheme_Hash_Table *table, Scheme_Object *key, int } else _h2x = &h2x; table->make_hash_indices((void *)key, &hx, _h2x); - h = to_unsigned_hash(hx) & mask; + h = fixmix(to_unsigned_hash(hx)) & mask; if (_h2x) - h2 = (to_unsigned_hash(h2x) & mask) | 1; + h2 = (fixmix(to_unsigned_hash(h2x)) & mask) | 1; ekey = NULL; } } else { uintptr_t lkey; - lkey = PTR_TO_LONG((Scheme_Object *)key); + lkey = fixmix(PTR_TO_LONG((Scheme_Object *)key)); h = lkey & mask; h2 = ((lkey >> 1) & mask) | 1; ekey = NULL; @@ -329,7 +343,7 @@ static Scheme_Object *do_hash(Scheme_Hash_Table *table, Scheme_Object *key, int scheme_hash_iteration_count++; if (!h2) { h2x = scheme_equal_hash_key2(ekey); - h2 = (to_unsigned_hash(h2x) & (table->size - 1)) | 1; + h2 = (fixmix(to_unsigned_hash(h2x)) & (table->size - 1)) | 1; } h = (h + h2) & mask; } @@ -356,7 +370,7 @@ static Scheme_Object *do_hash(Scheme_Hash_Table *table, Scheme_Object *key, int scheme_hash_iteration_count++; if (!h2) { table->make_hash_indices((void *)key, NULL, &h2x); - h2 = (to_unsigned_hash(h2x) & (table->size - 1)) | 1; + h2 = (fixmix(to_unsigned_hash(h2x)) & (table->size - 1)) | 1; } h = (h + h2) & mask; } @@ -441,7 +455,7 @@ static Scheme_Object *do_hash_set(Scheme_Hash_Table *table, Scheme_Object *key, mask = table->size - 1; - lkey = PTR_TO_LONG((Scheme_Object *)key); + lkey = fixmix(PTR_TO_LONG((Scheme_Object *)key)); h = lkey & mask; h2 = (lkey >> 1) & mask; @@ -497,7 +511,7 @@ XFORM_NONGCING static Scheme_Object *do_hash_get(Scheme_Hash_Table *table, Schem mask = table->size - 1; - lkey = PTR_TO_LONG((Scheme_Object *)key); + lkey = fixmix(PTR_TO_LONG((Scheme_Object *)key)); h = lkey & mask; h2 = (lkey >> 1) & mask; @@ -839,11 +853,11 @@ get_bucket (Scheme_Bucket_Table *table, const char *key, int add, Scheme_Bucket else ekey = (void *)key; table->make_hash_indices(ekey, &hx, &h2x); - h = to_unsigned_hash(hx) & mask; - h2 = to_unsigned_hash(h2x) & mask; + h = fixmix(to_unsigned_hash(hx)) & mask; + h2 = fixmix(to_unsigned_hash(h2x)) & mask; } else { uintptr_t lkey; - lkey = PTR_TO_LONG((Scheme_Object *)key); + lkey = fixmix(PTR_TO_LONG((Scheme_Object *)key)); h = lkey & mask; h2 = (lkey >> 1) & mask; ekey = NULL; @@ -1408,7 +1422,6 @@ XFORM_NONGCING static uintptr_t fast_equal_hash_key(Scheme_Object *o, uintptr_t case scheme_integer_type: { uintptr_t iv = to_unsigned_hash(SCHEME_INT_VAL(o)); - MZ_MIX(iv); return k + iv; } #ifdef MZ_USE_SINGLE_FLOATS @@ -1430,14 +1443,15 @@ XFORM_NONGCING static uintptr_t fast_equal_hash_key(Scheme_Object *o, uintptr_t case scheme_bignum_type: { int i = SCHEME_BIGLEN(o); - bigdig *d = SCHEME_BIGDIG(o), k2; + bigdig *d = SCHEME_BIGDIG(o); + uintptr_t k2; k2 = k; while (i--) { - k2 = (k2 << 3) + k2 + d[i]; + k2 = (k2 << 3) + k2 + (uintptr_t)d[i]; } - return (uintptr_t)k2; + return k2; } break; case scheme_rational_type: @@ -1931,7 +1945,7 @@ XFORM_NONGCING static uintptr_t fast_equal_hash_key2(Scheme_Object *o, int *_don case scheme_false_type: return 2; case scheme_integer_type: - return t - SCHEME_INT_VAL(o); + return (uintptr_t)t - SCHEME_INT_VAL(o); #ifdef MZ_USE_SINGLE_FLOATS case scheme_float_type: { @@ -1949,7 +1963,8 @@ XFORM_NONGCING static uintptr_t fast_equal_hash_key2(Scheme_Object *o, int *_don } #endif case scheme_bignum_type: - return SCHEME_BIGDIG(o)[0]; + return ((uintptr_t)SCHEME_BIGDIG(o)[0] + + (uintptr_t)SCHEME_BIGDIG(o)[SCHEME_BIGLEN(o)-1]); case scheme_rational_type: return fast_equal_hash_key2(scheme_rational_numerator(o), _done); case scheme_complex_type: diff --git a/racket/src/cs/rumble/hash-code.ss b/racket/src/cs/rumble/hash-code.ss index 2695b808cb..d83b6fd230 100644 --- a/racket/src/cs/rumble/hash-code.ss +++ b/racket/src/cs/rumble/hash-code.ss @@ -55,13 +55,31 @@ ;; Mostly copied from Chez Scheme's "newhash.ss": (define number-hash (lambda (z) - (cond - [(fixnum? z) (if (fx< z 0) (fxand z (most-positive-fixnum)) z)] - [(flonum? z) (#3%$flhash z)] - [(bignum? z) (modulo z (most-positive-fixnum))] - [(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))] - [else (logand (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z))) - (most-positive-fixnum))]))) + (let* ([+/fx + (lambda (hc k) + (#3%fx+ hc k))] + [sll/fx + (lambda (hc i) + (#3%fxsll hc i))] + [mix + (lambda (hc) + (let ([hc2 (+/fx hc (sll/fx hc 10))]) + (fxlogxor hc2 (fxsrl hc2 6))))]) + (cond + [(fixnum? z) (if (fx< z 0) (fxand z (most-positive-fixnum)) z)] + [(flonum? z) (#3%$flhash z)] + [(bignum? z) (let ([len (integer-length z)]) + (let loop ([i 0] [hc 0]) + (cond + [(fx>= i len) hc] + [else + (let ([next-i (fx+ i (fx- (fixnum-width) 1))]) + (loop next-i + (+/fx (bitwise-bit-field z i next-i) + (mix hc))))])))] + [(ratnum? z) (number-hash (+ (* (numerator z) 5) (denominator z)))] + [else (logand (logxor (lognot (number-hash (real-part z))) (number-hash (imag-part z))) + (most-positive-fixnum))])))) (define (eqv-hash-code x) (cond