hash-{map,for-each}: strengthen try-order? guarantee

Promise a specific order for a hash table that uses only certain
primitive, non-compound datatypes for keys.
This commit is contained in:
Matthew Flatt 2018-11-27 17:42:18 -07:00
parent da62067d8f
commit 432dfcdb4a
8 changed files with 265 additions and 47 deletions

View File

@ -12,7 +12,7 @@
(define collection 'multi)
(define version "7.1.0.6")
(define version "7.1.0.7")
(define deps `("racket-lib"
["racket" #:version ,version]))

View File

@ -384,14 +384,30 @@ change does not affect a traversal if the key has been seen already,
otherwise the traversal skips a deleted key or uses the remapped key's
new value.
If @racket[try-order?] is true, then the order of keys and values
passed to @racket[proc] is normalized under certain circumstances,
such as when the keys are all symbols and @racket[hash] is not an
@tech{impersonator}.
@see-also-concurrency-caveat[]
@history[#:changed "6.3" @elem{Added the @racket[try-order?] argument.}]}
If @racket[try-order?] is true, then the order of keys and values
passed to @racket[proc] is normalized under certain
circumstances---including when every key is one of the following and
with the following order (earlier bullets before later):
@itemlist[
@item{@tech{booleans} sorted @racket[#f] before @racket[#t];}
@item{@tech{characters} sorted by @racket[char<?];}
@item{@tech{real numbers} sorted by @racket[<];}
@item{@tech{symbols} sorted with @tech{uninterned} symbols before
@tech{unreadable symbols} before @tech{interned} symbols,
then sorted by @racket[symbol<?];}
@item{@tech{keywords} sorted by @racket[keyword<?];}
@item{@tech{strings} sorted by @racket[string<?];}
@item{@tech{byte strings} sorted by @racket[bytes<?];}
@item{@racket[null];}
@item{@|void-const|; and}
@item{@racket[eof].}
]
@history[#:changed "6.3" @elem{Added the @racket[try-order?] argument.}
#:changed "7.1.0.7" @elem{Added guarantees for @racket[try-order?].}]}
@defproc[(hash-keys [hash hash?])
(listof any/c)]{
@ -440,7 +456,8 @@ See @racket[hash-map] for information about @racket[try-order?] and
about modifying @racket[hash] within @racket[proc].
@see-also-concurrency-caveat[]
@history[#:changed "6.3" @elem{Added the @racket[try-order?] argument.}]}
@history[#:changed "6.3" @elem{Added the @racket[try-order?] argument.}
#:changed "7.1.0.7" @elem{Added guarantees for @racket[try-order?].}]}
@defproc[(hash-count [hash hash?])

View File

@ -5,6 +5,58 @@
(require racket/hash)
;; ----------------------------------------
;; Hash-key sorting:
(let ([u/apple (string->uninterned-symbol "apple")]
[u/banana (string->uninterned-symbol "banana")]
[u/coconut (string->uninterned-symbol "coconut")]
[apple (string->unreadable-symbol "apple")]
[banana (string->unreadable-symbol "banana")]
[coconut (string->unreadable-symbol "coconut")])
(test (list #f #t
#\a #\b #\c #\u3BB
(- (expt 2 79))
-3 -2 -1
0
1/2 0.75 8.5f-1
1 2 3
(expt 2 79)
u/apple u/banana u/coconut
apple banana coconut
'apple 'banana 'coconut 'coconut+
'#:apple '#:banana '#:coconut
"Apple"
"apple" "banana" "coconut"
#"Apple"
#"apple" #"banana" #"coconut"
null
(void)
eof)
'ordered
(hash-map (hash #t 'x
#f 'x
#\a 'a #\b 'b #\c 'c #\u3BB 'lam
1 'a 2 'b 3 'c
1/2 'n 0.75 'n 8.5f-1 'n
0 'z
-1 'a -2 'b -3 'c
(expt 2 79) 'b
(- (expt 2 79)) 'b
"Apple" 'a
"apple" 'a "banana" 'b "coconut" 'c
#"Apple" 'a
#"apple" 'a #"banana" 'b #"coconut" 'c
u/apple 'a u/banana 'b u/coconut 'c
apple 'a banana 'b coconut 'c
'apple 'a 'banana 'b 'coconut 'c 'coconut+ '+
'#:apple 'a '#:banana 'b '#:coconut 'c
null 'one
(void) 'one
eof 'one)
(lambda (k v) k)
#t)))
;; ----------------------------------------
(test #hash([4 . four] [3 . three] [1 . one] [2 . two])
@ -412,7 +464,6 @@
(hash-remove-iterate-test* [make-weak-hash make-weak-hasheq make-weak-hasheqv]
(p) in-hash-pairs in-weak-hash-pairs car)
;; ----------------------------------------
(report-errs)

View File

@ -320,12 +320,54 @@
;; deterministic, especially for marshaling operations.
(define (try-sort-keys ps)
(cond
[(#%andmap (lambda (p) (symbol? (car p))) ps)
(#%list-sort (lambda (a b) (symbol<? (car a) (car b))) ps)]
[(#%andmap (lambda (p) (real? (car p))) ps)
(#%list-sort (lambda (a b) (< (car a) (car b))) ps)]
[(#%andmap (lambda (p) (orderable? (car p))) ps)
(#%list-sort (lambda (a b) (orderable<? (car a) (car b))) ps)]
[else ps]))
(define (orderable-major v)
(cond
[(boolean? v) 0]
[(char? v) 1]
[(real? v) 2]
[(symbol? v) 3]
[(keyword? v) 4]
[(string? v) 5]
[(bytevector? v) 6]
[(null? v) 7]
[(void? v) 8]
[(eof-object? v) 9]
[else #f]))
(define (orderable? v) (orderable-major v))
(define (orderable<? a b)
(let ([am (orderable-major a)]
[bm (orderable-major b)])
(cond
[(or (not am) (not bm))
#f]
[(fx=? am bm)
(cond
[(boolean? a) (not a)]
[(char? a) (char<? a b)]
[(real? a) (< a b)]
[(symbol? a)
(cond
[(symbol-interned? a)
(and (symbol-interned? b)
(symbol<? a b))]
[(symbol-interned? b) #t]
[(symbol-unreadable? a)
(and (symbol-unreadable? b)
(symbol<? a b))]
[(symbol-unreadable? b) #t]
[else (symbol<? a b)])]
[(keyword? a) (keyword<? a b)]
[(string? a) (string<? a b)]
[(bytevector? a) (bytes<? a b)]
[else #f])]
[else (fx<? am bm)])))
(define (hash-count ht)
(cond
[(mutable-hash? ht) (hashtable-size (mutable-hash-ht ht))]

View File

@ -3420,6 +3420,9 @@ int scheme_byte_string_has_null(Scheme_Object *o);
int scheme_any_string_has_null(Scheme_Object *o);
#define CHAR_STRING_W_NO_NULLS "string-no-nuls?"
int scheme_string_compare(Scheme_Object *s1, Scheme_Object *s2);
int scheme_bytes_compare(Scheme_Object *s1, Scheme_Object *s2);
Scheme_Object *scheme_do_exit(int argc, Scheme_Object *argv[]);
Scheme_Object *scheme_make_arity(mzshort minc, mzshort maxc);

View File

@ -13,12 +13,12 @@
consistently.)
*/
#define MZSCHEME_VERSION "7.1.0.6"
#define MZSCHEME_VERSION "7.1.0.7"
#define MZSCHEME_VERSION_X 7
#define MZSCHEME_VERSION_Y 1
#define MZSCHEME_VERSION_Z 0
#define MZSCHEME_VERSION_W 6
#define MZSCHEME_VERSION_W 7
#define MZSCHEME_VERSION_MAJOR ((MZSCHEME_VERSION_X * 100) + MZSCHEME_VERSION_Y)
#define MZSCHEME_VERSION_MINOR ((MZSCHEME_VERSION_Z * 1000) + MZSCHEME_VERSION_W)

View File

@ -33,14 +33,49 @@ START_XFORM_SKIP;
END_XFORM_SKIP;
#endif
static int compare_syms(const void *_a, const void *_b)
{
Scheme_Object *a = *(Scheme_Object **)_a;
Scheme_Object *b = *(Scheme_Object **)_b;
intptr_t l = SCHEME_SYM_LEN(a), i;
enum {
sort_major_unknown,
sort_major_boolean,
sort_major_char,
sort_major_real,
sort_major_symbol,
sort_major_keyword,
sort_major_string,
sort_major_bytes,
sort_major_null,
sort_major_void,
sort_major_eof,
};
MZ_ASSERT(SCHEME_SYMBOLP(a));
MZ_ASSERT(SCHEME_SYMBOLP(b));
static int sort_major(Scheme_Object *v)
{
if (SAME_OBJ(v, scheme_true) || SCHEME_FALSEP(v))
return sort_major_boolean;
else if (SCHEME_CHARP(v))
return sort_major_char;
else if (SCHEME_REALP(v))
return sort_major_real;
else if (SCHEME_SYMBOLP(v))
return sort_major_symbol;
else if (SCHEME_KEYWORDP(v))
return sort_major_keyword;
else if (SCHEME_CHAR_STRINGP(v))
return sort_major_string;
else if (SCHEME_BYTE_STRINGP(v))
return sort_major_bytes;
else if (SCHEME_NULLP(v))
return sort_major_null;
else if (SCHEME_VOIDP(v))
return sort_major_void;
else if (SCHEME_EOFP(v))
return sort_major_eof;
else
return sort_major_unknown;
}
static int compare_sym_likes(Scheme_Object *a, Scheme_Object *b)
{
intptr_t l = SCHEME_SYM_LEN(a), i;
if (SCHEME_SYM_LEN(b) < l)
l = SCHEME_SYM_LEN(b);
@ -53,22 +88,42 @@ static int compare_syms(const void *_a, const void *_b)
return SCHEME_SYM_LEN(a) - SCHEME_SYM_LEN(b);
}
static void sort_symbol_array(Scheme_Object **a, intptr_t count)
static int compare_syms(Scheme_Object *a, Scheme_Object *b)
{
my_qsort(a, count, sizeof(Scheme_Object *), compare_syms);
MZ_ASSERT(SCHEME_SYMBOLP(a));
MZ_ASSERT(SCHEME_SYMBOLP(b));
/* Sort uninterned before unreadable before interned.
There's no guarantee that uninterned symbols are
usefully sorted, but try anyway. */
if (SCHEME_SYM_UNINTERNEDP(a)) {
if (!SCHEME_SYM_UNINTERNEDP(b))
return -1;
} else {
if (SCHEME_SYM_UNINTERNEDP(b))
return 1;
if (SCHEME_SYM_PARALLELP(a)) {
if (!SCHEME_SYM_PARALLELP(b))
return -1;
} else {
if (SCHEME_SYM_PARALLELP(b))
return 1;
}
}
return compare_sym_likes(a, b);
}
static int compare_nums(const void *_a, const void *_b)
/* also allow #fs */
static int compare_keywords(Scheme_Object *a, Scheme_Object *b)
{
Scheme_Object *a = *(Scheme_Object **)_a;
Scheme_Object *b = *(Scheme_Object **)_b;
MZ_ASSERT(SCHEME_KEYWORDP(a));
MZ_ASSERT(SCHEME_KEYWORDP(b));
if (SCHEME_FALSEP(a))
return -1;
else if (SCHEME_FALSEP(b))
return 1;
return compare_sym_likes(a, b);
}
static int compare_reals(Scheme_Object *a, Scheme_Object *b)
{
MZ_ASSERT(SCHEME_REALP(a));
MZ_ASSERT(SCHEME_REALP(b));
@ -80,11 +135,51 @@ static int compare_nums(const void *_a, const void *_b)
return 0;
}
static void sort_number_array(Scheme_Object **a, intptr_t count)
int compare_sortable(const void *_a, const void *_b)
{
my_qsort(a, count, sizeof(Scheme_Object *), compare_nums);
Scheme_Object *a = *(Scheme_Object **)_a;
Scheme_Object *b = *(Scheme_Object **)_b;
int am, bm;
am = sort_major(a);
bm = sort_major(b);
if (am != bm)
return am - bm;
else {
switch (am) {
case sort_major_boolean:
if (SAME_OBJ(a, b))
return 0;
else if (SCHEME_FALSEP(a))
return -1;
else
return 1;
case sort_major_char:
return SCHEME_CHAR_VAL(a) - SCHEME_CHAR_VAL(b);
case sort_major_real:
return compare_reals(a, b);
case sort_major_symbol:
return compare_syms(a, b);
case sort_major_keyword:
return compare_keywords(a, b);
case sort_major_string:
return scheme_string_compare(a, b);
case sort_major_bytes:
return scheme_bytes_compare(a, b);
case sort_major_null:
case sort_major_void:
case sort_major_eof:
/* There can be only one. */
return 0;
}
}
return 0;
}
/**************************************************************/
static int compare_vars_at_resolve(const void *_a, const void *_b)
{
Scheme_IR_Local *a = *(Scheme_IR_Local **)_a;
@ -99,22 +194,18 @@ void scheme_sort_resolve_ir_local_array(Scheme_IR_Local **a, intptr_t count)
/**************************************************************/
static int all_symbols(Scheme_Object **a, int c)
static int all_sortable(Scheme_Object **a, int c)
{
while (c--) {
if (!SCHEME_SYMBOLP(a[c]))
if (sort_major(a[c]) == sort_major_unknown)
return 0;
}
return 1;
}
static int all_reals(Scheme_Object **a, int c)
static void sort_sortable_array(Scheme_Object **a, intptr_t count)
{
while (c--) {
if (!SCHEME_REALP(a[c]))
return 0;
}
return 1;
my_qsort(a, count, sizeof(Scheme_Object *), compare_sortable);
}
Scheme_Object **scheme_extract_sorted_keys(Scheme_Object *tree)
@ -159,11 +250,10 @@ Scheme_Object **scheme_extract_sorted_keys(Scheme_Object *tree)
MZ_ASSERT(j == count);
}
if (SCHEME_SYMBOLP(a[0]) && all_symbols(a, count))
sort_symbol_array(a, count);
else if (all_reals(a, count))
sort_number_array(a, count);
else
if (all_sortable(a, count)) {
sort_sortable_array(a, count);
return a;
} else
return NULL;
return a;

View File

@ -4170,6 +4170,21 @@ static int mz_strcmp(const char *who, unsigned char *str1, intptr_t l1, unsigned
return endres;
}
int scheme_string_compare(Scheme_Object *a, Scheme_Object *b)
{
return mz_char_strcmp(NULL,
SCHEME_CHAR_STR_VAL(a), SCHEME_CHAR_STRTAG_VAL(a),
SCHEME_CHAR_STR_VAL(b), SCHEME_CHAR_STRTAG_VAL(b),
0, 0);
}
int scheme_bytes_compare(Scheme_Object *a, Scheme_Object *b)
{
return mz_strcmp(NULL,
(unsigned char *)SCHEME_BYTE_STR_VAL(a), SCHEME_BYTE_STRTAG_VAL(a),
(unsigned char *)SCHEME_BYTE_STR_VAL(b), SCHEME_BYTE_STRTAG_VAL(b));
}
/**********************************************************************/
/* byte string conversion */
/**********************************************************************/