improve complexity of `hash-iterate-{key,value}'

For immutable hashes, `hash-iterate-{key,value}' used to take O(N) time on
the first call for a particular table and O(1) thereafter. Now it takes
O(1) time for all calls, the constant is only slightly larger for
the non-first calls, and there's no extra allocation.
This commit is contained in:
Matthew Flatt 2013-02-27 17:41:35 -07:00
parent 773496642b
commit 7a8c2ff063
9 changed files with 243 additions and 172 deletions

View File

@ -1377,7 +1377,7 @@ static uintptr_t equal_hash_key(Scheme_Object *o, uintptr_t k, Hash_Info *hi)
hi->depth += 2;
old_depth = hi->depth;
for (i = ht->count; i--; ) {
for (i = scheme_hash_tree_next(ht, -1); i != -1; i = scheme_hash_tree_next(ht, i)) {
scheme_hash_tree_index(ht, i, &ik, &iv);
vk = equal_hash_key(ik, 0, hi);
MZ_MIX(vk);
@ -1832,8 +1832,8 @@ static uintptr_t equal_hash_key2(Scheme_Object *o, Hash_Info *hi)
hi->depth += 2;
old_depth = hi->depth;
for (i = ht->count; i--; ) {
for (i = scheme_hash_tree_next(ht, -1); i != -1; i = scheme_hash_tree_next(ht, i)) {
scheme_hash_tree_index(ht, i, &ik, &iv);
k += equal_hash_key2(ik, hi);
k += equal_hash_key2(iv, hi);
@ -1915,7 +1915,7 @@ typedef struct AVLNode {
MZTAG_IF_REQUIRED
char height;
uintptr_t code;
Scheme_Object *key; /* NULL => val is list of key-value pairs */
Scheme_Object *key; /* NULL => val is another tree for multiple key-value pairs */
Scheme_Object *val;
struct AVLNode *left;
struct AVLNode *right;
@ -2176,113 +2176,141 @@ Scheme_Hash_Tree *scheme_make_hash_tree(int kind)
return tree;
}
Scheme_Hash_Tree *scheme_hash_tree_set(Scheme_Hash_Tree *tree, Scheme_Object *key, Scheme_Object *val)
static intptr_t search_nodes(AVLNode *n, Scheme_Object *key, int kind)
/* O(N) search full tree to find a code for `key'; returns -1 if not found */
{
intptr_t code;
if ((kind && ((kind == 1)
? scheme_equal(n->key, key)
: scheme_eqv(n->key, key)))
|| (!kind && SAME_OBJ(n->key, key)))
return n->code;
if (n->left) {
code = search_nodes(n->left, key, kind);
if (code >= 0)
return code;
}
if (n->right)
return search_nodes(n->right, key, kind);
else
return -1;
}
XFORM_NONGCING static intptr_t search_nodes_eq(AVLNode *n, Scheme_Object *key)
/* O(N) search full tree to find a code for `key'; returns -1 if not found */
{
intptr_t code;
if (SAME_OBJ(n->key, key))
return n->code;
if (n->left) {
code = search_nodes_eq(n->left, key);
if (code >= 0)
return code;
}
if (n->right)
return search_nodes_eq(n->right, key);
else
return -1;
}
XFORM_NONGCING static intptr_t fresh_code(AVLNode *root)
/* O(n) search for an available code */
{
int i = 0;
while (1) {
if (!avl_find(i, root))
return i;
i++;
}
}
static void *hash_tree_set(Scheme_Hash_Tree *tree, Scheme_Object *key, Scheme_Object *val, intptr_t h,
AVLNode *root, int kind)
{
Scheme_Hash_Tree *tree2;
uintptr_t h;
AVLNode *root, *added;
AVLNode *added;
int delta;
if (SCHEME_HASHTR_FLAGS(tree) & 0x3) {
if (SCHEME_HASHTR_FLAGS(tree) & 0x1) {
h = to_unsigned_hash(scheme_equal_hash_key(key));
} else {
h = to_unsigned_hash(scheme_eqv_hash_key(key));
}
} else {
h = PTR_TO_LONG((Scheme_Object *)key);
h = h >> 2;
}
if (!val) {
/* Removing ... */
added = avl_find(h, tree->root);
if (!added)
return tree; /* nothing to remove */
added = avl_find(h, root);
if (!added) {
/* nothing to remove */
return (tree ? (void *)tree : (void *)root);
}
if (added->key) {
int kind = (SCHEME_HASHTR_FLAGS(tree) & 0x3);
if ((kind && ((kind == 1)
? scheme_equal(added->key, key)
: scheme_eqv(added->key, key)))
|| (!kind && SAME_OBJ(added->key, key))) {
/* remove single item */
root = avl_del(tree->root, h);
root = avl_del(root, h);
tree2 = MALLOC_ONE_TAGGED(Scheme_Hash_Tree);
memcpy(tree2, tree, sizeof(Scheme_Hash_Tree));
tree2->elems_box = NULL;
tree2->root = root;
--tree2->count;
return tree2;
if (tree) {
tree2 = MALLOC_ONE_TAGGED(Scheme_Hash_Tree);
memcpy(tree2, tree, sizeof(Scheme_Hash_Tree));
tree2->root = root;
--tree2->count;
return tree2;
} else
return root;
} else {
/* Nothing to remove */
return tree;
return (tree ? (void *)tree : (void *)root);
}
} else {
/* multiple mappings; remove it below */
root = tree->root;
}
} else {
/* Adding/setting: */
root = avl_ins(h, NULL, NULL, tree->root);
root = avl_ins(h, NULL, NULL, root);
added = avl_find(h, root);
}
delta = 0;
if (added->val) {
int kind = (SCHEME_HASHTR_FLAGS(tree) & 0x3);
if (!added->key) {
/* Have a list of keys and vals. In this case, val can be NULL
to implement removal. */
Scheme_Object *prs = added->val, *a;
int cnt = 0;
while (prs) {
a = SCHEME_CAR(prs);
if (kind) {
if (kind == 1) {
if (scheme_equal(SCHEME_CAR(a), key))
break;
} else {
if (scheme_eqv(SCHEME_CAR(a), key))
break;
}
} else {
if (SAME_OBJ(SCHEME_CAR(a), key))
break;
}
prs = SCHEME_CDR(prs);
cnt++;
}
if (!prs) {
/* Have a subtree of keys and vals (with bogus "code"s). */
AVLNode *savl = (AVLNode *)added->val;
intptr_t code;
code = search_nodes(savl, key, kind);
if (code < 0) {
/* Not mapped already: */
if (!val) return tree; /* nothing to remove after all */
val = scheme_make_raw_pair(scheme_make_raw_pair(key, val), added->val);
if (!val) {
/* nothing to remove after all */
return (tree ? (void *)tree : (void *)root);
}
savl = (AVLNode *)hash_tree_set(NULL, key, val, fresh_code(savl), savl, kind);
val = (Scheme_Object *)savl;
key = NULL;
delta = 1;
} else {
/* Mapped already: */
prs = SCHEME_CDR(prs);
for (a = added->val; cnt--; a = SCHEME_CDR(a)) {
prs = scheme_make_raw_pair(SCHEME_CAR(a), prs);
}
savl = (AVLNode *)hash_tree_set(NULL, key, val, code, savl, kind);
if (val) {
prs = scheme_make_raw_pair(scheme_make_raw_pair(key, val),
prs);
/* Updated */
val = (Scheme_Object *)savl;
key = NULL;
} else {
/* Removed */
delta = -1;
}
val = prs;
key = NULL;
if (!SCHEME_CDR(prs)) {
/* Removal reduced to a single mapping: */
a = SCHEME_CAR(prs);
key = SCHEME_CAR(a);
val = SCHEME_CDR(a);
if (!savl->left && !savl->right) {
/* Removal reduced to a single mapping: */
val = savl->val;
key = savl->key;
} else {
val = (Scheme_Object *)savl;
key = NULL;
}
}
}
} else {
@ -2297,9 +2325,15 @@ Scheme_Hash_Tree *scheme_hash_tree_set(Scheme_Hash_Tree *tree, Scheme_Object *ke
same = SAME_OBJ(key, added->key);
}
if (!same) {
val = scheme_make_raw_pair(scheme_make_raw_pair(key, val),
scheme_make_raw_pair(scheme_make_raw_pair(added->key, added->val),
NULL));
/* Switch to sub-tree mode to hold mulitple keys for the
same code: */
static AVLNode *sn;
/* avoid intermediate allocations by constructing directly: */
sn = make_avl(NULL, 1, added->key, added->val, NULL);
sn = make_avl(NULL, 0, key, val, sn);
val = (Scheme_Object *)sn;
key = NULL;
delta = 1;
}
@ -2315,15 +2349,36 @@ Scheme_Hash_Tree *scheme_hash_tree_set(Scheme_Hash_Tree *tree, Scheme_Object *ke
delta = 1;
}
tree2 = MALLOC_ONE_TAGGED(Scheme_Hash_Tree);
memcpy(tree2, tree, sizeof(Scheme_Hash_Tree));
tree2->elems_box = NULL;
if (tree) {
tree2 = MALLOC_ONE_TAGGED(Scheme_Hash_Tree);
memcpy(tree2, tree, sizeof(Scheme_Hash_Tree));
if (delta)
tree2->count += delta;
tree2->root = root;
return tree2;
} else
return root;
}
if (delta)
tree2->count += delta;
tree2->root = root;
Scheme_Hash_Tree *scheme_hash_tree_set(Scheme_Hash_Tree *tree, Scheme_Object *key, Scheme_Object *val)
{
uintptr_t h;
int kind = (SCHEME_HASHTR_FLAGS(tree) & 0x3);
return tree2;
if (kind) {
if (kind == 1) {
h = to_unsigned_hash(scheme_equal_hash_key(key));
} else {
h = to_unsigned_hash(scheme_eqv_hash_key(key));
}
} else {
h = PTR_TO_LONG((Scheme_Object *)key);
h = h >> 2;
}
return (Scheme_Hash_Tree *)hash_tree_set(tree, key, val, h, tree->root, kind);
}
Scheme_Object *scheme_eq_hash_tree_get(Scheme_Hash_Tree *tree, Scheme_Object *key)
@ -2337,13 +2392,13 @@ Scheme_Object *scheme_eq_hash_tree_get(Scheme_Hash_Tree *tree, Scheme_Object *ke
avl = avl_find(h, tree->root);
if (avl) {
if (!avl->key) {
/* Have list of keys & vals: */
Scheme_Object *prs = avl->val, *a;
while (prs) {
a = SCHEME_CAR(prs);
if (SAME_OBJ(SCHEME_CAR(a), key))
return SCHEME_CDR(a);
prs = SCHEME_CDR(prs);
/* Have tree */
AVLNode *savl = (AVLNode *)avl->val;
intptr_t code;
code = search_nodes_eq(savl, key);
if (code >= 0) {
avl = avl_find(code, savl);
return avl->val;
}
} else if (SAME_OBJ(avl->key, key))
return avl->val;
@ -2370,18 +2425,13 @@ Scheme_Object *scheme_hash_tree_get(Scheme_Hash_Tree *tree, Scheme_Object *key)
avl = avl_find(h, tree->root);
if (avl) {
if (!avl->key) {
/* Have list of keys & vals: */
Scheme_Object *prs = avl->val, *a;
while (prs) {
a = SCHEME_CAR(prs);
if (kind == 1) {
if (scheme_equal(SCHEME_CAR(a), key))
return SCHEME_CDR(a);
} else {
if (scheme_eqv(SCHEME_CAR(a), key))
return SCHEME_CDR(a);
}
prs = SCHEME_CDR(prs);
/* Have tree */
AVLNode *savl = (AVLNode *)avl->val;
intptr_t code;
code = search_nodes(savl, key, kind);
if (code >= 0) {
avl = avl_find(code, savl);
return avl->val;
}
} else {
if (kind == 1) {
@ -2397,71 +2447,95 @@ Scheme_Object *scheme_hash_tree_get(Scheme_Hash_Tree *tree, Scheme_Object *key)
return NULL;
}
intptr_t scheme_hash_tree_next(Scheme_Hash_Tree *tree, intptr_t pos)
{
if (pos >= tree->count)
return -2;
pos++;
if (tree->count > pos)
return pos;
else
return -1;
}
static int fill_elems(AVLNode *avl, Scheme_Object *vec, intptr_t pos, intptr_t count)
XFORM_NONGCING intptr_t path_next(AVLNode *avl, intptr_t path)
{
if (!avl)
return pos;
if (avl->left)
pos = fill_elems(avl->left, vec, pos, count);
if (avl->key) {
SCHEME_VEC_ELS(vec)[pos] = avl->val;
SCHEME_VEC_ELS(vec)[pos + count] = avl->key;
pos++;
} else {
Scheme_Object *prs = avl->val, *a;
while (prs) {
a = SCHEME_CAR(prs);
SCHEME_VEC_ELS(vec)[pos] = SCHEME_CDR(a);
SCHEME_VEC_ELS(vec)[pos + count] = SCHEME_CAR(a);
pos++;
prs = SCHEME_CDR(prs);
return -1;
if (!avl->key) {
/* subtree choice */
if (path & 0x1) {
/* in subtree or right */
if (!(path & 0x2)) {
/* haven't exhausted the subtree, yet: */
path >>= 2;
path = path_next((AVLNode *)avl->val, path);
if (path > 0)
return (path << 2) | 0x1;
path = 0x1; /* move on to right */
} else {
/* we have exhausted the subtree, and we're working on right */
path >>= 1;
/* assert: path & 0x1 */
}
}
}
if (avl->right)
pos = fill_elems(avl->right, vec, pos, count);
if (path & 0x1) {
path = path_next(avl->right, path >> 1);
/* The result cannot be 0.
If the result is -1, then the following calculation preserves the -1.
If the result is positive, then we preserve the decision to go right here. */
if (avl->key)
return (path << 1) | 0x1;
else
return (path << 2) | 0x3;
}
return pos;
path = path_next(avl->left, path >> 1);
if (path > 0)
return path << 1;
/* start here */
if (avl->key)
return 0x1;
else {
/* start subtree */
path = path_next((AVLNode *)avl->val, 0);
return (path << 2) | 0x1;
}
}
XFORM_NONGCING int path_find(AVLNode *avl, intptr_t path, Scheme_Object **_key, Scheme_Object **_val)
{
if (!avl) return 0;
if (!avl->key) {
/* subtree choice */
if (path & 0x1) {
/* in subtree or right */
if (!(path & 0x2)) {
/* in subtree */
return path_find((AVLNode *)avl->val, path >> 2, _key, _val);
} else {
/* in right */
path >>= 1;
/* assert: path & 0x1 */
}
}
}
if (path & 0x1) {
if (path >> 1)
return path_find(avl->right, path >> 1, _key, _val);
else {
*_key = avl->key;
*_val = avl->val;
return 1;
}
} else
return path_find(avl->left, path >> 1, _key, _val);
}
intptr_t scheme_hash_tree_next(Scheme_Hash_Tree *tree, intptr_t pos)
{
/* Iteration uses a key where the bits say when to turn right */
return path_next(tree->root, ((pos == -1) ? 0 : pos));
}
int scheme_hash_tree_index(Scheme_Hash_Tree *tree, intptr_t pos, Scheme_Object **_key, Scheme_Object **_val)
{
Scheme_Object *elems, *elems_box;
if ((pos < 0) || (pos >= tree->count))
return 0;
elems_box = tree->elems_box;
if (elems_box)
elems = SCHEME_WEAK_BOX_VAL(elems_box);
else
elems = NULL;
if (!elems) {
AVL_ASSERT_ONLY(int total_pos);
elems = scheme_make_vector(tree->count * 2, NULL);
AVL_ASSERT_ONLY(total_pos = ) fill_elems(tree->root, elems, 0, tree->count);
AVL_ASSERT(total_pos == tree->count);
elems_box = scheme_make_weak_box(elems);
tree->elems_box = elems_box;
}
*_val = SCHEME_VEC_ELS(elems)[pos];
*_key = SCHEME_VEC_ELS(elems)[tree->count + pos];
return 1;
return path_find(tree->root, pos, _key, _val);
}
int scheme_hash_tree_equal_rec(Scheme_Hash_Tree *t1, Scheme_Hash_Tree *t2, void *eql)
@ -2473,7 +2547,7 @@ int scheme_hash_tree_equal_rec(Scheme_Hash_Tree *t1, Scheme_Hash_Tree *t2, void
|| ((SCHEME_HASHTR_FLAGS(t1) & 0x3) != (SCHEME_HASHTR_FLAGS(t2) & 0x3)))
return 0;
for (i = t1->count; i--; ) {
for (i = scheme_hash_tree_next(t1, -1); i != -1; i = scheme_hash_tree_next(t1, i)) {
scheme_hash_tree_index(t1, i, &k, &v);
v2 = scheme_hash_tree_get(t2, k);
if (!v2)

View File

@ -2160,7 +2160,7 @@ static Scheme_Object *hash_table_copy(int argc, Scheme_Object *argv[])
else
naya = scheme_make_hash_table(SCHEME_hash_ptr);
for (i = t->count; i--; ) {
for (i = scheme_hash_tree_next(t, -1); i != -1; i = scheme_hash_tree_next(t, i)) {
scheme_hash_tree_index(t, i, &k, &val);
if (!SAME_OBJ((Scheme_Object *)t, v))
val = scheme_chaperone_hash_traversal_get(v, k, &k);

View File

@ -4580,7 +4580,7 @@ void scheme_check_unsafe_accessible(Scheme_Object *insp, Scheme_Env *from_env)
int i;
Scheme_Object *k, *v;
for (i = t->count; i--; ) {
for (i = scheme_hash_tree_next(t, -1); i != -1; i = scheme_hash_tree_next(t, i)) {
scheme_hash_tree_index(t, i, &k, &v);
insp = k;
if (scheme_module_protected_wrt(unsafe_env->guard_insp, insp)) {

View File

@ -9,7 +9,6 @@ static int hash_tree_val_MARK(void *p, struct NewGC *gc) {
Scheme_Hash_Tree *ht = (Scheme_Hash_Tree *)p;
gcMARK2(ht->root, gc);
gcMARK2(ht->elems_box, gc);
return
gcBYTES_TO_WORDS(sizeof(Scheme_Hash_Tree));
@ -19,7 +18,6 @@ static int hash_tree_val_FIXUP(void *p, struct NewGC *gc) {
Scheme_Hash_Tree *ht = (Scheme_Hash_Tree *)p;
gcFIXUP2(ht->root, gc);
gcFIXUP2(ht->elems_box, gc);
return
gcBYTES_TO_WORDS(sizeof(Scheme_Hash_Tree));

View File

@ -1483,7 +1483,6 @@ hash_tree_val {
Scheme_Hash_Tree *ht = (Scheme_Hash_Tree *)p;
gcMARK2(ht->root, gc);
gcMARK2(ht->elems_box, gc);
size:
gcBYTES_TO_WORDS(sizeof(Scheme_Hash_Tree));

View File

@ -2301,7 +2301,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
Scheme_Hash_Table *t;
Scheme_Hash_Tree *tr;
Scheme_Object **keys, **vals, *val, *key, *orig;
int i, size, did_one = 0;
int i, size, did_one = 0, pos;
orig = obj;
if (SCHEME_NP_CHAPERONEP(obj))
@ -2362,10 +2362,12 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
vals = NULL;
size = tr->count;
}
pos = -1;
for (i = 0; i < size; i++) {
if (!vals || vals[i]) {
if (!vals) {
scheme_hash_tree_index(tr, i, &key, &val);
pos = scheme_hash_tree_next(tr, pos);
scheme_hash_tree_index(tr, pos, &key, &val);
if (!SAME_OBJ(obj, orig))
val = scheme_chaperone_hash_traversal_get(orig, key, &key);
} else {

View File

@ -2182,7 +2182,7 @@ static Scheme_Object *resolve_references(Scheme_Object *obj,
kind = 0;
t = (Scheme_Hash_Tree *)obj;
lst = scheme_null;
for (i = t->count; i--; ) {
for (i = scheme_hash_tree_next(t, -1); i != -1; i = scheme_hash_tree_next(t, i)) {
scheme_hash_tree_index(t, i, &key, &val);
lst = scheme_make_pair(scheme_make_pair(key, val), lst);
}
@ -2210,7 +2210,6 @@ static Scheme_Object *resolve_references(Scheme_Object *obj,
t->count = base->count;
t->root = base->root;
t->elems_box = base->elems_box;
} else if (SCHEME_HASHTP(obj)) {
int i;
Scheme_Object *key, *val, *l = scheme_null, *orig_l;

View File

@ -507,8 +507,8 @@ MZ_EXTERN Scheme_Hash_Tree *scheme_make_hash_tree(int kind);
MZ_EXTERN Scheme_Hash_Tree *scheme_hash_tree_set(Scheme_Hash_Tree *tree, Scheme_Object *key, Scheme_Object *val);
MZ_EXTERN Scheme_Object *scheme_hash_tree_get(Scheme_Hash_Tree *tree, Scheme_Object *key);
XFORM_NONGCING MZ_EXTERN Scheme_Object *scheme_eq_hash_tree_get(Scheme_Hash_Tree *tree, Scheme_Object *key);
MZ_EXTERN intptr_t scheme_hash_tree_next(Scheme_Hash_Tree *tree, intptr_t pos);
MZ_EXTERN int scheme_hash_tree_index(Scheme_Hash_Tree *tree, intptr_t pos, Scheme_Object **_key, Scheme_Object **_val);
XFORM_NONGCING MZ_EXTERN intptr_t scheme_hash_tree_next(Scheme_Hash_Tree *tree, intptr_t pos);
XFORM_NONGCING MZ_EXTERN int scheme_hash_tree_index(Scheme_Hash_Tree *tree, intptr_t pos, Scheme_Object **_key, Scheme_Object **_val);
MZ_EXTERN int scheme_hash_tree_equal(Scheme_Hash_Tree *t1, Scheme_Hash_Tree *t2);
MZ_EXTERN int scheme_is_hash_tree_equal(Scheme_Object *o);
MZ_EXTERN int scheme_is_hash_tree_eqv(Scheme_Object *o);

View File

@ -790,7 +790,6 @@ struct Scheme_Hash_Tree
Scheme_Inclhash_Object iso; /* 0x1 flag => equal?-based hashing; 0x2 flag => eqv?-based hashing */
intptr_t count;
struct AVLNode *root;
Scheme_Object *elems_box; /* vector in a weak box */
};
#define SCHEME_HASHTR_FLAGS(tr) MZ_OPT_HASH_KEY(&(tr)->iso)