From 3f7db4d968b7213d35a698f18245e0fda2fbebd3 Mon Sep 17 00:00:00 2001 From: Wolfgang Spraul Date: Sun, 29 Jul 2012 05:59:59 +0200 Subject: [PATCH] more powerful hashed string array, high-speed search and replace utility hstrrep --- .gitignore | 2 + Makefile | 8 ++- README | 7 +- helper.c | 208 +++++++++++++++++++++++++++++++++++++++++++++++++++++ helper.h | 37 ++++++++++ model.c | 159 +++------------------------------------- model.h | 25 +------ new_fp.c | 1 - 8 files changed, 268 insertions(+), 179 deletions(-) diff --git a/.gitignore b/.gitignore index 166e408..ef3df0c 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,5 @@ helper.o model.o new_fp new_fp.o +hstrrep +hstrrep.o diff --git a/Makefile b/Makefile index c6ed472..9ed94ac 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ CFLAGS = -Wall -g LDLIBS = -lxml2 -all: bit2txt draw_svg_tiles new_fp xc6slx9.svg xc6slx9.fp +all: bit2txt draw_svg_tiles new_fp hstrrep xc6slx9.svg xc6slx9.fp xc6slx9.svg: draw_svg_tiles ./draw_svg_tiles | xmllint --pretty 1 - > $@ @@ -28,12 +28,14 @@ model.o: model.c model.h new_fp.o: new_fp.c model.h helper.h -draw_svg_tiles.o: draw_svg_tiles.c model.h +draw_svg_tiles.o: draw_svg_tiles.c model.h helper.h -draw_svg_tiles: draw_svg_tiles.o model.o +draw_svg_tiles: draw_svg_tiles.o model.o helper.o new_fp: new_fp.o model.o helper.o +hstrrep: hstrrep.o helper.o + clean: rm -f bit2txt bit2txt.o \ draw_svg_tiles draw_svg_tiles.o \ diff --git a/README b/README index a61246b..0c6308b 100644 --- a/README +++ b/README @@ -9,7 +9,8 @@ Design Principles Utilities -- new_fp creates empty .fp floorplan file +- new_fp creates empty .fp floorplan file - draw_svg_tiles draws a simple .svg showing tile types -- fp2bit converts .fp floorplan into .bit bitstream -- bit2txt dumps .bit bitstream as text +- fp2bit converts .fp floorplan into .bit bitstream +- bit2txt dumps .bit bitstream as text +- hstrrep high-speed hashed array based search and replace util diff --git a/helper.c b/helper.c index 38f12b7..b57ede7 100644 --- a/helper.c +++ b/helper.c @@ -654,6 +654,15 @@ int get_vm_mb() return (vm_size+1023)/1024; } +int get_random() +{ + int random_f, random_num; + random_f = open("/dev/urandom", O_RDONLY); + read(random_f, &random_num, sizeof(random_num)); + close(random_f); + return random_num; +} + int compare_with_number(const char* a, const char* b) { int i, a_i, b_i, non_numeric_result, a_num, b_num; @@ -683,3 +692,202 @@ int compare_with_number(const char* a, const char* b) b_num = strtol(&b[i], 0 /* endptr */, 10); return a_num - b_num; } + +// Dan Bernstein's hash function +uint32_t hash_djb2(const unsigned char* str) +{ + uint32_t hash = 5381; + int c; + + while ((c = *str++) != 0) + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + return hash; +} + +// +// The format of each entry in a bin is. +// uint32_t idx +// uint16_t entry len including 4-byte header +// char[] zero-terminated string +// +// Offsets point to the zero-terminated string, so the len +// is at off-2, the index at off-6. offset0 can thus be +// used as a special value to signal 'no entry'. +// + +#define BIN_STR_HEADER (4+2) +#define BIN_MIN_OFFSET BIN_STR_HEADER +#define BIN_INCREMENT 32768 + +const char* strarray_lookup(struct hashed_strarray* array, int idx) +{ + int bin, offset; + + if (!array->index_to_bin || !array->bin_offsets || idx==STRIDX_NO_ENTRY) + return 0; + + bin = array->index_to_bin[idx-1]; + offset = array->bin_offsets[idx-1]; + + // bin 0 offset 0 is a special value that signals 'no + // entry'. Normal offsets cannot be less than BIN_MIN_OFFSET. + if (!bin && !offset) return 0; + + if (!array->bin_strings[bin] || offset >= array->bin_len[bin] + || offset < BIN_MIN_OFFSET) { + // This really should never happen and is an internal error. + fprintf(stderr, "Internal error.\n"); + return 0; + } + + return &array->bin_strings[bin][offset]; +} + +int strarray_find(struct hashed_strarray* array, const char* str, int* idx) +{ + int bin, search_off, i; + uint32_t hash; + + hash = hash_djb2((const unsigned char*) str); + bin = hash % array->num_bins; + // iterate over strings in bin to find match + if (array->bin_strings[bin]) { + search_off = BIN_MIN_OFFSET; + while (search_off < array->bin_len[bin]) { + if (!strcmp(&array->bin_strings[bin][search_off], str)) { + i = *(uint32_t*)&array->bin_strings[bin][search_off-6]; + if (!i) { + fprintf(stderr, "Internal error - index 0.\n"); + return -1; + } + *idx = i+1; + return 0; + } + search_off += *(uint16_t*)&array->bin_strings[bin][search_off-2]; + } + } + *idx = STRIDX_NO_ENTRY; + return 0; +} + +int s_stash_at_bin(struct hashed_strarray* array, const char* str, int idx, int bin); + +int strarray_add(struct hashed_strarray* array, const char* str, int* idx) +{ + int bin, i, free_index, rc, start_index; + unsigned long hash; + + rc = strarray_find(array, str, idx); + if (rc) return rc; + if (*idx != STRIDX_NO_ENTRY) return 0; + + hash = hash_djb2((const unsigned char*) str); + + // search free index + start_index = hash % array->highest_index; + for (i = 0; i < array->highest_index; i++) { + int cur_i = (start_index+i)%array->highest_index; + if (!cur_i) // never issue index 0 + continue; + if (!array->bin_offsets[cur_i]) + break; + } + if (i >= array->highest_index) { + fprintf(stderr, "All array indices full.\n"); + return -1; + } + free_index = (start_index+i)%array->highest_index; + bin = hash % array->num_bins; + rc = s_stash_at_bin(array, str, free_index, bin); + if (rc) return rc; + *idx = free_index + 1; + return 0; +} + +int s_stash_at_bin(struct hashed_strarray* array, const char* str, int idx, int bin) +{ + int str_len = strlen(str); + // check whether bin needs expansion + if (!(array->bin_len[bin]%BIN_INCREMENT) + || array->bin_len[bin]%BIN_INCREMENT + BIN_STR_HEADER+str_len+1 > BIN_INCREMENT) + { + int new_alloclen = + ((array->bin_len[bin] + + BIN_STR_HEADER+str_len+1)/BIN_INCREMENT + 1) + * BIN_INCREMENT; + void* new_ptr = realloc(array->bin_strings[bin], new_alloclen); + if (!new_ptr) { + fprintf(stderr, "Out of memory.\n"); + return -1; + } + array->bin_strings[bin] = new_ptr; + } + // append new string at end of bin + *(uint32_t*)&array->bin_strings[bin][array->bin_len[bin]] = idx; + *(uint16_t*)&array->bin_strings[bin][array->bin_len[bin]+4] = BIN_STR_HEADER+str_len+1; + strcpy(&array->bin_strings[bin][array->bin_len[bin]+BIN_STR_HEADER], str); + array->index_to_bin[idx] = bin; + array->bin_offsets[idx] = array->bin_len[bin]+BIN_STR_HEADER; + array->bin_len[bin] += BIN_STR_HEADER+str_len+1; + return 0; +} + +int strarray_stash(struct hashed_strarray* array, const char* str, int idx) +{ + // The bin is just a random number here, because find + // cannot be used after stash anyway, only lookup can. + return s_stash_at_bin(array, str, idx-1, idx % array->num_bins); +} + +int strarray_used_slots(struct hashed_strarray* array) +{ + int i, num_used_slots; + num_used_slots = 0; + if (!array->bin_offsets) return 0; + for (i = 0; i < array->highest_index; i++) { + if (array->bin_offsets[i]) + num_used_slots++; + } + return num_used_slots; +} + +int strarray_init(struct hashed_strarray* array, int highest_index) +{ + memset(array, 0, sizeof(*array)); + array->highest_index = highest_index; + array->num_bins = highest_index / 64; + + array->bin_strings = malloc(array->num_bins*sizeof(*array->bin_strings)); + array->bin_len = malloc(array->num_bins*sizeof(*array->bin_len)); + array->bin_offsets = malloc(array->highest_index*sizeof(*array->bin_offsets)); + array->index_to_bin = malloc(array->highest_index*sizeof(*array->index_to_bin)); + + if (!array->bin_strings || !array->bin_len + || !array->bin_offsets || !array->index_to_bin) { + fprintf(stderr, "Out of memory in %s:%i\n", __FILE__, __LINE__); + free(array->bin_strings); + free(array->bin_len); + free(array->bin_offsets); + free(array->index_to_bin); + return -1; + } + return 0; +} + +void strarray_free(struct hashed_strarray* array) +{ + int i; + for (i = 0; i < sizeof(array->bin_strings)/ + sizeof(array->bin_strings[0]); i++) { + free(array->bin_strings[i]); + array->bin_strings[i] = 0; + } + free(array->bin_strings); + array->bin_strings = 0; + free(array->bin_len); + array->bin_len = 0; + free(array->bin_offsets); + array->bin_offsets = 0; + free(array->index_to_bin); + array->index_to_bin = 0; +} diff --git a/helper.h b/helper.h index 8bbe219..b9c1f3a 100644 --- a/helper.h +++ b/helper.h @@ -9,9 +9,12 @@ #include #include #include +#include +#include #include #define PROGRAM_REVISION "2012-06-27" +#define MACRO_STR(arg) #arg void printf_help(); @@ -69,4 +72,38 @@ void printf_extrabits(uint8_t* maj_bits, int start_minor, int num_minors, uint64_t read_lut64(uint8_t* two_minors, int off_in_frame); int get_vm_mb(); +int get_random(); int compare_with_number(const char* a, const char* b); + +uint32_t hash_djb2(const unsigned char* str); + +// Strings are distributed among bins. Each bin is +// one continuous stream of zero-terminated strings +// prefixed with a 32+16=48-bit header. The allocation +// increment for each bin is 32k. +struct hashed_strarray +{ + int highest_index; + uint32_t* bin_offsets; // min offset is 4, 0 means no entry + uint16_t* index_to_bin; + char** bin_strings; + int* bin_len; + int num_bins; +}; + +#define STRIDX_64K 0xFFFF +#define STRIDX_1M 1000000 + +int strarray_init(struct hashed_strarray* array, int highest_index); +void strarray_free(struct hashed_strarray* array); + +const char* strarray_lookup(struct hashed_strarray* array, int idx); +// The found or created index will never be 0, so the caller +// can use 0 as a special value to indicate 'no string'. +#define STRIDX_NO_ENTRY 0 +int strarray_find(struct hashed_strarray* array, const char* str, int* idx); +int strarray_add(struct hashed_strarray* array, const char* str, int* idx); +// If you stash a string to a fixed index, you cannot use strarray_find() +// anymore, only strarray_lookup(). +int strarray_stash(struct hashed_strarray* array, const char* str, int idx); +int strarray_used_slots(struct hashed_strarray* array); diff --git a/model.c b/model.c index a5d7e56..a7a4e86 100644 --- a/model.c +++ b/model.c @@ -176,7 +176,7 @@ int fpga_build_model(struct fpga_model* model, int fpga_rows, const char* column strncpy(model->cfg_columns, columns, sizeof(model->cfg_columns)-1); strncpy(model->cfg_left_wiring, left_wiring, sizeof(model->cfg_left_wiring)-1); strncpy(model->cfg_right_wiring, right_wiring, sizeof(model->cfg_right_wiring)-1); - strarray_init(&model->str); + strarray_init(&model->str, STRIDX_64K); rc = init_tiles(model); if (rc) return rc; @@ -239,10 +239,16 @@ int add_conn_uni(struct fpga_model* model, int y1, int x1, const char* name1, in int conn_start, num_conn_point_dests_for_this_wire, rc, i, j; tile1 = &model->tiles[y1 * model->tile_x_range + x1]; - rc = strarray_find_or_add(&model->str, name1, &name1_i); - if (!rc) return -1; - rc = strarray_find_or_add(&model->str, name2, &name2_i); - if (!rc) return -1; + rc = strarray_add(&model->str, name1, &i); + if (rc) return rc; + rc = strarray_add(&model->str, name2, &j); + if (rc) return rc; + if (i > 0xFFFF || j > 0xFFFF) { + fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); + return -1; + } + name1_i = i; + name2_i = j; // Search for a connection set under name1. for (i = 0; i < tile1->num_conn_point_names; i++) { @@ -1773,146 +1779,3 @@ const char* fpga_tiletype_str(enum fpga_tile_type type) return fpga_ttstr[type]; } -// Dan Bernstein's hash function -uint32_t hash_djb2(const unsigned char* str) -{ - uint32_t hash = 5381; - int c; - - while ((c = *str++) != 0) - hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ - return hash; -} - -// -// The format of each entry in a bin is. -// uint16_t idx -// uint16_t entry len including 4-byte header -// char[] zero-terminated string -// -// Offsets point to the zero-terminated string, so the len -// is at off-2, the index at off-4. bin0 offset0 can thus be -// used as a special value to signal 'no entry'. -// - -const char* strarray_lookup(struct hashed_strarray* array, uint16_t idx) -{ - int bin, offset; - - if (!array->index_to_bin || !array->bin_offsets || !idx) - return 0; - - bin = array->index_to_bin[idx]; - offset = array->bin_offsets[idx]; - - // bin 0 offset 0 is a special value that signals 'no - // entry'. Normal offsets cannot be less than 4. - if (!bin && !offset) return 0; - - if (!array->bin_strings[bin] || offset >= array->bin_len[bin] - || offset < 4) { - // This really should never happen and is an internal error. - fprintf(stderr, "Internal error.\n"); - return 0; - } - - return &array->bin_strings[bin][offset]; -} - -#define BIN_INCREMENT 32768 - -int strarray_find_or_add(struct hashed_strarray* array, const char* str, - uint16_t* idx) -{ - int bin, search_off, str_len, i, free_index; - int new_alloclen, start_index; - unsigned long hash; - void* new_ptr; - - hash = hash_djb2((const unsigned char*) str); - str_len = strlen(str); - bin = hash % (sizeof(array->bin_strings)/sizeof(array->bin_strings[0])); - // iterate over strings in bin to find match - if (array->bin_strings[bin]) { - search_off = 4; - while (search_off < array->bin_len[bin]) { - if (!strcmp(&array->bin_strings[bin][search_off], - str)) { - *idx = *(uint16_t*)&array->bin_strings - [bin][search_off-4]; - if (!(*idx)) { - fprintf(stderr, "Internal error - index 0.\n"); - return 0; - } - return 1; - } - search_off += *(uint16_t*)&array->bin_strings - [bin][search_off-2]; - } - } - // search free index - start_index = (uint16_t) ((hash >> 16) ^ (hash & 0xFFFF)); - for (i = 0; i < HASHARRAY_NUM_INDICES; i++) { - int cur_i = (start_index+i)%HASHARRAY_NUM_INDICES; - if (!cur_i) // never issue index 0 - continue; - if (!array->bin_offsets[cur_i]) - break; - } - if (i >= HASHARRAY_NUM_INDICES) { - fprintf(stderr, "All array indices full.\n"); - return 0; - } - free_index = (start_index+i)%HASHARRAY_NUM_INDICES; - // check whether bin needs expansion - if (!(array->bin_len[bin]%BIN_INCREMENT) - || array->bin_len[bin]%BIN_INCREMENT + 4+str_len+1 > BIN_INCREMENT) - { - new_alloclen = - ((array->bin_len[bin] - + 4+str_len+1)/BIN_INCREMENT + 1) - * BIN_INCREMENT; - new_ptr = realloc(array->bin_strings[bin], new_alloclen); - if (!new_ptr) { - fprintf(stderr, "Out of memory.\n"); - return 0; - } - array->bin_strings[bin] = new_ptr; - } - // append new string at end of bin - *(uint16_t*)&array->bin_strings[bin][array->bin_len[bin]] = free_index; - *(uint16_t*)&array->bin_strings[bin][array->bin_len[bin]+2] = 4+str_len+1; - strcpy(&array->bin_strings[bin][array->bin_len[bin]+4], str); - array->index_to_bin[free_index] = bin; - array->bin_offsets[free_index] = array->bin_len[bin]+4; - array->bin_len[bin] += 4+str_len+1; - *idx = free_index; - return 1; -} - -int strarray_used_slots(struct hashed_strarray* array) -{ - int i, num_used_slots; - num_used_slots = 0; - if (!array->bin_offsets) return 0; - for (i = 0; i < sizeof(array->bin_offsets)/sizeof(*array->bin_offsets); i++) { - if (array->bin_offsets[i]) - num_used_slots++; - } - return num_used_slots; -} - -void strarray_init(struct hashed_strarray* array) -{ - memset(array, 0, sizeof(*array)); -} - -void strarray_free(struct hashed_strarray* array) -{ - int i; - for (i = 0; i < sizeof(array->bin_strings)/ - sizeof(array->bin_strings[0]); i++) { - free(array->bin_strings[i]); - array->bin_strings[i] = 0; - } -} diff --git a/model.h b/model.h index ec3ef1b..bbdf6db 100644 --- a/model.h +++ b/model.h @@ -13,19 +13,7 @@ #include #define MACRO_STR(arg) #arg -#define HASHARRAY_NUM_INDICES (256*256) - -// Strings are distributed among 1024 bins. Each bin -// is one continuous stream of zero-terminated strings -// prefixed with a 2*16-bit header. The allocation -// increment for each bin is 32k. -struct hashed_strarray -{ - uint32_t bin_offsets[HASHARRAY_NUM_INDICES]; // min offset is 4, 0 means no entry - uint16_t index_to_bin[HASHARRAY_NUM_INDICES]; - char* bin_strings[1024]; - int bin_len[1024]; // points behind the last zero-termination -}; +#include "helper.h" // // columns @@ -241,14 +229,3 @@ int fpga_build_model(struct fpga_model* model, void fpga_free_model(struct fpga_model* model); const char* fpga_tiletype_str(enum fpga_tile_type type); - -uint32_t hash_djb2(const unsigned char* str); - -const char* strarray_lookup(struct hashed_strarray* array, uint16_t idx); -// The found or created index will never be 0, so the caller -// can use 0 as a special value to indicate 'no string'. -int strarray_find_or_add(struct hashed_strarray* array, const char* str, - uint16_t* idx); -int strarray_used_slots(struct hashed_strarray* array); -void strarray_init(struct hashed_strarray* array); -void strarray_free(struct hashed_strarray* array); diff --git a/new_fp.c b/new_fp.c index 98be4bc..caac3a1 100644 --- a/new_fp.c +++ b/new_fp.c @@ -13,7 +13,6 @@ #include #include "model.h" -#include "helper.h" #define PRINT_FLAG(f) if (tf & f) { printf (" %s", #f); tf &= ~f; }