more powerful hashed string array, high-speed search and replace utility

hstrrep
This commit is contained in:
Wolfgang Spraul 2012-07-29 05:59:59 +02:00
parent f2f5e5e027
commit 3f7db4d968
8 changed files with 268 additions and 179 deletions

2
.gitignore vendored
View File

@ -6,3 +6,5 @@ helper.o
model.o
new_fp
new_fp.o
hstrrep
hstrrep.o

View File

@ -10,7 +10,7 @@
CFLAGS = -Wall -g
LDLIBS = -lxml2
all: bit2txt draw_svg_tiles new_fp xc6slx9.svg xc6slx9.fp
all: bit2txt draw_svg_tiles new_fp hstrrep xc6slx9.svg xc6slx9.fp
xc6slx9.svg: draw_svg_tiles
./draw_svg_tiles | xmllint --pretty 1 - > $@
@ -28,12 +28,14 @@ model.o: model.c model.h
new_fp.o: new_fp.c model.h helper.h
draw_svg_tiles.o: draw_svg_tiles.c model.h
draw_svg_tiles.o: draw_svg_tiles.c model.h helper.h
draw_svg_tiles: draw_svg_tiles.o model.o
draw_svg_tiles: draw_svg_tiles.o model.o helper.o
new_fp: new_fp.o model.o helper.o
hstrrep: hstrrep.o helper.o
clean:
rm -f bit2txt bit2txt.o \
draw_svg_tiles draw_svg_tiles.o \

1
README
View File

@ -13,3 +13,4 @@ Utilities
- draw_svg_tiles draws a simple .svg showing tile types
- fp2bit converts .fp floorplan into .bit bitstream
- bit2txt dumps .bit bitstream as text
- hstrrep high-speed hashed array based search and replace util

208
helper.c
View File

@ -654,6 +654,15 @@ int get_vm_mb()
return (vm_size+1023)/1024;
}
int get_random()
{
int random_f, random_num;
random_f = open("/dev/urandom", O_RDONLY);
read(random_f, &random_num, sizeof(random_num));
close(random_f);
return random_num;
}
int compare_with_number(const char* a, const char* b)
{
int i, a_i, b_i, non_numeric_result, a_num, b_num;
@ -683,3 +692,202 @@ int compare_with_number(const char* a, const char* b)
b_num = strtol(&b[i], 0 /* endptr */, 10);
return a_num - b_num;
}
// Dan Bernstein's hash function
uint32_t hash_djb2(const unsigned char* str)
{
uint32_t hash = 5381;
int c;
while ((c = *str++) != 0)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
//
// The format of each entry in a bin is.
// uint32_t idx
// uint16_t entry len including 4-byte header
// char[] zero-terminated string
//
// Offsets point to the zero-terminated string, so the len
// is at off-2, the index at off-6. offset0 can thus be
// used as a special value to signal 'no entry'.
//
#define BIN_STR_HEADER (4+2)
#define BIN_MIN_OFFSET BIN_STR_HEADER
#define BIN_INCREMENT 32768
const char* strarray_lookup(struct hashed_strarray* array, int idx)
{
int bin, offset;
if (!array->index_to_bin || !array->bin_offsets || idx==STRIDX_NO_ENTRY)
return 0;
bin = array->index_to_bin[idx-1];
offset = array->bin_offsets[idx-1];
// bin 0 offset 0 is a special value that signals 'no
// entry'. Normal offsets cannot be less than BIN_MIN_OFFSET.
if (!bin && !offset) return 0;
if (!array->bin_strings[bin] || offset >= array->bin_len[bin]
|| offset < BIN_MIN_OFFSET) {
// This really should never happen and is an internal error.
fprintf(stderr, "Internal error.\n");
return 0;
}
return &array->bin_strings[bin][offset];
}
int strarray_find(struct hashed_strarray* array, const char* str, int* idx)
{
int bin, search_off, i;
uint32_t hash;
hash = hash_djb2((const unsigned char*) str);
bin = hash % array->num_bins;
// iterate over strings in bin to find match
if (array->bin_strings[bin]) {
search_off = BIN_MIN_OFFSET;
while (search_off < array->bin_len[bin]) {
if (!strcmp(&array->bin_strings[bin][search_off], str)) {
i = *(uint32_t*)&array->bin_strings[bin][search_off-6];
if (!i) {
fprintf(stderr, "Internal error - index 0.\n");
return -1;
}
*idx = i+1;
return 0;
}
search_off += *(uint16_t*)&array->bin_strings[bin][search_off-2];
}
}
*idx = STRIDX_NO_ENTRY;
return 0;
}
int s_stash_at_bin(struct hashed_strarray* array, const char* str, int idx, int bin);
int strarray_add(struct hashed_strarray* array, const char* str, int* idx)
{
int bin, i, free_index, rc, start_index;
unsigned long hash;
rc = strarray_find(array, str, idx);
if (rc) return rc;
if (*idx != STRIDX_NO_ENTRY) return 0;
hash = hash_djb2((const unsigned char*) str);
// search free index
start_index = hash % array->highest_index;
for (i = 0; i < array->highest_index; i++) {
int cur_i = (start_index+i)%array->highest_index;
if (!cur_i) // never issue index 0
continue;
if (!array->bin_offsets[cur_i])
break;
}
if (i >= array->highest_index) {
fprintf(stderr, "All array indices full.\n");
return -1;
}
free_index = (start_index+i)%array->highest_index;
bin = hash % array->num_bins;
rc = s_stash_at_bin(array, str, free_index, bin);
if (rc) return rc;
*idx = free_index + 1;
return 0;
}
int s_stash_at_bin(struct hashed_strarray* array, const char* str, int idx, int bin)
{
int str_len = strlen(str);
// check whether bin needs expansion
if (!(array->bin_len[bin]%BIN_INCREMENT)
|| array->bin_len[bin]%BIN_INCREMENT + BIN_STR_HEADER+str_len+1 > BIN_INCREMENT)
{
int new_alloclen =
((array->bin_len[bin]
+ BIN_STR_HEADER+str_len+1)/BIN_INCREMENT + 1)
* BIN_INCREMENT;
void* new_ptr = realloc(array->bin_strings[bin], new_alloclen);
if (!new_ptr) {
fprintf(stderr, "Out of memory.\n");
return -1;
}
array->bin_strings[bin] = new_ptr;
}
// append new string at end of bin
*(uint32_t*)&array->bin_strings[bin][array->bin_len[bin]] = idx;
*(uint16_t*)&array->bin_strings[bin][array->bin_len[bin]+4] = BIN_STR_HEADER+str_len+1;
strcpy(&array->bin_strings[bin][array->bin_len[bin]+BIN_STR_HEADER], str);
array->index_to_bin[idx] = bin;
array->bin_offsets[idx] = array->bin_len[bin]+BIN_STR_HEADER;
array->bin_len[bin] += BIN_STR_HEADER+str_len+1;
return 0;
}
int strarray_stash(struct hashed_strarray* array, const char* str, int idx)
{
// The bin is just a random number here, because find
// cannot be used after stash anyway, only lookup can.
return s_stash_at_bin(array, str, idx-1, idx % array->num_bins);
}
int strarray_used_slots(struct hashed_strarray* array)
{
int i, num_used_slots;
num_used_slots = 0;
if (!array->bin_offsets) return 0;
for (i = 0; i < array->highest_index; i++) {
if (array->bin_offsets[i])
num_used_slots++;
}
return num_used_slots;
}
int strarray_init(struct hashed_strarray* array, int highest_index)
{
memset(array, 0, sizeof(*array));
array->highest_index = highest_index;
array->num_bins = highest_index / 64;
array->bin_strings = malloc(array->num_bins*sizeof(*array->bin_strings));
array->bin_len = malloc(array->num_bins*sizeof(*array->bin_len));
array->bin_offsets = malloc(array->highest_index*sizeof(*array->bin_offsets));
array->index_to_bin = malloc(array->highest_index*sizeof(*array->index_to_bin));
if (!array->bin_strings || !array->bin_len
|| !array->bin_offsets || !array->index_to_bin) {
fprintf(stderr, "Out of memory in %s:%i\n", __FILE__, __LINE__);
free(array->bin_strings);
free(array->bin_len);
free(array->bin_offsets);
free(array->index_to_bin);
return -1;
}
return 0;
}
void strarray_free(struct hashed_strarray* array)
{
int i;
for (i = 0; i < sizeof(array->bin_strings)/
sizeof(array->bin_strings[0]); i++) {
free(array->bin_strings[i]);
array->bin_strings[i] = 0;
}
free(array->bin_strings);
array->bin_strings = 0;
free(array->bin_len);
array->bin_len = 0;
free(array->bin_offsets);
array->bin_offsets = 0;
free(array->index_to_bin);
array->index_to_bin = 0;
}

View File

@ -9,9 +9,12 @@
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/types.h>
#define PROGRAM_REVISION "2012-06-27"
#define MACRO_STR(arg) #arg
void printf_help();
@ -69,4 +72,38 @@ void printf_extrabits(uint8_t* maj_bits, int start_minor, int num_minors,
uint64_t read_lut64(uint8_t* two_minors, int off_in_frame);
int get_vm_mb();
int get_random();
int compare_with_number(const char* a, const char* b);
uint32_t hash_djb2(const unsigned char* str);
// Strings are distributed among bins. Each bin is
// one continuous stream of zero-terminated strings
// prefixed with a 32+16=48-bit header. The allocation
// increment for each bin is 32k.
struct hashed_strarray
{
int highest_index;
uint32_t* bin_offsets; // min offset is 4, 0 means no entry
uint16_t* index_to_bin;
char** bin_strings;
int* bin_len;
int num_bins;
};
#define STRIDX_64K 0xFFFF
#define STRIDX_1M 1000000
int strarray_init(struct hashed_strarray* array, int highest_index);
void strarray_free(struct hashed_strarray* array);
const char* strarray_lookup(struct hashed_strarray* array, int idx);
// The found or created index will never be 0, so the caller
// can use 0 as a special value to indicate 'no string'.
#define STRIDX_NO_ENTRY 0
int strarray_find(struct hashed_strarray* array, const char* str, int* idx);
int strarray_add(struct hashed_strarray* array, const char* str, int* idx);
// If you stash a string to a fixed index, you cannot use strarray_find()
// anymore, only strarray_lookup().
int strarray_stash(struct hashed_strarray* array, const char* str, int idx);
int strarray_used_slots(struct hashed_strarray* array);

159
model.c
View File

@ -176,7 +176,7 @@ int fpga_build_model(struct fpga_model* model, int fpga_rows, const char* column
strncpy(model->cfg_columns, columns, sizeof(model->cfg_columns)-1);
strncpy(model->cfg_left_wiring, left_wiring, sizeof(model->cfg_left_wiring)-1);
strncpy(model->cfg_right_wiring, right_wiring, sizeof(model->cfg_right_wiring)-1);
strarray_init(&model->str);
strarray_init(&model->str, STRIDX_64K);
rc = init_tiles(model);
if (rc) return rc;
@ -239,10 +239,16 @@ int add_conn_uni(struct fpga_model* model, int y1, int x1, const char* name1, in
int conn_start, num_conn_point_dests_for_this_wire, rc, i, j;
tile1 = &model->tiles[y1 * model->tile_x_range + x1];
rc = strarray_find_or_add(&model->str, name1, &name1_i);
if (!rc) return -1;
rc = strarray_find_or_add(&model->str, name2, &name2_i);
if (!rc) return -1;
rc = strarray_add(&model->str, name1, &i);
if (rc) return rc;
rc = strarray_add(&model->str, name2, &j);
if (rc) return rc;
if (i > 0xFFFF || j > 0xFFFF) {
fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__);
return -1;
}
name1_i = i;
name2_i = j;
// Search for a connection set under name1.
for (i = 0; i < tile1->num_conn_point_names; i++) {
@ -1773,146 +1779,3 @@ const char* fpga_tiletype_str(enum fpga_tile_type type)
return fpga_ttstr[type];
}
// Dan Bernstein's hash function
uint32_t hash_djb2(const unsigned char* str)
{
uint32_t hash = 5381;
int c;
while ((c = *str++) != 0)
hash = ((hash << 5) + hash) + c; /* hash * 33 + c */
return hash;
}
//
// The format of each entry in a bin is.
// uint16_t idx
// uint16_t entry len including 4-byte header
// char[] zero-terminated string
//
// Offsets point to the zero-terminated string, so the len
// is at off-2, the index at off-4. bin0 offset0 can thus be
// used as a special value to signal 'no entry'.
//
const char* strarray_lookup(struct hashed_strarray* array, uint16_t idx)
{
int bin, offset;
if (!array->index_to_bin || !array->bin_offsets || !idx)
return 0;
bin = array->index_to_bin[idx];
offset = array->bin_offsets[idx];
// bin 0 offset 0 is a special value that signals 'no
// entry'. Normal offsets cannot be less than 4.
if (!bin && !offset) return 0;
if (!array->bin_strings[bin] || offset >= array->bin_len[bin]
|| offset < 4) {
// This really should never happen and is an internal error.
fprintf(stderr, "Internal error.\n");
return 0;
}
return &array->bin_strings[bin][offset];
}
#define BIN_INCREMENT 32768
int strarray_find_or_add(struct hashed_strarray* array, const char* str,
uint16_t* idx)
{
int bin, search_off, str_len, i, free_index;
int new_alloclen, start_index;
unsigned long hash;
void* new_ptr;
hash = hash_djb2((const unsigned char*) str);
str_len = strlen(str);
bin = hash % (sizeof(array->bin_strings)/sizeof(array->bin_strings[0]));
// iterate over strings in bin to find match
if (array->bin_strings[bin]) {
search_off = 4;
while (search_off < array->bin_len[bin]) {
if (!strcmp(&array->bin_strings[bin][search_off],
str)) {
*idx = *(uint16_t*)&array->bin_strings
[bin][search_off-4];
if (!(*idx)) {
fprintf(stderr, "Internal error - index 0.\n");
return 0;
}
return 1;
}
search_off += *(uint16_t*)&array->bin_strings
[bin][search_off-2];
}
}
// search free index
start_index = (uint16_t) ((hash >> 16) ^ (hash & 0xFFFF));
for (i = 0; i < HASHARRAY_NUM_INDICES; i++) {
int cur_i = (start_index+i)%HASHARRAY_NUM_INDICES;
if (!cur_i) // never issue index 0
continue;
if (!array->bin_offsets[cur_i])
break;
}
if (i >= HASHARRAY_NUM_INDICES) {
fprintf(stderr, "All array indices full.\n");
return 0;
}
free_index = (start_index+i)%HASHARRAY_NUM_INDICES;
// check whether bin needs expansion
if (!(array->bin_len[bin]%BIN_INCREMENT)
|| array->bin_len[bin]%BIN_INCREMENT + 4+str_len+1 > BIN_INCREMENT)
{
new_alloclen =
((array->bin_len[bin]
+ 4+str_len+1)/BIN_INCREMENT + 1)
* BIN_INCREMENT;
new_ptr = realloc(array->bin_strings[bin], new_alloclen);
if (!new_ptr) {
fprintf(stderr, "Out of memory.\n");
return 0;
}
array->bin_strings[bin] = new_ptr;
}
// append new string at end of bin
*(uint16_t*)&array->bin_strings[bin][array->bin_len[bin]] = free_index;
*(uint16_t*)&array->bin_strings[bin][array->bin_len[bin]+2] = 4+str_len+1;
strcpy(&array->bin_strings[bin][array->bin_len[bin]+4], str);
array->index_to_bin[free_index] = bin;
array->bin_offsets[free_index] = array->bin_len[bin]+4;
array->bin_len[bin] += 4+str_len+1;
*idx = free_index;
return 1;
}
int strarray_used_slots(struct hashed_strarray* array)
{
int i, num_used_slots;
num_used_slots = 0;
if (!array->bin_offsets) return 0;
for (i = 0; i < sizeof(array->bin_offsets)/sizeof(*array->bin_offsets); i++) {
if (array->bin_offsets[i])
num_used_slots++;
}
return num_used_slots;
}
void strarray_init(struct hashed_strarray* array)
{
memset(array, 0, sizeof(*array));
}
void strarray_free(struct hashed_strarray* array)
{
int i;
for (i = 0; i < sizeof(array->bin_strings)/
sizeof(array->bin_strings[0]); i++) {
free(array->bin_strings[i]);
array->bin_strings[i] = 0;
}
}

25
model.h
View File

@ -13,19 +13,7 @@
#include <sys/stat.h>
#define MACRO_STR(arg) #arg
#define HASHARRAY_NUM_INDICES (256*256)
// Strings are distributed among 1024 bins. Each bin
// is one continuous stream of zero-terminated strings
// prefixed with a 2*16-bit header. The allocation
// increment for each bin is 32k.
struct hashed_strarray
{
uint32_t bin_offsets[HASHARRAY_NUM_INDICES]; // min offset is 4, 0 means no entry
uint16_t index_to_bin[HASHARRAY_NUM_INDICES];
char* bin_strings[1024];
int bin_len[1024]; // points behind the last zero-termination
};
#include "helper.h"
//
// columns
@ -241,14 +229,3 @@ int fpga_build_model(struct fpga_model* model,
void fpga_free_model(struct fpga_model* model);
const char* fpga_tiletype_str(enum fpga_tile_type type);
uint32_t hash_djb2(const unsigned char* str);
const char* strarray_lookup(struct hashed_strarray* array, uint16_t idx);
// The found or created index will never be 0, so the caller
// can use 0 as a special value to indicate 'no string'.
int strarray_find_or_add(struct hashed_strarray* array, const char* str,
uint16_t* idx);
int strarray_used_slots(struct hashed_strarray* array);
void strarray_init(struct hashed_strarray* array);
void strarray_free(struct hashed_strarray* array);

View File

@ -13,7 +13,6 @@
#include <sys/stat.h>
#include "model.h"
#include "helper.h"
#define PRINT_FLAG(f) if (tf & f) { printf (" %s", #f); tf &= ~f; }