From 328d3934c21edcdde9a43dde0825a5d3144aaa03 Mon Sep 17 00:00:00 2001 From: Wolfgang Spraul Date: Fri, 3 Aug 2012 08:27:05 +0200 Subject: [PATCH] better tools, a bit of logicout wiring --- Makefile | 8 +- helper.c | 16 +++ helper.h | 2 + merge_seq.c | 141 ++++++++++++------ model.c | 50 +++++-- model.h | 35 ++--- sort_seq.c | 406 ++++++++++++++++++++++++++++++++++++++++++++-------- 7 files changed, 531 insertions(+), 127 deletions(-) diff --git a/Makefile b/Makefile index 76376b3..d9ffb04 100644 --- a/Makefile +++ b/Makefile @@ -29,9 +29,13 @@ pair2net: pair2net.o helper.o pair2net.o: pair2net.c helper.h -sort_seq: sort_seq.o +sort_seq: sort_seq.o helper.o -merge_seq: merge_seq.o +sort_seq.o: sort_seq.c helper.h + +merge_seq: merge_seq.o helper.o + +merge_seq.o: merge_seq.c helper.h hstrrep: hstrrep.o helper.o diff --git a/helper.c b/helper.c index b57ede7..152ac84 100644 --- a/helper.c +++ b/helper.c @@ -693,6 +693,22 @@ int compare_with_number(const char* a, const char* b) return a_num - b_num; } +void next_word(const char*s, int start, int* beg, int* end) +{ + int i = start; + while (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') i++; + *beg = i; + while (s[i] != ' ' && s[i] != '\t' && s[i] != '\n' && s[i]) i++; + *end = i; +} + +int to_i(const char* s, int len) +{ + int num, base; + for (base = 1, num = 0; len; num += base*(s[--len]-'0'), base *= 10); + return num; +} + // Dan Bernstein's hash function uint32_t hash_djb2(const unsigned char* str) { diff --git a/helper.h b/helper.h index b9c1f3a..d5b2dcf 100644 --- a/helper.h +++ b/helper.h @@ -74,6 +74,8 @@ uint64_t read_lut64(uint8_t* two_minors, int off_in_frame); int get_vm_mb(); int get_random(); int compare_with_number(const char* a, const char* b); +void next_word(const char* s, int start, int* beg, int* end); +int to_i(const char* s, int len); uint32_t hash_djb2(const unsigned char* str); diff --git a/merge_seq.c b/merge_seq.c index d67428d..e6ffe35 100644 --- a/merge_seq.c +++ b/merge_seq.c @@ -9,6 +9,8 @@ #include #include +#include "helper.h" + #define LINE_LENGTH 1024 struct line_buf @@ -55,24 +57,8 @@ static int print_line(const struct line_buf* line) return 0; } -static void next_word(const char*s, int start, int* beg, int* end) -{ - int i = start; - while (s[i] == ' ' || s[i] == '\t' || s[i] == '\n') i++; - *beg = i; - while (s[i] != ' ' && s[i] != '\t' && s[i] != '\n' && s[i]) i++; - *end = i; -} - -static int to_i(const char* s, int len) -{ - int num, base; - for (base = 1, num = 0; len; num += base*(s[--len]-'0'), base *= 10); - return num; -} - // Finds the positions of two non-equal numbers that must meet -// two number of criteria: +// the following two criteria: // - prefixed by at least one capital 'A'-'Z' or '_' // - suffixed by matching or empty strings static void find_non_matching_number(const char* a, int a_len, @@ -129,6 +115,22 @@ static void find_non_matching_number(const char* a, int a_len, if (a_len - a_o != b_len - b_o) return; if ((a_len - a_o) && strncmp(&a[a_o], &b[b_o], a_len-a_o)) return; + // some known suffixes include numbers and must never be + // part of merging + if (a_len - a_o == 0) { + // _S0 _N3 + if (a_o > 3 + && ((a[a_o-3] == '_' && a[a_o-2] == 'S' && a[a_o-1] == '0') + || (a[a_o-3] == '_' && a[a_o-2] == 'N' && a[a_o-1] == '3'))) + return; + // _INT0 _INT1 _INT2 _INT3 + if (a_o > 5 + && a[a_o-5] == '_' && a[a_o-4] == 'I' && a[a_o-3] == 'N' + && a[a_o-2] == 'T' + && a[a_o-1] >= '0' && a[a_o-1] <= '3') + return; + } + *ab_start = digit_start; *a_end = a_o; *b_end = b_o; @@ -174,6 +176,12 @@ static int merge_line(struct line_buf* first_l, struct line_buf* second_l) fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); return -1; } + // We must be looking at the same digit, for example + // if we have a sequence SW2M0:3, and now the second + // line is SW4M0 - the '4' must not be seen as a + // continuation of the '3'. + if (s_start != first_l->left_digit_start_o) + return 0; if (second_num != first_l->left_digit_base + first_l->sequence_size + 1) return 0; } else { @@ -270,9 +278,33 @@ static int merge_line(struct line_buf* first_l, struct line_buf* second_l) return 0; } +static void read_line(FILE* fp, struct line_buf* line) +{ + *line->buf = 0; + line->left_digit_start_o = -1; + line->right_digit_start_o = -1; + line->sequence_size = 0; + if (!fgets(line->buf, sizeof(line->buf), fp)) + *line->buf = 0; +} + +static void increment(int *off, struct line_buf* lines, int num_lines, int end_of_ringbuf) +{ + if (++(*off) >= num_lines) + *off = 0; + while (!lines[*off].buf[0] && *off != end_of_ringbuf) { + if (++(*off) >= num_lines) + *off = 0; + } +} + +#define READ_AHEAD_SIZE 100 +#define LAST_MERGE_TRY 2 // how far to look forward for a mergable seq + int main(int argc, char** argv) { - struct line_buf first_line, second_line; + struct line_buf read_ahead[READ_AHEAD_SIZE]; + int read_ahead_get, read_ahead_put, second_line, eof_reached, try_count; FILE* fp = 0; int rc; @@ -292,34 +324,59 @@ int main(int argc, char** argv) } } - // read first line - first_line.buf[0] = 0; - first_line.left_digit_start_o = -1; - first_line.right_digit_start_o = -1; - first_line.sequence_size = 0; - if (!fgets(first_line.buf, sizeof(first_line.buf), fp) - || !first_line.buf[0]) goto out; - + read_line(fp, &read_ahead[0]); + if (!read_ahead[0].buf[0]) goto out; + read_ahead_get = 0; + read_ahead_put = 1; + eof_reached = 0; while (1) { - // read second line - second_line.buf[0] = 0; - second_line.left_digit_start_o = -1; - second_line.right_digit_start_o = -1; - second_line.sequence_size = 0; - if (!fgets(second_line.buf, sizeof(second_line.buf), fp)) - break; - // can the two be merged? - rc = merge_line(&first_line, &second_line); - if (rc) goto xout; - if (second_line.buf[0]) { - // no: print first line and move second line to first - rc = print_line(&first_line); + // fill up read ahead buffer + while (!eof_reached + && read_ahead_put != read_ahead_get) { + read_line(fp, &read_ahead[read_ahead_put]); + if (!read_ahead[read_ahead_put].buf[0]) { + eof_reached = 1; + break; + } + if (++read_ahead_put >= READ_AHEAD_SIZE) + read_ahead_put = 0; + } + + // find second line in read ahead buffer + second_line = read_ahead_get; + increment(&second_line, read_ahead, READ_AHEAD_SIZE, read_ahead_put); + + if (!read_ahead[second_line].buf[0]) { + // if no more lines, print first one and exit + rc = print_line(&read_ahead[read_ahead_get]); if (rc) goto xout; - first_line = second_line; + break; + } + + try_count = 0; + while (1) { + // try to merge + rc = merge_line(&read_ahead[read_ahead_get], &read_ahead[second_line]); + if (rc) goto xout; + if (!read_ahead[second_line].buf[0]) // merge successful + break; + + // try next one + increment(&second_line, read_ahead, + READ_AHEAD_SIZE, read_ahead_put); + + if (second_line == read_ahead_put + || ++try_count >= LAST_MERGE_TRY) { + // read-ahead empty or stop trying + rc = print_line(&read_ahead[read_ahead_get]); + if (rc) goto xout; + read_ahead[read_ahead_get].buf[0] = 0; + increment(&read_ahead_get, read_ahead, + READ_AHEAD_SIZE, read_ahead_put); + break; + } } } - rc = print_line(&first_line); - if (rc) goto xout; out: return EXIT_SUCCESS; xout: diff --git a/model.c b/model.c index f4198d9..4481f85 100644 --- a/model.c +++ b/model.c @@ -1146,21 +1146,41 @@ static int run_logic_inout(struct fpga_model* model) char buf[128]; int x, y, i, rc; + // LOGICOUT + for (x = 0; x < model->x_width; x++) { + if (is_atx(X_FABRIC_LOGIC_ROUTING_COL|X_CENTER_ROUTING_COL, model, x)) { + for (y = 0; y < model->y_height; y++) { + tile = &model->tiles[y * model->x_width + x]; + if (tile[1].flags & TF_LOGIC_XM_DEV) { + if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "CLEXM_LOGICOUT%i", 0))) goto xout; + } + if (tile[1].flags & TF_LOGIC_XL_DEV) { + if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "CLEXL_LOGICOUT%i", 0))) goto xout; + } + if (tile[1].flags & TF_IOLOGIC_DELAY_DEV) { + if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "IOI_LOGICOUT%i", 0))) goto xout; + } + } + } + if (is_atx(X_FABRIC_BRAM_ROUTING_COL, model, x)) { + for (y = TOP_IO_TILES; y < model->y_height - BOT_IO_TILES; y++) { + if (is_aty(Y_ROW_HORIZ_AXSYMM|Y_CHIP_HORIZ_REGS, + model, y)) + continue; + if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "INT_INTERFACE_LOGICOUT%i", 0))) goto xout; + if (YX_TILE(model, y, x)[2].flags & TF_BRAM_DEV) { + if ((rc = add_conn_range(model, NOPREF_BI_F, y-3, x+1, "INT_INTERFACE_LOGICOUT_%i", 0, 23, y, x+2, "BRAM_LOGICOUT%i_INT3", 0))) goto xout; + if ((rc = add_conn_range(model, NOPREF_BI_F, y-2, x+1, "INT_INTERFACE_LOGICOUT_%i", 0, 23, y, x+2, "BRAM_LOGICOUT%i_INT2", 0))) goto xout; + if ((rc = add_conn_range(model, NOPREF_BI_F, y-1, x+1, "INT_INTERFACE_LOGICOUT_%i", 0, 23, y, x+2, "BRAM_LOGICOUT%i_INT1", 0))) goto xout; + if ((rc = add_conn_range(model, NOPREF_BI_F, y, x+1, "INT_INTERFACE_LOGICOUT_%i", 0, 23, y, x+2, "BRAM_LOGICOUT%i_INT0", 0))) goto xout; + } + } + } + } for (y = 0; y < model->y_height; y++) { for (x = 0; x < model->x_width; x++) { tile = &model->tiles[y * model->x_width + x]; - // LOGICOUT - if (tile[1].flags & TF_LOGIC_XM_DEV) { - if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "CLEXM_LOGICOUT%i", 0))) goto xout; - } - if (tile[1].flags & TF_LOGIC_XL_DEV) { - if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "CLEXL_LOGICOUT%i", 0))) goto xout; - } - if (tile[1].flags & TF_IOLOGIC_DELAY_DEV) { - if ((rc = add_conn_range(model, NOPREF_BI_F, y, x, "LOGICOUT%i", 0, 23, y, x+1, "IOI_LOGICOUT%i", 0))) goto xout; - } - // LOGICIN if (is_atyx(YX_ROUTING_TILE, model, y, x)) { static const int north_p[4] = {21, 28, 52, 60}; @@ -2392,6 +2412,14 @@ int is_atx(int check, struct fpga_model* model, int x) && model->tiles[x].flags & TF_FABRIC_ROUTING_COL && model->tiles[x+1].flags & TF_FABRIC_LOGIC_COL) return 1; if (check & X_FABRIC_LOGIC_COL && model->tiles[x].flags & TF_FABRIC_LOGIC_COL) return 1; + if (check & X_FABRIC_BRAM_ROUTING_COL + && model->tiles[x].flags & TF_FABRIC_ROUTING_COL + && model->tiles[x+1].flags & TF_FABRIC_BRAM_MACC_ROUTING_COL + && model->tiles[x+2].flags & TF_FABRIC_BRAM_COL) return 1; + if (check & X_FABRIC_MACC_ROUTING_COL + && model->tiles[x].flags & TF_FABRIC_ROUTING_COL + && model->tiles[x+1].flags & TF_FABRIC_BRAM_MACC_ROUTING_COL + && model->tiles[x+2].flags & TF_FABRIC_MACC_COL) return 1; if (check & X_FABRIC_BRAM_MACC_ROUTING_COL && model->tiles[x].flags & TF_FABRIC_BRAM_MACC_ROUTING_COL) return 1; if (check & X_FABRIC_BRAM_COL && model->tiles[x].flags & TF_FABRIC_BRAM_COL) return 1; if (check & X_FABRIC_MACC_COL && model->tiles[x].flags & TF_FABRIC_MACC_COL) return 1; diff --git a/model.h b/model.h index e81da95..7379a39 100644 --- a/model.h +++ b/model.h @@ -196,23 +196,26 @@ int is_aty(int check, struct fpga_model* model, int y); #define X_ROUTING_TO_MACC_COL 0x00000040 #define X_ROUTING_NO_IO 0x00000080 #define X_LOGIC_COL 0x00000100 // includes the center logic col -#define X_FABRIC_ROUTING_COL 0x00000200 -#define X_FABRIC_LOGIC_ROUTING_COL 0x00000400 +// todo: maybe X_FABRIC_ROUTING_COL could be logic+bram+macc? +#define X_FABRIC_ROUTING_COL 0x00000200 // logic+BRAM+MACC +#define X_FABRIC_LOGIC_ROUTING_COL 0x00000400 // logic only #define X_FABRIC_LOGIC_COL 0x00000800 -#define X_FABRIC_BRAM_MACC_ROUTING_COL 0x00001000 -#define X_FABRIC_BRAM_COL 0x00002000 -#define X_FABRIC_MACC_COL 0x00004000 -#define X_CENTER_ROUTING_COL 0x00008000 -#define X_CENTER_LOGIC_COL 0x00010000 -#define X_CENTER_CMTPLL_COL 0x00020000 -#define X_CENTER_REGS_COL 0x00040000 -#define X_LEFT_IO_ROUTING_COL 0x00080000 -#define X_LEFT_IO_DEVS_COL 0x00100000 -#define X_RIGHT_IO_ROUTING_COL 0x00200000 -#define X_RIGHT_IO_DEVS_COL 0x00400000 -#define X_LEFT_SIDE 0x00800000 // true for anything left of the center (not including center) -#define X_LEFT_MCB 0x01000000 -#define X_RIGHT_MCB 0x02000000 +#define X_FABRIC_BRAM_ROUTING_COL 0x00001000 // BRAM only +#define X_FABRIC_MACC_ROUTING_COL 0x00002000 // MACC only +#define X_FABRIC_BRAM_MACC_ROUTING_COL 0x00004000 // second routing col for BRAM/MACC +#define X_FABRIC_BRAM_COL 0x00008000 +#define X_FABRIC_MACC_COL 0x00010000 +#define X_CENTER_ROUTING_COL 0x00020000 +#define X_CENTER_LOGIC_COL 0x00040000 +#define X_CENTER_CMTPLL_COL 0x00080000 +#define X_CENTER_REGS_COL 0x00100000 +#define X_LEFT_IO_ROUTING_COL 0x00200000 +#define X_LEFT_IO_DEVS_COL 0x00400000 +#define X_RIGHT_IO_ROUTING_COL 0x00800000 +#define X_RIGHT_IO_DEVS_COL 0x01000000 +#define X_LEFT_SIDE 0x02000000 // true for anything left of the center (not including center) +#define X_LEFT_MCB 0x04000000 +#define X_RIGHT_MCB 0x08000000 // multiple checks are combined with OR logic int is_atx(int check, struct fpga_model* model, int x); diff --git a/sort_seq.c b/sort_seq.c index 836377b..399cc51 100644 --- a/sort_seq.c +++ b/sort_seq.c @@ -9,93 +9,387 @@ #include #include +#include "helper.h" + #define LINE_LENGTH 1024 static int s_numlines; static char s_lines[1000][LINE_LENGTH]; -static int is_known_suffix(const char* str) +// returns 0 if no number found +int find_rightmost_num(const char* s, int s_len, int* dig_start, int* dig_end) { - static const char known_suffix[32][16] = - { "_S0", "_N3", "_INT0", "_INT1", "_INT2", "_INT3", - "_TEST", "_BRK", "_BUF", "_FOLD", "_BRAM", "_BRAM_INTER", - "_CLB", "_DSP", "_INT", "_MCB", "_DOWN", "_UP", - "_E", "_W", "_S", "_N", "_M", "_EXT", "_PINW", - "" }; int i; + if (s_len < 2) return 0; + i = s_len; + while (i > 0 && (s[i-1] < '0' || s[i-1] > '9')) + i--; + if (!i) return 0; + *dig_end = i; + while (i > 0 && s[i-1] >= '0' && s[i-1] <= '9') + i--; + if (!i) return 0; + if ((s[i-1] < 'A' || s[i-1] > 'Z') && s[i-1] != '_') + return 0; + *dig_start = i; + return 1; +} + +// Finds the position of a number in a string, searching from +// the right, meeting: +// - not part of a known suffix if there is another number to +// the left of it +// - prefixed by at least one capital 'A'-'Z' or '_' +// If none is found, both *num_start and *num_end will be returned as 0. +static void find_number(const char* s, int s_len, int* num_start, int* num_end) +{ + int result, dig_start, dig_end, found_num, search_more; + int next_dig_start, next_dig_end; + + *num_start = 0; + *num_end = 0; + + if (s_len >= 13 && !strncmp("_DSP48A1_SITE", &s[s_len-13], 13)) + s_len -= 13; + else if (s_len >= 15 && !strncmp("_DSP48A1_B_SITE", &s[s_len-15], 15)) + s_len -= 15; + + result = find_rightmost_num(s, s_len, &dig_start, &dig_end); + if (!result) return; + + // If the found number is not part of a potential + // suffix, we can take it. + found_num = to_i(&s[dig_start], dig_end-dig_start); + + // The remaining suffixes all reach the right end of + // the string, so if our digits don't, we can take them. + if (dig_end < s_len) { + *num_start = dig_start; + *num_end = dig_end; + return; + } + search_more = 0; + // _ + if (dig_start >= 2 + && s[dig_start-1] == '_' + && ((s[dig_start-2] >= 'A' && s[dig_start-2] <= 'Z') + || (s[dig_start-2] >= '0' && s[dig_start-2] <= '9'))) + search_more = 1; + // _S0 + else if (found_num == 0 && dig_start >= 3 + && s[dig_start-1] == 'S' && s[dig_start-2] == '_' + && ((s[dig_start-3] >= 'A' && s[dig_start-3] <= 'Z') + || (s[dig_start-3] >= '0' && s[dig_start-3] <= '9'))) + search_more = 1; + // _N3 + else if (found_num == 3 && dig_start >= 3 + && s[dig_start-1] == 'N' && s[dig_start-2] == '_' + && ((s[dig_start-3] >= 'A' && s[dig_start-3] <= 'Z') + || (s[dig_start-3] >= '0' && s[dig_start-3] <= '9'))) + search_more = 1; + // _INT0 _INT1 _INT2 _INT3 + else if ((found_num >= 0 && found_num <= 3) && dig_start >= 5 + && s[dig_start-1] == 'T' && s[dig_start-2] == 'N' + && s[dig_start-3] == 'I' && s[dig_start-4] == '_' + && ((s[dig_start-5] >= 'A' && s[dig_start-5] <= 'Z') + || (s[dig_start-5] >= '0' && s[dig_start-5] <= '9'))) + search_more = 1; + if (!search_more + || !find_rightmost_num(s, dig_start, &next_dig_start, &next_dig_end)) { + *num_start = dig_start; + *num_end = dig_end; + } else { + *num_start = next_dig_start; + *num_end = next_dig_end; + } +} + +int str_cmp(const char* a, int a_len, const char* b, int b_len) +{ + int i = 0; + + while (1) { + if (i >= a_len) { + if (i >= b_len) + return 0; + return -1; + } + if (i >= b_len) { + if (i >= a_len) + return 0; + return 1; + } + if (a[i] != b[i]) + return a[i] - b[i]; + i++; + } +} + +static int is_known_suffix(const char* str, int str_len) +{ + int i; + + if (str_len < 1) return 0; if (str[0] != '_') return 0; - for (i = 0; known_suffix[i][0]; i++) { - if (!strcmp(known_suffix[i], str)) + if (str_len < 2) return 0; + + // Special case _ - we detect this as a + // known suffix here because our number finding + // function already found a better match to the + // left of it, so we can assume the _ to + // be a suffix. + i = 1; + while (i < str_len) { + if (str[i] < '0' || str[i] > '9') + break; + i++; + } + if (i >= str_len) + return 1; + + if (str_len == 2) { + // _E _W _S _N _M + if (str[1] == 'E' || str[1] == 'W' || str[2] == 'S' + || str[1] == 'N' || str[1] == 'M') + return 1; + } + if (str_len < 3) return 0; + if (str_len == 3) { + // _S0 _N3 _UP + if ((str[1] == 'S' && str[2] == '0') + || (str[1] == 'N' && str[2] == '3') + || (str[1] == 'U' && str[2] == 'P')) + return 1; + } + if (str_len < 4) return 0; + if (str_len == 4) { + // _CLB _DSP _EXT _INT _MCP _BRK _BUF + if ((str[1] == 'C' && str[2] == 'L' && str[3] == 'B') + || (str[1] == 'D' && str[2] == 'S' && str[3] == 'P') + || (str[1] == 'E' && str[2] == 'X' && str[3] == 'T') + || (str[1] == 'I' && str[2] == 'N' && str[3] == 'T') + || (str[1] == 'M' && str[2] == 'C' && str[3] == 'B') + || (str[1] == 'B' && str[2] == 'R' && str[3] == 'K') + || (str[1] == 'B' && str[2] == 'U' && str[3] == 'F')) + return 1; + } + if (str_len < 5) return 0; + if (str_len == 5) { + // _INT0 _INT1 _INT2 _INT3 _TEST _FOLD _BRAM _DOWN _PINW + if ((str[1] == 'I' && str[2] == 'N' && str[3] == 'T' + && str[4] >= '0' && str[4] <= '3') + || (str[1] == 'T' && str[2] == 'E' && str[3] == 'S' + && str[4] == 'T') + || (str[1] == 'F' && str[2] == 'O' && str[3] == 'L' + && str[4] == 'D') + || (str[1] == 'B' && str[2] == 'R' && str[3] == 'A' + && str[4] == 'M') + || (str[1] == 'D' && str[2] == 'O' && str[3] == 'W' + && str[4] == 'N') + || (str[1] == 'P' && str[2] == 'I' && str[3] == 'N' + && str[4] == 'W')) + return 1; + } + if (str_len < 11) return 0; + if (str_len == 11) { + // _BRAM_INTER + if (str[1] == 'B' && str[2] == 'R' && str[3] == 'A' + && str[4] == 'M' && str[5] == '_' && str[6] == 'I' + && str[7] == 'N' && str[8] == 'T' && str[9] == 'E' + && str[10] == 'R') return 1; } return 0; } -static void copy_word(char* buf, const char* s) +void next_unequal_word(const char* a, int a_start, int* a_beg, int* a_end, + const char* b, int b_start, int* b_beg, int* b_end) { - int i = 0; - while (s[i] != ' ' && s[i] != '\t' && s[i] != '\n' && s[i]) { - buf[i] = s[i]; - i++; + *a_beg = a_start; + *a_end = a_start; + *b_beg = b_start; + *b_end = b_start; + + // find the first non-matching word + while (1) { + next_word(a, *a_beg, a_beg, a_end); + next_word(b, *b_beg, b_beg, b_end); + + if (*a_end-*a_beg <= 0 + || *b_end-*b_beg <= 0) + return; + + if (str_cmp(&a[*a_beg], *a_end-*a_beg, &b[*b_beg], *b_end-*b_beg)) + return; + *a_beg = *a_end; + *b_beg = *b_end; } - buf[i] = 0; } int sort_lines(const void* a, const void* b) { const char* _a, *_b; - int i, a_i, b_i, a_num, b_num, rc; - char a_word[1024], b_word[1024]; + int a_word_beg, a_word_end, b_word_beg, b_word_end; + int a_num, b_num, a_num_start, b_num_start, a_num_end, b_num_end; + int num_result, result, suffix_result; _a = a; _b = b; - // search first non-matching character - for (i = 0; _a[i] && _a[i] == _b[i]; i++); + // find the first non-matching word + a_word_beg = 0; + b_word_beg = 0; + next_unequal_word(_a, a_word_beg, &a_word_beg, &a_word_end, + _b, b_word_beg, &b_word_beg, &b_word_end); - // if entire string matches, return 0 - if (!_a[i] && !_b[i]) return 0; - - // if neither of the non-matching characters is a digit, return - if ((_a[i] < '0' || _a[i] > '9') - && (_b[i] < '0' || _b[i] > '9')) - return _a[i] - _b[i]; - - // go back to beginning of numeric section - // (a and b must be identical going backwards) - while (i && _a[i-1] >= '0' && _a[i-1] <= '9') - i--; - - // go forward to first non-digit - for (a_i = i; _a[a_i] >= '0' && _a[a_i] <= '9'; a_i++ ); - for (b_i = i; _b[b_i] >= '0' && _b[b_i] <= '9'; b_i++ ); - - // there must be at least one digit on each side - if (a_i <= i || b_i <= i) { - // We move numbers before all other characters. - if (_a[i] >= '0' && _a[i] <= '9' - && (_b[i] < '0' || _b[i] > '9')) return 1; - if (_b[i] >= '0' && _b[i] <= '9' - && (_a[i] < '0' || _a[i] > '9')) return -1; - return _a[i] - _b[i]; + if (a_word_end-a_word_beg <= 0) { + if (b_word_end-b_word_beg <= 0) + return 0; + return -1; + } + if (b_word_end-b_word_beg <= 0) { + if (a_word_end-a_word_beg <= 0) + return 0; + return 1; } - // for known suffixes, the suffix comes before the number - copy_word(a_word, &_a[a_i]); - copy_word(b_word, &_b[b_i]); - if ((!a_word[0] || is_known_suffix(a_word)) - && (!b_word[0] || is_known_suffix(b_word))) { - rc = strcmp(a_word, b_word); - if (rc) return rc; + // first try to find 2 numbers + find_number(&_a[a_word_beg], a_word_end-a_word_beg, + &a_num_start, &a_num_end); + find_number(&_b[b_word_beg], b_word_end-b_word_beg, + &b_num_start, &b_num_end); + + // if we cannot find both numbers, return a regular + // string comparison over the entire word + if (a_num_end <= a_num_start + || b_num_end <= b_num_start) { + result = str_cmp(&_a[a_word_beg], a_word_end-a_word_beg, + &_b[b_word_beg], b_word_end-b_word_beg); + if (!result) { + fprintf(stderr, "Internal error in %s:%i\n", + __FILE__, __LINE__); + exit(0); + } + return result; + } + // A number must always be prefixed by at least one character. + if (!a_num_start || !b_num_start) { + fprintf(stderr, "Internal error in %s:%i\n", + __FILE__, __LINE__); + exit(0); + } + // otherwise compare the string up to the 2 numbers, + // if it does not match return that result + result = str_cmp(&_a[a_word_beg], a_num_start, + &_b[b_word_beg], b_num_start); + if (result) + return result; + + a_num_start += a_word_beg; + a_num_end += a_word_beg; + b_num_start += b_word_beg; + b_num_end += b_word_beg; + if (a_num_end > a_word_end + || b_num_end > b_word_end) { + fprintf(stderr, "Internal error in %s:%i\n", + __FILE__, __LINE__); + fprintf(stderr, "sort_line_a: %s", _a); + fprintf(stderr, "sort_line_b: %s", _b); + exit(1); + } + if ((a_word_end-a_num_end == 0 + || is_known_suffix(&_a[a_num_end], + a_word_end-a_num_end)) + && (b_word_end-b_num_end == 0 + || is_known_suffix(&_b[b_num_end], + b_word_end-b_num_end))) { + // known suffix comes before number + suffix_result = str_cmp(&_a[a_num_end], + a_word_end-a_num_end, + &_b[b_num_end], b_word_end-b_num_end); + if (suffix_result) + return suffix_result; } - a_num = strtol(&_a[i], 0 /* endptr */, 10); - b_num = strtol(&_b[i], 0 /* endptr */, 10); - if (a_num != b_num) - return a_num - b_num; + a_num = to_i(&_a[a_num_start], a_num_end-a_num_start); + b_num = to_i(&_b[b_num_start], b_num_end-b_num_start); + num_result = a_num-b_num; - return strcmp(&_a[a_i], &_b[b_i]); + // if the non-known suffixes don't match, return numeric result + // if numbers are not equal, otherwise suffix result + suffix_result = str_cmp(&_a[a_num_end], a_word_end-a_num_end, + &_b[b_num_end], b_word_end-b_num_end); + if (suffix_result) { + if (num_result) return num_result; + return suffix_result; + } + // Should be impossible that both the number result and + // suffix result are equal. How can the entire word then + // be unequal? + if (!num_result) { + fprintf(stderr, "Internal error in %s:%i\n", + __FILE__, __LINE__); + fprintf(stderr, "sort_line_a: %s", _a); + fprintf(stderr, "sort_line_b: %s", _b); + exit(1); + } + + // find second non-equal word + next_unequal_word(_a, a_word_end, &a_word_beg, &a_word_end, + _b, b_word_end, &b_word_beg, &b_word_end); + if (a_word_end <= a_word_beg + || b_word_end <= b_word_beg) + return num_result; + + // if no numbers in second non-equal words, fall back + // to numeric result of first word + find_number(&_a[a_word_beg], a_word_end-a_word_beg, + &a_num_start, &a_num_end); + find_number(&_b[b_word_beg], b_word_end-b_word_beg, + &b_num_start, &b_num_end); + if (a_num_end <= a_num_start + || b_num_end <= b_num_start) + return num_result; + // A number must always be prefixed by at least one character. + if (!a_num_start || !b_num_start) { + fprintf(stderr, "Internal error in %s:%i\n", + __FILE__, __LINE__); + exit(0); + } + // If the prefix string of the second word does not + // match, fall back to numeric result of first word. + result = str_cmp(&_a[a_word_beg], a_num_start, + &_b[b_word_beg], b_num_start); + if (result) + return num_result; + a_num_start += a_word_beg; + a_num_end += a_word_beg; + b_num_start += b_word_beg; + b_num_end += b_word_beg; + if (a_num_end > a_word_end + || b_num_end > b_word_end) { + fprintf(stderr, "Internal error in %s:%i\n", + __FILE__, __LINE__); + exit(0); + } + // if there are known suffixes in second non-equal + // words, compare those first + if ((a_word_end-a_num_end == 0 + || is_known_suffix(&_a[a_num_end], + a_word_end-a_num_end)) + && (b_word_end-b_num_end == 0 + || is_known_suffix(&_b[b_num_end], + b_word_end-b_num_end))) { + // known suffix comes before number + suffix_result = str_cmp(&_a[a_num_end], + a_word_end-a_num_end, + &_b[b_num_end], b_word_end-b_num_end); + if (suffix_result) + return suffix_result; + } + // otherwise fall back to numeric result of first word + return num_result; } int main(int argc, char** argv)