// // Author: Wolfgang Spraul // // This is free and unencumbered software released into the public domain. // For details see the UNLICENSE file at the root of the source tree. // #include #include #include #include "helper.h" #define LINE_LENGTH 1024 static int s_numlines; static char s_lines[1000][LINE_LENGTH]; // returns 0 if no number found static int find_rightmost_num(const char* s, int s_len, int* dig_start, int* dig_end) { int i; if (s_len < 2) return 0; i = s_len; while (i > 0 && (s[i-1] < '0' || s[i-1] > '9')) i--; if (!i) return 0; *dig_end = i; while (i > 0 && s[i-1] >= '0' && s[i-1] <= '9') i--; if (!i) return 0; if ((s[i-1] < 'A' || s[i-1] > 'Z') && s[i-1] != '_') return 0; *dig_start = i; return 1; } // Finds the position of a number in a string, searching from // the right, meeting: // - not part of a known suffix if there is another number to // the left of it // - prefixed by at least one capital 'A'-'Z' or '_' // If none is found, both *num_start and *num_end will be returned as 0. static void find_number(const char* s, int s_len, int* num_start, int* num_end) { int result, dig_start, dig_end, found_num, search_more; int next_dig_start, next_dig_end; *num_start = 0; *num_end = 0; if (s_len >= 13 && !strncmp("_DSP48A1_SITE", &s[s_len-13], 13)) s_len -= 13; else if (s_len >= 15 && !strncmp("_DSP48A1_B_SITE", &s[s_len-15], 15)) s_len -= 15; result = find_rightmost_num(s, s_len, &dig_start, &dig_end); if (!result) return; // If the found number is not part of a potential // suffix, we can take it. found_num = to_i(&s[dig_start], dig_end-dig_start); // The remaining suffixes all reach the right end of // the string, so if our digits don't, we can take them. if (dig_end < s_len) { *num_start = dig_start; *num_end = dig_end; return; } search_more = 0; // _ if (dig_start >= 2 && s[dig_start-1] == '_' && ((s[dig_start-2] >= 'A' && s[dig_start-2] <= 'Z') || (s[dig_start-2] >= '0' && s[dig_start-2] <= '9'))) search_more = 1; // _S0 else if (found_num == 0 && dig_start >= 3 && s[dig_start-1] == 'S' && s[dig_start-2] == '_' && ((s[dig_start-3] >= 'A' && s[dig_start-3] <= 'Z') || (s[dig_start-3] >= '0' && s[dig_start-3] <= '9'))) search_more = 1; // _N3 else if (found_num == 3 && dig_start >= 3 && s[dig_start-1] == 'N' && s[dig_start-2] == '_' && ((s[dig_start-3] >= 'A' && s[dig_start-3] <= 'Z') || (s[dig_start-3] >= '0' && s[dig_start-3] <= '9'))) search_more = 1; // _INT0 _INT1 _INT2 _INT3 else if ((found_num >= 0 && found_num <= 3) && dig_start >= 5 && s[dig_start-1] == 'T' && s[dig_start-2] == 'N' && s[dig_start-3] == 'I' && s[dig_start-4] == '_' && ((s[dig_start-5] >= 'A' && s[dig_start-5] <= 'Z') || (s[dig_start-5] >= '0' && s[dig_start-5] <= '9'))) search_more = 1; if (!search_more || !find_rightmost_num(s, dig_start, &next_dig_start, &next_dig_end)) { *num_start = dig_start; *num_end = dig_end; } else { *num_start = next_dig_start; *num_end = next_dig_end; } } static int is_known_suffix(const char* str, int str_len) { int i; if (str_len < 1) return 0; if (str[0] != '_') return 0; if (str_len < 2) return 0; // Special case _ - we detect this as a // known suffix here because our number finding // function already found a better match to the // left of it, so we can assume the _ to // be a suffix. i = 1; while (i < str_len) { if (str[i] < '0' || str[i] > '9') break; i++; } if (i >= str_len) return 1; if (str_len == 2) { // _E _W _S _N _M if (str[1] == 'E' || str[1] == 'W' || str[2] == 'S' || str[1] == 'N' || str[1] == 'M') return 1; } if (str_len < 3) return 0; if (str_len == 3) { // _S0 _N3 _UP if ((str[1] == 'S' && str[2] == '0') || (str[1] == 'N' && str[2] == '3') || (str[1] == 'U' && str[2] == 'P')) return 1; } if (str_len < 4) return 0; if (str_len == 4) { // _CLB _DSP _EXT _INT _MCP _BRK _BUF if ((str[1] == 'C' && str[2] == 'L' && str[3] == 'B') || (str[1] == 'D' && str[2] == 'S' && str[3] == 'P') || (str[1] == 'E' && str[2] == 'X' && str[3] == 'T') || (str[1] == 'I' && str[2] == 'N' && str[3] == 'T') || (str[1] == 'M' && str[2] == 'C' && str[3] == 'B') || (str[1] == 'B' && str[2] == 'R' && str[3] == 'K') || (str[1] == 'B' && str[2] == 'U' && str[3] == 'F')) return 1; } if (str_len < 5) return 0; if (str_len == 5) { // _INT0 _INT1 _INT2 _INT3 _TEST _FOLD _BRAM _DOWN _PINW if ((str[1] == 'I' && str[2] == 'N' && str[3] == 'T' && str[4] >= '0' && str[4] <= '3') || (str[1] == 'T' && str[2] == 'E' && str[3] == 'S' && str[4] == 'T') || (str[1] == 'F' && str[2] == 'O' && str[3] == 'L' && str[4] == 'D') || (str[1] == 'B' && str[2] == 'R' && str[3] == 'A' && str[4] == 'M') || (str[1] == 'D' && str[2] == 'O' && str[3] == 'W' && str[4] == 'N') || (str[1] == 'P' && str[2] == 'I' && str[3] == 'N' && str[4] == 'W')) return 1; } if (str_len < 11) return 0; if (str_len == 11) { // _BRAM_INTER if (str[1] == 'B' && str[2] == 'R' && str[3] == 'A' && str[4] == 'M' && str[5] == '_' && str[6] == 'I' && str[7] == 'N' && str[8] == 'T' && str[9] == 'E' && str[10] == 'R') return 1; } return 0; } static void next_unequal_word( const char* a, int a_start, int* a_beg, int* a_end, const char* b, int b_start, int* b_beg, int* b_end) { *a_beg = a_start; *a_end = a_start; *b_beg = b_start; *b_end = b_start; // find the first non-matching word while (1) { next_word(a, *a_beg, a_beg, a_end); next_word(b, *b_beg, b_beg, b_end); if (*a_end-*a_beg <= 0 || *b_end-*b_beg <= 0) return; if (str_cmp(&a[*a_beg], *a_end-*a_beg, &b[*b_beg], *b_end-*b_beg)) return; *a_beg = *a_end; *b_beg = *b_end; } } static int sort_lines(const void* a, const void* b) { const char* _a, *_b; int a_word_beg, a_word_end, b_word_beg, b_word_end; int a_num, b_num, a_num_start, b_num_start, a_num_end, b_num_end; int num_result, result, suffix_result; _a = a; _b = b; // find the first non-matching word a_word_beg = 0; b_word_beg = 0; next_unequal_word(_a, a_word_beg, &a_word_beg, &a_word_end, _b, b_word_beg, &b_word_beg, &b_word_end); if (a_word_end-a_word_beg <= 0) { if (b_word_end-b_word_beg <= 0) return 0; return -1; } if (b_word_end-b_word_beg <= 0) { if (a_word_end-a_word_beg <= 0) return 0; return 1; } // first try to find 2 numbers find_number(&_a[a_word_beg], a_word_end-a_word_beg, &a_num_start, &a_num_end); find_number(&_b[b_word_beg], b_word_end-b_word_beg, &b_num_start, &b_num_end); // if we cannot find both numbers, return a regular // string comparison over the entire word if (a_num_end <= a_num_start || b_num_end <= b_num_start) { result = str_cmp(&_a[a_word_beg], a_word_end-a_word_beg, &_b[b_word_beg], b_word_end-b_word_beg); if (!result) { fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); exit(0); } return result; } // A number must always be prefixed by at least one character. if (!a_num_start || !b_num_start) { fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); exit(0); } // otherwise compare the string up to the 2 numbers, // if it does not match return that result result = str_cmp(&_a[a_word_beg], a_num_start, &_b[b_word_beg], b_num_start); if (result) return result; a_num_start += a_word_beg; a_num_end += a_word_beg; b_num_start += b_word_beg; b_num_end += b_word_beg; if (a_num_end > a_word_end || b_num_end > b_word_end) { fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); fprintf(stderr, "sort_line_a: %s", _a); fprintf(stderr, "sort_line_b: %s", _b); exit(1); } if ((a_word_end-a_num_end == 0 || is_known_suffix(&_a[a_num_end], a_word_end-a_num_end)) && (b_word_end-b_num_end == 0 || is_known_suffix(&_b[b_num_end], b_word_end-b_num_end))) { // known suffix comes before number suffix_result = str_cmp(&_a[a_num_end], a_word_end-a_num_end, &_b[b_num_end], b_word_end-b_num_end); if (suffix_result) return suffix_result; } a_num = to_i(&_a[a_num_start], a_num_end-a_num_start); b_num = to_i(&_b[b_num_start], b_num_end-b_num_start); num_result = a_num-b_num; // if the non-known suffixes don't match, return numeric result // if numbers are not equal, otherwise suffix result suffix_result = str_cmp(&_a[a_num_end], a_word_end-a_num_end, &_b[b_num_end], b_word_end-b_num_end); if (suffix_result) { if (num_result) return num_result; return suffix_result; } // Should be impossible that both the number result and // suffix result are equal. How can the entire word then // be unequal? if (!num_result) { fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); fprintf(stderr, "sort_line_a: %s", _a); fprintf(stderr, "sort_line_b: %s", _b); exit(1); } // find second non-equal word next_unequal_word(_a, a_word_end, &a_word_beg, &a_word_end, _b, b_word_end, &b_word_beg, &b_word_end); if (a_word_end <= a_word_beg || b_word_end <= b_word_beg) return num_result; // if no numbers in second non-equal words, fall back // to numeric result of first word find_number(&_a[a_word_beg], a_word_end-a_word_beg, &a_num_start, &a_num_end); find_number(&_b[b_word_beg], b_word_end-b_word_beg, &b_num_start, &b_num_end); if (a_num_end <= a_num_start || b_num_end <= b_num_start) return num_result; // A number must always be prefixed by at least one character. if (!a_num_start || !b_num_start) { fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); exit(0); } // If the prefix string of the second word does not // match, fall back to numeric result of first word. result = str_cmp(&_a[a_word_beg], a_num_start, &_b[b_word_beg], b_num_start); if (result) return num_result; a_num_start += a_word_beg; a_num_end += a_word_beg; b_num_start += b_word_beg; b_num_end += b_word_beg; if (a_num_end > a_word_end || b_num_end > b_word_end) { fprintf(stderr, "Internal error in %s:%i\n", __FILE__, __LINE__); exit(0); } // if there are known suffixes in second non-equal // words, compare those first if ((a_word_end-a_num_end == 0 || is_known_suffix(&_a[a_num_end], a_word_end-a_num_end)) && (b_word_end-b_num_end == 0 || is_known_suffix(&_b[b_num_end], b_word_end-b_num_end))) { // known suffix comes before number suffix_result = str_cmp(&_a[a_num_end], a_word_end-a_num_end, &_b[b_num_end], b_word_end-b_num_end); if (suffix_result) return suffix_result; } // otherwise fall back to numeric result of first word return num_result; } int main(int argc, char** argv) { FILE* fp = 0; int i; if (argc < 2) { fprintf(stderr, "sort_seq - sort by sequence\n" "Usage: %s | - for stdin\n", argv[0]); goto xout; } if (!strcmp(argv[1], "-")) fp = stdin; else { fp = fopen(argv[1], "r"); if (!fp) { fprintf(stderr, "Error opening %s.\n", argv[1]); goto xout; } } s_numlines = 0; // read 200 lines to beginning of buffer while (s_numlines < 200 && fgets(s_lines[s_numlines], sizeof(s_lines[0]), fp)) s_numlines++; while (1) { // read another 800 lines while (s_numlines < 1000 && fgets(s_lines[s_numlines], sizeof(s_lines[0]), fp)) s_numlines++; if (!s_numlines) break; // sort 1000 lines qsort(s_lines, s_numlines, sizeof(s_lines[0]), sort_lines); // print first 800 lines for (i = 0; i < 800; i++) { if (i >= s_numlines) break; printf(s_lines[i]); } // move up last 200 lines to beginning of buffer if (s_numlines > i) { memmove(s_lines[0], s_lines[i], (s_numlines-i)*sizeof(s_lines[0])); s_numlines -= i; } else s_numlines = 0; } return EXIT_SUCCESS; xout: return EXIT_FAILURE; }