fpgatools/sort_seq.c
Wolfgang Spraul 3487a8ad7b autotester
2012-08-16 10:57:51 +02:00

426 lines
12 KiB
C

//
// Author: Wolfgang Spraul
//
// This is free and unencumbered software released into the public domain.
// For details see the UNLICENSE file at the root of the source tree.
//
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "helper.h"
#define LINE_LENGTH 1024
static int s_numlines;
static char s_lines[1000][LINE_LENGTH];
// returns 0 if no number found
static int find_rightmost_num(const char* s, int s_len,
int* dig_start, int* dig_end)
{
int i;
if (s_len < 2) return 0;
i = s_len;
while (i > 0 && (s[i-1] < '0' || s[i-1] > '9'))
i--;
if (!i) return 0;
*dig_end = i;
while (i > 0 && s[i-1] >= '0' && s[i-1] <= '9')
i--;
if (!i) return 0;
if ((s[i-1] < 'A' || s[i-1] > 'Z') && s[i-1] != '_')
return 0;
*dig_start = i;
return 1;
}
// Finds the position of a number in a string, searching from
// the right, meeting:
// - not part of a known suffix if there is another number to
// the left of it
// - prefixed by at least one capital 'A'-'Z' or '_'
// If none is found, both *num_start and *num_end will be returned as 0.
static void find_number(const char* s, int s_len, int* num_start, int* num_end)
{
int result, dig_start, dig_end, found_num, search_more;
int next_dig_start, next_dig_end;
*num_start = 0;
*num_end = 0;
if (s_len >= 13 && !strncmp("_DSP48A1_SITE", &s[s_len-13], 13))
s_len -= 13;
else if (s_len >= 15 && !strncmp("_DSP48A1_B_SITE", &s[s_len-15], 15))
s_len -= 15;
result = find_rightmost_num(s, s_len, &dig_start, &dig_end);
if (!result) return;
// If the found number is not part of a potential
// suffix, we can take it.
found_num = to_i(&s[dig_start], dig_end-dig_start);
// The remaining suffixes all reach the right end of
// the string, so if our digits don't, we can take them.
if (dig_end < s_len) {
*num_start = dig_start;
*num_end = dig_end;
return;
}
search_more = 0;
// _
if (dig_start >= 2
&& s[dig_start-1] == '_'
&& ((s[dig_start-2] >= 'A' && s[dig_start-2] <= 'Z')
|| (s[dig_start-2] >= '0' && s[dig_start-2] <= '9')))
search_more = 1;
// _S0
else if (found_num == 0 && dig_start >= 3
&& s[dig_start-1] == 'S' && s[dig_start-2] == '_'
&& ((s[dig_start-3] >= 'A' && s[dig_start-3] <= 'Z')
|| (s[dig_start-3] >= '0' && s[dig_start-3] <= '9')))
search_more = 1;
// _N3
else if (found_num == 3 && dig_start >= 3
&& s[dig_start-1] == 'N' && s[dig_start-2] == '_'
&& ((s[dig_start-3] >= 'A' && s[dig_start-3] <= 'Z')
|| (s[dig_start-3] >= '0' && s[dig_start-3] <= '9')))
search_more = 1;
// _INT0 _INT1 _INT2 _INT3
else if ((found_num >= 0 && found_num <= 3) && dig_start >= 5
&& s[dig_start-1] == 'T' && s[dig_start-2] == 'N'
&& s[dig_start-3] == 'I' && s[dig_start-4] == '_'
&& ((s[dig_start-5] >= 'A' && s[dig_start-5] <= 'Z')
|| (s[dig_start-5] >= '0' && s[dig_start-5] <= '9')))
search_more = 1;
if (!search_more
|| !find_rightmost_num(s, dig_start, &next_dig_start, &next_dig_end)) {
*num_start = dig_start;
*num_end = dig_end;
} else {
*num_start = next_dig_start;
*num_end = next_dig_end;
}
}
static int is_known_suffix(const char* str, int str_len)
{
int i;
if (str_len < 1) return 0;
if (str[0] != '_') return 0;
if (str_len < 2) return 0;
// Special case _<digits> - we detect this as a
// known suffix here because our number finding
// function already found a better match to the
// left of it, so we can assume the _<digits> to
// be a suffix.
i = 1;
while (i < str_len) {
if (str[i] < '0' || str[i] > '9')
break;
i++;
}
if (i >= str_len)
return 1;
if (str_len == 2) {
// _E _W _S _N _M
if (str[1] == 'E' || str[1] == 'W' || str[2] == 'S'
|| str[1] == 'N' || str[1] == 'M')
return 1;
}
if (str_len < 3) return 0;
if (str_len == 3) {
// _S0 _N3 _UP
if ((str[1] == 'S' && str[2] == '0')
|| (str[1] == 'N' && str[2] == '3')
|| (str[1] == 'U' && str[2] == 'P'))
return 1;
}
if (str_len < 4) return 0;
if (str_len == 4) {
// _CLB _DSP _EXT _INT _MCP _BRK _BUF
if ((str[1] == 'C' && str[2] == 'L' && str[3] == 'B')
|| (str[1] == 'D' && str[2] == 'S' && str[3] == 'P')
|| (str[1] == 'E' && str[2] == 'X' && str[3] == 'T')
|| (str[1] == 'I' && str[2] == 'N' && str[3] == 'T')
|| (str[1] == 'M' && str[2] == 'C' && str[3] == 'B')
|| (str[1] == 'B' && str[2] == 'R' && str[3] == 'K')
|| (str[1] == 'B' && str[2] == 'U' && str[3] == 'F'))
return 1;
}
if (str_len < 5) return 0;
if (str_len == 5) {
// _INT0 _INT1 _INT2 _INT3 _TEST _FOLD _BRAM _DOWN _PINW
if ((str[1] == 'I' && str[2] == 'N' && str[3] == 'T'
&& str[4] >= '0' && str[4] <= '3')
|| (str[1] == 'T' && str[2] == 'E' && str[3] == 'S'
&& str[4] == 'T')
|| (str[1] == 'F' && str[2] == 'O' && str[3] == 'L'
&& str[4] == 'D')
|| (str[1] == 'B' && str[2] == 'R' && str[3] == 'A'
&& str[4] == 'M')
|| (str[1] == 'D' && str[2] == 'O' && str[3] == 'W'
&& str[4] == 'N')
|| (str[1] == 'P' && str[2] == 'I' && str[3] == 'N'
&& str[4] == 'W'))
return 1;
}
if (str_len < 11) return 0;
if (str_len == 11) {
// _BRAM_INTER
if (str[1] == 'B' && str[2] == 'R' && str[3] == 'A'
&& str[4] == 'M' && str[5] == '_' && str[6] == 'I'
&& str[7] == 'N' && str[8] == 'T' && str[9] == 'E'
&& str[10] == 'R')
return 1;
}
return 0;
}
static void next_unequal_word(
const char* a, int a_start, int* a_beg, int* a_end,
const char* b, int b_start, int* b_beg, int* b_end)
{
*a_beg = a_start;
*a_end = a_start;
*b_beg = b_start;
*b_end = b_start;
// find the first non-matching word
while (1) {
next_word(a, *a_beg, a_beg, a_end);
next_word(b, *b_beg, b_beg, b_end);
if (*a_end-*a_beg <= 0
|| *b_end-*b_beg <= 0)
return;
if (str_cmp(&a[*a_beg], *a_end-*a_beg, &b[*b_beg], *b_end-*b_beg))
return;
*a_beg = *a_end;
*b_beg = *b_end;
}
}
static int sort_lines(const void* a, const void* b)
{
const char* _a, *_b;
int a_word_beg, a_word_end, b_word_beg, b_word_end;
int a_num, b_num, a_num_start, b_num_start, a_num_end, b_num_end;
int num_result, result, suffix_result;
_a = a;
_b = b;
// find the first non-matching word
a_word_beg = 0;
b_word_beg = 0;
next_unequal_word(_a, a_word_beg, &a_word_beg, &a_word_end,
_b, b_word_beg, &b_word_beg, &b_word_end);
if (a_word_end-a_word_beg <= 0) {
if (b_word_end-b_word_beg <= 0)
return 0;
return -1;
}
if (b_word_end-b_word_beg <= 0) {
if (a_word_end-a_word_beg <= 0)
return 0;
return 1;
}
// first try to find 2 numbers
find_number(&_a[a_word_beg], a_word_end-a_word_beg,
&a_num_start, &a_num_end);
find_number(&_b[b_word_beg], b_word_end-b_word_beg,
&b_num_start, &b_num_end);
// if we cannot find both numbers, return a regular
// string comparison over the entire word
if (a_num_end <= a_num_start
|| b_num_end <= b_num_start) {
result = str_cmp(&_a[a_word_beg], a_word_end-a_word_beg,
&_b[b_word_beg], b_word_end-b_word_beg);
if (!result) {
fprintf(stderr, "Internal error in %s:%i\n",
__FILE__, __LINE__);
exit(0);
}
return result;
}
// A number must always be prefixed by at least one character.
if (!a_num_start || !b_num_start) {
fprintf(stderr, "Internal error in %s:%i\n",
__FILE__, __LINE__);
exit(0);
}
// otherwise compare the string up to the 2 numbers,
// if it does not match return that result
result = str_cmp(&_a[a_word_beg], a_num_start,
&_b[b_word_beg], b_num_start);
if (result)
return result;
a_num_start += a_word_beg;
a_num_end += a_word_beg;
b_num_start += b_word_beg;
b_num_end += b_word_beg;
if (a_num_end > a_word_end
|| b_num_end > b_word_end) {
fprintf(stderr, "Internal error in %s:%i\n",
__FILE__, __LINE__);
fprintf(stderr, "sort_line_a: %s", _a);
fprintf(stderr, "sort_line_b: %s", _b);
exit(1);
}
if ((a_word_end-a_num_end == 0
|| is_known_suffix(&_a[a_num_end],
a_word_end-a_num_end))
&& (b_word_end-b_num_end == 0
|| is_known_suffix(&_b[b_num_end],
b_word_end-b_num_end))) {
// known suffix comes before number
suffix_result = str_cmp(&_a[a_num_end],
a_word_end-a_num_end,
&_b[b_num_end], b_word_end-b_num_end);
if (suffix_result)
return suffix_result;
}
a_num = to_i(&_a[a_num_start], a_num_end-a_num_start);
b_num = to_i(&_b[b_num_start], b_num_end-b_num_start);
num_result = a_num-b_num;
// if the non-known suffixes don't match, return numeric result
// if numbers are not equal, otherwise suffix result
suffix_result = str_cmp(&_a[a_num_end], a_word_end-a_num_end,
&_b[b_num_end], b_word_end-b_num_end);
if (suffix_result) {
if (num_result) return num_result;
return suffix_result;
}
// Should be impossible that both the number result and
// suffix result are equal. How can the entire word then
// be unequal?
if (!num_result) {
fprintf(stderr, "Internal error in %s:%i\n",
__FILE__, __LINE__);
fprintf(stderr, "sort_line_a: %s", _a);
fprintf(stderr, "sort_line_b: %s", _b);
exit(1);
}
// find second non-equal word
next_unequal_word(_a, a_word_end, &a_word_beg, &a_word_end,
_b, b_word_end, &b_word_beg, &b_word_end);
if (a_word_end <= a_word_beg
|| b_word_end <= b_word_beg)
return num_result;
// if no numbers in second non-equal words, fall back
// to numeric result of first word
find_number(&_a[a_word_beg], a_word_end-a_word_beg,
&a_num_start, &a_num_end);
find_number(&_b[b_word_beg], b_word_end-b_word_beg,
&b_num_start, &b_num_end);
if (a_num_end <= a_num_start
|| b_num_end <= b_num_start)
return num_result;
// A number must always be prefixed by at least one character.
if (!a_num_start || !b_num_start) {
fprintf(stderr, "Internal error in %s:%i\n",
__FILE__, __LINE__);
exit(0);
}
// If the prefix string of the second word does not
// match, fall back to numeric result of first word.
result = str_cmp(&_a[a_word_beg], a_num_start,
&_b[b_word_beg], b_num_start);
if (result)
return num_result;
a_num_start += a_word_beg;
a_num_end += a_word_beg;
b_num_start += b_word_beg;
b_num_end += b_word_beg;
if (a_num_end > a_word_end
|| b_num_end > b_word_end) {
fprintf(stderr, "Internal error in %s:%i\n",
__FILE__, __LINE__);
exit(0);
}
// if there are known suffixes in second non-equal
// words, compare those first
if ((a_word_end-a_num_end == 0
|| is_known_suffix(&_a[a_num_end],
a_word_end-a_num_end))
&& (b_word_end-b_num_end == 0
|| is_known_suffix(&_b[b_num_end],
b_word_end-b_num_end))) {
// known suffix comes before number
suffix_result = str_cmp(&_a[a_num_end],
a_word_end-a_num_end,
&_b[b_num_end], b_word_end-b_num_end);
if (suffix_result)
return suffix_result;
}
// otherwise fall back to numeric result of first word
return num_result;
}
int main(int argc, char** argv)
{
FILE* fp = 0;
int i;
if (argc < 2) {
fprintf(stderr,
"sort_seq - sort by sequence\n"
"Usage: %s <data_file> | - for stdin\n", argv[0]);
goto xout;
}
if (!strcmp(argv[1], "-"))
fp = stdin;
else {
fp = fopen(argv[1], "r");
if (!fp) {
fprintf(stderr, "Error opening %s.\n", argv[1]);
goto xout;
}
}
s_numlines = 0;
// read 200 lines to beginning of buffer
while (s_numlines < 200
&& fgets(s_lines[s_numlines], sizeof(s_lines[0]), fp))
s_numlines++;
while (1) {
// read another 800 lines
while (s_numlines < 1000
&& fgets(s_lines[s_numlines], sizeof(s_lines[0]), fp))
s_numlines++;
if (!s_numlines) break;
// sort 1000 lines
qsort(s_lines, s_numlines, sizeof(s_lines[0]), sort_lines);
// print first 800 lines
for (i = 0; i < 800; i++) {
if (i >= s_numlines) break;
printf(s_lines[i]);
}
// move up last 200 lines to beginning of buffer
if (s_numlines > i) {
memmove(s_lines[0], s_lines[i],
(s_numlines-i)*sizeof(s_lines[0]));
s_numlines -= i;
} else
s_numlines = 0;
}
return EXIT_SUCCESS;
xout:
return EXIT_FAILURE;
}