improve regexp to see through certain groupings to pick a 'regmust' string to speed up match failures (PR 9602)

svn: r10750
This commit is contained in:
Matthew Flatt 2008-07-13 21:36:58 +00:00
parent 28bf6141b5
commit 0f97bbcd9e
2 changed files with 40 additions and 15 deletions

View File

@ -195,6 +195,12 @@ exec mzscheme -qu "$0" ${1+"$@"}
(list 'escape #"noBABthe AAABquickAAAB brown foxAB" #"noB(?:[^AB]+|A.)*B" 10 '())
(list 'escape #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 100000 '(rxmzold))
(list 'escape #"nobabthe aaabquickaaab frown foxab" #"(?i:noB(?:[^AB]+|A.)*B)" 10 '(rxmzold))
(list 'backtrack1 (make-bytes 1000 (char->integer #\x)) #".*z" 100 '())
(list 'backtrack1z (bytes-append #"z" (make-bytes 1000 (char->integer #\x))) #".*z" 100 '())
(list 'backtrack2 (make-bytes 1000 (char->integer #\x)) #".*(z)" 100 '())
(list 'backtrack2z (bytes-append #"z" (make-bytes 1000 (char->integer #\x))) #".*(z)" 100 '())
(list 'backtrack3 (make-bytes 1000 (char->integer #\x)) #".*(?:z)" 100 '())
(list 'backtrack3z (bytes-append #"z" (make-bytes 1000 (char->integer #\x))) #".*(?:z)" 100 '())
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x|a)\\1*x$" 1000 '(rxmzold))
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*|a)\\1x$" 1000 '(rxmzold))
(list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*?|a)\\1x$" 1000 '(rxmzold))

View File

@ -249,11 +249,15 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre)
longest_is_ci = 0;
len = 0;
for (; scan != 0; scan = regnext(scan)) {
if (((rOP(scan) == EXACTLY) || (rOP(scan) == EXACTLY_CI))
&& rOPLEN(OPERAND(scan)) >= len) {
int mscan = scan;
while (1) {
int mop;
mop = rOP(mscan);
if (((mop == EXACTLY) || (mop == EXACTLY_CI))
&& rOPLEN(OPERAND(mscan)) >= len) {
/* Skip regmust if it contains a null character: */
rxpos ls = OPSTR(OPERAND(scan));
int ll = rOPLEN(OPERAND(scan)), i;
rxpos ls = OPSTR(OPERAND(mscan));
int ll = rOPLEN(OPERAND(mscan)), i;
for (i = 0; i < ll; i++) {
if (!regstr[ls + i])
break;
@ -261,8 +265,23 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre)
if (i >= ll) {
longest = ls;
len = ll;
longest_is_ci = (rOP(scan) == EXACTLY_CI);
longest_is_ci = (rOP(mscan) == EXACTLY_CI);
}
break;
} else if ((mop == EXACTLY1) && (1 >= len)) {
/* Skip if it's a null character */
if (regstr[OPERAND(mscan)]) {
longest = OPERAND(mscan);
len = 1;
longest_is_ci = 0;
}
break;
} else if ((mop == OPENN)
|| (mop == SAVECONST)
|| ((mop >= OPEN) && (mop < CLOSE))) {
mscan = NEXT_OP(mscan);
} else
break;
}
}
if (longest) {