diff --git a/collects/tests/mzscheme/benchmarks/rx/auto.ss b/collects/tests/mzscheme/benchmarks/rx/auto.ss index 69f04f747e..c74befa265 100755 --- a/collects/tests/mzscheme/benchmarks/rx/auto.ss +++ b/collects/tests/mzscheme/benchmarks/rx/auto.ss @@ -195,6 +195,12 @@ exec mzscheme -qu "$0" ${1+"$@"} (list 'escape #"noBABthe AAABquickAAAB brown foxAB" #"noB(?:[^AB]+|A.)*B" 10 '()) (list 'escape #"yesbabthe aaabquickaaab frown foxabb" #"(?i:yesB(?:[^AB]+|A.)*B)" 100000 '(rxmzold)) (list 'escape #"nobabthe aaabquickaaab frown foxab" #"(?i:noB(?:[^AB]+|A.)*B)" 10 '(rxmzold)) + (list 'backtrack1 (make-bytes 1000 (char->integer #\x)) #".*z" 100 '()) + (list 'backtrack1z (bytes-append #"z" (make-bytes 1000 (char->integer #\x))) #".*z" 100 '()) + (list 'backtrack2 (make-bytes 1000 (char->integer #\x)) #".*(z)" 100 '()) + (list 'backtrack2z (bytes-append #"z" (make-bytes 1000 (char->integer #\x))) #".*(z)" 100 '()) + (list 'backtrack3 (make-bytes 1000 (char->integer #\x)) #".*(?:z)" 100 '()) + (list 'backtrack3z (bytes-append #"z" (make-bytes 1000 (char->integer #\x))) #".*(?:z)" 100 '()) (list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x|a)\\1*x$" 1000 '(rxmzold)) (list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*|a)\\1x$" 1000 '(rxmzold)) (list 'stress-same (make-bytes 1000 (char->integer #\x)) #"^(x*?|a)\\1x$" 1000 '(rxmzold)) diff --git a/src/mzscheme/src/regexp.c b/src/mzscheme/src/regexp.c index 81706e2ea9..dc59acbc9d 100644 --- a/src/mzscheme/src/regexp.c +++ b/src/mzscheme/src/regexp.c @@ -249,21 +249,40 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre) longest_is_ci = 0; len = 0; for (; scan != 0; scan = regnext(scan)) { - if (((rOP(scan) == EXACTLY) || (rOP(scan) == EXACTLY_CI)) - && rOPLEN(OPERAND(scan)) >= len) { - /* Skip regmust if it contains a null character: */ - rxpos ls = OPSTR(OPERAND(scan)); - int ll = rOPLEN(OPERAND(scan)), i; - for (i = 0; i < ll; i++) { - if (!regstr[ls + i]) - break; - } - if (i >= ll) { - longest = ls; - len = ll; - longest_is_ci = (rOP(scan) == EXACTLY_CI); - } - } + int mscan = scan; + while (1) { + int mop; + mop = rOP(mscan); + if (((mop == EXACTLY) || (mop == EXACTLY_CI)) + && rOPLEN(OPERAND(mscan)) >= len) { + /* Skip regmust if it contains a null character: */ + rxpos ls = OPSTR(OPERAND(mscan)); + int ll = rOPLEN(OPERAND(mscan)), i; + for (i = 0; i < ll; i++) { + if (!regstr[ls + i]) + break; + } + if (i >= ll) { + longest = ls; + len = ll; + longest_is_ci = (rOP(mscan) == EXACTLY_CI); + } + break; + } else if ((mop == EXACTLY1) && (1 >= len)) { + /* Skip if it's a null character */ + if (regstr[OPERAND(mscan)]) { + longest = OPERAND(mscan); + len = 1; + longest_is_ci = 0; + } + break; + } else if ((mop == OPENN) + || (mop == SAVECONST) + || ((mop >= OPEN) && (mop < CLOSE))) { + mscan = NEXT_OP(mscan); + } else + break; + } } if (longest) { r->regmust = longest;