From 00644821deb41786c823f2010a0a19b4721d5697 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 24 Sep 2016 14:43:54 -0600 Subject: [PATCH] fix regexp-matching bug In a pattern like a*b a naive attempt to match will take quadratic time on an input that contains all "a"s an no "b". To improve that case, the regexp compiler detects that a match will require a "b" and checks the input for a "b" to enable linear-time failure. That optimization mishandled `(?!...)` and `(?integer #\a)))) +(test #f 'optimized (regexp-match #px"a*(?<=bc)" (make-bytes 100024 (char->integer #\a)))) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/racket/src/racket/src/regexp.c b/racket/src/racket/src/regexp.c index 8354e3f10f..c5be06ac53 100644 --- a/racket/src/racket/src/regexp.c +++ b/racket/src/racket/src/regexp.c @@ -281,12 +281,16 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre, Scheme_Object *handler) longest = 0; longest_is_ci = 0; len = 0; - for (; scan != 0; scan = regnext(scan)) { + for (; scan != 0; ) { int mscan = scan; while (1) { int mop; mop = rOP(mscan); - if (((mop == EXACTLY) || (mop == EXACTLY_CI)) + if ((mop == LOOKF) || (mop == LOOKBF)) { + /* skip over part that we don't want to match */ + mscan = mscan + rOPLEN(OPERAND(mscan)); + mscan = NEXT_OP(mscan); + } else if (((mop == EXACTLY) || (mop == EXACTLY_CI)) && rOPLEN(OPERAND(mscan)) >= len) { /* Skip regmust if it contains a null character: */ rxpos ls = OPSTR(OPERAND(mscan)); @@ -321,6 +325,13 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre, Scheme_Object *handler) break; } prev_op = rOP(scan); + if ((prev_op == LOOKF) || (prev_op == LOOKBF)) { + /* skip over part that we don't want to match */ + scan = scan + rOPLEN(OPERAND(scan)); + scan = NEXT_OP(scan); + } else { + scan = regnext(scan); + } } if (longest) { r->regmust = longest;