diff --git a/collects/scheme/private/string.ss b/collects/scheme/private/string.ss index 5522a6b734..9e6e4277f9 100644 --- a/collects/scheme/private/string.ss +++ b/collects/scheme/private/string.ss @@ -56,13 +56,15 @@ [else (raise-type-error 'regexp-replace-quote "string or byte string" s)])) + ;; This was originally intended to be general, but it has become specialized + ;; to deal with the combination of a regexp and a number: (define (make-regexp-tweaker tweaker) - (let ([t (make-weak-hasheq)]) - (lambda (rx) + (let ([t (make-hash)]) + (lambda (rx n) (define-syntax-rule (->str x) (if (bytes? x) (bytes->string/utf-8 x) x)) (define-syntax-rule (->bts x) (if (bytes? x) x (string->bytes/utf-8 x))) (define-syntax-rule (tweak unwrap wrap convert) - (let ([tweaked (tweaker (unwrap rx))]) + (let ([tweaked (tweaker (unwrap rx) n)]) ;; the tweaker is allowed to return a regexp (if (or (regexp? tweaked) (byte-regexp? tweaked)) tweaked @@ -81,8 +83,9 @@ 'regexp-tweaker "regexp, byte regexp, string, or byte string" rx)])) - (or (hash-ref t rx #f) - (let ([rx* (run-tweak)]) (hash-set! t rx rx*) rx*))))) + (let ([key (cons n rx)]) + (or (hash-ref t key #f) + (let ([rx* (run-tweak)]) (hash-set! t key rx*) rx*)))))) (define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f]) (unless (input-port? input-port) @@ -109,8 +112,8 @@ ;; Helper macro for the regexp functions below, with some utilities. (define (bstring-length s) (if (bytes? s) (bytes-length s) (string-length s))) - (define (no-empty-edge-matches n) - (make-regexp-tweaker (lambda (rx) + (define no-empty-edge-matches + (make-regexp-tweaker (lambda (rx n) (if (bytes? rx) (bytes-append #"(?:" rx @@ -166,8 +169,7 @@ (let loop ([acc '()] [start start] [end end] [ipre ipre] [0-ok? #t]) (let* ([rx (if 0-ok? orig-rx - ((no-empty-edge-matches (add1 (bytes-length ipre))) - orig-rx))]) + (no-empty-edge-matches orig-rx (add1 (bytes-length ipre))))]) (if (and port-success-choose (input-port? string)) ;; Input port match, get string diff --git a/src/mzscheme/src/regexp.c b/src/mzscheme/src/regexp.c index b23fb50381..83dedfcb99 100644 --- a/src/mzscheme/src/regexp.c +++ b/src/mzscheme/src/regexp.c @@ -84,7 +84,6 @@ THREAD_LOCAL_DECL(static rxpos regcode) ; /* Code-emit pointer, if less than THREAD_LOCAL_DECL(static rxpos regcodesize); THREAD_LOCAL_DECL(static rxpos regcodemax); THREAD_LOCAL_DECL(static long regmaxlookback); -static int reghasgenlookback; /* FIXME: make this thread local */ /* caches to avoid gc */ THREAD_LOCAL_DECL(static long rx_buffer_size); @@ -170,7 +169,6 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre) regnpar = 1; regncounter = 0; regmaxlookback = 0; - reghasgenlookback = 0; regcode = 1; regcodesize = 0; regcodemax = 0; @@ -202,8 +200,6 @@ regcomp(char *expstr, rxpos exp, int explen, int pcre) r->nsubexp = regnpar; r->ncounter = regncounter; r->maxlookback = regmaxlookback; - if (reghasgenlookback) - r->flags |= REGEXP_LOOKBEHIND; /* Second pass: emit code. */ regparse = exp; @@ -498,7 +494,7 @@ reg(int paren, int *flagp, int paren_set, int lookahead, int parse_flags) rxpos br; rxpos ender; int parno = 0; - int flags, matchmin, matchmax, maxlookback, brcount, hasgenlookback; + int flags, matchmin, matchmax, maxlookback, brcount; Scheme_Hash_Table *backdepends; #ifdef DO_STACK_CHECK @@ -564,7 +560,6 @@ reg(int paren, int *flagp, int paren_set, int lookahead, int parse_flags) matchmin = regmatchmin; matchmax = regmatchmax; maxlookback = regmaxlookback; - hasgenlookback = reghasgenlookback; brcount = 1; while (regparsestr[regparse] == '|') { brcount++; @@ -595,15 +590,12 @@ reg(int paren, int *flagp, int paren_set, int lookahead, int parse_flags) matchmax = regmatchmax; if (regmaxlookback > maxlookback) maxlookback = regmaxlookback; - if (reghasgenlookback) - hasgenlookback = 1; } } regbackdepends = backdepends; regmatchmin = matchmin; regmatchmax = matchmax; regmaxlookback = maxlookback; - reghasgenlookback = hasgenlookback; if (paren && paren_set) { Scheme_Object *assumed; @@ -660,7 +652,6 @@ reg(int paren, int *flagp, int paren_set, int lookahead, int parse_flags) if (matchmax > 0x7FFF) FAIL("lookbehind match is potentially too long (more than 32767 bytes)"); regmaxlookback = matchmax + maxlookback; - reghasgenlookback = 1; if (ret + 8 < regcodesize) { regstr[ret + 5] = (matchmin >> 8); regstr[ret + 6] = (matchmin & 255); @@ -726,7 +717,7 @@ regbranch(int *flagp, int parse_flags, int without_branch_node) { rxpos ret; rxpos chain, latest; - int flags = 0, matchmin = 0, matchmax = 0, maxlookback = 0, hasgenlookback = 0, pcount = 0, save_flags; + int flags = 0, matchmin = 0, matchmax = 0, maxlookback = 0, pcount = 0, save_flags; *flagp = (WORST|SPFIXED); /* Tentatively. */ @@ -757,8 +748,6 @@ regbranch(int *flagp, int parse_flags, int without_branch_node) regtail(chain, latest); if (!(flags&SPFIXED)) *flagp &= ~SPFIXED; - if (reghasgenlookback && (regmaxlookback > matchmin)) - hasgenlookback = 1; if ((regmaxlookback - matchmin) > maxlookback) maxlookback = regmaxlookback - matchmin; matchmin += regmatchmin; @@ -771,7 +760,6 @@ regbranch(int *flagp, int parse_flags, int without_branch_node) regmatchmin = matchmin; regmatchmax = matchmax; regmaxlookback = maxlookback; - reghasgenlookback = hasgenlookback; if (chain == 0) { /* Loop ran zero times. */ latest = regnode(NOTHING); if (without_branch_node) @@ -1085,7 +1073,6 @@ regatom(int *flagp, int parse_flags, int at_start) *flagp = (WORST|SPFIXED); /* Tentatively. */ regmatchmin = regmatchmax = 1; regmaxlookback = 0; - reghasgenlookback = 0; switch (regparsestr[regparse++]) { case '^': @@ -1331,12 +1318,10 @@ regatom(int *flagp, int parse_flags, int at_start) ret = regnode(WORDBOUND); regmatchmin = regmatchmax = 0; regmaxlookback = 1; - reghasgenlookback = 1; } else if ((parse_flags & PARSE_PCRE) && (c == 'B')) { ret = regnode(NOTWORDBOUND); regmatchmin = regmatchmax = 0; regmaxlookback = 1; - reghasgenlookback = 1; } else if ((parse_flags & PARSE_PCRE) && (c == 'p')) { ret = regunicode(0); regmatchmax = MAX_UTF8_CHAR_BYTES; diff --git a/src/mzscheme/src/schrx.h b/src/mzscheme/src/schrx.h index 3e6daae50f..1d30598a5c 100644 --- a/src/mzscheme/src/schrx.h +++ b/src/mzscheme/src/schrx.h @@ -31,7 +31,6 @@ typedef struct regexp { #define REGEXP_ANCH 0x04 #define REGEXP_MUST_CI 0x08 #define REGEXP_JIT 0x10 -#define REGEXP_LOOKBEHIND 0x20 #ifdef INDIRECT_TO_PROGRAM # define N_ITO_DELTA(prog, extra, re) extra