diff --git a/collects/mzlib/string.ss b/collects/mzlib/string.ss index e3765694e5..1b2f1cfe23 100644 --- a/collects/mzlib/string.ss +++ b/collects/mzlib/string.ss @@ -404,34 +404,35 @@ #f)) (wrap regexp-match* -regexp-match*) - (define regexp-match-exact? - (lambda (p s) - (let ([m (regexp-match-positions p s)]) - (and m - (zero? (caar m)) - (if (or (byte-regexp? p) - (bytes? p) - (bytes? s)) - (= (cdar m) (if (bytes? s) - (bytes-length s) - (string-utf-8-length s))) - (= (cdar m) (string-length s))))))) + (define (regexp-match-exact? p s) + (let ([m (regexp-match-positions p s)]) + (and m + (zero? (caar m)) + (if (or (byte-regexp? p) (bytes? p) (bytes? s)) + (= (cdar m) (if (bytes? s) + (bytes-length s) + (string-utf-8-length s))) + (= (cdar m) (string-length s)))))) (define glob->regexp (let-values ([(def-case-sens) (not (memq (system-type) '(windows macos macosx)))] - [(item:s item:b) - (let ([rx (string-append - "(?:" - "[\\]." ; escaped item - "|" - "[*?]" ; wildcards -- the only 1-character match - "|" - "\\[(?:\\^?\\]|\\^?[^]^])[^]]*\\]" ; range - ")" - )]) - (values (regexp rx) (byte-regexp (string->bytes/utf-8 rx))))]) - (opt-lambda (glob [hide-dots? #t] [case-sens? def-case-sens]) + [(item:s item:b simple-item:s simple-item:b) + (let ([rx (lambda (s) + (string-append + "(?:" + "[\\]." ; escaped item + "|" + "[*?]" ; wildcards -- the only 1-character match + s ; [*] more stuff here + ")" + ))] + [range "|\\[(?:\\^?\\]|\\^?[^]^])[^]]*\\]"]) ; goes in [*] + (values (regexp (rx range)) + (byte-regexp (string->bytes/utf-8 (rx range))) + (regexp (rx "")) + (byte-regexp (string->bytes/utf-8 (rx "")))))]) + (opt-lambda (glob [hide-dots? #t] [case-sens? def-case-sens] [simple? #f]) (let*-values ([(b?) (cond [(bytes? glob) #t] [(string? glob) #f] [else (raise-type-error @@ -440,9 +441,11 @@ [(app sub ref rx item star any one) (if b? (values bytes-append subbytes bytes-ref byte-regexp - item:b (char->integer #\*) #".*" #".") + (if simple? simple-item:b item:b) + (char->integer #\*) #".*" #".") (values string-append substring string-ref regexp - item:s #\* ".*" "."))] + (if simple? simple-item:s item:s) + #\* ".*" "."))] [(pfx sfx) (if case-sens? (if b? (values #"^" #"^") (values "^" "$")) @@ -450,21 +453,22 @@ (values "^(?i:" ")$")))] [(pfx) (if hide-dots? (app pfx (if b? #"(?![.])" "(?![.])")) - pfx)]) + pfx)] + [(subq) (lambda xs (regexp-quote (apply sub xs)))]) (let loop ([i 0] [ps (regexp-match-positions* item glob)] [r '()]) (cond [(null? ps) - (rx (app pfx - (apply app (reverse! (cons (sub glob i) r))) - sfx))] + (let ([r (apply app (reverse! (cons (subq glob i) r)))]) + (rx (app pfx r sfx)))] [(= 1 (- (cdar ps) (caar ps))) (loop (cdar ps) (cdr ps) (cons (if (equal? star (ref glob (caar ps))) any one) (if (= i (caar ps)) - r - (cons (sub glob i (caar ps)) r))))] + r (cons (subq glob i (caar ps)) r))))] [else (loop (cdar ps) (cdr ps) - (cons (sub glob i (cdar ps)) r))])))))) + (cons (sub glob (caar ps) (cdar ps)) + (if (= i (caar ps)) + r (cons (subq glob i (caar ps)) r))))])))))) ) diff --git a/collects/tests/mzscheme/string.ss b/collects/tests/mzscheme/string.ss index 85cc795925..6856b7125d 100644 --- a/collects/tests/mzscheme/string.ss +++ b/collects/tests/mzscheme/string.ss @@ -18,7 +18,8 @@ (cons (integer->char i) (loop (add1 i))))))]) (test (list s) regexp-match (regexp-quote s) s) (test (string-append "!" s "!") - regexp-replace (regexp-quote s) s (regexp-replace-quote (string-append "!" s "!")))) + regexp-replace + (regexp-quote s) s (regexp-replace-quote (string-append "!" s "!")))) (let ([s (open-input-string "hello there")]) (test #f regexp-match/fail-without-reading #rx"not there" s) @@ -42,7 +43,8 @@ (test '(#"a") regexp-match* #"[abc]" "here's a buck" 0 8) (test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck" 0 #f) -(test '(#"a" #"b" #"c") regexp-match* "[abc]" (open-input-string "here's a buck")) +(test '(#"a" #"b" #"c") + regexp-match* "[abc]" (open-input-string "here's a buck")) (test '(#"b" #"c") regexp-match* "[abc]" (open-input-string "here's a buck") 8) (let ([s (open-input-string "here's a buck")]) (test '(#"a") regexp-match* "[abc]" s 0 8) @@ -51,13 +53,19 @@ (test '(#"a" #"b" #"c") regexp-match* "[abc]" s 0 #f) (test eof read-char s)) -(test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-positions* "[abc]" "here's a buck") -(test '((9 . 10) (11 . 12)) regexp-match-positions* "[abc]" "here's a buck" 8) -(test '((7 . 8)) regexp-match-positions* "[abc]" "here's a buck" 0 8) -(test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-positions* "[abc]" "here's a buck" 0 #f) +(test '((7 . 8) (9 . 10) (11 . 12)) + regexp-match-positions* "[abc]" "here's a buck") +(test '((9 . 10) (11 . 12)) + regexp-match-positions* "[abc]" "here's a buck" 8) +(test '((7 . 8)) + regexp-match-positions* "[abc]" "here's a buck" 0 8) +(test '((7 . 8) (9 . 10) (11 . 12)) + regexp-match-positions* "[abc]" "here's a buck" 0 #f) -(test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-positions* "[abc]" (open-input-string "here's a buck")) -(test '((9 . 10) (11 . 12)) regexp-match-positions* "[abc]" (open-input-string "here's a buck") 8) +(test '((7 . 8) (9 . 10) (11 . 12)) + regexp-match-positions* "[abc]" (open-input-string "here's a buck")) +(test '((9 . 10) (11 . 12)) + regexp-match-positions* "[abc]" (open-input-string "here's a buck") 8) (let ([s (open-input-string "here's a buck")]) (test '((7 . 8)) regexp-match-positions* "[abc]" s 0 8) (test " buck" read-string 50 s)) @@ -66,23 +74,33 @@ (test eof read-char s)) (let ([s (open-input-string "here's a buck")]) - (test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-peek-positions* "[abc]" (open-input-string "here's a buck")) - (test '((9 . 10) (11 . 12)) regexp-match-peek-positions* "[abc]" (open-input-string "here's a buck") 8) - (test '((7 . 8)) regexp-match-peek-positions* "[abc]" s 0 8) - (test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-peek-positions* "[abc]" s 0 #f) - (test "here's a buck" read-string 50 s)) + (test '((7 . 8) (9 . 10) (11 . 12)) + regexp-match-peek-positions* + "[abc]" (open-input-string "here's a buck")) + (test '((9 . 10) (11 . 12)) + regexp-match-peek-positions* + "[abc]" (open-input-string "here's a buck") 8) + (test '((7 . 8)) + regexp-match-peek-positions* "[abc]" s 0 8) + (test '((7 . 8) (9 . 10) (11 . 12)) + regexp-match-peek-positions* "[abc]" s 0 #f) + (test "here's a buck" + read-string 50 s)) (test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck") -(test '(" " "u" "k") regexp-split "[abc]" "here's a buck" 8) -(test '("" "u" "k") regexp-split "[abc]" "here's a buck" 9) -(test '("" "u" "") regexp-split "[abc]" "here's a buck" 9 12) -(test '("here's " "") regexp-split "[abc]" "here's a buck" 0 8) -(test '("here's " " ") regexp-split "[abc]" "here's a buck" 0 9) +(test '(" " "u" "k") regexp-split "[abc]" "here's a buck" 8) +(test '("" "u" "k") regexp-split "[abc]" "here's a buck" 9) +(test '("" "u" "") regexp-split "[abc]" "here's a buck" 9 12) +(test '("here's " "") regexp-split "[abc]" "here's a buck" 0 8) +(test '("here's " " ") regexp-split "[abc]" "here's a buck" 0 9) (test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck" 0 #f) -(test '(#"here's " #" " #"u" #"k") regexp-split "[abc]" (open-input-string "here's a buck")) -(test '(#" " #"u" #"k") regexp-split "[abc]" (open-input-string "here's a buck") 8) -(test '(#"" #"u" #"k") regexp-split "[abc]" (open-input-string "here's a buck") 9) +(test '(#"here's " #" " #"u" #"k") + regexp-split "[abc]" (open-input-string "here's a buck")) +(test '(#" " #"u" #"k") + regexp-split "[abc]" (open-input-string "here's a buck") 8) +(test '(#"" #"u" #"k") + regexp-split "[abc]" (open-input-string "here's a buck") 9) (let ([s (open-input-string "here's a buck")]) (test '(#"" #"u" #"") regexp-split "[abc]" s 9 12) (test "k" read-string 50 s)) @@ -136,6 +154,13 @@ #f ".afoo-bar" ".b-foo-bar") ((g->re-test "[.]foo-bar" #f #t) #f "foobar" "foo-barr" "foo-bar" "-foo-bar" - #t ".foo-bar")) + #t ".foo-bar") + ((g->re-test "foo{*}bar" #t #t) + #f "foo{bar" "foo{-{bar" "foo{}barr" ".foo-bar" + #t "foo{}bar" "foo{-}bar" "foo{{}}bar" "foo{}{}bar") + ((g->re-test "^foo{[*]}bar$" #t #t #t) + #f "^foo{[}bar$" "^foo{[-]{bar$" "^foo{[]}barr$" ".^foo{[]}bar$" + #t "^foo{[]}bar$" "^foo{[-]}bar$" "^foo{[{}]}bar$" "^foo{[]}{[]}bar$") + ((g->re-test "$[.]^" #t #t #t) #f "$[,]^" #t "$[.]^")) (report-errs)