diff --git a/collects/tests/mzscheme/string.ss b/collects/tests/mzscheme/string.ss index 49e1439513..22d27e0d35 100644 --- a/collects/tests/mzscheme/string.ss +++ b/collects/tests/mzscheme/string.ss @@ -25,113 +25,212 @@ ;; ---------- regexp-quote ---------- (let ([s (list->string - (let loop ([i 0]) - (if (= i 256) - null - (cons (integer->char i) (loop (add1 i))))))]) + (let loop ([i 0]) + (if (= i 256) + null + (cons (integer->char i) (loop (add1 i))))))]) (test (list s) regexp-match (regexp-quote s) s) (test (string-append "!" s "!") - regexp-replace + regexp-replace (regexp-quote s) s (regexp-replace-quote (string-append "!" s "!")))) -;; ---------- regexp-match* ---------- -(test '("a" "b" "c") regexp-match* "[abc]" "here's a buck") -(test '("b" "c") regexp-match* "[abc]" "here's a buck" 8) -(test '("a") regexp-match* "[abc]" "here's a buck" 0 8) -(test '("a" "b" "c") regexp-match* "[abc]" "here's a buck" 0 #f) - -(test '(#"a" #"b" #"c") regexp-match* "[abc]" #"here's a buck") -(test '(#"b" #"c") regexp-match* "[abc]" #"here's a buck" 8) -(test '(#"a") regexp-match* "[abc]" #"here's a buck" 0 8) -(test '(#"a" #"b" #"c") regexp-match* "[abc]" #"here's a buck" 0 #f) - -(test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck") -(test '(#"b" #"c") regexp-match* #"[abc]" "here's a buck" 8) -(test '(#"a") regexp-match* #"[abc]" "here's a buck" 0 8) -(test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck" 0 #f) - -(test '(#"a" #"b" #"c") - regexp-match* "[abc]" (open-input-string "here's a buck")) -(test '(#"b" #"c") regexp-match* "[abc]" (open-input-string "here's a buck") 8) -(let ([s (open-input-string "here's a buck")]) - (test '(#"a") regexp-match* "[abc]" s 0 8) - (test " buck" read-string 50 s)) -(let ([s (open-input-string "here's a buck")]) - (test '(#"a" #"b" #"c") regexp-match* "[abc]" s 0 #f) - (test eof read-char s)) - -;; ---------- regexp-match-positions* ---------- -(test '((7 . 8) (9 . 10) (11 . 12)) - regexp-match-positions* "[abc]" "here's a buck") -(test '((9 . 10) (11 . 12)) - regexp-match-positions* "[abc]" "here's a buck" 8) -(test '((7 . 8)) - regexp-match-positions* "[abc]" "here's a buck" 0 8) -(test '((7 . 8) (9 . 10) (11 . 12)) - regexp-match-positions* "[abc]" "here's a buck" 0 #f) - -(test '((7 . 8) (9 . 10) (11 . 12)) - regexp-match-positions* "[abc]" (open-input-string "here's a buck")) -(test '((9 . 10) (11 . 12)) - regexp-match-positions* "[abc]" (open-input-string "here's a buck") 8) -(let ([s (open-input-string "here's a buck")]) - (test '((7 . 8)) regexp-match-positions* "[abc]" s 0 8) - (test " buck" read-string 50 s)) -(let ([s (open-input-string "here's a buck")]) - (test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-positions* "[abc]" s 0 #f) - (test eof read-char s)) - -;; ---------- regexp-match-peek-positions* ---------- -(let ([s (open-input-string "here's a buck")]) - (test '((7 . 8) (9 . 10) (11 . 12)) - regexp-match-peek-positions* - "[abc]" (open-input-string "here's a buck")) - (test '((9 . 10) (11 . 12)) - regexp-match-peek-positions* - "[abc]" (open-input-string "here's a buck") 8) - (test '((7 . 8)) - regexp-match-peek-positions* "[abc]" s 0 8) - (test '((7 . 8) (9 . 10) (11 . 12)) - regexp-match-peek-positions* "[abc]" s 0 #f) - (test "here's a buck" - read-string 50 s)) - -;; ---------- regexp-split ---------- -(test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck") -(test '(" " "u" "k") regexp-split "[abc]" "here's a buck" 8) -(test '("" "u" "k") regexp-split "[abc]" "here's a buck" 9) -(test '("" "u" "") regexp-split "[abc]" "here's a buck" 9 12) -(test '("here's " "") regexp-split "[abc]" "here's a buck" 0 8) -(test '("here's " " ") regexp-split "[abc]" "here's a buck" 0 9) -(test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck" 0 #f) - -(test '(#"here's " #" " #"u" #"k") - regexp-split "[abc]" (open-input-string "here's a buck")) -(test '(#" " #"u" #"k") - regexp-split "[abc]" (open-input-string "here's a buck") 8) -(test '(#"" #"u" #"k") - regexp-split "[abc]" (open-input-string "here's a buck") 9) -(let ([s (open-input-string "here's a buck")]) - (test '(#"" #"u" #"") regexp-split "[abc]" s 9 12) - (test "k" read-string 50 s)) -(let ([s (open-input-string "here's a buck")]) - (test '(#"here's " #"") regexp-split "[abc]" s 0 8) - (test " buck" read-string 50 s)) -(let ([s (open-input-string "here's a buck")]) - (test '(#"here's " #" ") regexp-split "[abc]" s 0 9) - (test "buck" read-string 50 s)) -(let ([s (open-input-string "here's a buck")]) - (test '(#"here's " #" " #"u" #"k") regexp-split "[abc]" s 0 #f) - (test eof read-char s)) - -;; test with zero-length matches -(test '("" "f" "o" "o" "") regexp-split #rx"" "foo") -(test '("" "f" "o" "o" " " "b" "a" "r" "") regexp-split #rx"" "foo bar") -(test '("" "f" "o" "o" "" "b" "a" "r" "") regexp-split #rx" *" "foo bar") -(test '("f" "" "ar") regexp-split #rx"oo| b" "foo bar") -(test '("foo bar" "") regexp-split #rx"$" "foo bar") -;; this doesn't work (like in Emacs) because ^ matches the start pos -;; (test '("" "foo bar") regexp-split #rx"^" "foo bar") +;; ---------- regexp-* functions ---------- +(let () + (define (->b x) + (cond [(list? x) (map ->b x)] + [(string? x) (string->bytes/utf-8 x)] + [else x])) + (define fun* #f) + (define t + (case-lambda + [(fun) (set! fun* fun)] + [(res left rx str . args) + (unless (eq? fun* regexp-match-peek-positions*) + ;; test with a string + (apply test res fun* rx str args) + ;; test with a byte-regexp and/or a byte string + (apply test (->b res) fun* (->b rx) str args) + (apply test (->b res) fun* rx (->b str) args) + (apply test (->b res) fun* (->b rx) (->b str) args)) + ;; test with a port, and test leftovers + (let ([p (open-input-string str)]) + (apply test (->b res) fun* rx p args) + (when left (test left read-string 50 p)))])) + ;; -------------------- + (t regexp-match*) + (t '("a" "b" "c") eof "[abc]" " a b c ") + (t '("b" "c") eof "[abc]" " a b c " 2) + (t '("b" "c") eof "[abc]" " a b c " 3) + (t '("a") " b c " "[abc]" " a b c " 0 2) + (t '("a") "b c " "[abc]" " a b c " 0 3) + (t '("a" "b" "c") eof "[abc]" " a b c " 0 #f) + (t '("a" "b" "c") eof "[abc]" " a b c " 0 7) + (t '("a" "b" "c") eof "[abc]" "a b c") + (t '("b" "c") eof "[abc]" "a b c" 1) + (t '("b" "c") eof "[abc]" "a b c" 2) + (t '("a") " b c" "[abc]" "a b c" 0 1) + (t '("a") "b c" "[abc]" "a b c" 0 2) + (t '("a" "b" "c") eof "[abc]" "a b c" 0) + (t '("a" "b" "c") eof "[abc]" "a b c" 0 #f) + (t '("a" "b" "c") eof "[abc]" "a b c" 0 5) + ;; -------------------- + (t regexp-match-positions*) + (t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ") + (t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 2) + (t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 3) + (t '((1 . 2)) " b c " "[abc]" " a b c " 0 2) + (t '((1 . 2)) "b c " "[abc]" " a b c " 0 3) + (t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c " 0 #f) + (t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c " 0 7) + (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c") + (t '((2 . 3) (4 . 5)) eof "[abc]" "a b c" 1) + (t '((2 . 3) (4 . 5)) eof "[abc]" "a b c" 2) + (t '((0 . 1)) " b c" "[abc]" "a b c" 0 1) + (t '((0 . 1)) "b c" "[abc]" "a b c" 0 2) + (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0) + (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 #f) + (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 5) + ;; -------------------- + (t regexp-split) + (t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4") + (t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2) + (t '("" "3" "4") eof "[abc]" "1a2b3c4" 3) + (t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2) + (t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3) + (t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f) + (t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7) + (t '("" "1" "2" "") eof "[abc]" "a1b2c") + (t '("1" "2" "") eof "[abc]" "a1b2c" 1) + (t '("" "2" "") eof "[abc]" "a1b2c" 2) + (t '("" "") "1b2c" "[abc]" "a1b2c" 0 1) + (t '("" "1") "b2c" "[abc]" "a1b2c" 0 2) + (t '("" "1" "2" "") eof "[abc]" "a1b2c" 0) + (t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f) + (t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5) + ;; -------------------- + (t regexp-match-peek-positions*) + (err/rt-test (regexp-match-peek-positions* "[abc]" "a b c")) + (t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ") + (t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 2) + (t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 3) + (t '((1 . 2)) " a b c " "[abc]" " a b c " 0 2) + (t '((1 . 2)) " a b c " "[abc]" " a b c " 0 3) + (t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 0 #f) + (t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 0 7) + (t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c") + (t '((2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 1) + (t '((2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 2) + (t '((0 . 1)) "a b c" "[abc]" "a b c" 0 1) + (t '((0 . 1)) "a b c" "[abc]" "a b c" 0 2) + (t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 #f) + (t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 5) + ;; ---------- tests with zero-length matches ---------- + ;; Many of these tests can be repeated with Perl. To try something + ;; in Perl, put this code in a file: + ;; #!/usr/bin/perl + ;; sub test { + ;; my ($rx,$str) = @_; @words = split /$rx/, $str; + ;; print "(t '("; + ;; while (@words) {print '"'.shift(@words).'"'.(@words?" ":"");} + ;; print ") eof \"$rx\" \"$str\")\n"; + ;; }; + ;; test("[abc]","1a2b3"); + ;; and it will print a test that does what perl is doing. Tests + ;; that differ from Perl have explanations. + ;; + (t regexp-split) + ;; test("a","a"); + ;; (t '() eof "a" "a") + ;; perl returns an empty list, we return '("" ""), and this is a + ;; difference that is unrelated to dealing with empty matches, + ;; just the way that perl's split throws out some empty matches. + (t '("" "") eof "a" "a") + ;; test("3","123"); + ;; (t '("12") eof "3" "123") + ;; a more straightforward demonstration of this + (t '("12" "") eof "3" "123") + ;; test("a","1a2a3"); + (t '("1" "2" "3") eof "a" "1a2a3") + ;; test("","123"); + (t '("1" "2" "3") eof "" "123") + ;; test("","12 34"); + (t '("1" "2" " " "3" "4") eof "" "12 34") + ;; test(" *","123"); + (t '("1" "2" "3") eof " *" "123") + ;; test(" *","12 34"); + (t '("1" "2" "3" "4") eof " *" "12 34") + ;; test(" *"," 12 34 "); + ;; (t '("" "1" "2" "3" "4") eof " *" " 12 34 ") + ;; perl drops the last empty string, we don't -- unrelated to + ;; empty matches (same as the <"a","a"> case above) + (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 ") + ;; test("2|", "1234"); + (t '("1" "3" "4") eof "2|" "1234") + ;; test("1|", "1234"); + (t '("" "2" "3" "4") eof "1|" "1234") + ;; test("4|", "1234"); + ;; perl drops the last empty string, we don't, same as above + (t '("1" "2" "3" "") eof "4|" "1234") + ;; test("|2", "1234"); + ;; (t '("1" "" "3" "4") eof "|2" "1234") + ;; perl will find the "2", we can't do that since we'll always + ;; find the empty match first, so it's just like using "" (to do + ;; the perl thing, we'll need a hook into the matcher's C code, or + ;; some way of saying `match this pattern but prefer a non-empty + ;; match if possible') + (t '("1" "2" "3" "4") eof "|2" "1234") + ;; test("2|3", "1234"); + (t '("1" "" "4") eof "2|3" "1234") + ;; test("1|2", "1234"); + (t '("" "" "34") eof "1|2" "1234") + ;; test("3|4", "1234"); + ;; (t '("12") eof "3|4" "1234") + ;; again, perl dumps empty matches at the end, even two + (t '("12" "" "") eof "3|4" "1234") + ;; test('$',"123"); + (t '("123") eof "$" "123") + ;; test('^',"123"); + ;; (t '("123") eof "^" "123") + ;; this is a technicality: perl matches "^" once, but mzscheme + ;; matches on whatever `start' may be; perl is treating it as a + ;; beginning-of-line instead of a ...-of-string behind your back + ;; "since it isn't much use otherwise" + ;; (http://perldoc.perl.org/functions/split.html); so our correct + ;; test is: + (t '("1" "2" "3") eof "^" "123") + ;; and we can get the same with "(m?:^)": + (t '("123") eof "(m?:^)" "123") + ;; test("^|a", "abc"); + ;; (t '("" "bc") eof "^|a" "abc") + ;; same deal here, use "(m?:^)": + (t '("" "bc") eof "(m?:^|a)" "abc") + ;; some tests with bounds (these have no perl equivalences) + (t '("1" "2" " " "3" "4") eof "" "12 34" 0) + (t '("1" "2" " " "3" "4") eof "" "12 34" 0 #f) + (t '("1" "2" " " "3" "4") eof "" "12 34" 0 5) + (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0) + (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 #f) + (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 7) + (t '("2" "3") eof " *" "123" 1) + (t '("2" "3" "4") eof " *" "12 34" 1) + (t '("2" "3" "4" "") eof " *" " 12 34 " 2) + (t '("1") "23" "" "123" 0 1) + (t '("2") "3" "" "123" 1 2) + (t '("3") eof "" "123" 2 3) + (t '("1" "2") "3" " *" "123" 0 2) + (t '("2" "3") eof " *" "123" 1 3) + ;; more tests with match* + (t regexp-match*) + (t '("" "") eof "" "123") + (t '("" " " "") eof " *" "12 34") + (t '(" " "" " " "" " ") eof " *" " 12 34 ") + (t '("" " " "") " " " *" " 12 34 " 1 6) + (t regexp-match-positions*) + (t '((1 . 1) (2 . 2)) eof "" "123") + (t '((1 . 1) (2 . 3) (4 . 4)) eof " *" "12 34") + (t '((0 . 1) (2 . 2) (3 . 4) (5 . 5) (6 . 7)) eof " *" " 12 34 ") + (t '((2 . 2) (3 . 4) (5 . 5)) " " " *" " 12 34 " 1 6) + ) ;; ---------- string-append* ---------- (let ()