much more extensive tests for regexp-X* functions
svn: r9605
This commit is contained in:
parent
0e299c5bd2
commit
726c66d8ce
|
@ -25,113 +25,212 @@
|
|||
|
||||
;; ---------- regexp-quote ----------
|
||||
(let ([s (list->string
|
||||
(let loop ([i 0])
|
||||
(if (= i 256)
|
||||
null
|
||||
(cons (integer->char i) (loop (add1 i))))))])
|
||||
(let loop ([i 0])
|
||||
(if (= i 256)
|
||||
null
|
||||
(cons (integer->char i) (loop (add1 i))))))])
|
||||
(test (list s) regexp-match (regexp-quote s) s)
|
||||
(test (string-append "!" s "!")
|
||||
regexp-replace
|
||||
regexp-replace
|
||||
(regexp-quote s) s (regexp-replace-quote (string-append "!" s "!"))))
|
||||
|
||||
;; ---------- regexp-match* ----------
|
||||
(test '("a" "b" "c") regexp-match* "[abc]" "here's a buck")
|
||||
(test '("b" "c") regexp-match* "[abc]" "here's a buck" 8)
|
||||
(test '("a") regexp-match* "[abc]" "here's a buck" 0 8)
|
||||
(test '("a" "b" "c") regexp-match* "[abc]" "here's a buck" 0 #f)
|
||||
|
||||
(test '(#"a" #"b" #"c") regexp-match* "[abc]" #"here's a buck")
|
||||
(test '(#"b" #"c") regexp-match* "[abc]" #"here's a buck" 8)
|
||||
(test '(#"a") regexp-match* "[abc]" #"here's a buck" 0 8)
|
||||
(test '(#"a" #"b" #"c") regexp-match* "[abc]" #"here's a buck" 0 #f)
|
||||
|
||||
(test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck")
|
||||
(test '(#"b" #"c") regexp-match* #"[abc]" "here's a buck" 8)
|
||||
(test '(#"a") regexp-match* #"[abc]" "here's a buck" 0 8)
|
||||
(test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck" 0 #f)
|
||||
|
||||
(test '(#"a" #"b" #"c")
|
||||
regexp-match* "[abc]" (open-input-string "here's a buck"))
|
||||
(test '(#"b" #"c") regexp-match* "[abc]" (open-input-string "here's a buck") 8)
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '(#"a") regexp-match* "[abc]" s 0 8)
|
||||
(test " buck" read-string 50 s))
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '(#"a" #"b" #"c") regexp-match* "[abc]" s 0 #f)
|
||||
(test eof read-char s))
|
||||
|
||||
;; ---------- regexp-match-positions* ----------
|
||||
(test '((7 . 8) (9 . 10) (11 . 12))
|
||||
regexp-match-positions* "[abc]" "here's a buck")
|
||||
(test '((9 . 10) (11 . 12))
|
||||
regexp-match-positions* "[abc]" "here's a buck" 8)
|
||||
(test '((7 . 8))
|
||||
regexp-match-positions* "[abc]" "here's a buck" 0 8)
|
||||
(test '((7 . 8) (9 . 10) (11 . 12))
|
||||
regexp-match-positions* "[abc]" "here's a buck" 0 #f)
|
||||
|
||||
(test '((7 . 8) (9 . 10) (11 . 12))
|
||||
regexp-match-positions* "[abc]" (open-input-string "here's a buck"))
|
||||
(test '((9 . 10) (11 . 12))
|
||||
regexp-match-positions* "[abc]" (open-input-string "here's a buck") 8)
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '((7 . 8)) regexp-match-positions* "[abc]" s 0 8)
|
||||
(test " buck" read-string 50 s))
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-positions* "[abc]" s 0 #f)
|
||||
(test eof read-char s))
|
||||
|
||||
;; ---------- regexp-match-peek-positions* ----------
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '((7 . 8) (9 . 10) (11 . 12))
|
||||
regexp-match-peek-positions*
|
||||
"[abc]" (open-input-string "here's a buck"))
|
||||
(test '((9 . 10) (11 . 12))
|
||||
regexp-match-peek-positions*
|
||||
"[abc]" (open-input-string "here's a buck") 8)
|
||||
(test '((7 . 8))
|
||||
regexp-match-peek-positions* "[abc]" s 0 8)
|
||||
(test '((7 . 8) (9 . 10) (11 . 12))
|
||||
regexp-match-peek-positions* "[abc]" s 0 #f)
|
||||
(test "here's a buck"
|
||||
read-string 50 s))
|
||||
|
||||
;; ---------- regexp-split ----------
|
||||
(test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck")
|
||||
(test '(" " "u" "k") regexp-split "[abc]" "here's a buck" 8)
|
||||
(test '("" "u" "k") regexp-split "[abc]" "here's a buck" 9)
|
||||
(test '("" "u" "") regexp-split "[abc]" "here's a buck" 9 12)
|
||||
(test '("here's " "") regexp-split "[abc]" "here's a buck" 0 8)
|
||||
(test '("here's " " ") regexp-split "[abc]" "here's a buck" 0 9)
|
||||
(test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck" 0 #f)
|
||||
|
||||
(test '(#"here's " #" " #"u" #"k")
|
||||
regexp-split "[abc]" (open-input-string "here's a buck"))
|
||||
(test '(#" " #"u" #"k")
|
||||
regexp-split "[abc]" (open-input-string "here's a buck") 8)
|
||||
(test '(#"" #"u" #"k")
|
||||
regexp-split "[abc]" (open-input-string "here's a buck") 9)
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '(#"" #"u" #"") regexp-split "[abc]" s 9 12)
|
||||
(test "k" read-string 50 s))
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '(#"here's " #"") regexp-split "[abc]" s 0 8)
|
||||
(test " buck" read-string 50 s))
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '(#"here's " #" ") regexp-split "[abc]" s 0 9)
|
||||
(test "buck" read-string 50 s))
|
||||
(let ([s (open-input-string "here's a buck")])
|
||||
(test '(#"here's " #" " #"u" #"k") regexp-split "[abc]" s 0 #f)
|
||||
(test eof read-char s))
|
||||
|
||||
;; test with zero-length matches
|
||||
(test '("" "f" "o" "o" "") regexp-split #rx"" "foo")
|
||||
(test '("" "f" "o" "o" " " "b" "a" "r" "") regexp-split #rx"" "foo bar")
|
||||
(test '("" "f" "o" "o" "" "b" "a" "r" "") regexp-split #rx" *" "foo bar")
|
||||
(test '("f" "" "ar") regexp-split #rx"oo| b" "foo bar")
|
||||
(test '("foo bar" "") regexp-split #rx"$" "foo bar")
|
||||
;; this doesn't work (like in Emacs) because ^ matches the start pos
|
||||
;; (test '("" "foo bar") regexp-split #rx"^" "foo bar")
|
||||
;; ---------- regexp-<?>* functions ----------
|
||||
(let ()
|
||||
(define (->b x)
|
||||
(cond [(list? x) (map ->b x)]
|
||||
[(string? x) (string->bytes/utf-8 x)]
|
||||
[else x]))
|
||||
(define fun* #f)
|
||||
(define t
|
||||
(case-lambda
|
||||
[(fun) (set! fun* fun)]
|
||||
[(res left rx str . args)
|
||||
(unless (eq? fun* regexp-match-peek-positions*)
|
||||
;; test with a string
|
||||
(apply test res fun* rx str args)
|
||||
;; test with a byte-regexp and/or a byte string
|
||||
(apply test (->b res) fun* (->b rx) str args)
|
||||
(apply test (->b res) fun* rx (->b str) args)
|
||||
(apply test (->b res) fun* (->b rx) (->b str) args))
|
||||
;; test with a port, and test leftovers
|
||||
(let ([p (open-input-string str)])
|
||||
(apply test (->b res) fun* rx p args)
|
||||
(when left (test left read-string 50 p)))]))
|
||||
;; --------------------
|
||||
(t regexp-match*)
|
||||
(t '("a" "b" "c") eof "[abc]" " a b c ")
|
||||
(t '("b" "c") eof "[abc]" " a b c " 2)
|
||||
(t '("b" "c") eof "[abc]" " a b c " 3)
|
||||
(t '("a") " b c " "[abc]" " a b c " 0 2)
|
||||
(t '("a") "b c " "[abc]" " a b c " 0 3)
|
||||
(t '("a" "b" "c") eof "[abc]" " a b c " 0 #f)
|
||||
(t '("a" "b" "c") eof "[abc]" " a b c " 0 7)
|
||||
(t '("a" "b" "c") eof "[abc]" "a b c")
|
||||
(t '("b" "c") eof "[abc]" "a b c" 1)
|
||||
(t '("b" "c") eof "[abc]" "a b c" 2)
|
||||
(t '("a") " b c" "[abc]" "a b c" 0 1)
|
||||
(t '("a") "b c" "[abc]" "a b c" 0 2)
|
||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0)
|
||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0 #f)
|
||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0 5)
|
||||
;; --------------------
|
||||
(t regexp-match-positions*)
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ")
|
||||
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 2)
|
||||
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 3)
|
||||
(t '((1 . 2)) " b c " "[abc]" " a b c " 0 2)
|
||||
(t '((1 . 2)) "b c " "[abc]" " a b c " 0 3)
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c " 0 #f)
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c " 0 7)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c")
|
||||
(t '((2 . 3) (4 . 5)) eof "[abc]" "a b c" 1)
|
||||
(t '((2 . 3) (4 . 5)) eof "[abc]" "a b c" 2)
|
||||
(t '((0 . 1)) " b c" "[abc]" "a b c" 0 1)
|
||||
(t '((0 . 1)) "b c" "[abc]" "a b c" 0 2)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 #f)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 5)
|
||||
;; --------------------
|
||||
(t regexp-split)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
|
||||
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
|
||||
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
|
||||
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
|
||||
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
|
||||
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
|
||||
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
|
||||
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
|
||||
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
||||
;; --------------------
|
||||
(t regexp-match-peek-positions*)
|
||||
(err/rt-test (regexp-match-peek-positions* "[abc]" "a b c"))
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ")
|
||||
(t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 2)
|
||||
(t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 3)
|
||||
(t '((1 . 2)) " a b c " "[abc]" " a b c " 0 2)
|
||||
(t '((1 . 2)) " a b c " "[abc]" " a b c " 0 3)
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 0 #f)
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 0 7)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c")
|
||||
(t '((2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 1)
|
||||
(t '((2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 2)
|
||||
(t '((0 . 1)) "a b c" "[abc]" "a b c" 0 1)
|
||||
(t '((0 . 1)) "a b c" "[abc]" "a b c" 0 2)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 #f)
|
||||
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 5)
|
||||
;; ---------- tests with zero-length matches ----------
|
||||
;; Many of these tests can be repeated with Perl. To try something
|
||||
;; in Perl, put this code in a file:
|
||||
;; #!/usr/bin/perl
|
||||
;; sub test {
|
||||
;; my ($rx,$str) = @_; @words = split /$rx/, $str;
|
||||
;; print "(t '(";
|
||||
;; while (@words) {print '"'.shift(@words).'"'.(@words?" ":"");}
|
||||
;; print ") eof \"$rx\" \"$str\")\n";
|
||||
;; };
|
||||
;; test("[abc]","1a2b3");
|
||||
;; and it will print a test that does what perl is doing. Tests
|
||||
;; that differ from Perl have explanations.
|
||||
;;
|
||||
(t regexp-split)
|
||||
;; test("a","a");
|
||||
;; (t '() eof "a" "a")
|
||||
;; perl returns an empty list, we return '("" ""), and this is a
|
||||
;; difference that is unrelated to dealing with empty matches,
|
||||
;; just the way that perl's split throws out some empty matches.
|
||||
(t '("" "") eof "a" "a")
|
||||
;; test("3","123");
|
||||
;; (t '("12") eof "3" "123")
|
||||
;; a more straightforward demonstration of this
|
||||
(t '("12" "") eof "3" "123")
|
||||
;; test("a","1a2a3");
|
||||
(t '("1" "2" "3") eof "a" "1a2a3")
|
||||
;; test("","123");
|
||||
(t '("1" "2" "3") eof "" "123")
|
||||
;; test("","12 34");
|
||||
(t '("1" "2" " " "3" "4") eof "" "12 34")
|
||||
;; test(" *","123");
|
||||
(t '("1" "2" "3") eof " *" "123")
|
||||
;; test(" *","12 34");
|
||||
(t '("1" "2" "3" "4") eof " *" "12 34")
|
||||
;; test(" *"," 12 34 ");
|
||||
;; (t '("" "1" "2" "3" "4") eof " *" " 12 34 ")
|
||||
;; perl drops the last empty string, we don't -- unrelated to
|
||||
;; empty matches (same as the <"a","a"> case above)
|
||||
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 ")
|
||||
;; test("2|", "1234");
|
||||
(t '("1" "3" "4") eof "2|" "1234")
|
||||
;; test("1|", "1234");
|
||||
(t '("" "2" "3" "4") eof "1|" "1234")
|
||||
;; test("4|", "1234");
|
||||
;; perl drops the last empty string, we don't, same as above
|
||||
(t '("1" "2" "3" "") eof "4|" "1234")
|
||||
;; test("|2", "1234");
|
||||
;; (t '("1" "" "3" "4") eof "|2" "1234")
|
||||
;; perl will find the "2", we can't do that since we'll always
|
||||
;; find the empty match first, so it's just like using "" (to do
|
||||
;; the perl thing, we'll need a hook into the matcher's C code, or
|
||||
;; some way of saying `match this pattern but prefer a non-empty
|
||||
;; match if possible')
|
||||
(t '("1" "2" "3" "4") eof "|2" "1234")
|
||||
;; test("2|3", "1234");
|
||||
(t '("1" "" "4") eof "2|3" "1234")
|
||||
;; test("1|2", "1234");
|
||||
(t '("" "" "34") eof "1|2" "1234")
|
||||
;; test("3|4", "1234");
|
||||
;; (t '("12") eof "3|4" "1234")
|
||||
;; again, perl dumps empty matches at the end, even two
|
||||
(t '("12" "" "") eof "3|4" "1234")
|
||||
;; test('$',"123");
|
||||
(t '("123") eof "$" "123")
|
||||
;; test('^',"123");
|
||||
;; (t '("123") eof "^" "123")
|
||||
;; this is a technicality: perl matches "^" once, but mzscheme
|
||||
;; matches on whatever `start' may be; perl is treating it as a
|
||||
;; beginning-of-line instead of a ...-of-string behind your back
|
||||
;; "since it isn't much use otherwise"
|
||||
;; (http://perldoc.perl.org/functions/split.html); so our correct
|
||||
;; test is:
|
||||
(t '("1" "2" "3") eof "^" "123")
|
||||
;; and we can get the same with "(m?:^)":
|
||||
(t '("123") eof "(m?:^)" "123")
|
||||
;; test("^|a", "abc");
|
||||
;; (t '("" "bc") eof "^|a" "abc")
|
||||
;; same deal here, use "(m?:^)":
|
||||
(t '("" "bc") eof "(m?:^|a)" "abc")
|
||||
;; some tests with bounds (these have no perl equivalences)
|
||||
(t '("1" "2" " " "3" "4") eof "" "12 34" 0)
|
||||
(t '("1" "2" " " "3" "4") eof "" "12 34" 0 #f)
|
||||
(t '("1" "2" " " "3" "4") eof "" "12 34" 0 5)
|
||||
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0)
|
||||
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 #f)
|
||||
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 7)
|
||||
(t '("2" "3") eof " *" "123" 1)
|
||||
(t '("2" "3" "4") eof " *" "12 34" 1)
|
||||
(t '("2" "3" "4" "") eof " *" " 12 34 " 2)
|
||||
(t '("1") "23" "" "123" 0 1)
|
||||
(t '("2") "3" "" "123" 1 2)
|
||||
(t '("3") eof "" "123" 2 3)
|
||||
(t '("1" "2") "3" " *" "123" 0 2)
|
||||
(t '("2" "3") eof " *" "123" 1 3)
|
||||
;; more tests with match*
|
||||
(t regexp-match*)
|
||||
(t '("" "") eof "" "123")
|
||||
(t '("" " " "") eof " *" "12 34")
|
||||
(t '(" " "" " " "" " ") eof " *" " 12 34 ")
|
||||
(t '("" " " "") " " " *" " 12 34 " 1 6)
|
||||
(t regexp-match-positions*)
|
||||
(t '((1 . 1) (2 . 2)) eof "" "123")
|
||||
(t '((1 . 1) (2 . 3) (4 . 4)) eof " *" "12 34")
|
||||
(t '((0 . 1) (2 . 2) (3 . 4) (5 . 5) (6 . 7)) eof " *" " 12 34 ")
|
||||
(t '((2 . 2) (3 . 4) (5 . 5)) " " " *" " 12 34 " 1 6)
|
||||
)
|
||||
|
||||
;; ---------- string-append* ----------
|
||||
(let ()
|
||||
|
|
Loading…
Reference in New Issue
Block a user