better tests for port and char-string matching
svn: r4294
This commit is contained in:
parent
c369f93a59
commit
42f35ba39f
|
@ -101,6 +101,110 @@
|
|||
#"?"
|
||||
#"+"))
|
||||
|
||||
(map (lambda (p)
|
||||
(let ([name (car p)]
|
||||
[predicate (cdr p)]
|
||||
[mk (lambda (name extra not? star?)
|
||||
(byte-pregexp
|
||||
(string->bytes/latin-1
|
||||
(format "[~a~a[:~a:]]~a"
|
||||
(if not? "^" "")
|
||||
(if extra extra "")
|
||||
name
|
||||
(if star? "*" "")))))])
|
||||
(let ([try
|
||||
(lambda (extra)
|
||||
(let ([b (mk name extra #f #f)]
|
||||
[not-b (mk name extra #t #f)]
|
||||
[b* (mk name extra #f #t)]
|
||||
[not-b* (mk name extra #t #t)])
|
||||
(let loop ([c 0])
|
||||
(unless (= c 128)
|
||||
(let ([in? (or (and extra
|
||||
(= c (char->integer extra)))
|
||||
(predicate (integer->char c)))])
|
||||
(test (if in? (list (bytes c)) #f)
|
||||
regexp-match
|
||||
b
|
||||
(bytes c))
|
||||
(test (if in? (list (bytes c c)) (list (bytes)))
|
||||
regexp-match
|
||||
b*
|
||||
(bytes c c))
|
||||
(test (if in? #f (list (bytes c)))
|
||||
regexp-match
|
||||
not-b
|
||||
(bytes c))
|
||||
(test (if in? (list (bytes)) (list (bytes c c)))
|
||||
regexp-match
|
||||
not-b*
|
||||
(bytes c c))
|
||||
(loop (add1 c)))))
|
||||
(test #f regexp-match b (bytes 128))
|
||||
(test (list (bytes)) regexp-match b* (bytes 128 128))
|
||||
(test (list (bytes 128)) regexp-match not-b (bytes 128))
|
||||
(test (list (bytes 128 128)) regexp-match not-b* (bytes 128 128))))])
|
||||
(try #f)
|
||||
(try #\377)
|
||||
(unless (predicate #\x)
|
||||
(try #\x))
|
||||
(unless (predicate #\space)
|
||||
(try #\space))
|
||||
(unless (predicate #\000)
|
||||
(try #\000))
|
||||
(unless (predicate #\002)
|
||||
(try #\002)))))
|
||||
(list
|
||||
(cons "alpha" char-alphabetic?)
|
||||
(cons "alnum" (lambda (x)
|
||||
(or (char-alphabetic? x)
|
||||
(char-numeric? x))))
|
||||
))
|
||||
|
||||
|
||||
(test '("app\u039Be") regexp-match #px"(?i:app\u039Be)" "app\u039Be")
|
||||
(test '("app\u039Be") regexp-match #px"(?i:app\u03BBe)" "app\u039Be")
|
||||
(test #f regexp-match #px"app\u03BBe" "app\u039Be")
|
||||
(test #f regexp-match #px"(?i:(?-i:app\u03BBe))" "app\u039Be")
|
||||
(test '("app\u039Be") regexp-match #px"(?i:(?-i:app)\u03BBe)" "app\u039Be")
|
||||
(test '("app\u03BBe") regexp-match #px"(?i:app\u039Be)" "app\u03BBe")
|
||||
(test '("app\u03BBe") regexp-match #px"(?i:app[\u039B-\u039C]e)" "app\u03BBe")
|
||||
(test '("app\u039Be") regexp-match #px"(?i:app[\u03BB-\u03BC]e)" "app\u039Be")
|
||||
|
||||
(let ([sigmas '(#\u03A3 #\u03C2 #\u03C3)]
|
||||
[lambdas '(#\u039B #\u039B #\u03BB)])
|
||||
(for-each (lambda (s1)
|
||||
(for-each
|
||||
(lambda (s2 l2)
|
||||
(let* ([r (list (format "_~a_" s1))]
|
||||
[gen-sigma? (or (equal? s2 #\u03C2)
|
||||
(not (equal? s1 #\u03C2)))]
|
||||
[gr (and gen-sigma? r)])
|
||||
(if (equal? s1 s2)
|
||||
(test r regexp-match (format "_~a_" s2) (format "_~a_" s1))
|
||||
(test #f regexp-match (format "_~a_" s2) (format "_~a_" s1)))
|
||||
(test gr regexp-match (format "(?i:_~a_)" s2) (format "_~a_" s1))
|
||||
(test gr regexp-match (format "(?i:_[~a]_)" s2) (format "_~a_" s1))
|
||||
(test (and (not (equal? s1 s2)) r) regexp-match (format "_[^~a]_" s2) (format "_~a_" s1))
|
||||
(test (and (equal? s1 #\u03C2) (not (equal? s2 #\u03C2)) r)
|
||||
regexp-match (format "(?i:_[^~a]_)" s2) (format "_~a_" s1))
|
||||
(test gr regexp-match (format "(?i:_[x~a]_)" s2) (format "_~a_" s1))
|
||||
(test gr regexp-match (format "(?i:_[~a\x1888]_)" s2) (format "_~a_" s1))
|
||||
(test gr regexp-match (format "(?i:_[~a-~a]_)" s2 s2) (format "_~a_" s1))
|
||||
(test r regexp-match (format "_[~a-~a]_" (car sigmas) (caddr sigmas)) (format "_~a_" s1))
|
||||
(test (and (char<=? (car sigmas) s1 s2) r)
|
||||
regexp-match (format "_[~a-~a]_" (car sigmas) s2) (format "_~a_" s1))
|
||||
(test (and (char<=? s2 s1 (caddr sigmas)) r)
|
||||
regexp-match (format "_[~a-~a]_" s2 (caddr sigmas)) (format "_~a_" s1))
|
||||
(test (and (or gen-sigma? (equal? s2 #\u03C3)) r)
|
||||
regexp-match (format "(?i:_[~a-~a]_)" (car sigmas) s2) (format "_~a_" s1))
|
||||
(test (and (or gen-sigma? (equal? s2 #\u03A3)) r)
|
||||
regexp-match (format "(?i:_[~a-~a]_)" s2 (caddr sigmas)) (format "_~a_" s1))
|
||||
(test (and (or gen-sigma? (equal? s2 #\u03C3)) r)
|
||||
regexp-match (format "(?i:_[~a-~a]_)" l2 s2) (format "_~a_" s1))))
|
||||
sigmas lambdas))
|
||||
sigmas))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Most of the following tests are derived from "testinput" in
|
||||
;; CL-PPCRE, which probably is from Perl originally.
|
||||
|
@ -108,8 +212,28 @@
|
|||
|
||||
(map (lambda (t)
|
||||
(if (pair? t)
|
||||
(begin
|
||||
(test (caddr t) regexp-match (byte-pregexp (car t)) (cadr t))
|
||||
(err/rt-test (byte-pregexp t))))
|
||||
(test (caddr t) regexp-match (byte-pregexp (car t)) (bytes-append #"xxxxxxxxxx" (cadr t)) 10)
|
||||
(test (caddr t) regexp-match (byte-pregexp (car t)) (bytes-append (cadr t) #"xxxxxxxxxx") 0 (bytes-length (cadr t)))
|
||||
(test (caddr t) regexp-match (byte-pregexp (car t)) (open-input-bytes (cadr t)))
|
||||
(test (and (caddr t)
|
||||
(map (lambda (v)
|
||||
(and v (bytes->string/latin-1 v)))
|
||||
(caddr t)))
|
||||
regexp-match
|
||||
(pregexp (bytes->string/latin-1 (car t)))
|
||||
(bytes->string/latin-1 (cadr t)))
|
||||
(test (and (caddr t)
|
||||
(map (lambda (v)
|
||||
(and v (string->bytes/utf-8 (bytes->string/latin-1 v))))
|
||||
(caddr t)))
|
||||
regexp-match
|
||||
(pregexp (bytes->string/latin-1 (car t)))
|
||||
(open-input-string (bytes->string/latin-1 (cadr t)))))
|
||||
(begin
|
||||
(err/rt-test (byte-pregexp t))
|
||||
(err/rt-test (pregexp (bytes->string/latin-1 t))))))
|
||||
'(#"}"
|
||||
#"]"
|
||||
#"[a[:alph:]b]"
|
||||
|
@ -184,16 +308,7 @@
|
|||
(#"^(b+|a){1,2}c" #"aaac" #f)
|
||||
(#"^(b+|a){1,2}c" #"abbbbbbbbbbbac" #f)
|
||||
(#"^(b+|a){1,2}?bc" #"bbc" (#"bbc" #"b"))
|
||||
(#"^(b*|ba){1,2}?bc" #"babc" (#"babc" #"ba"))
|
||||
(#"^(b*|ba){1,2}?bc" #"bbabc" (#"bbabc" #"ba"))
|
||||
(#"^(b*|ba){1,2}?bc" #"bababc" (#"bababc" #"ba"))
|
||||
(#"^(b*|ba){1,2}?bc" #"bababbc" #f)
|
||||
(#"^(b*|ba){1,2}?bc" #"babababc" #f)
|
||||
(#"^(ba|b*){1,2}?bc" #"babc" (#"babc" #"ba"))
|
||||
(#"^(ba|b*){1,2}?bc" #"bbabc" (#"bbabc" #"ba"))
|
||||
(#"^(ba|b*){1,2}?bc" #"bababc" (#"bababc" #"ba"))
|
||||
(#"^(ba|b*){1,2}?bc" #"bababbc" #f)
|
||||
(#"^(ba|b*){1,2}?bc" #"babababc" #f)
|
||||
#"^(b*|ba){1,2}?bc"
|
||||
(#"^[ab\\]cde]" #"athing" (#"a"))
|
||||
(#"^[ab\\]cde]" #"bthing" (#"b"))
|
||||
(#"^[ab\\]cde]" #"]thing" (#"]"))
|
||||
|
@ -416,17 +531,8 @@
|
|||
(#"^(a|)\\1?b" #"aab" (#"aab" #"a"))
|
||||
(#"^(a|)\\1?b" #"b" (#"b" #""))
|
||||
(#"^(a|)\\1?b" #"acb" #f)
|
||||
(#"^(a|)\\1{2}b" #"aaab" (#"aaab" #"a"))
|
||||
(#"^(a|)\\1{2}b" #"b" (#"b" #""))
|
||||
(#"^(a|)\\1{2}b" #"ab" #f)
|
||||
(#"^(a|)\\1{2}b" #"aab" #f)
|
||||
(#"^(a|)\\1{2}b" #"aaaab" #f)
|
||||
(#"^(a|)\\1{2,3}b" #"aaab" (#"aaab" #"a"))
|
||||
(#"^(a|)\\1{2,3}b" #"aaaab" (#"aaaab" #"a"))
|
||||
(#"^(a|)\\1{2,3}b" #"b" (#"b" #""))
|
||||
(#"^(a|)\\1{2,3}b" #"ab" #f)
|
||||
(#"^(a|)\\1{2,3}b" #"aab" #f)
|
||||
(#"^(a|)\\1{2,3}b" #"aaaaab" #f)
|
||||
#"^(a|)\\1{2}b"
|
||||
#"^(a|)\\1{2,3}b"
|
||||
(#"ab{1,3}bc" #"abbbbc" (#"abbbbc"))
|
||||
(#"ab{1,3}bc" #"abbbc" (#"abbbc"))
|
||||
(#"ab{1,3}bc" #"abbc" (#"abbc"))
|
||||
|
@ -623,8 +729,7 @@
|
|||
(#"(A|B)*CD" #"CD " (#"CD" #f))
|
||||
(#"(AB)*?\\1" #"ABABAB" (#"ABAB" #"AB"))
|
||||
(#"(AB)*\\1" #"ABABAB" (#"ABABAB" #"AB"))
|
||||
(#"((a{0,5}){0,5}){0,5}[c]" #"aaaaaaaaaac" (#"aaaaaaaaaac" #"" #""))
|
||||
(#"((a{0,5}){0,5}){0,5}[c]" #"aaaaaaaaaa" #f)
|
||||
#"((a{0,5}){0,5}){0,5}[c]"
|
||||
#"((a{0,5}){0,5})*[c]"
|
||||
#"((a{0,5}){0,5})*[c]"
|
||||
#"(\\b)*a"
|
||||
|
@ -725,17 +830,9 @@
|
|||
(#"a(?i:b)" #"ab" (#"ab"))
|
||||
(#"a(?i:b)" #"aB" (#"aB"))
|
||||
(#"a(?i:b)" #"Ab" #f)
|
||||
(#"a(?<=a){3000}a" #"" #f)
|
||||
(#"a(?<=a){3000}a" #"" #f)
|
||||
(#"a(?<=a){3000}a" #"" #f)
|
||||
(#"a(?!=a){3000}a" #"" #f)
|
||||
(#"a(?!=a){3000}a" #"" #f)
|
||||
(#"a(?!=a){3000}a" #"" #f)
|
||||
(#"a(){3000}a" #"" #f)
|
||||
(#"a(){3000}a" #"" #f)
|
||||
(#"a(){3000}a" #"" #f)
|
||||
(#"a(?:){3000}a" #"" #f)
|
||||
(#"a(?:){3000}a" #"" #f)
|
||||
#"a(?<=a){3000}a"
|
||||
#"a(){3000}a"
|
||||
#"a(?:){3000}a"
|
||||
#"a(?<=a)*a"
|
||||
#"a(?!=a)*a"
|
||||
#"a()*a"
|
||||
|
@ -745,15 +842,10 @@
|
|||
#"a(?<=(a))*?\\1"
|
||||
#"(?=(a)\\1)*aa"
|
||||
(#"^((a|b){2,5}){2}$" #"aaabbbbb" (#"aaabbbbb" #"bbb" #"b"))
|
||||
(#"^(b*|ba){1,2}bc" #"bc" (#"bc" #""))
|
||||
(#"^(b*|ba){1,2}bc" #"abc" #f)
|
||||
(#"^(b*|ba){1,2}bc" #"babc" (#"babc" #"ba"))
|
||||
(#"^(b*|ba){1,2}bc" #"babbc" (#"babbc" #"b"))
|
||||
(#"^(b*|ba){1,2}bc" #"bababc" (#"bababc" #"ba"))
|
||||
#"^(b*|ba){1,2}bc"
|
||||
(#"^a{4,5}(?:c|a)c$" #"aaac" #f)
|
||||
(#"^a{4,5}(?:c|a)c$" #"aaaac" #f)
|
||||
(#"^(a|){4,5}(?:c|a)c$" #"aaac" (#"aaac" #""))
|
||||
(#"^(a|){4,5}(?:c|a)c$" #"aaaac" (#"aaaac" #""))
|
||||
#"^(a|){4,5}(?:c|a)c$"
|
||||
(#"(?m:^).abc$" #"exabc" #f)
|
||||
(#"(?m:^)abc" #"c" #f)
|
||||
(#"^abc" #"c" #f)
|
||||
|
@ -1240,7 +1332,7 @@
|
|||
(#"(?:..)*a" #"aba" (#"aba"))
|
||||
(#"(?:..)*?a" #"aba" (#"a"))
|
||||
(#"^(?:b|a(?=(.)))*\\1" #"abc" (#"ab" #"b"))
|
||||
(#"^(){3,5}" #"abc" (#"" #""))
|
||||
#"^(){3,5}"
|
||||
(#"^(a+)*ax" #"aax" (#"aax" #"a"))
|
||||
(#"^((a|b)+)*ax" #"aax" (#"aax" #"a" #"a"))
|
||||
(#"^((a|bc)+)*ax" #"aax" (#"aax" #"a" #"a"))
|
||||
|
@ -1376,7 +1468,27 @@
|
|||
(#"'[ab]'" #"'ab'" #f)
|
||||
(#"ab(?=.*q)cd" #"abcdxklqj" (#"abcd"))
|
||||
(#"a(?!.*$)b" #"ab" #f)
|
||||
(#".{2}[a-z]" #"Axi" (#"Axi"))))
|
||||
(#".{2}[a-z]" #"Axi" (#"Axi"))
|
||||
(#"^12.34" #"12\n34" (#"12\n34"))
|
||||
(#"(?m:^12.34)" #"12\n34" #f)
|
||||
(#"(?s:^12.34)" #"12\n34" (#"12\n34"))
|
||||
(#"(?m:^12.34)" #"12x34" (#"12x34"))
|
||||
(#"(?s:^12.34)" #"12x34" (#"12x34"))
|
||||
(#"^a" #"a\nb\nc" (#"a"))
|
||||
(#"^a$" #"a\nb\nc" #f)
|
||||
(#"(?m:^a)" #"a\nb\nc" (#"a"))
|
||||
(#"(?m:^a$)" #"a\nb\nc" (#"a"))
|
||||
(#"^b" #"a\nb\nc" #f)
|
||||
(#"b$" #"a\nb\nc" #f)
|
||||
(#"^b$" #"a\nb\nc" #f)
|
||||
(#"(?m:^b)" #"a\nb\nc" (#"b"))
|
||||
(#"(?m:b$)" #"a\nb\nc" (#"b"))
|
||||
(#"(?m:^b$)" #"a\nb\nc" (#"b"))
|
||||
(#"(?m:a[^a]^b)" #"a\nb\nc" (#"a\nb"))
|
||||
(#"(?m:a[^a]^b$)" #"a\nb\nc" (#"a\nb"))
|
||||
(#"(?m:a$[^a]^b$)" #"a\nb\nc" (#"a\nb"))
|
||||
(#"(?m:a[^a]^b[^a]^c)" #"a\nb\nc" (#"a\nb\nc"))
|
||||
(#"(?m:^a$[^a]^b$[^a]^c$)" #"a\nb\nc" (#"a\nb\nc"))))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user