unicode-property tests

svn: r4303
This commit is contained in:
Matthew Flatt 2006-09-11 01:44:10 +00:00
parent 407339e935
commit 0b1395e5d1

View File

@ -1490,6 +1490,93 @@
(#"(?m:a[^a]^b[^a]^c)" #"a\nb\nc" (#"a\nb\nc"))
(#"(?m:^a$[^a]^b$[^a]^c$)" #"a\nb\nc" (#"a\nb\nc"))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Test unicode-property patterns
(let ([just-once? #t]
[kcrl (make-known-char-range-list)]
[ht (make-hash-table)]
[bformat (lambda (s v)
(string->bytes/latin-1 (format s v)))])
(for-each (lambda (str)
(hash-table-put! ht
(string->symbol (string-downcase str))
(vector
(byte-pregexp (bformat "\\p{~a}" str))
(pregexp (format "\\p{~a}" str))
(byte-pregexp (bformat "\\p{~a}*" str))
(pregexp (format "\\p{~a}*" str))
(byte-pregexp (bformat "\\P{~a}" str))
(pregexp (format "\\P{~a}" str))
(byte-pregexp (bformat "\\P{~a}*" str))
(pregexp (format "\\P{~a}*" str)))))
'("Cn" "Cc" "Cf" "Cs" "Co" "Ll" "Lu" "Lt" "Lm" "Lo" "Nd" "Nl" "No"
"Ps" "Pe" "Pi" "Pf" "Pc" "Pd" "Po" "Mn" "Mc" "Me" "Sc" "Sk" "Sm" "So" "Zl" "Zp" "Zs"))
(hash-table-for-each ht
(lambda (k v)
(let ([bad1 #"\377\377"]
[bad2 (regexp-replace #rx#".$"
(string->bytes/utf-8 "\U10FFF1")
#"\377")]
[bad3 (regexp-replace #rx#".$"
(string->bytes/utf-8 "\u1234")
#"")])
(test #f regexp-match (vector-ref v 0) bad1)
(test #f regexp-match (vector-ref v 0) bad2)
(test #f regexp-match (vector-ref v 0) bad3)
(test #f regexp-match (vector-ref v 1) bad1)
(test #f regexp-match (vector-ref v 1) bad2)
(test #f regexp-match (vector-ref v 1) bad3)
(test #f regexp-match (vector-ref v 4) bad1)
(test #f regexp-match (vector-ref v 4) bad2)
(test #f regexp-match (vector-ref v 4) bad3)
(test #f regexp-match (vector-ref v 5) bad1)
(test #f regexp-match (vector-ref v 5) bad2)
(test #f regexp-match (vector-ref v 5) bad3)
(let ([other (ormap (lambda (e)
(and (not (eq? k (char-general-category
(integer->char (car e)))))
(integer->char (car e))))
kcrl)])
(let* ([s (string other)]
[bs (string->bytes/utf-8 s)]
[s* (string-append s s s)]
[bs* (bytes-append bs bs bs)])
(test #f regexp-match (vector-ref v 0) bs)
(test #f regexp-match (vector-ref v 1) s)
(test '(#"") regexp-match (vector-ref v 2) bs)
(test '("") regexp-match (vector-ref v 3) s)
(test (list bs) regexp-match (vector-ref v 4) bs)
(test (list s) regexp-match (vector-ref v 5) s)
(test (list bs*) regexp-match (vector-ref v 6) bs*)
(test (list s*) regexp-match (vector-ref v 7) s*))))))
(let ([try (lambda (n)
(let* ([cat (char-general-category (integer->char n))]
[v (hash-table-get ht cat #f)])
(when v
(when just-once?
(hash-table-remove! ht cat))
(let* ([s (string (integer->char n))]
[bs (string->bytes/utf-8 s)]
[bs* (string->bytes/utf-8 (string-append s s s s s))]
[regexp-match* (lambda (p s)
(let ([v (regexp-match p (bytes->string/utf-8 s))])
(and v
(map string->bytes/utf-8 v))))])
(test (list bs) regexp-match (vector-ref v 0) bs)
(test (list bs) regexp-match* (vector-ref v 1) bs)
(test (list bs*) regexp-match (vector-ref v 2) bs*)
(test (list bs*) regexp-match* (vector-ref v 3) bs*)))))])
(for-each (lambda (e)
(let ([start (car e)]
[end (cadr e)]
[uniform? (caddr e)])
(let loop ([n start])
(try n)
(unless (= n end)
(loop (if uniform? end (+ n 1)))))))
kcrl)))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)