From 0b1395e5d1809b1d260bf1209e6ba9019ea577a7 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 11 Sep 2006 01:44:10 +0000 Subject: [PATCH] unicode-property tests svn: r4303 --- collects/tests/mzscheme/rx.ss | 87 +++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) diff --git a/collects/tests/mzscheme/rx.ss b/collects/tests/mzscheme/rx.ss index 09e415e44b..bcdf8fb1b2 100644 --- a/collects/tests/mzscheme/rx.ss +++ b/collects/tests/mzscheme/rx.ss @@ -1490,6 +1490,93 @@ (#"(?m:a[^a]^b[^a]^c)" #"a\nb\nc" (#"a\nb\nc")) (#"(?m:^a$[^a]^b$[^a]^c$)" #"a\nb\nc" (#"a\nb\nc")))) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Test unicode-property patterns + +(let ([just-once? #t] + [kcrl (make-known-char-range-list)] + [ht (make-hash-table)] + [bformat (lambda (s v) + (string->bytes/latin-1 (format s v)))]) + (for-each (lambda (str) + (hash-table-put! ht + (string->symbol (string-downcase str)) + (vector + (byte-pregexp (bformat "\\p{~a}" str)) + (pregexp (format "\\p{~a}" str)) + (byte-pregexp (bformat "\\p{~a}*" str)) + (pregexp (format "\\p{~a}*" str)) + (byte-pregexp (bformat "\\P{~a}" str)) + (pregexp (format "\\P{~a}" str)) + (byte-pregexp (bformat "\\P{~a}*" str)) + (pregexp (format "\\P{~a}*" str))))) + '("Cn" "Cc" "Cf" "Cs" "Co" "Ll" "Lu" "Lt" "Lm" "Lo" "Nd" "Nl" "No" + "Ps" "Pe" "Pi" "Pf" "Pc" "Pd" "Po" "Mn" "Mc" "Me" "Sc" "Sk" "Sm" "So" "Zl" "Zp" "Zs")) + (hash-table-for-each ht + (lambda (k v) + (let ([bad1 #"\377\377"] + [bad2 (regexp-replace #rx#".$" + (string->bytes/utf-8 "\U10FFF1") + #"\377")] + [bad3 (regexp-replace #rx#".$" + (string->bytes/utf-8 "\u1234") + #"")]) + (test #f regexp-match (vector-ref v 0) bad1) + (test #f regexp-match (vector-ref v 0) bad2) + (test #f regexp-match (vector-ref v 0) bad3) + (test #f regexp-match (vector-ref v 1) bad1) + (test #f regexp-match (vector-ref v 1) bad2) + (test #f regexp-match (vector-ref v 1) bad3) + (test #f regexp-match (vector-ref v 4) bad1) + (test #f regexp-match (vector-ref v 4) bad2) + (test #f regexp-match (vector-ref v 4) bad3) + (test #f regexp-match (vector-ref v 5) bad1) + (test #f regexp-match (vector-ref v 5) bad2) + (test #f regexp-match (vector-ref v 5) bad3) + (let ([other (ormap (lambda (e) + (and (not (eq? k (char-general-category + (integer->char (car e))))) + (integer->char (car e)))) + kcrl)]) + (let* ([s (string other)] + [bs (string->bytes/utf-8 s)] + [s* (string-append s s s)] + [bs* (bytes-append bs bs bs)]) + (test #f regexp-match (vector-ref v 0) bs) + (test #f regexp-match (vector-ref v 1) s) + (test '(#"") regexp-match (vector-ref v 2) bs) + (test '("") regexp-match (vector-ref v 3) s) + (test (list bs) regexp-match (vector-ref v 4) bs) + (test (list s) regexp-match (vector-ref v 5) s) + (test (list bs*) regexp-match (vector-ref v 6) bs*) + (test (list s*) regexp-match (vector-ref v 7) s*)))))) + (let ([try (lambda (n) + (let* ([cat (char-general-category (integer->char n))] + [v (hash-table-get ht cat #f)]) + (when v + (when just-once? + (hash-table-remove! ht cat)) + (let* ([s (string (integer->char n))] + [bs (string->bytes/utf-8 s)] + [bs* (string->bytes/utf-8 (string-append s s s s s))] + [regexp-match* (lambda (p s) + (let ([v (regexp-match p (bytes->string/utf-8 s))]) + (and v + (map string->bytes/utf-8 v))))]) + (test (list bs) regexp-match (vector-ref v 0) bs) + (test (list bs) regexp-match* (vector-ref v 1) bs) + (test (list bs*) regexp-match (vector-ref v 2) bs*) + (test (list bs*) regexp-match* (vector-ref v 3) bs*)))))]) + (for-each (lambda (e) + (let ([start (car e)] + [end (cadr e)] + [uniform? (caddr e)]) + (let loop ([n start]) + (try n) + (unless (= n end) + (loop (if uniform? end (+ n 1))))))) + kcrl))) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs)