unicode-property tests
svn: r4303
This commit is contained in:
parent
407339e935
commit
0b1395e5d1
|
@ -1490,6 +1490,93 @@
|
|||
(#"(?m:a[^a]^b[^a]^c)" #"a\nb\nc" (#"a\nb\nc"))
|
||||
(#"(?m:^a$[^a]^b$[^a]^c$)" #"a\nb\nc" (#"a\nb\nc"))))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Test unicode-property patterns
|
||||
|
||||
(let ([just-once? #t]
|
||||
[kcrl (make-known-char-range-list)]
|
||||
[ht (make-hash-table)]
|
||||
[bformat (lambda (s v)
|
||||
(string->bytes/latin-1 (format s v)))])
|
||||
(for-each (lambda (str)
|
||||
(hash-table-put! ht
|
||||
(string->symbol (string-downcase str))
|
||||
(vector
|
||||
(byte-pregexp (bformat "\\p{~a}" str))
|
||||
(pregexp (format "\\p{~a}" str))
|
||||
(byte-pregexp (bformat "\\p{~a}*" str))
|
||||
(pregexp (format "\\p{~a}*" str))
|
||||
(byte-pregexp (bformat "\\P{~a}" str))
|
||||
(pregexp (format "\\P{~a}" str))
|
||||
(byte-pregexp (bformat "\\P{~a}*" str))
|
||||
(pregexp (format "\\P{~a}*" str)))))
|
||||
'("Cn" "Cc" "Cf" "Cs" "Co" "Ll" "Lu" "Lt" "Lm" "Lo" "Nd" "Nl" "No"
|
||||
"Ps" "Pe" "Pi" "Pf" "Pc" "Pd" "Po" "Mn" "Mc" "Me" "Sc" "Sk" "Sm" "So" "Zl" "Zp" "Zs"))
|
||||
(hash-table-for-each ht
|
||||
(lambda (k v)
|
||||
(let ([bad1 #"\377\377"]
|
||||
[bad2 (regexp-replace #rx#".$"
|
||||
(string->bytes/utf-8 "\U10FFF1")
|
||||
#"\377")]
|
||||
[bad3 (regexp-replace #rx#".$"
|
||||
(string->bytes/utf-8 "\u1234")
|
||||
#"")])
|
||||
(test #f regexp-match (vector-ref v 0) bad1)
|
||||
(test #f regexp-match (vector-ref v 0) bad2)
|
||||
(test #f regexp-match (vector-ref v 0) bad3)
|
||||
(test #f regexp-match (vector-ref v 1) bad1)
|
||||
(test #f regexp-match (vector-ref v 1) bad2)
|
||||
(test #f regexp-match (vector-ref v 1) bad3)
|
||||
(test #f regexp-match (vector-ref v 4) bad1)
|
||||
(test #f regexp-match (vector-ref v 4) bad2)
|
||||
(test #f regexp-match (vector-ref v 4) bad3)
|
||||
(test #f regexp-match (vector-ref v 5) bad1)
|
||||
(test #f regexp-match (vector-ref v 5) bad2)
|
||||
(test #f regexp-match (vector-ref v 5) bad3)
|
||||
(let ([other (ormap (lambda (e)
|
||||
(and (not (eq? k (char-general-category
|
||||
(integer->char (car e)))))
|
||||
(integer->char (car e))))
|
||||
kcrl)])
|
||||
(let* ([s (string other)]
|
||||
[bs (string->bytes/utf-8 s)]
|
||||
[s* (string-append s s s)]
|
||||
[bs* (bytes-append bs bs bs)])
|
||||
(test #f regexp-match (vector-ref v 0) bs)
|
||||
(test #f regexp-match (vector-ref v 1) s)
|
||||
(test '(#"") regexp-match (vector-ref v 2) bs)
|
||||
(test '("") regexp-match (vector-ref v 3) s)
|
||||
(test (list bs) regexp-match (vector-ref v 4) bs)
|
||||
(test (list s) regexp-match (vector-ref v 5) s)
|
||||
(test (list bs*) regexp-match (vector-ref v 6) bs*)
|
||||
(test (list s*) regexp-match (vector-ref v 7) s*))))))
|
||||
(let ([try (lambda (n)
|
||||
(let* ([cat (char-general-category (integer->char n))]
|
||||
[v (hash-table-get ht cat #f)])
|
||||
(when v
|
||||
(when just-once?
|
||||
(hash-table-remove! ht cat))
|
||||
(let* ([s (string (integer->char n))]
|
||||
[bs (string->bytes/utf-8 s)]
|
||||
[bs* (string->bytes/utf-8 (string-append s s s s s))]
|
||||
[regexp-match* (lambda (p s)
|
||||
(let ([v (regexp-match p (bytes->string/utf-8 s))])
|
||||
(and v
|
||||
(map string->bytes/utf-8 v))))])
|
||||
(test (list bs) regexp-match (vector-ref v 0) bs)
|
||||
(test (list bs) regexp-match* (vector-ref v 1) bs)
|
||||
(test (list bs*) regexp-match (vector-ref v 2) bs*)
|
||||
(test (list bs*) regexp-match* (vector-ref v 3) bs*)))))])
|
||||
(for-each (lambda (e)
|
||||
(let ([start (car e)]
|
||||
[end (cadr e)]
|
||||
[uniform? (caddr e)])
|
||||
(let loop ([n start])
|
||||
(try n)
|
||||
(unless (= n end)
|
||||
(loop (if uniform? end (+ n 1)))))))
|
||||
kcrl)))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(report-errs)
|
||||
|
|
Loading…
Reference in New Issue
Block a user