cs: fix some UTF-16 and UCS-4 conversions

This commit is contained in:
Matthew Flatt 2021-05-08 08:44:33 -06:00
parent f35a92744d
commit c42d64cdbc
6 changed files with 38 additions and 28 deletions

View File

@ -1214,6 +1214,11 @@
(parameterize ([current-locale "C"])
(go #t))))
(when (or known-locale?
(eq? 'macosx (system-type)))
(test "\U1F600" string-locale-downcase "\U1F600")
(test "\U1F600" string-locale-upcase "\U1F600"))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; locale<->UTF-8 conversions

View File

@ -433,14 +433,14 @@
bstr)
(loop (fx+ i 1))))])))
(define (subbytes-at-2-byte-nul x)
(define (subbytes-at-2-byte-nul x offset)
(let ([len (fxand (bytes-length x) (fxnot 1))])
(let loop ([i 0])
(let loop ([i offset])
(cond
[(fx= i len) x]
[(fx= i len) (if (fx= offset 0) x (subbytes x offset len))]
[(and (fx= 0 (bytes-ref x i))
(fx= 0 (bytes-ref x (fx+ i 1))))
(subbytes x 0 i)]
(subbytes x offset i)]
[else (loop (fx+ i 2))]))))
(define (uptr->bytes/2-byte-nul x)
@ -452,16 +452,16 @@
bstr)
(loop (fx+ i 2)))))
(define (subbytes-at-4-byte-nul x)
(define (subbytes-at-4-byte-nul x offset)
(let ([len (fxand (bytes-length x) (fxnot 3))])
(let loop ([i 0])
(let loop ([i offset])
(cond
[(fx= i len) x]
[(fx= i len) (if (fx= offset 0) x (subbytes x offset len))]
[(and (fx= 0 (bytes-ref x i))
(fx= 0 (bytes-ref x (fx+ i 1)))
(fx= 0 (bytes-ref x (fx+ i 2)))
(fx= 0 (bytes-ref x (fx+ i 3))))
(subbytes x 0 i)]
(subbytes x offset i)]
[else (loop (fx+ i 4))]))))
(define (uptr->bytes/4-byte-nul x)
@ -901,9 +901,9 @@
'big)])
(cond
[(bytevector? v)
(utf16->string (subbytes-at-2-byte-nul v) endian #t)]
(utf16->string (subbytes-at-2-byte-nul v offset) endian #t)]
[(integer? v)
(utf16->string (uptr->bytes/2-byte-nul v) endian #t)]
(utf16->string (uptr->bytes/2-byte-nul (+ v offset)) endian #t)]
[else #f])))
=> (lambda (v) v)]
[(and (word-aligned? offset)
@ -915,9 +915,9 @@
'big)])
(cond
[(bytevector? v)
(utf32->string (subbytes-at-4-byte-nul v) endian #t)]
(utf32->string (subbytes-at-4-byte-nul v offset) endian #t)]
[(integer? v)
(utf32->string (uptr->bytes/4-byte-nul v) endian #t)]
(utf32->string (uptr->bytes/4-byte-nul (+ v offset)) endian #t)]
[else #f])))
=> (lambda (v) v)]
[else
@ -930,12 +930,12 @@
(eq? 'utf-32le host-rep)
(eq? 'utf-32be host-rep))
(let ([v (with-interrupts-disabled*
(foreign-ref 'uptr (cpointer-address p) 0))])
(foreign-ref 'uptr (cpointer-address p) offset))])
(case host-rep
[(utf-16le) (utf16->string (uptr->bytes/2-byte-nul v) 'little #t)]
[(utf-16be) (utf16->string (uptr->bytes/2-byte-nul v) 'big #t)]
[(utf-32le) (utf16->string (uptr->bytes/4-byte-nul v) 'little #t)]
[(utf-32be) (utf16->string (uptr->bytes/4-byte-nul v) 'big #t)]))]
[(utf-32le) (utf32->string (uptr->bytes/4-byte-nul v) 'little #t)]
[(utf-32be) (utf32->string (uptr->bytes/4-byte-nul v) 'big #t)]))]
[else
;; Disable interrupts to avoid a GC:
(with-interrupts-disabled*

View File

@ -31045,11 +31045,14 @@
pos_0
(unsafe-fxrshift hi_0 8)
(fxand hi_0 255))
(let ((app_0 (+ pos_0 2)))
(bytes-set-two!
bstr_0
pos_0
(unsafe-fxrshift lo_0 8)
(fxand lo_0 255))
app_0
(unsafe-fxrshift
lo_0
8)
(fxand lo_0 255)))
(fx+ pos_0 4)))))
(begin
(bytes-set-two!
@ -31119,7 +31122,7 @@
(if big-endian?
(fxior (unsafe-fxlshift a_0 8) b_0)
(fxior (unsafe-fxlshift b_0 8) a_0))))
(if (fx= (fxand v_0 56320) 56320)
(if (fx= (fxand v_0 56320) 55296)
(let ((a_1
(unsafe-bytes-ref bstr_0 (fx+ i_0 2))))
(let ((b_1
@ -34375,11 +34378,11 @@
'subprocess
"(or/c (and/c output-port? file-stream-port?) #f 'stdout)"
stderr_0))
(let ((lr1322 unsafe-undefined)
(let ((lr1323 unsafe-undefined)
(group_0 unsafe-undefined)
(command_0 unsafe-undefined)
(exact/args_0 unsafe-undefined))
(set! lr1322
(set! lr1323
(call-with-values
(lambda ()
(if (path-string? group/command_0)
@ -34434,9 +34437,9 @@
((group_1 command_1 exact/args_1)
(vector group_1 command_1 exact/args_1))
(args (raise-binding-result-arity-error 3 args)))))
(set! group_0 (unsafe-vector*-ref lr1322 0))
(set! command_0 (unsafe-vector*-ref lr1322 1))
(set! exact/args_0 (unsafe-vector*-ref lr1322 2))
(set! group_0 (unsafe-vector*-ref lr1323 0))
(set! command_0 (unsafe-vector*-ref lr1323 1))
(set! exact/args_0 (unsafe-vector*-ref lr1323 2))
(call-with-values
(lambda ()
(if (if (pair? exact/args_0)

View File

@ -655,6 +655,8 @@
(parameterize ([current-locale "en_US.ISO8859-1"])
(test "Éric" (string-locale-downcase "Éric"))))
(when (eq? 'macosx (system-type))
(test "\U1F600" (string-locale-downcase "\U1F600")))
;; ----------------------------------------

View File

@ -24,7 +24,7 @@
(fxior (fxlshift a 8) b)
(fxior (fxlshift b 8) a)))
(cond
[(fx= (fxand v #xDC00) #xDC00)
[(fx= (fxand v #xDC00) #xD800)
;; surrogate pair
(define a (bytes-ref bstr (fx+ i 2)))
(define b (bytes-ref bstr (fx+ i 3)))

View File

@ -19,7 +19,7 @@
(define hi (fxior #xD800 (fxand (fxrshift av 10) #x3FF)))
(define lo (fxior #xDC00 (fxand av #x3FF)))
(bytes-set-two! bstr pos (fxrshift hi 8) (fxand hi #xFF))
(bytes-set-two! bstr pos (fxrshift lo 8) (fxand lo #xFF))
(bytes-set-two! bstr (+ pos 2) (fxrshift lo 8) (fxand lo #xFF))
(fx+ pos 4)]
[else
(bytes-set-two! bstr pos (fxrshift v 8) (fxand v #xFF))