cs: fix some UTF-16 and UCS-4 conversions
This commit is contained in:
parent
f35a92744d
commit
c42d64cdbc
|
@ -1214,6 +1214,11 @@
|
|||
(parameterize ([current-locale "C"])
|
||||
(go #t))))
|
||||
|
||||
(when (or known-locale?
|
||||
(eq? 'macosx (system-type)))
|
||||
(test "\U1F600" string-locale-downcase "\U1F600")
|
||||
(test "\U1F600" string-locale-upcase "\U1F600"))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; locale<->UTF-8 conversions
|
||||
|
||||
|
|
|
@ -433,14 +433,14 @@
|
|||
bstr)
|
||||
(loop (fx+ i 1))))])))
|
||||
|
||||
(define (subbytes-at-2-byte-nul x)
|
||||
(define (subbytes-at-2-byte-nul x offset)
|
||||
(let ([len (fxand (bytes-length x) (fxnot 1))])
|
||||
(let loop ([i 0])
|
||||
(let loop ([i offset])
|
||||
(cond
|
||||
[(fx= i len) x]
|
||||
[(fx= i len) (if (fx= offset 0) x (subbytes x offset len))]
|
||||
[(and (fx= 0 (bytes-ref x i))
|
||||
(fx= 0 (bytes-ref x (fx+ i 1))))
|
||||
(subbytes x 0 i)]
|
||||
(subbytes x offset i)]
|
||||
[else (loop (fx+ i 2))]))))
|
||||
|
||||
(define (uptr->bytes/2-byte-nul x)
|
||||
|
@ -452,16 +452,16 @@
|
|||
bstr)
|
||||
(loop (fx+ i 2)))))
|
||||
|
||||
(define (subbytes-at-4-byte-nul x)
|
||||
(define (subbytes-at-4-byte-nul x offset)
|
||||
(let ([len (fxand (bytes-length x) (fxnot 3))])
|
||||
(let loop ([i 0])
|
||||
(let loop ([i offset])
|
||||
(cond
|
||||
[(fx= i len) x]
|
||||
[(fx= i len) (if (fx= offset 0) x (subbytes x offset len))]
|
||||
[(and (fx= 0 (bytes-ref x i))
|
||||
(fx= 0 (bytes-ref x (fx+ i 1)))
|
||||
(fx= 0 (bytes-ref x (fx+ i 2)))
|
||||
(fx= 0 (bytes-ref x (fx+ i 3))))
|
||||
(subbytes x 0 i)]
|
||||
(subbytes x offset i)]
|
||||
[else (loop (fx+ i 4))]))))
|
||||
|
||||
(define (uptr->bytes/4-byte-nul x)
|
||||
|
@ -901,9 +901,9 @@
|
|||
'big)])
|
||||
(cond
|
||||
[(bytevector? v)
|
||||
(utf16->string (subbytes-at-2-byte-nul v) endian #t)]
|
||||
(utf16->string (subbytes-at-2-byte-nul v offset) endian #t)]
|
||||
[(integer? v)
|
||||
(utf16->string (uptr->bytes/2-byte-nul v) endian #t)]
|
||||
(utf16->string (uptr->bytes/2-byte-nul (+ v offset)) endian #t)]
|
||||
[else #f])))
|
||||
=> (lambda (v) v)]
|
||||
[(and (word-aligned? offset)
|
||||
|
@ -915,9 +915,9 @@
|
|||
'big)])
|
||||
(cond
|
||||
[(bytevector? v)
|
||||
(utf32->string (subbytes-at-4-byte-nul v) endian #t)]
|
||||
(utf32->string (subbytes-at-4-byte-nul v offset) endian #t)]
|
||||
[(integer? v)
|
||||
(utf32->string (uptr->bytes/4-byte-nul v) endian #t)]
|
||||
(utf32->string (uptr->bytes/4-byte-nul (+ v offset)) endian #t)]
|
||||
[else #f])))
|
||||
=> (lambda (v) v)]
|
||||
[else
|
||||
|
@ -930,12 +930,12 @@
|
|||
(eq? 'utf-32le host-rep)
|
||||
(eq? 'utf-32be host-rep))
|
||||
(let ([v (with-interrupts-disabled*
|
||||
(foreign-ref 'uptr (cpointer-address p) 0))])
|
||||
(foreign-ref 'uptr (cpointer-address p) offset))])
|
||||
(case host-rep
|
||||
[(utf-16le) (utf16->string (uptr->bytes/2-byte-nul v) 'little #t)]
|
||||
[(utf-16be) (utf16->string (uptr->bytes/2-byte-nul v) 'big #t)]
|
||||
[(utf-32le) (utf16->string (uptr->bytes/4-byte-nul v) 'little #t)]
|
||||
[(utf-32be) (utf16->string (uptr->bytes/4-byte-nul v) 'big #t)]))]
|
||||
[(utf-32le) (utf32->string (uptr->bytes/4-byte-nul v) 'little #t)]
|
||||
[(utf-32be) (utf32->string (uptr->bytes/4-byte-nul v) 'big #t)]))]
|
||||
[else
|
||||
;; Disable interrupts to avoid a GC:
|
||||
(with-interrupts-disabled*
|
||||
|
|
|
@ -31045,11 +31045,14 @@
|
|||
pos_0
|
||||
(unsafe-fxrshift hi_0 8)
|
||||
(fxand hi_0 255))
|
||||
(let ((app_0 (+ pos_0 2)))
|
||||
(bytes-set-two!
|
||||
bstr_0
|
||||
pos_0
|
||||
(unsafe-fxrshift lo_0 8)
|
||||
(fxand lo_0 255))
|
||||
app_0
|
||||
(unsafe-fxrshift
|
||||
lo_0
|
||||
8)
|
||||
(fxand lo_0 255)))
|
||||
(fx+ pos_0 4)))))
|
||||
(begin
|
||||
(bytes-set-two!
|
||||
|
@ -31119,7 +31122,7 @@
|
|||
(if big-endian?
|
||||
(fxior (unsafe-fxlshift a_0 8) b_0)
|
||||
(fxior (unsafe-fxlshift b_0 8) a_0))))
|
||||
(if (fx= (fxand v_0 56320) 56320)
|
||||
(if (fx= (fxand v_0 56320) 55296)
|
||||
(let ((a_1
|
||||
(unsafe-bytes-ref bstr_0 (fx+ i_0 2))))
|
||||
(let ((b_1
|
||||
|
@ -34375,11 +34378,11 @@
|
|||
'subprocess
|
||||
"(or/c (and/c output-port? file-stream-port?) #f 'stdout)"
|
||||
stderr_0))
|
||||
(let ((lr1322 unsafe-undefined)
|
||||
(let ((lr1323 unsafe-undefined)
|
||||
(group_0 unsafe-undefined)
|
||||
(command_0 unsafe-undefined)
|
||||
(exact/args_0 unsafe-undefined))
|
||||
(set! lr1322
|
||||
(set! lr1323
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(if (path-string? group/command_0)
|
||||
|
@ -34434,9 +34437,9 @@
|
|||
((group_1 command_1 exact/args_1)
|
||||
(vector group_1 command_1 exact/args_1))
|
||||
(args (raise-binding-result-arity-error 3 args)))))
|
||||
(set! group_0 (unsafe-vector*-ref lr1322 0))
|
||||
(set! command_0 (unsafe-vector*-ref lr1322 1))
|
||||
(set! exact/args_0 (unsafe-vector*-ref lr1322 2))
|
||||
(set! group_0 (unsafe-vector*-ref lr1323 0))
|
||||
(set! command_0 (unsafe-vector*-ref lr1323 1))
|
||||
(set! exact/args_0 (unsafe-vector*-ref lr1323 2))
|
||||
(call-with-values
|
||||
(lambda ()
|
||||
(if (if (pair? exact/args_0)
|
||||
|
|
|
@ -655,6 +655,8 @@
|
|||
(parameterize ([current-locale "en_US.ISO8859-1"])
|
||||
(test "Éric" (string-locale-downcase "Éric"))))
|
||||
|
||||
(when (eq? 'macosx (system-type))
|
||||
(test "\U1F600" (string-locale-downcase "\U1F600")))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
(fxior (fxlshift a 8) b)
|
||||
(fxior (fxlshift b 8) a)))
|
||||
(cond
|
||||
[(fx= (fxand v #xDC00) #xDC00)
|
||||
[(fx= (fxand v #xDC00) #xD800)
|
||||
;; surrogate pair
|
||||
(define a (bytes-ref bstr (fx+ i 2)))
|
||||
(define b (bytes-ref bstr (fx+ i 3)))
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
(define hi (fxior #xD800 (fxand (fxrshift av 10) #x3FF)))
|
||||
(define lo (fxior #xDC00 (fxand av #x3FF)))
|
||||
(bytes-set-two! bstr pos (fxrshift hi 8) (fxand hi #xFF))
|
||||
(bytes-set-two! bstr pos (fxrshift lo 8) (fxand lo #xFF))
|
||||
(bytes-set-two! bstr (+ pos 2) (fxrshift lo 8) (fxand lo #xFF))
|
||||
(fx+ pos 4)]
|
||||
[else
|
||||
(bytes-set-two! bstr pos (fxrshift v 8) (fxand v #xFF))
|
||||
|
|
Loading…
Reference in New Issue
Block a user