cs: fix some UTF-16 and UCS-4 conversions
This commit is contained in:
parent
f35a92744d
commit
c42d64cdbc
|
@ -1214,6 +1214,11 @@
|
||||||
(parameterize ([current-locale "C"])
|
(parameterize ([current-locale "C"])
|
||||||
(go #t))))
|
(go #t))))
|
||||||
|
|
||||||
|
(when (or known-locale?
|
||||||
|
(eq? 'macosx (system-type)))
|
||||||
|
(test "\U1F600" string-locale-downcase "\U1F600")
|
||||||
|
(test "\U1F600" string-locale-upcase "\U1F600"))
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; locale<->UTF-8 conversions
|
;; locale<->UTF-8 conversions
|
||||||
|
|
||||||
|
|
|
@ -433,14 +433,14 @@
|
||||||
bstr)
|
bstr)
|
||||||
(loop (fx+ i 1))))])))
|
(loop (fx+ i 1))))])))
|
||||||
|
|
||||||
(define (subbytes-at-2-byte-nul x)
|
(define (subbytes-at-2-byte-nul x offset)
|
||||||
(let ([len (fxand (bytes-length x) (fxnot 1))])
|
(let ([len (fxand (bytes-length x) (fxnot 1))])
|
||||||
(let loop ([i 0])
|
(let loop ([i offset])
|
||||||
(cond
|
(cond
|
||||||
[(fx= i len) x]
|
[(fx= i len) (if (fx= offset 0) x (subbytes x offset len))]
|
||||||
[(and (fx= 0 (bytes-ref x i))
|
[(and (fx= 0 (bytes-ref x i))
|
||||||
(fx= 0 (bytes-ref x (fx+ i 1))))
|
(fx= 0 (bytes-ref x (fx+ i 1))))
|
||||||
(subbytes x 0 i)]
|
(subbytes x offset i)]
|
||||||
[else (loop (fx+ i 2))]))))
|
[else (loop (fx+ i 2))]))))
|
||||||
|
|
||||||
(define (uptr->bytes/2-byte-nul x)
|
(define (uptr->bytes/2-byte-nul x)
|
||||||
|
@ -452,16 +452,16 @@
|
||||||
bstr)
|
bstr)
|
||||||
(loop (fx+ i 2)))))
|
(loop (fx+ i 2)))))
|
||||||
|
|
||||||
(define (subbytes-at-4-byte-nul x)
|
(define (subbytes-at-4-byte-nul x offset)
|
||||||
(let ([len (fxand (bytes-length x) (fxnot 3))])
|
(let ([len (fxand (bytes-length x) (fxnot 3))])
|
||||||
(let loop ([i 0])
|
(let loop ([i offset])
|
||||||
(cond
|
(cond
|
||||||
[(fx= i len) x]
|
[(fx= i len) (if (fx= offset 0) x (subbytes x offset len))]
|
||||||
[(and (fx= 0 (bytes-ref x i))
|
[(and (fx= 0 (bytes-ref x i))
|
||||||
(fx= 0 (bytes-ref x (fx+ i 1)))
|
(fx= 0 (bytes-ref x (fx+ i 1)))
|
||||||
(fx= 0 (bytes-ref x (fx+ i 2)))
|
(fx= 0 (bytes-ref x (fx+ i 2)))
|
||||||
(fx= 0 (bytes-ref x (fx+ i 3))))
|
(fx= 0 (bytes-ref x (fx+ i 3))))
|
||||||
(subbytes x 0 i)]
|
(subbytes x offset i)]
|
||||||
[else (loop (fx+ i 4))]))))
|
[else (loop (fx+ i 4))]))))
|
||||||
|
|
||||||
(define (uptr->bytes/4-byte-nul x)
|
(define (uptr->bytes/4-byte-nul x)
|
||||||
|
@ -901,9 +901,9 @@
|
||||||
'big)])
|
'big)])
|
||||||
(cond
|
(cond
|
||||||
[(bytevector? v)
|
[(bytevector? v)
|
||||||
(utf16->string (subbytes-at-2-byte-nul v) endian #t)]
|
(utf16->string (subbytes-at-2-byte-nul v offset) endian #t)]
|
||||||
[(integer? v)
|
[(integer? v)
|
||||||
(utf16->string (uptr->bytes/2-byte-nul v) endian #t)]
|
(utf16->string (uptr->bytes/2-byte-nul (+ v offset)) endian #t)]
|
||||||
[else #f])))
|
[else #f])))
|
||||||
=> (lambda (v) v)]
|
=> (lambda (v) v)]
|
||||||
[(and (word-aligned? offset)
|
[(and (word-aligned? offset)
|
||||||
|
@ -915,9 +915,9 @@
|
||||||
'big)])
|
'big)])
|
||||||
(cond
|
(cond
|
||||||
[(bytevector? v)
|
[(bytevector? v)
|
||||||
(utf32->string (subbytes-at-4-byte-nul v) endian #t)]
|
(utf32->string (subbytes-at-4-byte-nul v offset) endian #t)]
|
||||||
[(integer? v)
|
[(integer? v)
|
||||||
(utf32->string (uptr->bytes/4-byte-nul v) endian #t)]
|
(utf32->string (uptr->bytes/4-byte-nul (+ v offset)) endian #t)]
|
||||||
[else #f])))
|
[else #f])))
|
||||||
=> (lambda (v) v)]
|
=> (lambda (v) v)]
|
||||||
[else
|
[else
|
||||||
|
@ -930,12 +930,12 @@
|
||||||
(eq? 'utf-32le host-rep)
|
(eq? 'utf-32le host-rep)
|
||||||
(eq? 'utf-32be host-rep))
|
(eq? 'utf-32be host-rep))
|
||||||
(let ([v (with-interrupts-disabled*
|
(let ([v (with-interrupts-disabled*
|
||||||
(foreign-ref 'uptr (cpointer-address p) 0))])
|
(foreign-ref 'uptr (cpointer-address p) offset))])
|
||||||
(case host-rep
|
(case host-rep
|
||||||
[(utf-16le) (utf16->string (uptr->bytes/2-byte-nul v) 'little #t)]
|
[(utf-16le) (utf16->string (uptr->bytes/2-byte-nul v) 'little #t)]
|
||||||
[(utf-16be) (utf16->string (uptr->bytes/2-byte-nul v) 'big #t)]
|
[(utf-16be) (utf16->string (uptr->bytes/2-byte-nul v) 'big #t)]
|
||||||
[(utf-32le) (utf16->string (uptr->bytes/4-byte-nul v) 'little #t)]
|
[(utf-32le) (utf32->string (uptr->bytes/4-byte-nul v) 'little #t)]
|
||||||
[(utf-32be) (utf16->string (uptr->bytes/4-byte-nul v) 'big #t)]))]
|
[(utf-32be) (utf32->string (uptr->bytes/4-byte-nul v) 'big #t)]))]
|
||||||
[else
|
[else
|
||||||
;; Disable interrupts to avoid a GC:
|
;; Disable interrupts to avoid a GC:
|
||||||
(with-interrupts-disabled*
|
(with-interrupts-disabled*
|
||||||
|
|
|
@ -31045,11 +31045,14 @@
|
||||||
pos_0
|
pos_0
|
||||||
(unsafe-fxrshift hi_0 8)
|
(unsafe-fxrshift hi_0 8)
|
||||||
(fxand hi_0 255))
|
(fxand hi_0 255))
|
||||||
(bytes-set-two!
|
(let ((app_0 (+ pos_0 2)))
|
||||||
bstr_0
|
(bytes-set-two!
|
||||||
pos_0
|
bstr_0
|
||||||
(unsafe-fxrshift lo_0 8)
|
app_0
|
||||||
(fxand lo_0 255))
|
(unsafe-fxrshift
|
||||||
|
lo_0
|
||||||
|
8)
|
||||||
|
(fxand lo_0 255)))
|
||||||
(fx+ pos_0 4)))))
|
(fx+ pos_0 4)))))
|
||||||
(begin
|
(begin
|
||||||
(bytes-set-two!
|
(bytes-set-two!
|
||||||
|
@ -31119,7 +31122,7 @@
|
||||||
(if big-endian?
|
(if big-endian?
|
||||||
(fxior (unsafe-fxlshift a_0 8) b_0)
|
(fxior (unsafe-fxlshift a_0 8) b_0)
|
||||||
(fxior (unsafe-fxlshift b_0 8) a_0))))
|
(fxior (unsafe-fxlshift b_0 8) a_0))))
|
||||||
(if (fx= (fxand v_0 56320) 56320)
|
(if (fx= (fxand v_0 56320) 55296)
|
||||||
(let ((a_1
|
(let ((a_1
|
||||||
(unsafe-bytes-ref bstr_0 (fx+ i_0 2))))
|
(unsafe-bytes-ref bstr_0 (fx+ i_0 2))))
|
||||||
(let ((b_1
|
(let ((b_1
|
||||||
|
@ -34375,11 +34378,11 @@
|
||||||
'subprocess
|
'subprocess
|
||||||
"(or/c (and/c output-port? file-stream-port?) #f 'stdout)"
|
"(or/c (and/c output-port? file-stream-port?) #f 'stdout)"
|
||||||
stderr_0))
|
stderr_0))
|
||||||
(let ((lr1322 unsafe-undefined)
|
(let ((lr1323 unsafe-undefined)
|
||||||
(group_0 unsafe-undefined)
|
(group_0 unsafe-undefined)
|
||||||
(command_0 unsafe-undefined)
|
(command_0 unsafe-undefined)
|
||||||
(exact/args_0 unsafe-undefined))
|
(exact/args_0 unsafe-undefined))
|
||||||
(set! lr1322
|
(set! lr1323
|
||||||
(call-with-values
|
(call-with-values
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(if (path-string? group/command_0)
|
(if (path-string? group/command_0)
|
||||||
|
@ -34434,9 +34437,9 @@
|
||||||
((group_1 command_1 exact/args_1)
|
((group_1 command_1 exact/args_1)
|
||||||
(vector group_1 command_1 exact/args_1))
|
(vector group_1 command_1 exact/args_1))
|
||||||
(args (raise-binding-result-arity-error 3 args)))))
|
(args (raise-binding-result-arity-error 3 args)))))
|
||||||
(set! group_0 (unsafe-vector*-ref lr1322 0))
|
(set! group_0 (unsafe-vector*-ref lr1323 0))
|
||||||
(set! command_0 (unsafe-vector*-ref lr1322 1))
|
(set! command_0 (unsafe-vector*-ref lr1323 1))
|
||||||
(set! exact/args_0 (unsafe-vector*-ref lr1322 2))
|
(set! exact/args_0 (unsafe-vector*-ref lr1323 2))
|
||||||
(call-with-values
|
(call-with-values
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(if (if (pair? exact/args_0)
|
(if (if (pair? exact/args_0)
|
||||||
|
|
|
@ -655,6 +655,8 @@
|
||||||
(parameterize ([current-locale "en_US.ISO8859-1"])
|
(parameterize ([current-locale "en_US.ISO8859-1"])
|
||||||
(test "Éric" (string-locale-downcase "Éric"))))
|
(test "Éric" (string-locale-downcase "Éric"))))
|
||||||
|
|
||||||
|
(when (eq? 'macosx (system-type))
|
||||||
|
(test "\U1F600" (string-locale-downcase "\U1F600")))
|
||||||
|
|
||||||
;; ----------------------------------------
|
;; ----------------------------------------
|
||||||
|
|
||||||
|
|
|
@ -24,7 +24,7 @@
|
||||||
(fxior (fxlshift a 8) b)
|
(fxior (fxlshift a 8) b)
|
||||||
(fxior (fxlshift b 8) a)))
|
(fxior (fxlshift b 8) a)))
|
||||||
(cond
|
(cond
|
||||||
[(fx= (fxand v #xDC00) #xDC00)
|
[(fx= (fxand v #xDC00) #xD800)
|
||||||
;; surrogate pair
|
;; surrogate pair
|
||||||
(define a (bytes-ref bstr (fx+ i 2)))
|
(define a (bytes-ref bstr (fx+ i 2)))
|
||||||
(define b (bytes-ref bstr (fx+ i 3)))
|
(define b (bytes-ref bstr (fx+ i 3)))
|
||||||
|
|
|
@ -19,7 +19,7 @@
|
||||||
(define hi (fxior #xD800 (fxand (fxrshift av 10) #x3FF)))
|
(define hi (fxior #xD800 (fxand (fxrshift av 10) #x3FF)))
|
||||||
(define lo (fxior #xDC00 (fxand av #x3FF)))
|
(define lo (fxior #xDC00 (fxand av #x3FF)))
|
||||||
(bytes-set-two! bstr pos (fxrshift hi 8) (fxand hi #xFF))
|
(bytes-set-two! bstr pos (fxrshift hi 8) (fxand hi #xFF))
|
||||||
(bytes-set-two! bstr pos (fxrshift lo 8) (fxand lo #xFF))
|
(bytes-set-two! bstr (+ pos 2) (fxrshift lo 8) (fxand lo #xFF))
|
||||||
(fx+ pos 4)]
|
(fx+ pos 4)]
|
||||||
[else
|
[else
|
||||||
(bytes-set-two! bstr pos (fxrshift v 8) (fxand v #xFF))
|
(bytes-set-two! bstr pos (fxrshift v 8) (fxand v #xFF))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user