diff --git a/pkgs/racket-test-core/tests/racket/unicode.rktl b/pkgs/racket-test-core/tests/racket/unicode.rktl index bc328309a8..5653cca6f0 100644 --- a/pkgs/racket-test-core/tests/racket/unicode.rktl +++ b/pkgs/racket-test-core/tests/racket/unicode.rktl @@ -1214,6 +1214,11 @@ (parameterize ([current-locale "C"]) (go #t)))) +(when (or known-locale? + (eq? 'macosx (system-type))) + (test "\U1F600" string-locale-downcase "\U1F600") + (test "\U1F600" string-locale-upcase "\U1F600")) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; locale<->UTF-8 conversions diff --git a/racket/src/cs/rumble/foreign.ss b/racket/src/cs/rumble/foreign.ss index 260224432f..21df34a73b 100644 --- a/racket/src/cs/rumble/foreign.ss +++ b/racket/src/cs/rumble/foreign.ss @@ -433,14 +433,14 @@ bstr) (loop (fx+ i 1))))]))) -(define (subbytes-at-2-byte-nul x) +(define (subbytes-at-2-byte-nul x offset) (let ([len (fxand (bytes-length x) (fxnot 1))]) - (let loop ([i 0]) + (let loop ([i offset]) (cond - [(fx= i len) x] + [(fx= i len) (if (fx= offset 0) x (subbytes x offset len))] [(and (fx= 0 (bytes-ref x i)) (fx= 0 (bytes-ref x (fx+ i 1)))) - (subbytes x 0 i)] + (subbytes x offset i)] [else (loop (fx+ i 2))])))) (define (uptr->bytes/2-byte-nul x) @@ -452,16 +452,16 @@ bstr) (loop (fx+ i 2))))) -(define (subbytes-at-4-byte-nul x) +(define (subbytes-at-4-byte-nul x offset) (let ([len (fxand (bytes-length x) (fxnot 3))]) - (let loop ([i 0]) + (let loop ([i offset]) (cond - [(fx= i len) x] + [(fx= i len) (if (fx= offset 0) x (subbytes x offset len))] [(and (fx= 0 (bytes-ref x i)) (fx= 0 (bytes-ref x (fx+ i 1))) (fx= 0 (bytes-ref x (fx+ i 2))) (fx= 0 (bytes-ref x (fx+ i 3)))) - (subbytes x 0 i)] + (subbytes x offset i)] [else (loop (fx+ i 4))])))) (define (uptr->bytes/4-byte-nul x) @@ -901,9 +901,9 @@ 'big)]) (cond [(bytevector? v) - (utf16->string (subbytes-at-2-byte-nul v) endian #t)] + (utf16->string (subbytes-at-2-byte-nul v offset) endian #t)] [(integer? v) - (utf16->string (uptr->bytes/2-byte-nul v) endian #t)] + (utf16->string (uptr->bytes/2-byte-nul (+ v offset)) endian #t)] [else #f]))) => (lambda (v) v)] [(and (word-aligned? offset) @@ -915,9 +915,9 @@ 'big)]) (cond [(bytevector? v) - (utf32->string (subbytes-at-4-byte-nul v) endian #t)] + (utf32->string (subbytes-at-4-byte-nul v offset) endian #t)] [(integer? v) - (utf32->string (uptr->bytes/4-byte-nul v) endian #t)] + (utf32->string (uptr->bytes/4-byte-nul (+ v offset)) endian #t)] [else #f]))) => (lambda (v) v)] [else @@ -930,12 +930,12 @@ (eq? 'utf-32le host-rep) (eq? 'utf-32be host-rep)) (let ([v (with-interrupts-disabled* - (foreign-ref 'uptr (cpointer-address p) 0))]) + (foreign-ref 'uptr (cpointer-address p) offset))]) (case host-rep [(utf-16le) (utf16->string (uptr->bytes/2-byte-nul v) 'little #t)] [(utf-16be) (utf16->string (uptr->bytes/2-byte-nul v) 'big #t)] - [(utf-32le) (utf16->string (uptr->bytes/4-byte-nul v) 'little #t)] - [(utf-32be) (utf16->string (uptr->bytes/4-byte-nul v) 'big #t)]))] + [(utf-32le) (utf32->string (uptr->bytes/4-byte-nul v) 'little #t)] + [(utf-32be) (utf32->string (uptr->bytes/4-byte-nul v) 'big #t)]))] [else ;; Disable interrupts to avoid a GC: (with-interrupts-disabled* diff --git a/racket/src/cs/schemified/io.scm b/racket/src/cs/schemified/io.scm index 4b272d1ba1..2fe02bb543 100644 --- a/racket/src/cs/schemified/io.scm +++ b/racket/src/cs/schemified/io.scm @@ -31045,11 +31045,14 @@ pos_0 (unsafe-fxrshift hi_0 8) (fxand hi_0 255)) - (bytes-set-two! - bstr_0 - pos_0 - (unsafe-fxrshift lo_0 8) - (fxand lo_0 255)) + (let ((app_0 (+ pos_0 2))) + (bytes-set-two! + bstr_0 + app_0 + (unsafe-fxrshift + lo_0 + 8) + (fxand lo_0 255))) (fx+ pos_0 4))))) (begin (bytes-set-two! @@ -31119,7 +31122,7 @@ (if big-endian? (fxior (unsafe-fxlshift a_0 8) b_0) (fxior (unsafe-fxlshift b_0 8) a_0)))) - (if (fx= (fxand v_0 56320) 56320) + (if (fx= (fxand v_0 56320) 55296) (let ((a_1 (unsafe-bytes-ref bstr_0 (fx+ i_0 2)))) (let ((b_1 @@ -34375,11 +34378,11 @@ 'subprocess "(or/c (and/c output-port? file-stream-port?) #f 'stdout)" stderr_0)) - (let ((lr1322 unsafe-undefined) + (let ((lr1323 unsafe-undefined) (group_0 unsafe-undefined) (command_0 unsafe-undefined) (exact/args_0 unsafe-undefined)) - (set! lr1322 + (set! lr1323 (call-with-values (lambda () (if (path-string? group/command_0) @@ -34434,9 +34437,9 @@ ((group_1 command_1 exact/args_1) (vector group_1 command_1 exact/args_1)) (args (raise-binding-result-arity-error 3 args))))) - (set! group_0 (unsafe-vector*-ref lr1322 0)) - (set! command_0 (unsafe-vector*-ref lr1322 1)) - (set! exact/args_0 (unsafe-vector*-ref lr1322 2)) + (set! group_0 (unsafe-vector*-ref lr1323 0)) + (set! command_0 (unsafe-vector*-ref lr1323 1)) + (set! exact/args_0 (unsafe-vector*-ref lr1323 2)) (call-with-values (lambda () (if (if (pair? exact/args_0) diff --git a/racket/src/io/demo.rkt b/racket/src/io/demo.rkt index dedf1d08d0..2428267da2 100644 --- a/racket/src/io/demo.rkt +++ b/racket/src/io/demo.rkt @@ -655,6 +655,8 @@ (parameterize ([current-locale "en_US.ISO8859-1"]) (test "Éric" (string-locale-downcase "Éric")))) +(when (eq? 'macosx (system-type)) + (test "\U1F600" (string-locale-downcase "\U1F600"))) ;; ---------------------------------------- diff --git a/racket/src/io/string/utf-16-decode.rkt b/racket/src/io/string/utf-16-decode.rkt index 3f05793c1b..e190f0c464 100644 --- a/racket/src/io/string/utf-16-decode.rkt +++ b/racket/src/io/string/utf-16-decode.rkt @@ -24,7 +24,7 @@ (fxior (fxlshift a 8) b) (fxior (fxlshift b 8) a))) (cond - [(fx= (fxand v #xDC00) #xDC00) + [(fx= (fxand v #xDC00) #xD800) ;; surrogate pair (define a (bytes-ref bstr (fx+ i 2))) (define b (bytes-ref bstr (fx+ i 3))) diff --git a/racket/src/io/string/utf-16-encode.rkt b/racket/src/io/string/utf-16-encode.rkt index 2c311f6e9a..5ca1d148f3 100644 --- a/racket/src/io/string/utf-16-encode.rkt +++ b/racket/src/io/string/utf-16-encode.rkt @@ -19,7 +19,7 @@ (define hi (fxior #xD800 (fxand (fxrshift av 10) #x3FF))) (define lo (fxior #xDC00 (fxand av #x3FF))) (bytes-set-two! bstr pos (fxrshift hi 8) (fxand hi #xFF)) - (bytes-set-two! bstr pos (fxrshift lo 8) (fxand lo #xFF)) + (bytes-set-two! bstr (+ pos 2) (fxrshift lo 8) (fxand lo #xFF)) (fx+ pos 4)] [else (bytes-set-two! bstr pos (fxrshift v 8) (fxand v #xFF))