ffi/unsafe: fix a bug in conversion to UTF-16

Characters outside of the BMP were translated incorrectly.
This commit is contained in:
Matthew Flatt 2013-02-18 08:38:18 -07:00
parent f8a0d6d432
commit 4271d987cd
3 changed files with 12 additions and 0 deletions

View File

@ -494,6 +494,9 @@
(test 4.4t0 extflvector-ref v 2)
(test 2.2t0 ptr-ref (ptr-add (extflvector->cpointer v) (ctype-sizeof _longdouble)) _longdouble))
;; Check a corner of UTF-16 conversion:
(test "\U171D3" cast (cast "\U171D3" _string/utf-16 _pointer) _pointer _string/utf-16)
(report-errs)
#| --- ignore everything below ---

View File

@ -988,6 +988,14 @@
(bytes-convert c
(bytes-append (integer->integer-bytes #xDC00 2 #f)
(integer->integer-bytes #x1000 2 #f))))))))
;; Check a corner of UTF-16 conversion:
(let ([c (bytes-open-converter "platform-UTF-8" "platform-UTF-16")])
(let-values ([(s n status) (bytes-convert c (string->bytes/utf-8 "\U171D3"))])
(let ([c2 (bytes-open-converter "platform-UTF-16" "platform-UTF-8")])
(let-values ([(s2 n2 status2) (bytes-convert c2 s)])
(bytes->string/utf-8 s2)))))
(when (eq? (system-type) 'windows)
(let ([c (bytes-open-converter "platform-UTF-8-permissive" "platform-UTF-16")])
;; Check that we use all 6 bytes of #"\355\240\200\355\260\200" or none

View File

@ -5389,6 +5389,7 @@ unsigned short *scheme_ucs4_to_utf16(const mzchar *text, intptr_t start, intptr_
for (i = start, j = 0; i < end; i++) {
v = text[i];
if (v > 0xFFFF) {
v -= 0x10000;
utf16[j++] = 0xD800 | ((v >> 10) & 0x3FF);
utf16[j++] = 0xDC00 | (v & 0x3FF);
} else