fixed UTF-16 conversin for Windows

svn: r2902
This commit is contained in:
Matthew Flatt 2006-05-10 16:28:51 +00:00
parent 97e800aeee
commit bbdbec15d5

View File

@ -4427,6 +4427,9 @@ static int utf8_decode_x(const unsigned char *s, int start, int end,
int init_doki;
int nextbits, v;
unsigned int sc;
# ifdef WINDOWS_UNICODE_SUPPORT
int pending_surrogate = 0;
# endif
if (_state) {
state = (*_state) & 0x7;
@ -4577,10 +4580,18 @@ static int utf8_decode_x(const unsigned char *s, int start, int end,
if (compact) {
if (utf16) {
if (v > 0xFFFF) {
# ifdef WINDOWS_UNICODE_SUPPORT
if (pending_surrogate) {
if (us)
((unsigned short *)us)[j] = pending_surrogate;
j++; /* Accept previously written unpaired surrogate */
pending_surrogate = 0;
}
# endif
if (j + 1 >= dend)
break;
if (us) {
v -= 0x10000;
if (j + 1 >= dend)
break;
((unsigned short *)us)[j] = 0xD800 | ((v >> 10) & 0x3FF);
((unsigned short *)us)[j+1] = 0xDC00 | (v & 0x3FF);
}
@ -4591,20 +4602,63 @@ static int utf8_decode_x(const unsigned char *s, int start, int end,
a 0xDC00 after a 0xD800, otherwise multiple encodings can
map to the same thing. */
if ((v >= 0xD800) && (v <= 0xDFFF)) {
if ((utf16 == 2) && ((v & 0xDC00) == 0xDC00)) {
if (permissive)
v = permissive;
else
if (pending_surrogate && ((v & 0xDC00) == 0xDC00)) {
/* This looks like a surrogate pair, so disallow it. */
if (permissive) {
/* We need to fill in 6 permissive substitutions,
one for each input byte. If we can't put all 6,
then don't use any input. */
if (j + 5 >= dend) {
break;
} else {
int p;
if (us) {
for (p = 0; p < 5; p++) {
if (j + p >= dend)
break;
((unsigned short *)us)[j+p] = permissive;
}
}
j += 5;
v = permissive;
}
} else {
ENCFAIL;
} else if ((v & 0xDC00) == 0xD800)
utf16 = 2;
else
utf16 = 1;
} else
utf16 = 1;
# endif
}
pending_surrogate = 0;
} else {
if (pending_surrogate) {
if (us)
((unsigned short *)us)[j] = pending_surrogate;
j++; /* Accept previousy written unpaired surrogate */
pending_surrogate = 0;
if (j >= dend)
break;
}
if ((v & 0xDC00) == 0xD800)
pending_surrogate = v;
else
pending_surrogate = 0;
}
} else {
if (pending_surrogate) {
if (us)
((unsigned short *)us)[j] = pending_surrogate;
j++; /* Accept previousy written unpaired surrogate */
pending_surrogate = 0;
if (j >= dend)
break;
}
}
if (pending_surrogate)
--j; /* don't accept unpaired surrogate, yet */
else if (us)
((unsigned short *)us)[j] = v;
# else
if (us)
((unsigned short *)us)[j] = v;
# endif
}
} else {
int delta;
@ -4664,6 +4718,11 @@ static int utf8_decode_x(const unsigned char *s, int start, int end,
}
}
# ifdef WINDOWS_UNICODE_SUPPORT
if (pending_surrogate)
oki -= 3;
#endif
if (ipos)
*ipos = oki;
if (jpos)
@ -4672,6 +4731,13 @@ static int utf8_decode_x(const unsigned char *s, int start, int end,
if (i < end)
return failmode;
# ifdef WINDOWS_UNICODE_SUPPORT
if (pending_surrogate) {
/* input must have ended right after surrogate */
return -1;
}
#endif
return j - dstart;
}