Fix UTF-8 symbol repair

Commit 6a5a3037b4 was not quite right, because it used sightly the
wrong variant among a dozen decoding functions.  The test suite caught
the problem, but I forgot to run it before pushing.

Also, repair the "Inside" documentation on the function that was
incorrectly used, and document the new variant.
This commit is contained in:
Matthew Flatt 2014-06-23 15:27:48 +01:00
parent 6a5a3037b4
commit 408d6bb773
11 changed files with 45 additions and 6 deletions

View File

@ -57,11 +57,30 @@ If @var{ipos} is non-@cpp{NULL}, it is filled with the first undecoded
how many bytes were decoded before decoding stopped.
If @var{permissive} is non-zero, it is used as the decoding of bytes
that are not part of a valid UTF-8 encoding. Thus, the function
result can be @cpp{-2} only if @var{permissive} is @cpp{0}.
that are not part of a valid UTF-8 encoding or if the input ends in the
middle of an encoding. Thus, the function
result can be @cpp{-1} or @cpp{-2} only if @var{permissive} is @cpp{0}.
This function does not allocate or trigger garbage collection.}
@function[(int scheme_utf8_decode_offset_prefix
[const-unsigned-char* s]
[int start]
[int end]
[mzchar* us]
[int dstart]
[int dend]
[intptr_t* ipos]
[char utf16]
[int permissive])]{
Like @cpp{scheme_utf8_decode}, but returns @cpp{-1} if the input ends
in the middle of a UTF-8 encoding even if @var{permission} is
non-zero.
@history[#:added "6.0.1.13"]}
@function[(int scheme_utf8_decode_as_prefix
[const-unsigned-char* s]
[int start]
@ -99,7 +118,7 @@ Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
[int permissive])]{
Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
decoding produces UCS-4 @cpp{mzchar}s. If the buffer @var{us}
decoding produces UCS-4 @cpp{mzchar}s. The buffer @var{us}
@bold{must} be non-@cpp{NULL}, and it is assumed to be long enough to hold the
decoding (which cannot be longer than the length of the input, though
it may be shorter). If @var{len} is negative, @cpp{strlen(@var{s})}

View File

@ -338,6 +338,7 @@ EXPORTS
scheme_is_cpointer
scheme_get_proc_name
scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix
scheme_utf8_decode_all
scheme_utf8_decode_prefix

View File

@ -353,6 +353,7 @@ EXPORTS
scheme_is_cpointer
scheme_get_proc_name
scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix
scheme_utf8_decode_all
scheme_utf8_decode_prefix

View File

@ -355,6 +355,7 @@ scheme_make_offset_external_cptr
scheme_is_cpointer
scheme_get_proc_name
scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix
scheme_utf8_decode_all
scheme_utf8_decode_prefix

View File

@ -361,6 +361,7 @@ scheme_make_offset_external_cptr
scheme_is_cpointer
scheme_get_proc_name
scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix
scheme_utf8_decode_all
scheme_utf8_decode_prefix

View File

@ -672,6 +672,9 @@ MZ_EXTERN const char *scheme_get_proc_name(Scheme_Object *p, int *len, int for_e
MZ_EXTERN intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);
MZ_EXTERN intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);
MZ_EXTERN intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);

View File

@ -541,6 +541,9 @@ const char *(*scheme_get_proc_name)(Scheme_Object *p, int *len, int for_error);
intptr_t (*scheme_utf8_decode)(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);
intptr_t (*scheme_utf8_decode_offset_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);
intptr_t (*scheme_utf8_decode_as_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);

View File

@ -398,6 +398,7 @@
scheme_extension_table->scheme_is_cpointer = scheme_is_cpointer;
scheme_extension_table->scheme_get_proc_name = scheme_get_proc_name;
scheme_extension_table->scheme_utf8_decode = scheme_utf8_decode;
scheme_extension_table->scheme_utf8_decode_offset_prefix = scheme_utf8_decode_offset_prefix;
scheme_extension_table->scheme_utf8_decode_as_prefix = scheme_utf8_decode_as_prefix;
scheme_extension_table->scheme_utf8_decode_all = scheme_utf8_decode_all;
scheme_extension_table->scheme_utf8_decode_prefix = scheme_utf8_decode_prefix;

View File

@ -398,6 +398,7 @@
#define scheme_is_cpointer (scheme_extension_table->scheme_is_cpointer)
#define scheme_get_proc_name (scheme_extension_table->scheme_get_proc_name)
#define scheme_utf8_decode (scheme_extension_table->scheme_utf8_decode)
#define scheme_utf8_decode_offset_prefix (scheme_extension_table->scheme_utf8_decode_offset_prefix)
#define scheme_utf8_decode_as_prefix (scheme_extension_table->scheme_utf8_decode_as_prefix)
#define scheme_utf8_decode_all (scheme_extension_table->scheme_utf8_decode_all)
#define scheme_utf8_decode_prefix (scheme_extension_table->scheme_utf8_decode_prefix)

View File

@ -5476,6 +5476,14 @@ intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end
ipos, NULL, utf16, utf16, NULL, 0, permissive);
}
intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive)
{
return utf8_decode_x(s, start, end, us, dstart, dend,
ipos, NULL, utf16, utf16, NULL, 1, permissive);
}
intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive)

View File

@ -613,9 +613,9 @@ const char *scheme_symbol_name_and_size(Scheme_Object *sym, uintptr_t *length, i
mzchar buf[2];
int ul = 1;
while (1) {
if (scheme_utf8_decode((unsigned char *)s, i, i + ul,
buf, 0, 1,
NULL, 0, '?') > 0)
if (scheme_utf8_decode_offset_prefix((unsigned char *)s, i, i + ul,
buf, 0, 1,
NULL, 0, '?') > 0)
break;
ul++;
}