Fix UTF-8 symbol repair

Commit 6a5a3037b4 was not quite right, because it used sightly the
wrong variant among a dozen decoding functions.  The test suite caught
the problem, but I forgot to run it before pushing.

Also, repair the "Inside" documentation on the function that was
incorrectly used, and document the new variant.
This commit is contained in:
Matthew Flatt 2014-06-23 15:27:48 +01:00
parent 6a5a3037b4
commit 408d6bb773
11 changed files with 45 additions and 6 deletions

View File

@ -57,11 +57,30 @@ If @var{ipos} is non-@cpp{NULL}, it is filled with the first undecoded
how many bytes were decoded before decoding stopped. how many bytes were decoded before decoding stopped.
If @var{permissive} is non-zero, it is used as the decoding of bytes If @var{permissive} is non-zero, it is used as the decoding of bytes
that are not part of a valid UTF-8 encoding. Thus, the function that are not part of a valid UTF-8 encoding or if the input ends in the
result can be @cpp{-2} only if @var{permissive} is @cpp{0}. middle of an encoding. Thus, the function
result can be @cpp{-1} or @cpp{-2} only if @var{permissive} is @cpp{0}.
This function does not allocate or trigger garbage collection.} This function does not allocate or trigger garbage collection.}
@function[(int scheme_utf8_decode_offset_prefix
[const-unsigned-char* s]
[int start]
[int end]
[mzchar* us]
[int dstart]
[int dend]
[intptr_t* ipos]
[char utf16]
[int permissive])]{
Like @cpp{scheme_utf8_decode}, but returns @cpp{-1} if the input ends
in the middle of a UTF-8 encoding even if @var{permission} is
non-zero.
@history[#:added "6.0.1.13"]}
@function[(int scheme_utf8_decode_as_prefix @function[(int scheme_utf8_decode_as_prefix
[const-unsigned-char* s] [const-unsigned-char* s]
[int start] [int start]
@ -99,7 +118,7 @@ Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
[int permissive])]{ [int permissive])]{
Like @cpp{scheme_utf8_decode}, but with fewer arguments. The Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
decoding produces UCS-4 @cpp{mzchar}s. If the buffer @var{us} decoding produces UCS-4 @cpp{mzchar}s. The buffer @var{us}
@bold{must} be non-@cpp{NULL}, and it is assumed to be long enough to hold the @bold{must} be non-@cpp{NULL}, and it is assumed to be long enough to hold the
decoding (which cannot be longer than the length of the input, though decoding (which cannot be longer than the length of the input, though
it may be shorter). If @var{len} is negative, @cpp{strlen(@var{s})} it may be shorter). If @var{len} is negative, @cpp{strlen(@var{s})}

View File

@ -338,6 +338,7 @@ EXPORTS
scheme_is_cpointer scheme_is_cpointer
scheme_get_proc_name scheme_get_proc_name
scheme_utf8_decode scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix scheme_utf8_decode_as_prefix
scheme_utf8_decode_all scheme_utf8_decode_all
scheme_utf8_decode_prefix scheme_utf8_decode_prefix

View File

@ -353,6 +353,7 @@ EXPORTS
scheme_is_cpointer scheme_is_cpointer
scheme_get_proc_name scheme_get_proc_name
scheme_utf8_decode scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix scheme_utf8_decode_as_prefix
scheme_utf8_decode_all scheme_utf8_decode_all
scheme_utf8_decode_prefix scheme_utf8_decode_prefix

View File

@ -355,6 +355,7 @@ scheme_make_offset_external_cptr
scheme_is_cpointer scheme_is_cpointer
scheme_get_proc_name scheme_get_proc_name
scheme_utf8_decode scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix scheme_utf8_decode_as_prefix
scheme_utf8_decode_all scheme_utf8_decode_all
scheme_utf8_decode_prefix scheme_utf8_decode_prefix

View File

@ -361,6 +361,7 @@ scheme_make_offset_external_cptr
scheme_is_cpointer scheme_is_cpointer
scheme_get_proc_name scheme_get_proc_name
scheme_utf8_decode scheme_utf8_decode
scheme_utf8_decode_offset_prefix
scheme_utf8_decode_as_prefix scheme_utf8_decode_as_prefix
scheme_utf8_decode_all scheme_utf8_decode_all
scheme_utf8_decode_prefix scheme_utf8_decode_prefix

View File

@ -672,6 +672,9 @@ MZ_EXTERN const char *scheme_get_proc_name(Scheme_Object *p, int *len, int for_e
MZ_EXTERN intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end, MZ_EXTERN intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend, unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive); intptr_t *ipos, char utf16, int permissive);
MZ_EXTERN intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);
MZ_EXTERN intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end, MZ_EXTERN intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend, unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive); intptr_t *ipos, char utf16, int permissive);

View File

@ -541,6 +541,9 @@ const char *(*scheme_get_proc_name)(Scheme_Object *p, int *len, int for_error);
intptr_t (*scheme_utf8_decode)(const unsigned char *s, intptr_t start, intptr_t end, intptr_t (*scheme_utf8_decode)(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend, unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive); intptr_t *ipos, char utf16, int permissive);
intptr_t (*scheme_utf8_decode_offset_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive);
intptr_t (*scheme_utf8_decode_as_prefix)(const unsigned char *s, intptr_t start, intptr_t end, intptr_t (*scheme_utf8_decode_as_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend, unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive); intptr_t *ipos, char utf16, int permissive);

View File

@ -398,6 +398,7 @@
scheme_extension_table->scheme_is_cpointer = scheme_is_cpointer; scheme_extension_table->scheme_is_cpointer = scheme_is_cpointer;
scheme_extension_table->scheme_get_proc_name = scheme_get_proc_name; scheme_extension_table->scheme_get_proc_name = scheme_get_proc_name;
scheme_extension_table->scheme_utf8_decode = scheme_utf8_decode; scheme_extension_table->scheme_utf8_decode = scheme_utf8_decode;
scheme_extension_table->scheme_utf8_decode_offset_prefix = scheme_utf8_decode_offset_prefix;
scheme_extension_table->scheme_utf8_decode_as_prefix = scheme_utf8_decode_as_prefix; scheme_extension_table->scheme_utf8_decode_as_prefix = scheme_utf8_decode_as_prefix;
scheme_extension_table->scheme_utf8_decode_all = scheme_utf8_decode_all; scheme_extension_table->scheme_utf8_decode_all = scheme_utf8_decode_all;
scheme_extension_table->scheme_utf8_decode_prefix = scheme_utf8_decode_prefix; scheme_extension_table->scheme_utf8_decode_prefix = scheme_utf8_decode_prefix;

View File

@ -398,6 +398,7 @@
#define scheme_is_cpointer (scheme_extension_table->scheme_is_cpointer) #define scheme_is_cpointer (scheme_extension_table->scheme_is_cpointer)
#define scheme_get_proc_name (scheme_extension_table->scheme_get_proc_name) #define scheme_get_proc_name (scheme_extension_table->scheme_get_proc_name)
#define scheme_utf8_decode (scheme_extension_table->scheme_utf8_decode) #define scheme_utf8_decode (scheme_extension_table->scheme_utf8_decode)
#define scheme_utf8_decode_offset_prefix (scheme_extension_table->scheme_utf8_decode_offset_prefix)
#define scheme_utf8_decode_as_prefix (scheme_extension_table->scheme_utf8_decode_as_prefix) #define scheme_utf8_decode_as_prefix (scheme_extension_table->scheme_utf8_decode_as_prefix)
#define scheme_utf8_decode_all (scheme_extension_table->scheme_utf8_decode_all) #define scheme_utf8_decode_all (scheme_extension_table->scheme_utf8_decode_all)
#define scheme_utf8_decode_prefix (scheme_extension_table->scheme_utf8_decode_prefix) #define scheme_utf8_decode_prefix (scheme_extension_table->scheme_utf8_decode_prefix)

View File

@ -5476,6 +5476,14 @@ intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end
ipos, NULL, utf16, utf16, NULL, 0, permissive); ipos, NULL, utf16, utf16, NULL, 0, permissive);
} }
intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive)
{
return utf8_decode_x(s, start, end, us, dstart, dend,
ipos, NULL, utf16, utf16, NULL, 1, permissive);
}
intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end, intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
unsigned int *us, intptr_t dstart, intptr_t dend, unsigned int *us, intptr_t dstart, intptr_t dend,
intptr_t *ipos, char utf16, int permissive) intptr_t *ipos, char utf16, int permissive)

View File

@ -613,9 +613,9 @@ const char *scheme_symbol_name_and_size(Scheme_Object *sym, uintptr_t *length, i
mzchar buf[2]; mzchar buf[2];
int ul = 1; int ul = 1;
while (1) { while (1) {
if (scheme_utf8_decode((unsigned char *)s, i, i + ul, if (scheme_utf8_decode_offset_prefix((unsigned char *)s, i, i + ul,
buf, 0, 1, buf, 0, 1,
NULL, 0, '?') > 0) NULL, 0, '?') > 0)
break; break;
ul++; ul++;
} }