Fix UTF-8 symbol repair
Commit 6a5a3037b4
was not quite right, because it used sightly the
wrong variant among a dozen decoding functions. The test suite caught
the problem, but I forgot to run it before pushing.
Also, repair the "Inside" documentation on the function that was
incorrectly used, and document the new variant.
This commit is contained in:
parent
6a5a3037b4
commit
408d6bb773
|
@ -57,11 +57,30 @@ If @var{ipos} is non-@cpp{NULL}, it is filled with the first undecoded
|
||||||
how many bytes were decoded before decoding stopped.
|
how many bytes were decoded before decoding stopped.
|
||||||
|
|
||||||
If @var{permissive} is non-zero, it is used as the decoding of bytes
|
If @var{permissive} is non-zero, it is used as the decoding of bytes
|
||||||
that are not part of a valid UTF-8 encoding. Thus, the function
|
that are not part of a valid UTF-8 encoding or if the input ends in the
|
||||||
result can be @cpp{-2} only if @var{permissive} is @cpp{0}.
|
middle of an encoding. Thus, the function
|
||||||
|
result can be @cpp{-1} or @cpp{-2} only if @var{permissive} is @cpp{0}.
|
||||||
|
|
||||||
This function does not allocate or trigger garbage collection.}
|
This function does not allocate or trigger garbage collection.}
|
||||||
|
|
||||||
|
@function[(int scheme_utf8_decode_offset_prefix
|
||||||
|
[const-unsigned-char* s]
|
||||||
|
[int start]
|
||||||
|
[int end]
|
||||||
|
[mzchar* us]
|
||||||
|
[int dstart]
|
||||||
|
[int dend]
|
||||||
|
[intptr_t* ipos]
|
||||||
|
[char utf16]
|
||||||
|
[int permissive])]{
|
||||||
|
|
||||||
|
Like @cpp{scheme_utf8_decode}, but returns @cpp{-1} if the input ends
|
||||||
|
in the middle of a UTF-8 encoding even if @var{permission} is
|
||||||
|
non-zero.
|
||||||
|
|
||||||
|
@history[#:added "6.0.1.13"]}
|
||||||
|
|
||||||
|
|
||||||
@function[(int scheme_utf8_decode_as_prefix
|
@function[(int scheme_utf8_decode_as_prefix
|
||||||
[const-unsigned-char* s]
|
[const-unsigned-char* s]
|
||||||
[int start]
|
[int start]
|
||||||
|
@ -99,7 +118,7 @@ Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
|
||||||
[int permissive])]{
|
[int permissive])]{
|
||||||
|
|
||||||
Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
|
Like @cpp{scheme_utf8_decode}, but with fewer arguments. The
|
||||||
decoding produces UCS-4 @cpp{mzchar}s. If the buffer @var{us}
|
decoding produces UCS-4 @cpp{mzchar}s. The buffer @var{us}
|
||||||
@bold{must} be non-@cpp{NULL}, and it is assumed to be long enough to hold the
|
@bold{must} be non-@cpp{NULL}, and it is assumed to be long enough to hold the
|
||||||
decoding (which cannot be longer than the length of the input, though
|
decoding (which cannot be longer than the length of the input, though
|
||||||
it may be shorter). If @var{len} is negative, @cpp{strlen(@var{s})}
|
it may be shorter). If @var{len} is negative, @cpp{strlen(@var{s})}
|
||||||
|
|
|
@ -338,6 +338,7 @@ EXPORTS
|
||||||
scheme_is_cpointer
|
scheme_is_cpointer
|
||||||
scheme_get_proc_name
|
scheme_get_proc_name
|
||||||
scheme_utf8_decode
|
scheme_utf8_decode
|
||||||
|
scheme_utf8_decode_offset_prefix
|
||||||
scheme_utf8_decode_as_prefix
|
scheme_utf8_decode_as_prefix
|
||||||
scheme_utf8_decode_all
|
scheme_utf8_decode_all
|
||||||
scheme_utf8_decode_prefix
|
scheme_utf8_decode_prefix
|
||||||
|
|
|
@ -353,6 +353,7 @@ EXPORTS
|
||||||
scheme_is_cpointer
|
scheme_is_cpointer
|
||||||
scheme_get_proc_name
|
scheme_get_proc_name
|
||||||
scheme_utf8_decode
|
scheme_utf8_decode
|
||||||
|
scheme_utf8_decode_offset_prefix
|
||||||
scheme_utf8_decode_as_prefix
|
scheme_utf8_decode_as_prefix
|
||||||
scheme_utf8_decode_all
|
scheme_utf8_decode_all
|
||||||
scheme_utf8_decode_prefix
|
scheme_utf8_decode_prefix
|
||||||
|
|
|
@ -355,6 +355,7 @@ scheme_make_offset_external_cptr
|
||||||
scheme_is_cpointer
|
scheme_is_cpointer
|
||||||
scheme_get_proc_name
|
scheme_get_proc_name
|
||||||
scheme_utf8_decode
|
scheme_utf8_decode
|
||||||
|
scheme_utf8_decode_offset_prefix
|
||||||
scheme_utf8_decode_as_prefix
|
scheme_utf8_decode_as_prefix
|
||||||
scheme_utf8_decode_all
|
scheme_utf8_decode_all
|
||||||
scheme_utf8_decode_prefix
|
scheme_utf8_decode_prefix
|
||||||
|
|
|
@ -361,6 +361,7 @@ scheme_make_offset_external_cptr
|
||||||
scheme_is_cpointer
|
scheme_is_cpointer
|
||||||
scheme_get_proc_name
|
scheme_get_proc_name
|
||||||
scheme_utf8_decode
|
scheme_utf8_decode
|
||||||
|
scheme_utf8_decode_offset_prefix
|
||||||
scheme_utf8_decode_as_prefix
|
scheme_utf8_decode_as_prefix
|
||||||
scheme_utf8_decode_all
|
scheme_utf8_decode_all
|
||||||
scheme_utf8_decode_prefix
|
scheme_utf8_decode_prefix
|
||||||
|
|
|
@ -672,6 +672,9 @@ MZ_EXTERN const char *scheme_get_proc_name(Scheme_Object *p, int *len, int for_e
|
||||||
MZ_EXTERN intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end,
|
MZ_EXTERN intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
unsigned int *us, intptr_t dstart, intptr_t dend,
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
intptr_t *ipos, char utf16, int permissive);
|
intptr_t *ipos, char utf16, int permissive);
|
||||||
|
MZ_EXTERN intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
|
intptr_t *ipos, char utf16, int permissive);
|
||||||
MZ_EXTERN intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
|
MZ_EXTERN intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
unsigned int *us, intptr_t dstart, intptr_t dend,
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
intptr_t *ipos, char utf16, int permissive);
|
intptr_t *ipos, char utf16, int permissive);
|
||||||
|
|
|
@ -541,6 +541,9 @@ const char *(*scheme_get_proc_name)(Scheme_Object *p, int *len, int for_error);
|
||||||
intptr_t (*scheme_utf8_decode)(const unsigned char *s, intptr_t start, intptr_t end,
|
intptr_t (*scheme_utf8_decode)(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
unsigned int *us, intptr_t dstart, intptr_t dend,
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
intptr_t *ipos, char utf16, int permissive);
|
intptr_t *ipos, char utf16, int permissive);
|
||||||
|
intptr_t (*scheme_utf8_decode_offset_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
|
intptr_t *ipos, char utf16, int permissive);
|
||||||
intptr_t (*scheme_utf8_decode_as_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
|
intptr_t (*scheme_utf8_decode_as_prefix)(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
unsigned int *us, intptr_t dstart, intptr_t dend,
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
intptr_t *ipos, char utf16, int permissive);
|
intptr_t *ipos, char utf16, int permissive);
|
||||||
|
|
|
@ -398,6 +398,7 @@
|
||||||
scheme_extension_table->scheme_is_cpointer = scheme_is_cpointer;
|
scheme_extension_table->scheme_is_cpointer = scheme_is_cpointer;
|
||||||
scheme_extension_table->scheme_get_proc_name = scheme_get_proc_name;
|
scheme_extension_table->scheme_get_proc_name = scheme_get_proc_name;
|
||||||
scheme_extension_table->scheme_utf8_decode = scheme_utf8_decode;
|
scheme_extension_table->scheme_utf8_decode = scheme_utf8_decode;
|
||||||
|
scheme_extension_table->scheme_utf8_decode_offset_prefix = scheme_utf8_decode_offset_prefix;
|
||||||
scheme_extension_table->scheme_utf8_decode_as_prefix = scheme_utf8_decode_as_prefix;
|
scheme_extension_table->scheme_utf8_decode_as_prefix = scheme_utf8_decode_as_prefix;
|
||||||
scheme_extension_table->scheme_utf8_decode_all = scheme_utf8_decode_all;
|
scheme_extension_table->scheme_utf8_decode_all = scheme_utf8_decode_all;
|
||||||
scheme_extension_table->scheme_utf8_decode_prefix = scheme_utf8_decode_prefix;
|
scheme_extension_table->scheme_utf8_decode_prefix = scheme_utf8_decode_prefix;
|
||||||
|
|
|
@ -398,6 +398,7 @@
|
||||||
#define scheme_is_cpointer (scheme_extension_table->scheme_is_cpointer)
|
#define scheme_is_cpointer (scheme_extension_table->scheme_is_cpointer)
|
||||||
#define scheme_get_proc_name (scheme_extension_table->scheme_get_proc_name)
|
#define scheme_get_proc_name (scheme_extension_table->scheme_get_proc_name)
|
||||||
#define scheme_utf8_decode (scheme_extension_table->scheme_utf8_decode)
|
#define scheme_utf8_decode (scheme_extension_table->scheme_utf8_decode)
|
||||||
|
#define scheme_utf8_decode_offset_prefix (scheme_extension_table->scheme_utf8_decode_offset_prefix)
|
||||||
#define scheme_utf8_decode_as_prefix (scheme_extension_table->scheme_utf8_decode_as_prefix)
|
#define scheme_utf8_decode_as_prefix (scheme_extension_table->scheme_utf8_decode_as_prefix)
|
||||||
#define scheme_utf8_decode_all (scheme_extension_table->scheme_utf8_decode_all)
|
#define scheme_utf8_decode_all (scheme_extension_table->scheme_utf8_decode_all)
|
||||||
#define scheme_utf8_decode_prefix (scheme_extension_table->scheme_utf8_decode_prefix)
|
#define scheme_utf8_decode_prefix (scheme_extension_table->scheme_utf8_decode_prefix)
|
||||||
|
|
|
@ -5476,6 +5476,14 @@ intptr_t scheme_utf8_decode(const unsigned char *s, intptr_t start, intptr_t end
|
||||||
ipos, NULL, utf16, utf16, NULL, 0, permissive);
|
ipos, NULL, utf16, utf16, NULL, 0, permissive);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
intptr_t scheme_utf8_decode_offset_prefix(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
|
intptr_t *ipos, char utf16, int permissive)
|
||||||
|
{
|
||||||
|
return utf8_decode_x(s, start, end, us, dstart, dend,
|
||||||
|
ipos, NULL, utf16, utf16, NULL, 1, permissive);
|
||||||
|
}
|
||||||
|
|
||||||
intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
|
intptr_t scheme_utf8_decode_as_prefix(const unsigned char *s, intptr_t start, intptr_t end,
|
||||||
unsigned int *us, intptr_t dstart, intptr_t dend,
|
unsigned int *us, intptr_t dstart, intptr_t dend,
|
||||||
intptr_t *ipos, char utf16, int permissive)
|
intptr_t *ipos, char utf16, int permissive)
|
||||||
|
|
|
@ -613,9 +613,9 @@ const char *scheme_symbol_name_and_size(Scheme_Object *sym, uintptr_t *length, i
|
||||||
mzchar buf[2];
|
mzchar buf[2];
|
||||||
int ul = 1;
|
int ul = 1;
|
||||||
while (1) {
|
while (1) {
|
||||||
if (scheme_utf8_decode((unsigned char *)s, i, i + ul,
|
if (scheme_utf8_decode_offset_prefix((unsigned char *)s, i, i + ul,
|
||||||
buf, 0, 1,
|
buf, 0, 1,
|
||||||
NULL, 0, '?') > 0)
|
NULL, 0, '?') > 0)
|
||||||
break;
|
break;
|
||||||
ul++;
|
ul++;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user