read: reject non-Latin-1 characters in byte-string literals
This is a backward-incompatible change, but the old behavior (truncate the character value to 8 bits) was never intended and seems clearly bad.
This commit is contained in:
parent
7ed82a5f15
commit
80a7ff831f
|
@ -521,7 +521,10 @@ literal.) See @secref["bytestrings"] for information on byte
|
|||
strings. The resulting byte string is @tech{interned} in
|
||||
@racket[read-syntax] mode.
|
||||
Byte-string constants support the same escape sequences as
|
||||
character strings, except @litchar{\u} and @litchar{\U}.
|
||||
character strings, except @litchar{\u} and @litchar{\U}. Otherwise, each
|
||||
character within the byte-string quotes must have a Unicode code-point number
|
||||
in the range 0 to 255, which is used as the corresponding byte's value; if
|
||||
a character is not in that range, the @exnraise[exn:fail:read].
|
||||
|
||||
When the reader encounters @as-index{@litchar{#<<}}, it starts parsing a
|
||||
@pidefterm{here string}. The characters following @litchar{#<<} until
|
||||
|
|
|
@ -196,6 +196,8 @@
|
|||
(err/rt-test (readstr "#\"\\c\"") exn:fail:read?)
|
||||
(err/rt-test (readstr "#\"\\777\"") exn:fail:read?)
|
||||
(err/rt-test (readstr "#\"\\u0040\"") exn:fail:read?)
|
||||
(err/rt-test (readstr "#\"\u0100\"") exn:fail:read?)
|
||||
(err/rt-test (readstr "#\"\u03BB\"") exn:fail:read?)
|
||||
|
||||
(load-relative "numstrs.rktl")
|
||||
(let loop ([l number-table])
|
||||
|
|
|
@ -82,22 +82,25 @@
|
|||
;; What about byte string regexp strings
|
||||
[str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"")
|
||||
byte-str)]
|
||||
[byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* string-element) "\"")]
|
||||
[byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* byte-string-element) "\"")]
|
||||
[string-element (:or (:~ "\"" "\\")
|
||||
"\\\""
|
||||
"\\\\"
|
||||
"\\a"
|
||||
"\\b"
|
||||
"\\t"
|
||||
"\\n"
|
||||
"\\v"
|
||||
"\\f"
|
||||
"\\r"
|
||||
"\\e"
|
||||
"\\'"
|
||||
(:: "\\" (:** 1 3 digit8))
|
||||
(:: "\\x" (:** 1 2 digit16))
|
||||
(:: "\\" #\newline))]
|
||||
string-escape)]
|
||||
[byte-string-element (:or (:- (:/ "\x00" "\xFF") "\"" "\\")
|
||||
string-escape)]
|
||||
[string-escape (:or "\\\""
|
||||
"\\\\"
|
||||
"\\a"
|
||||
"\\b"
|
||||
"\\t"
|
||||
"\\n"
|
||||
"\\v"
|
||||
"\\f"
|
||||
"\\r"
|
||||
"\\e"
|
||||
"\\'"
|
||||
(:: "\\" (:** 1 3 digit8))
|
||||
(:: "\\x" (:** 1 2 digit16))
|
||||
(:: "\\" #\newline))]
|
||||
|
||||
[bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\""
|
||||
(:* (:~ "\"" "\\")
|
||||
|
|
|
@ -391,8 +391,8 @@ static intptr_t sch_vsprintf(char *s, intptr_t maxlen, const char *msg, va_list
|
|||
tlen = 1;
|
||||
} else {
|
||||
mzchar mc;
|
||||
mc = c;
|
||||
tlen = scheme_utf8_encode_all(&mc, 1, (unsigned char *)buf);
|
||||
c = (int)mc;
|
||||
}
|
||||
t = buf;
|
||||
}
|
||||
|
|
|
@ -3283,14 +3283,19 @@ read_string(int is_byte, Scheme_Object *port,
|
|||
}
|
||||
}
|
||||
}
|
||||
} else if (is_byte && (ch > 255)) {
|
||||
if (err_ok)
|
||||
scheme_read_err(port, stxsrc, line, col, pos, SPAN(port, pos), 0, indentation,
|
||||
"read: out-of-range character in byte string: %c",
|
||||
ch);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (ch < 0) {
|
||||
if (err_ok)
|
||||
scheme_read_err(port, stxsrc, line, col, pos, SPAN(port, pos), 0, indentation,
|
||||
"read: out-of-range character in %s%s",
|
||||
is_byte ? "byte " : "",
|
||||
"string");
|
||||
"read: out-of-range character in %sstring",
|
||||
is_byte ? "byte " : "");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user