read: reject non-Latin-1 characters in byte-string literals
This is a backward-incompatible change, but the old behavior (truncate the character value to 8 bits) was never intended and seems clearly bad.
This commit is contained in:
parent
7ed82a5f15
commit
80a7ff831f
|
@ -521,7 +521,10 @@ literal.) See @secref["bytestrings"] for information on byte
|
||||||
strings. The resulting byte string is @tech{interned} in
|
strings. The resulting byte string is @tech{interned} in
|
||||||
@racket[read-syntax] mode.
|
@racket[read-syntax] mode.
|
||||||
Byte-string constants support the same escape sequences as
|
Byte-string constants support the same escape sequences as
|
||||||
character strings, except @litchar{\u} and @litchar{\U}.
|
character strings, except @litchar{\u} and @litchar{\U}. Otherwise, each
|
||||||
|
character within the byte-string quotes must have a Unicode code-point number
|
||||||
|
in the range 0 to 255, which is used as the corresponding byte's value; if
|
||||||
|
a character is not in that range, the @exnraise[exn:fail:read].
|
||||||
|
|
||||||
When the reader encounters @as-index{@litchar{#<<}}, it starts parsing a
|
When the reader encounters @as-index{@litchar{#<<}}, it starts parsing a
|
||||||
@pidefterm{here string}. The characters following @litchar{#<<} until
|
@pidefterm{here string}. The characters following @litchar{#<<} until
|
||||||
|
|
|
@ -196,6 +196,8 @@
|
||||||
(err/rt-test (readstr "#\"\\c\"") exn:fail:read?)
|
(err/rt-test (readstr "#\"\\c\"") exn:fail:read?)
|
||||||
(err/rt-test (readstr "#\"\\777\"") exn:fail:read?)
|
(err/rt-test (readstr "#\"\\777\"") exn:fail:read?)
|
||||||
(err/rt-test (readstr "#\"\\u0040\"") exn:fail:read?)
|
(err/rt-test (readstr "#\"\\u0040\"") exn:fail:read?)
|
||||||
|
(err/rt-test (readstr "#\"\u0100\"") exn:fail:read?)
|
||||||
|
(err/rt-test (readstr "#\"\u03BB\"") exn:fail:read?)
|
||||||
|
|
||||||
(load-relative "numstrs.rktl")
|
(load-relative "numstrs.rktl")
|
||||||
(let loop ([l number-table])
|
(let loop ([l number-table])
|
||||||
|
|
|
@ -82,22 +82,25 @@
|
||||||
;; What about byte string regexp strings
|
;; What about byte string regexp strings
|
||||||
[str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"")
|
[str (:or (:: (:? (:or "#px" "#rx")) "\"" (:* string-element (:: "\\" unicode)) "\"")
|
||||||
byte-str)]
|
byte-str)]
|
||||||
[byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* string-element) "\"")]
|
[byte-str (:: (:? (:or "#px" "#rx")) "#\"" (:* byte-string-element) "\"")]
|
||||||
[string-element (:or (:~ "\"" "\\")
|
[string-element (:or (:~ "\"" "\\")
|
||||||
"\\\""
|
string-escape)]
|
||||||
"\\\\"
|
[byte-string-element (:or (:- (:/ "\x00" "\xFF") "\"" "\\")
|
||||||
"\\a"
|
string-escape)]
|
||||||
"\\b"
|
[string-escape (:or "\\\""
|
||||||
"\\t"
|
"\\\\"
|
||||||
"\\n"
|
"\\a"
|
||||||
"\\v"
|
"\\b"
|
||||||
"\\f"
|
"\\t"
|
||||||
"\\r"
|
"\\n"
|
||||||
"\\e"
|
"\\v"
|
||||||
"\\'"
|
"\\f"
|
||||||
(:: "\\" (:** 1 3 digit8))
|
"\\r"
|
||||||
(:: "\\x" (:** 1 2 digit16))
|
"\\e"
|
||||||
(:: "\\" #\newline))]
|
"\\'"
|
||||||
|
(:: "\\" (:** 1 3 digit8))
|
||||||
|
(:: "\\x" (:** 1 2 digit16))
|
||||||
|
(:: "\\" #\newline))]
|
||||||
|
|
||||||
[bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\""
|
[bad-str (:: (:? (:or "#px" "#rx")) (:? "#") "\""
|
||||||
(:* (:~ "\"" "\\")
|
(:* (:~ "\"" "\\")
|
||||||
|
|
|
@ -391,8 +391,8 @@ static intptr_t sch_vsprintf(char *s, intptr_t maxlen, const char *msg, va_list
|
||||||
tlen = 1;
|
tlen = 1;
|
||||||
} else {
|
} else {
|
||||||
mzchar mc;
|
mzchar mc;
|
||||||
|
mc = c;
|
||||||
tlen = scheme_utf8_encode_all(&mc, 1, (unsigned char *)buf);
|
tlen = scheme_utf8_encode_all(&mc, 1, (unsigned char *)buf);
|
||||||
c = (int)mc;
|
|
||||||
}
|
}
|
||||||
t = buf;
|
t = buf;
|
||||||
}
|
}
|
||||||
|
|
|
@ -3283,14 +3283,19 @@ read_string(int is_byte, Scheme_Object *port,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} else if (is_byte && (ch > 255)) {
|
||||||
|
if (err_ok)
|
||||||
|
scheme_read_err(port, stxsrc, line, col, pos, SPAN(port, pos), 0, indentation,
|
||||||
|
"read: out-of-range character in byte string: %c",
|
||||||
|
ch);
|
||||||
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (ch < 0) {
|
if (ch < 0) {
|
||||||
if (err_ok)
|
if (err_ok)
|
||||||
scheme_read_err(port, stxsrc, line, col, pos, SPAN(port, pos), 0, indentation,
|
scheme_read_err(port, stxsrc, line, col, pos, SPAN(port, pos), 0, indentation,
|
||||||
"read: out-of-range character in %s%s",
|
"read: out-of-range character in %sstring",
|
||||||
is_byte ? "byte " : "",
|
is_byte ? "byte " : "");
|
||||||
"string");
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user