diff --git a/pkgs/racket-doc/scribblings/reference/reader.scrbl b/pkgs/racket-doc/scribblings/reference/reader.scrbl index 7eb875efc0..11a6537e50 100644 --- a/pkgs/racket-doc/scribblings/reference/reader.scrbl +++ b/pkgs/racket-doc/scribblings/reference/reader.scrbl @@ -53,8 +53,8 @@ necessarily produce an @tech{interned} value at the receiving @;------------------------------------------------------------------------ @section[#:tag "default-readtable-dispatch"]{Delimiters and Dispatch} -Along with @racketlink[char-whitespace?]{whitespace}, the following -characters are @defterm{delimiters}: +Along with @racketlink[char-whitespace?]{whitespace} and a BOM +character, the following characters are @defterm{delimiters}: @t{ @hspace[2] @ilitchar{(} @ilitchar{)} @ilitchar{[} @ilitchar{]} @@ -86,8 +86,9 @@ characters play special roles: ] -More precisely, after skipping whitespace, the reader dispatches based -on the next character or characters in the input stream as follows: +More precisely, after skipping whitespace and @racket[#\uFEFF] BOM +characters, the reader dispatches based on the next character or +characters in the input stream as follows: @dispatch-table[ @@ -189,6 +190,11 @@ on the next character or characters in the input stream as follows: ] +@history[#:changed "7.8.0.9" @elem{Changed treatment of the BOM + character so that it is treated + like whitespace in the same places + that comments are allowed.}] + @section[#:tag "parse-symbol"]{Reading Symbols} @@ -978,7 +984,7 @@ numbers are followed by a @litchar{.} intended to be read as a C-style infix dot, then a delimiter must precede the @litchar{.}. Finally, after reading any datum @racket[_x], the reader will seek -through whitespace and comments and look for zero or more sequences of a +through whitespace, BOM characters, and comments and look for zero or more sequences of a @litchar{.} followed by another datum @racket[_y]. It will then group @racket[_x] and @racket[_y] together in a @racket[#%dot] form so that @racket[_x.y] reads equal to @racket[(#%dot _x _y)]. diff --git a/pkgs/racket-test-core/tests/racket/read.rktl b/pkgs/racket-test-core/tests/racket/read.rktl index 4deb9f60f6..a993e2f606 100644 --- a/pkgs/racket-test-core/tests/racket/read.rktl +++ b/pkgs/racket-test-core/tests/racket/read.rktl @@ -655,6 +655,26 @@ (test-write-sym (cadar l) (cadar l) (cadar l)) (loop (cdr l))])) +(let () + (define BOM-utf8 (bytes #xEF #xBB #xBF)) + + (test "it-works" symbol->string + (read (open-input-bytes + (bytes-append BOM-utf8 #"it-works")))) + + (test '(1 2 3) read (open-input-bytes + (bytes-append BOM-utf8 + #"(" BOM-utf8 BOM-utf8 + #"1" BOM-utf8 + #"2" BOM-utf8 + #"3" BOM-utf8 BOM-utf8 #")" + BOM-utf8))) + + (test #t procedure? + (parameterize ([read-accept-reader #t]) + (read-language (open-input-bytes + (bytes-append BOM-utf8 #"#lang racket/base")))))) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Test mid-stream EOF diff --git a/racket/src/bc/src/startup.inc b/racket/src/bc/src/startup.inc index 85d7467aaa..61c27af085 100644 --- a/racket/src/bc/src/startup.inc +++ b/racket/src/bc/src/startup.inc @@ -54246,7 +54246,8 @@ static const char *startup_source = " #f)" "(let-values()(skip-loop_0 #f))" "(let-values() c_0))))" -"(if(char-whitespace? ec_0)" +"(if(let-values(((or-part_0)(char-whitespace? ec_0)))" +"(if or-part_0 or-part_0(eqv? '#\\uFEFF ec_0)))" "(let-values()(skip-loop_0 #f))" "(if(char=? '#\\; ec_0)" "(let-values()" @@ -54491,9 +54492,12 @@ static const char *startup_source = " (let-values (((or-part_11) (char=? dc_0 '#\\\")))" "(if or-part_11" " or-part_11" +"(let-values(((or-part_12)(char=? dc_0 '#\\uFEFF)))" +"(if or-part_12" +" or-part_12" "(if(char=? dc_0 '#\\.)" "(check-parameter 1/read-cdot config_0)" -" #f))))))))))))))))))))))))))))))))" +" #f))))))))))))))))))))))))))))))))))" "(define-values" "(char-delimiter?)" "(lambda(c_0 config_0)(begin(readtable-char-delimiter?(read-config-readtable config_0) c_0 config_0))))" diff --git a/racket/src/cs/schemified/expander.scm b/racket/src/cs/schemified/expander.scm index 9aeafa7d3c..9f02d0a989 100644 --- a/racket/src/cs/schemified/expander.scm +++ b/racket/src/cs/schemified/expander.scm @@ -64072,7 +64072,8 @@ #f) (skip-loop_0 #f) c_0)) - (if (char-whitespace? ec_0) + (if (let ((or-part_0 (char-whitespace? ec_0))) + (if or-part_0 or-part_0 (eqv? '#\xfeff ec_0))) (skip-loop_0 #f) (if (char=? '#\x3b ec_0) (begin @@ -64322,13 +64323,19 @@ '#\x22))) (if or-part_11 or-part_11 - (if (char=? - dc_0 - '#\x2e) - (check-parameter - 1/read-cdot - config_0) - #f)))))))))))))))))))))))))))))) + (let ((or-part_12 + (char=? + dc_0 + '#\xfeff))) + (if or-part_12 + or-part_12 + (if (char=? + dc_0 + '#\x2e) + (check-parameter + 1/read-cdot + config_0) + #f)))))))))))))))))))))))))))))))) (define char-delimiter? (lambda (c_0 config_0) (readtable-char-delimiter? diff --git a/racket/src/expander/read/delimiter.rkt b/racket/src/expander/read/delimiter.rkt index ff22d018bd..18f6c51b95 100644 --- a/racket/src/expander/read/delimiter.rkt +++ b/racket/src/expander/read/delimiter.rkt @@ -26,6 +26,7 @@ (char=? dc #\,) (char=? dc #\;) (char=? dc #\") + (char=? dc #\uFEFF) ; treat BOM as comment-like whitespace (and (char=? dc #\.) (check-parameter read-cdot config)))])) diff --git a/racket/src/expander/read/whitespace.rkt b/racket/src/expander/read/whitespace.rkt index 8bf1b3524c..c952c6d9ab 100644 --- a/racket/src/expander/read/whitespace.rkt +++ b/racket/src/expander/read/whitespace.rkt @@ -31,7 +31,9 @@ (not (read-config-keep-comment? config))) (skip-loop #f)] [else c])] - [(char-whitespace? ec) + [(or (char-whitespace? ec) + ;; treat BOM as whitespace in the same sense as a comment: + (eqv? #\uFEFF ec)) (skip-loop #f)] [(char=? #\; ec) (let loop ()