Fixes PR11793.
It was very hard to figure out why � is invalid. Both the XML 1.0 and 1.1 specs say that a CharRef can be '&#' [0-9]+ ';' (sec 4.1), but there is a note on the side that it must also be a valid Char. The Char definition (sec 2.2) then lays out which characters are valid. (The 1.0 and 1.1 specs disagree though.) I've gone with the 1.1 definition. I did not update the rest of the reader to disallow those characters in pcdata segments. If this hurts you or you morally disagree, submit another PR, please.
This commit is contained in:
parent
b80728f4b9
commit
af57e0419c
|
@ -121,6 +121,7 @@ END
|
|||
(test-xexpr? (list 'p "one" "two" "three"))
|
||||
(test-xexpr? 'nbsp)
|
||||
(test-xexpr? 10)
|
||||
(test-not-xexpr? 0)
|
||||
(test-xexpr? (make-cdata #f #f "unquoted <b>"))
|
||||
(test-xexpr? (make-comment "Comment!"))
|
||||
(test-xexpr? (make-pcdata #f #f "quoted <b>"))
|
||||
|
@ -249,6 +250,10 @@ END
|
|||
(list (make-entity (make-source (make-location 1 6 7) (make-location 1 11 12)) '40)))
|
||||
(list)))
|
||||
|
||||
(test-read-xml/exn
|
||||
"<root>�</root>"
|
||||
"read-xml: lex-error: at position 1.10/11: not a well-formed numeric entity (does not match the production for Char, see XML 4.1)")
|
||||
|
||||
(test-read-xml
|
||||
"<!-- comment --><br />"
|
||||
'(make-document
|
||||
|
@ -616,7 +621,7 @@ END
|
|||
(validate-xexpr xe))))]
|
||||
(test-suite
|
||||
"validate-xexpr"
|
||||
(test-validate-xexpr 4)
|
||||
(test-validate-xexpr 64)
|
||||
(test-validate-xexpr 'nbsp)
|
||||
(test-validate-xexpr "string")
|
||||
(test-validate-xexpr (make-pcdata #f #f "pcdata"))
|
||||
|
@ -628,6 +633,7 @@ END
|
|||
(test-validate-xexpr '(a ([href "#"]) "string"))
|
||||
|
||||
(test-validate-xexpr/exn #f #f)
|
||||
(test-validate-xexpr/exn 4 4)
|
||||
(test-validate-xexpr/exn + +)
|
||||
(test-validate-xexpr/exn '(a ([href foo]) bar) 'foo)
|
||||
(test-validate-xexpr/exn '("foo" bar) '("foo" bar))))
|
||||
|
|
|
@ -203,6 +203,8 @@
|
|||
[else (string->number (read-until #\; in pos))])])
|
||||
(unless (number? n)
|
||||
(lex-error in pos "malformed numeric entity"))
|
||||
(unless (valid-char? n)
|
||||
(lex-error in pos "not a well-formed numeric entity (does not match the production for Char, see XML 4.1)"))
|
||||
n)]
|
||||
[else
|
||||
(begin0
|
||||
|
|
|
@ -44,6 +44,17 @@
|
|||
; Misc = Comment
|
||||
; | Processing-instruction
|
||||
|
||||
; Section 2.2 of XML 1.1
|
||||
; (XML 1.0 is slightly different and looks less restrictive)
|
||||
(define (valid-char? i)
|
||||
(and (exact-nonnegative-integer? i)
|
||||
(or (= i #x9)
|
||||
(= i #xA)
|
||||
(= i #xD)
|
||||
(<= #x20 i #xD7FF)
|
||||
(<= #xE000 i #xFFFD)
|
||||
(<= #x10000 i #x10FFFF))))
|
||||
|
||||
; Entity = (make-entity Location Location (U Nat Symbol))
|
||||
(define-struct (entity source) (text) #:transparent)
|
||||
|
||||
|
@ -124,6 +135,7 @@
|
|||
(struct (cdata source) ([start location/c]
|
||||
[stop location/c]
|
||||
[string string?]))
|
||||
[valid-char? (any/c . -> . boolean?)]
|
||||
(struct (entity source) ([start location/c]
|
||||
[stop location/c]
|
||||
[text (or/c symbol? exact-nonnegative-integer?)])))
|
||||
[text (or/c symbol? valid-char?)])))
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
;; | (list* Symbol (listof Attribute-srep) (listof Xexpr))
|
||||
;; | (cons Symbol (listof Xexpr))
|
||||
;; | Symbol
|
||||
;; | Nat
|
||||
;; | Nat (WFC: Valid Char)
|
||||
;; | Comment
|
||||
;; | Processing-instruction
|
||||
;; | Cdata
|
||||
|
@ -25,7 +25,7 @@
|
|||
(define xexpr-drop-empty-attributes (make-parameter #f))
|
||||
|
||||
(define xexpr-datum/c
|
||||
(or/c string? symbol? exact-nonnegative-integer?
|
||||
(or/c string? symbol? valid-char?
|
||||
comment? p-i? cdata? pcdata?))
|
||||
|
||||
#;(define xexpr/c
|
||||
|
@ -69,7 +69,7 @@
|
|||
(cond
|
||||
((string? x) (true))
|
||||
((symbol? x) (true))
|
||||
((exact-nonnegative-integer? x) (true))
|
||||
((valid-char? x) (true))
|
||||
((comment? x) (true))
|
||||
((p-i? x) (true))
|
||||
((cdata? x) (true))
|
||||
|
@ -296,7 +296,7 @@
|
|||
; Entities
|
||||
[(symbol? x)
|
||||
(fprintf out "&~a;" x)]
|
||||
[(exact-nonnegative-integer? x)
|
||||
[(valid-char? x)
|
||||
(fprintf out "&#~a;" x)]
|
||||
; Embedded XML
|
||||
[(source? x)
|
||||
|
|
|
@ -108,7 +108,11 @@ Represents an attribute within an element.}
|
|||
@defthing[permissive/c contract?]{
|
||||
If @racket[(permissive-xexprs)] is @racket[#t], then equivalent to @racket[any/c], otherwise equivalent to @racket[(make-none/c 'permissive)]}
|
||||
|
||||
@defstruct[(entity source) ([text (or/c symbol? exact-nonnegative-integer?)])]{
|
||||
@defproc[(valid-char? [x any/c]) boolean?]{
|
||||
Returns true if @racket[x] is an exact-nonnegative-integer whose character interpretation under UTF-8 is from the set (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]), in accordance with section 2.2 of the XML 1.1 spec.
|
||||
}
|
||||
|
||||
@defstruct[(entity source) ([text (or/c symbol? valid-char?)])]{
|
||||
|
||||
Represents a symbolic or numerical entity.}
|
||||
|
||||
|
@ -142,12 +146,12 @@ Returns @racket[#t] if @racket[v] is a @tech{X-expression}, @racket[#f] otherwis
|
|||
The following grammar describes expressions that create @tech{X-expressions}:
|
||||
|
||||
@racketgrammar[
|
||||
#:literals (cons list)
|
||||
#:literals (cons list valid-char?)
|
||||
xexpr string
|
||||
(list symbol (list (list symbol string) ...) xexpr ...)
|
||||
(cons symbol (list xexpr ...))
|
||||
symbol
|
||||
exact-nonnegative-integer
|
||||
valid-char?
|
||||
cdata
|
||||
misc
|
||||
]
|
||||
|
@ -162,7 +166,7 @@ represented by a string.
|
|||
A @racket[_symbol] represents a symbolic entity. For example,
|
||||
@racket['nbsp] represents @litchar{ }.
|
||||
|
||||
An @racket[_exact-nonnegative-integer] represents a numeric entity. For example,
|
||||
An @racket[valid-char?] represents a numeric entity. For example,
|
||||
@racketvalfont{#x20} represents @litchar{}.
|
||||
|
||||
A @racket[_cdata] is an instance of the @racket[cdata] structure type,
|
||||
|
|
Loading…
Reference in New Issue
Block a user