diff --git a/collects/tests/xml/test.rkt b/collects/tests/xml/test.rkt index 04f1d118b1..af4243278d 100644 --- a/collects/tests/xml/test.rkt +++ b/collects/tests/xml/test.rkt @@ -121,6 +121,7 @@ END (test-xexpr? (list 'p "one" "two" "three")) (test-xexpr? 'nbsp) (test-xexpr? 10) + (test-not-xexpr? 0) (test-xexpr? (make-cdata #f #f "unquoted ")) (test-xexpr? (make-comment "Comment!")) (test-xexpr? (make-pcdata #f #f "quoted ")) @@ -249,6 +250,10 @@ END (list (make-entity (make-source (make-location 1 6 7) (make-location 1 11 12)) '40))) (list))) + (test-read-xml/exn + "" + "read-xml: lex-error: at position 1.10/11: not a well-formed numeric entity (does not match the production for Char, see XML 4.1)") + (test-read-xml "
" '(make-document @@ -616,7 +621,7 @@ END (validate-xexpr xe))))] (test-suite "validate-xexpr" - (test-validate-xexpr 4) + (test-validate-xexpr 64) (test-validate-xexpr 'nbsp) (test-validate-xexpr "string") (test-validate-xexpr (make-pcdata #f #f "pcdata")) @@ -628,6 +633,7 @@ END (test-validate-xexpr '(a ([href "#"]) "string")) (test-validate-xexpr/exn #f #f) + (test-validate-xexpr/exn 4 4) (test-validate-xexpr/exn + +) (test-validate-xexpr/exn '(a ([href foo]) bar) 'foo) (test-validate-xexpr/exn '("foo" bar) '("foo" bar)))) diff --git a/collects/xml/private/reader.rkt b/collects/xml/private/reader.rkt index 43f567dfae..a4e342cd19 100644 --- a/collects/xml/private/reader.rkt +++ b/collects/xml/private/reader.rkt @@ -203,6 +203,8 @@ [else (string->number (read-until #\; in pos))])]) (unless (number? n) (lex-error in pos "malformed numeric entity")) + (unless (valid-char? n) + (lex-error in pos "not a well-formed numeric entity (does not match the production for Char, see XML 4.1)")) n)] [else (begin0 diff --git a/collects/xml/private/structures.rkt b/collects/xml/private/structures.rkt index bb8dab9536..e58ad1120c 100644 --- a/collects/xml/private/structures.rkt +++ b/collects/xml/private/structures.rkt @@ -44,6 +44,17 @@ ; Misc = Comment ; | Processing-instruction +; Section 2.2 of XML 1.1 +; (XML 1.0 is slightly different and looks less restrictive) +(define (valid-char? i) + (and (exact-nonnegative-integer? i) + (or (= i #x9) + (= i #xA) + (= i #xD) + (<= #x20 i #xD7FF) + (<= #xE000 i #xFFFD) + (<= #x10000 i #x10FFFF)))) + ; Entity = (make-entity Location Location (U Nat Symbol)) (define-struct (entity source) (text) #:transparent) @@ -124,6 +135,7 @@ (struct (cdata source) ([start location/c] [stop location/c] [string string?])) + [valid-char? (any/c . -> . boolean?)] (struct (entity source) ([start location/c] [stop location/c] - [text (or/c symbol? exact-nonnegative-integer?)]))) + [text (or/c symbol? valid-char?)]))) diff --git a/collects/xml/private/xexpr.rkt b/collects/xml/private/xexpr.rkt index 0b19488d45..8c76c62be0 100644 --- a/collects/xml/private/xexpr.rkt +++ b/collects/xml/private/xexpr.rkt @@ -10,7 +10,7 @@ ;; | (list* Symbol (listof Attribute-srep) (listof Xexpr)) ;; | (cons Symbol (listof Xexpr)) ;; | Symbol -;; | Nat +;; | Nat (WFC: Valid Char) ;; | Comment ;; | Processing-instruction ;; | Cdata @@ -25,7 +25,7 @@ (define xexpr-drop-empty-attributes (make-parameter #f)) (define xexpr-datum/c - (or/c string? symbol? exact-nonnegative-integer? + (or/c string? symbol? valid-char? comment? p-i? cdata? pcdata?)) #;(define xexpr/c @@ -69,7 +69,7 @@ (cond ((string? x) (true)) ((symbol? x) (true)) - ((exact-nonnegative-integer? x) (true)) + ((valid-char? x) (true)) ((comment? x) (true)) ((p-i? x) (true)) ((cdata? x) (true)) @@ -296,7 +296,7 @@ ; Entities [(symbol? x) (fprintf out "&~a;" x)] - [(exact-nonnegative-integer? x) + [(valid-char? x) (fprintf out "&#~a;" x)] ; Embedded XML [(source? x) diff --git a/collects/xml/xml.scrbl b/collects/xml/xml.scrbl index 0436126d9c..c684f06bbf 100644 --- a/collects/xml/xml.scrbl +++ b/collects/xml/xml.scrbl @@ -108,7 +108,11 @@ Represents an attribute within an element.} @defthing[permissive/c contract?]{ If @racket[(permissive-xexprs)] is @racket[#t], then equivalent to @racket[any/c], otherwise equivalent to @racket[(make-none/c 'permissive)]} -@defstruct[(entity source) ([text (or/c symbol? exact-nonnegative-integer?)])]{ +@defproc[(valid-char? [x any/c]) boolean?]{ + Returns true if @racket[x] is an exact-nonnegative-integer whose character interpretation under UTF-8 is from the set (#x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF]), in accordance with section 2.2 of the XML 1.1 spec. +} + +@defstruct[(entity source) ([text (or/c symbol? valid-char?)])]{ Represents a symbolic or numerical entity.} @@ -142,12 +146,12 @@ Returns @racket[#t] if @racket[v] is a @tech{X-expression}, @racket[#f] otherwis The following grammar describes expressions that create @tech{X-expressions}: @racketgrammar[ -#:literals (cons list) +#:literals (cons list valid-char?) xexpr string (list symbol (list (list symbol string) ...) xexpr ...) (cons symbol (list xexpr ...)) symbol - exact-nonnegative-integer + valid-char? cdata misc ] @@ -162,7 +166,7 @@ represented by a string. A @racket[_symbol] represents a symbolic entity. For example, @racket['nbsp] represents @litchar{ }. -An @racket[_exact-nonnegative-integer] represents a numeric entity. For example, +An @racket[valid-char?] represents a numeric entity. For example, @racketvalfont{#x20} represents @litchar{}. A @racket[_cdata] is an instance of the @racket[cdata] structure type,