diff --git a/pkgs/racket-doc/xml/xml.scrbl b/pkgs/racket-doc/xml/xml.scrbl index b035dd0bb5..db17192331 100644 --- a/pkgs/racket-doc/xml/xml.scrbl +++ b/pkgs/racket-doc/xml/xml.scrbl @@ -244,26 +244,76 @@ Like @racket[syntax:real-xml], but it reads an XML element like @defproc[(write-xml [doc document?] [out output-port? (current-output-port)]) void?]{ -Writes a document to the given output port, currently ignoring -everything except the document's root element.} +Same as @(racket display-xml) with @(racket #:indentation 'none).} @defproc[(write-xml/content [content content/c] [out output-port? (current-output-port)]) void?]{ -Writes document content to the given output port.} +Same as @(racket display-xml/content) with @(racket #:indentation 'none).} -@defproc[(display-xml [doc document?] [out output-port? (current-output-port)]) +@defproc[(display-xml [doc document?] [out output-port? (current-output-port)] + [#:indentation indentation (or/c 'none 'classic 'peek 'scan) 'classic]) void?]{ -Like @racket[write-xml], but newlines and indentation make the output -more readable, though less technically correct when whitespace is -significant.} +Writes the document to the given output port. -@defproc[(display-xml/content [content content/c] [out output-port? (current-output-port)]) +See @(racket display-xml/content) for an explanation of @(racket indentation).} + +@(begin + (define indent-eval (make-base-eval)) + (indent-eval '(require xml))) +@defproc[(display-xml/content [content content/c] [out output-port? (current-output-port)] + [#:indentation indentation (or/c 'none 'classic 'peek 'scan) 'classic]) void?]{ +Writes document content to the given output port. -Like @racket[write-xml/content], but with indentation and newlines -like @racket[display-xml].} +Indentation can make the output more readable, though less technically correct +when whitespace is significant. +The four @(racket indentation) modes are as follows: +@(itemlist + @item{@(racket 'none) --- No whitespace is added. + This is the only mode that is guaranteed to be 100% accurate in all situations.} + @item{@(racket 'classic) --- Whitespace is added around almost every node. + This mode is mostly for compatibility.} + @item{@(racket 'scan) --- If any child of an @(racket element?) is @(racket pcdata?) + or @(racket entity?), then no whitespace will be added inside that element. + This mode works well for XML that does not contain mixed content, but @(racket 'peek) + should be equally good and faster.} + @item{@(racket 'peek) --- Like @(racket 'scan) except only the first child is checked. + This mode works well for XML that does not contain mixed content.}) + +@(examples + #:eval indent-eval + (define example-data + '(root (a "nobody") + (b "some" "body") + (c "any" (i "body")) + (d (i "every") "body"))) + (define (show indentation [data example-data]) + (display-xml/content (xexpr->xml data) + #:indentation indentation)) + (code:comment "") + (code:comment "`none` is guaranteed to be accurate:") + (show 'none) + (code:comment "") + (code:comment "`classic` adds the most whitespace.") + (code:comment "Even the 'nobody' pcdata has whitespace added:") + (show 'classic) + (code:comment "") + (code:comment "`peek` cannot see that contains a pcdata child:") + (show 'peek) + (code:comment "") + (code:comment "`scan` sees that contains a pcdata child:") + (show 'scan)) + +Be warned that even @(racket 'scan) does not handle HTML with 100% accuracy. +The following example will be incorrectly rendered as "@italic{no} @bold{body}" +instead of "@italic{no}@bold{body}": +@(examples + #:eval indent-eval + (define html-data '(span (i "no") (b "body"))) + (show 'scan html-data)) +} @defproc[(write-xexpr [xe xexpr/c] [out output-port? (current-output-port)] diff --git a/pkgs/racket-test/tests/xml/test.rkt b/pkgs/racket-test/tests/xml/test.rkt index 519990edaa..fc18b228f9 100644 --- a/pkgs/racket-test/tests/xml/test.rkt +++ b/pkgs/racket-test/tests/xml/test.rkt @@ -52,19 +52,24 @@ (define (test-display-xml/content str res) (test-equal? str (with-output-to-string (lambda () (display-xml/content (document-element (read-xml (open-input-string str)))))) res)) -(define (indent indentation xexpr) - ; The indentation tests will ignore the leading newline - (string-trim - (with-output-to-string - (lambda () (display-xml/content (xexpr->xml xexpr) - (current-output-port) - #:indentation indentation))))) +(define/contract (indent indentation val) + (-> symbol? (or/c xexpr? document?) string?) + (let-values ([(proc val) + (cond + [(document? val) + (values display-xml val)] + [else + (values display-xml/content (xexpr->xml val))])]) + ; The indentation tests will ignore the leading newline + (string-trim + (with-output-to-string + (lambda () (proc val (current-output-port) #:indentation indentation)))))) (define-syntax (test-indentation stx) (syntax-case stx () - [(_ indentation xexpr expected-xml) + [(_ arg ... expected-xml) (syntax/loc stx - (check-equal? (fix-newline expected-xml) - (indent indentation xexpr)))])) + (check-equal? (indent arg ...) + (fix-newline expected-xml)))])) (define (test-xexpr? xe) (test-not-false (format "~S" xe) (xexpr? xe))) @@ -626,6 +631,147 @@ XML
  • Abstract is a verb.
  • +XML + )) + (let ([el '(foo (a) + (b "1") + (c "1" "2") + ; Both symbolic and numeric entities are considered whitespace-sensitive + (d sym) + (span quot "blah" quot) + (e 42))]) + (test-indentation 'none el "112&sym;"blah"*") + (test-indentation 'classic el #< + + + 1 + + + 1 + 2 + + &sym; + + " + blah" + + * + + +XML + ) + (test-indentation 'peek el #< + + 1 + 12 + &sym; + "blah" + * + +XML + ) + (test-indentation 'scan el #< + + 1 + 12 + &sym; + "blah" + * + +XML + )) + (let* ([comment (make-comment " comment ")] + [pi (make-p-i #f #f 'pi "ins")] + [misc (list comment pi)] + [prolog (make-prolog misc + (document-type 'my-doctype (external-dtd "ignored") #f) + misc)] + [xpr `(root ((aa "bb") + (cc "dd")) + (ee) + (ff "1") + (gg "1" "2") + (hh symbolic) + ,comment + ,(make-cdata #f #f "") + ,pi)] + [doc (make-document prolog (xexpr->xml xpr) misc)]) + (test-indentation 'none doc #< + + + + +112&symbolic; + +XML + ) + (test-indentation 'classic doc #< + + + + + + + + + 1 + + + 1 + 2 + + &symbolic; + + + + + + +XML + ) + (test-indentation 'peek doc #< + + + + + + + + 1 + 12 + &symbolic; + + + + + + +XML + ) + (test-indentation 'scan doc #< + + + + + + + + 1 + 12 + &symbolic; + + + + + + XML )) ) diff --git a/racket/collects/xml/private/writer.rkt b/racket/collects/xml/private/writer.rkt index 5cce4aba38..a8644b0ff2 100644 --- a/racket/collects/xml/private/writer.rkt +++ b/racket/collects/xml/private/writer.rkt @@ -3,7 +3,7 @@ "structures.rkt") ; Within this file, `Dent` is used in comments and `dent` is used in code -; to refer to this contract: +; to refer to an indentation (define indentation? (or/c 'none 'classic 'peek 'scan)) (provide/contract @@ -50,12 +50,17 @@ ;; display-xml : Document [Output-port] [#:indentation Dent] -> Void (define (display-xml doc [out (current-output-port)] #:indentation [dent 'classic]) + (define doc-misc (document-misc doc)) (let ([prolog (document-prolog doc)]) (display-outside-misc (prolog-misc prolog) out) (display-dtd (prolog-dtd prolog) out) (display-outside-misc (prolog-misc2 prolog) out)) (display-xml/content (document-element doc) out #:indentation dent) - (display-outside-misc (document-misc doc) out)) + (case dent + [(none classic) (void)] + [else (when (not (null? doc-misc)) + (newline out))]) + (display-outside-misc doc-misc out)) ; display-dtd : document-type oport -> void (define (display-dtd dtd out) @@ -94,7 +99,9 @@ [else (error 'write-xml-content "received ~e" el)]) el over dent out)) -(define whitespace-sensitive? pcdata?) +(define (whitespace-sensitive? x) + (or (pcdata? x) + (entity? x))) ;; write-xml-element : Element Nat Dent Output-Stream -> Void (define (write-xml-element el over dent out) @@ -147,7 +154,8 @@ [(none) (void)] [else - (error "Bug in xml package - unexpected indentation: " dent)]) + ; Either we have a bug or someone bypassed the contract + (error "xml: unexpected indentation: " dent)]) (display el out)) ;; write-xml-pcdata : Pcdata Nat Dent Output-Stream -> Void