add tests and scribble documentation for XML indentation

This commit is contained in:
Ryan Kramer 2021-02-20 11:32:21 -06:00 committed by Jay McCarthy
parent a5f95a4a41
commit 0841af7992
3 changed files with 228 additions and 24 deletions

View File

@ -244,26 +244,76 @@ Like @racket[syntax:real-xml], but it reads an XML element like
@defproc[(write-xml [doc document?] [out output-port? (current-output-port)])
void?]{
Writes a document to the given output port, currently ignoring
everything except the document's root element.}
Same as @(racket display-xml) with @(racket #:indentation 'none).}
@defproc[(write-xml/content [content content/c] [out output-port? (current-output-port)])
void?]{
Writes document content to the given output port.}
Same as @(racket display-xml/content) with @(racket #:indentation 'none).}
@defproc[(display-xml [doc document?] [out output-port? (current-output-port)])
@defproc[(display-xml [doc document?] [out output-port? (current-output-port)]
[#:indentation indentation (or/c 'none 'classic 'peek 'scan) 'classic])
void?]{
Like @racket[write-xml], but newlines and indentation make the output
more readable, though less technically correct when whitespace is
significant.}
Writes the document to the given output port.
@defproc[(display-xml/content [content content/c] [out output-port? (current-output-port)])
See @(racket display-xml/content) for an explanation of @(racket indentation).}
@(begin
(define indent-eval (make-base-eval))
(indent-eval '(require xml)))
@defproc[(display-xml/content [content content/c] [out output-port? (current-output-port)]
[#:indentation indentation (or/c 'none 'classic 'peek 'scan) 'classic])
void?]{
Writes document content to the given output port.
Like @racket[write-xml/content], but with indentation and newlines
like @racket[display-xml].}
Indentation can make the output more readable, though less technically correct
when whitespace is significant.
The four @(racket indentation) modes are as follows:
@(itemlist
@item{@(racket 'none) --- No whitespace is added.
This is the only mode that is guaranteed to be 100% accurate in all situations.}
@item{@(racket 'classic) --- Whitespace is added around almost every node.
This mode is mostly for compatibility.}
@item{@(racket 'scan) --- If any child of an @(racket element?) is @(racket pcdata?)
or @(racket entity?), then no whitespace will be added inside that element.
This mode works well for XML that does not contain mixed content, but @(racket 'peek)
should be equally good and faster.}
@item{@(racket 'peek) --- Like @(racket 'scan) except only the first child is checked.
This mode works well for XML that does not contain mixed content.})
@(examples
#:eval indent-eval
(define example-data
'(root (a "nobody")
(b "some" "body")
(c "any" (i "body"))
(d (i "every") "body")))
(define (show indentation [data example-data])
(display-xml/content (xexpr->xml data)
#:indentation indentation))
(code:comment "")
(code:comment "`none` is guaranteed to be accurate:")
(show 'none)
(code:comment "")
(code:comment "`classic` adds the most whitespace.")
(code:comment "Even the 'nobody' pcdata has whitespace added:")
(show 'classic)
(code:comment "")
(code:comment "`peek` cannot see that <d> contains a pcdata child:")
(show 'peek)
(code:comment "")
(code:comment "`scan` sees that <d> contains a pcdata child:")
(show 'scan))
Be warned that even @(racket 'scan) does not handle HTML with 100% accuracy.
The following example will be incorrectly rendered as "@italic{no} @bold{body}"
instead of "@italic{no}@bold{body}":
@(examples
#:eval indent-eval
(define html-data '(span (i "no") (b "body")))
(show 'scan html-data))
}
@defproc[(write-xexpr [xe xexpr/c] [out output-port? (current-output-port)]

View File

@ -52,19 +52,24 @@
(define (test-display-xml/content str res)
(test-equal? str (with-output-to-string (lambda () (display-xml/content (document-element (read-xml (open-input-string str)))))) res))
(define (indent indentation xexpr)
; The indentation tests will ignore the leading newline
(string-trim
(with-output-to-string
(lambda () (display-xml/content (xexpr->xml xexpr)
(current-output-port)
#:indentation indentation)))))
(define/contract (indent indentation val)
(-> symbol? (or/c xexpr? document?) string?)
(let-values ([(proc val)
(cond
[(document? val)
(values display-xml val)]
[else
(values display-xml/content (xexpr->xml val))])])
; The indentation tests will ignore the leading newline
(string-trim
(with-output-to-string
(lambda () (proc val (current-output-port) #:indentation indentation))))))
(define-syntax (test-indentation stx)
(syntax-case stx ()
[(_ indentation xexpr expected-xml)
[(_ arg ... expected-xml)
(syntax/loc stx
(check-equal? (fix-newline expected-xml)
(indent indentation xexpr)))]))
(check-equal? (indent arg ...)
(fix-newline expected-xml)))]))
(define (test-xexpr? xe)
(test-not-false (format "~S" xe) (xexpr? xe)))
@ -626,6 +631,147 @@ XML
<li>Ab<b>stract</b> is a verb.</li>
</ul>
</div>
XML
))
(let ([el '(foo (a)
(b "1")
(c "1" "2")
; Both symbolic and numeric entities are considered whitespace-sensitive
(d sym)
(span quot "blah" quot)
(e 42))])
(test-indentation 'none el "<foo><a /><b>1</b><c>12</c><d>&sym;</d><span>&quot;blah&quot;</span><e>&#42;</e></foo>")
(test-indentation 'classic el #<<XML
<foo>
<a />
<b>
1
</b>
<c>
1
2
</c>
<d>&sym;
</d>
<span>&quot;
blah&quot;
</span>
<e>&#42;
</e>
</foo>
XML
)
(test-indentation 'peek el #<<XML
<foo>
<a />
<b>1</b>
<c>12</c>
<d>&sym;</d>
<span>&quot;blah&quot;</span>
<e>&#42;</e>
</foo>
XML
)
(test-indentation 'scan el #<<XML
<foo>
<a />
<b>1</b>
<c>12</c>
<d>&sym;</d>
<span>&quot;blah&quot;</span>
<e>&#42;</e>
</foo>
XML
))
(let* ([comment (make-comment " comment ")]
[pi (make-p-i #f #f 'pi "ins")]
[misc (list comment pi)]
[prolog (make-prolog misc
(document-type 'my-doctype (external-dtd "ignored") #f)
misc)]
[xpr `(root ((aa "bb")
(cc "dd"))
(ee)
(ff "1")
(gg "1" "2")
(hh symbolic)
,comment
,(make-cdata #f #f "<![CDATA[my cdata]]>")
,pi)]
[doc (make-document prolog (xexpr->xml xpr) misc)])
(test-indentation 'none doc #<<XML
<!-- comment -->
<?pi ins?>
<!DOCTYPE my-doctype>
<!-- comment -->
<?pi ins?>
<root aa="bb" cc="dd"><ee /><ff>1</ff><gg>12</gg><hh>&symbolic;</hh><!-- comment --><![CDATA[my cdata]]><?pi ins?></root><!-- comment -->
<?pi ins?>
XML
)
(test-indentation 'classic doc #<<XML
<!-- comment -->
<?pi ins?>
<!DOCTYPE my-doctype>
<!-- comment -->
<?pi ins?>
<root aa="bb" cc="dd">
<ee />
<ff>
1
</ff>
<gg>
1
2
</gg>
<hh>&symbolic;
</hh>
<!-- comment -->
<![CDATA[my cdata]]>
<?pi ins?>
</root><!-- comment -->
<?pi ins?>
XML
)
(test-indentation 'peek doc #<<XML
<!-- comment -->
<?pi ins?>
<!DOCTYPE my-doctype>
<!-- comment -->
<?pi ins?>
<root aa="bb" cc="dd">
<ee />
<ff>1</ff>
<gg>12</gg>
<hh>&symbolic;</hh>
<!-- comment -->
<![CDATA[my cdata]]>
<?pi ins?>
</root>
<!-- comment -->
<?pi ins?>
XML
)
(test-indentation 'scan doc #<<XML
<!-- comment -->
<?pi ins?>
<!DOCTYPE my-doctype>
<!-- comment -->
<?pi ins?>
<root aa="bb" cc="dd">
<ee />
<ff>1</ff>
<gg>12</gg>
<hh>&symbolic;</hh>
<!-- comment -->
<![CDATA[my cdata]]>
<?pi ins?>
</root>
<!-- comment -->
<?pi ins?>
XML
))
)

View File

@ -3,7 +3,7 @@
"structures.rkt")
; Within this file, `Dent` is used in comments and `dent` is used in code
; to refer to this contract:
; to refer to an indentation
(define indentation? (or/c 'none 'classic 'peek 'scan))
(provide/contract
@ -50,12 +50,17 @@
;; display-xml : Document [Output-port] [#:indentation Dent] -> Void
(define (display-xml doc [out (current-output-port)]
#:indentation [dent 'classic])
(define doc-misc (document-misc doc))
(let ([prolog (document-prolog doc)])
(display-outside-misc (prolog-misc prolog) out)
(display-dtd (prolog-dtd prolog) out)
(display-outside-misc (prolog-misc2 prolog) out))
(display-xml/content (document-element doc) out #:indentation dent)
(display-outside-misc (document-misc doc) out))
(case dent
[(none classic) (void)]
[else (when (not (null? doc-misc))
(newline out))])
(display-outside-misc doc-misc out))
; display-dtd : document-type oport -> void
(define (display-dtd dtd out)
@ -94,7 +99,9 @@
[else (error 'write-xml-content "received ~e" el)])
el over dent out))
(define whitespace-sensitive? pcdata?)
(define (whitespace-sensitive? x)
(or (pcdata? x)
(entity? x)))
;; write-xml-element : Element Nat Dent Output-Stream -> Void
(define (write-xml-element el over dent out)
@ -147,7 +154,8 @@
[(none)
(void)]
[else
(error "Bug in xml package - unexpected indentation: " dent)])
; Either we have a bug or someone bypassed the contract
(error "xml: unexpected indentation: " dent)])
(display el out))
;; write-xml-pcdata : Pcdata Nat Dent Output-Stream -> Void