;;; BLah italic bold ened
;;; still < bold
But not done yet...")
;;; @result{}
;;; (*TOP* (html (head (title) (title "whatever"))
;;; (body "\n"
;;; (a (@@ (href "url")) "link")
;;; (p (@@ (align "center"))
;;; (ul (@@ (compact) (style "aa")) "\n"))
;;; (p "BLah"
;;; (*COMMENT* " comment ")
;;; " "
;;; (i " italic " (b " bold " (tt " ened")))
;;; "\n"
;;; "still < bold "))
;;; (p " But not done yet...")))
;;; @end lisp
;;;
;;; Note that in the emitted SHTML the text token @code{"still < bold"} is
;;; @emph{not} inside the @code{b} element, which represents an unfortunate
;;; failure to emulate all the quirks-handling behavior of some popular Web
;;; browsers.
;;;
;;; The procedures @code{html->sxml-@var{n}nf} for @var{n} 0 through 2
;;; correspond to 0th through 2nd normal forms of SXML as specified in SXML,
;;; and indicate the minimal requirements of the emitted SXML.
;;;
;;; @code{html->sxml} and @code{html->shtml} are currently aliases for
;;; @code{html->sxml-0nf}, and can be used in scripts and interactively, when
;;; terseness is important and any normal form of SXML would suffice.
(define (html->sxml-0nf input) (%htmlprag:parse-html input #f #t))
(define (html->sxml-1nf input) (%htmlprag:parse-html input #f #t))
(define (html->sxml-2nf input) (%htmlprag:parse-html input #t #t))
(define html->sxml html->sxml-0nf)
(define html->shtml html->sxml-0nf)
;;; @section Emitting HTML
;;; Two procedures encoding the SHTML representation as conventional HTML,
;;; @code{write-shtml-as-html} and @code{shtml->html}. These are perhaps most
;;; useful for emitting the result of parsed and transformed input HTML. They
;;; can also be used for emitting HTML from generated or handwritten SHTML.
;;; @defproc write-shtml-as-html shtml [out [foreign-filter]]
;;;
;;; Writes a conventional HTML transliteration of the SHTML @var{shtml} to
;;; output port @var{out}. If @var{out} is not specified, the default is the
;;; current output port. HTML elements of types that are always empty are
;;; written using HTML4-compatible XHTML tag syntax.
;;;
;;; If @var{foreign-filter} is specified, it is a procedure of two argument
;;; that is applied to any non-SHTML (``foreign'') object encountered in
;;; @var{shtml}, and should yield SHTML. The first argument is the object, and
;;; the second argument is a boolean for whether or not the object is part of
;;; an attribute value.
;;;
;;; No inter-tag whitespace or line breaks not explicit in @var{shtml} is
;;; emitted. The @var{shtml} should normally include a newline at the end of
;;; the document. For example:
;;;
;;; @lisp
;;; (write-shtml-as-html
;;; '((html (head (title "My Title"))
;;; (body (@@ (bgcolor "white"))
;;; (h1 "My Heading")
;;; (p "This is a paragraph.")
;;; (p "This is another paragraph.")))))
;;; @print{} My TitleMy Heading
This is a paragraph.
This is
;;; @print{} another paragraph.
;;; @end lisp
(define (%htmlprag:write-shtml-as-html/fixed shtml out foreign-filter)
(letrec
((write-shtml-text
(lambda (str out)
(let ((len (string-length str)))
(let loop ((i 0))
(if (< i len)
(begin (display (let ((c (string-ref str i)))
(case c
;; ((#\") """)
((#\&) "&")
((#\<) "<")
((#\>) ">")
(else c)))
out)
(loop (+ 1 i))))))))
(write-dquote-ampified
(lambda (str out)
;; TODO: If we emit """, we really should parse it, and HTML
;; 4.01 says we should, but anachronisms in HTML create the potential
;; for nasty mutilation of URI in attribute values.
(let ((len (string-length str)))
(let loop ((i 0))
(if (< i len)
(begin (display (let ((c (string-ref str i)))
(if (eqv? c #\") """ c))
out)
(loop (+ 1 i))))))))
(do-thing
(lambda (thing)
(cond ((string? thing) (write-shtml-text thing out))
((list? thing) (if (not (null? thing))
(do-list-thing thing)))
(else (do-thing (foreign-filter thing #f))))))
(do-list-thing
(lambda (thing)
(let ((head (car thing)))
(cond ((symbol? head)
;; Head is a symbol, so...
(cond ((eq? head shtml-comment-symbol)
;; TODO: Make sure the comment text doesn't contain a
;; comment end sequence.
(display "" out))
((eq? head shtml-decl-symbol)
(let ((head (car (cdr thing))))
(display "string head) out)
(for-each
(lambda (n)
(cond ((symbol? n)
(display #\space out)
(display (symbol->string n) out))
((string? n)
(display " \"" out)
(write-dquote-ampified n out)
(display #\" out))
(else (%htmlprag:error
"write-shtml-as-html"
"invalid SHTML decl:"
thing))))
(cdr (cdr thing)))
(display #\> out)))
((eq? head shtml-pi-symbol)
(display "" out)
(display (symbol->string (car (cdr thing))) out)
(display #\space out)
(display (car (cdr (cdr thing))) out)
;; TODO: Error-check that no more rest of PI.
(display "?>" out))
((eq? head shtml-top-symbol)
(for-each do-thing (cdr thing)))
((eq? head shtml-empty-symbol)
#f)
((eq? head '@)
(%htmlprag:error
"write-shtml-as-html"
"illegal position of SHTML attributes:"
thing))
((or (eq? head '&) (eq? head shtml-entity-symbol))
(let ((val (shtml-entity-value thing)))
(if val
(begin (write-char #\& out)
(if (integer? val)
(write-char #\# out))
(display val out)
(write-char #\; out))
(%htmlprag:error
"write-shtml-as-html"
"invalid SHTML entity reference:"
thing))))
((memq head `(,shtml-end-symbol
,shtml-start-symbol
,shtml-text-symbol))
(%htmlprag:error "write-shtml-as-html"
"invalid SHTML symbol:"
head))
(else
(display #\< out)
(display head out)
(let* ((rest (cdr thing)))
(if (not (null? rest))
(let ((second (car rest)))
(and (list? second)
(not (null? second))
(eq? (car second)
'@)
(begin (for-each do-attr (cdr second))
(set! rest (cdr rest))))))
(if (memq head
%htmlprag:empty-elements)
;; TODO: Error-check to make sure the element
;; has no content other than attributes. We
;; have to test for cases like: (br (@) ()
;; (()))
(display " />" out)
(begin (display #\> out)
(for-each do-thing rest)
(display "" out)
(display (symbol->string head) out)
(display #\> out)))))))
;; ((or (list? head) (string? head))
;;
;; Head is a list or string, which might occur as the result
;; of an SXML transform, so we'll cope.
(else
;; Head is not a symbol, which might occur as the result of
;; an SXML transform, so we'll cope.
(for-each do-thing thing))
;;(else
;; ;; Head is NOT a symbol, list, or string, so error.
;; (%htmlprag:error "write-shtml-as-html"
;; "invalid SHTML list:"
;; thing))
))))
(write-attr-val-dquoted
(lambda (str out)
(display #\" out)
(display str out)
(display #\" out)))
(write-attr-val-squoted
(lambda (str out)
(display #\' out)
(display str out)
(display #\' out)))
(write-attr-val-dquoted-and-amped
(lambda (str out)
(display #\" out)
(write-dquote-ampified str out)
(display #\" out)))
(write-attr-val
(lambda (str out)
(let ((len (string-length str)))
(let find-dquote-and-squote ((i 0))
(if (= i len)
(write-attr-val-dquoted str out)
(let ((c (string-ref str i)))
(cond ((eqv? c #\")
(let find-squote ((i (+ 1 i)))
(if (= i len)
(write-attr-val-squoted str out)
(if (eqv? (string-ref str i) #\')
(write-attr-val-dquoted-and-amped str
out)
(find-squote (+ 1 i))))))
((eqv? c #\')
(let find-dquote ((i (+ 1 i)))
(if (= i len)
(write-attr-val-dquoted str out)
(if (eqv? (string-ref str i) #\")
(write-attr-val-dquoted-and-amped str
out)
(find-dquote (+ 1 i))))))
(else (find-dquote-and-squote (+ 1 i))))))))))
(collect-and-write-attr-val
;; TODO: Take another look at this.
(lambda (lst out)
(let ((os #f))
(let do-list ((lst lst))
(for-each
(lambda (thing)
(let do-thing ((thing thing))
(cond ((string? thing)
(or os (set! os (open-output-string)))
(display thing os))
((list? thing)
(do-list thing))
((eq? thing #t)
#f)
(else
(do-thing (foreign-filter thing #t))))))
lst))
(if os
(begin
(display #\= out)
(write-attr-val (%htmlprag:gosc os) out))))))
(do-attr
(lambda (attr)
(or (list? attr)
(%htmlprag:error "write-shtml-as-html"
"invalid SHTML attribute:"
attr))
(if (not (null? attr))
(let ((name (car attr)))
(or (symbol? name)
(%htmlprag:error
"write-shtml-as-html"
"invalid name in SHTML attribute:"
attr))
(if (not (eq? name '@))
(begin
(display #\space out)
(display name out)
(collect-and-write-attr-val (cdr attr) out)
)))))))
(do-thing shtml)
(if #f #f)))
(define write-shtml-as-html
(letrec ((error-foreign-filter
(lambda (foreign-object in-attribute-value?)
(%htmlprag:error
"write-shtml-as-html"
(if in-attribute-value?
"unhandled foreign object in shtml attribute value:"
"unhandled foreign object in shtml:")
foreign-object))))
(lambda (shtml . rest)
(case (length rest)
((0) (%htmlprag:write-shtml-as-html/fixed
shtml
(current-output-port)
error-foreign-filter))
((1) (%htmlprag:write-shtml-as-html/fixed
shtml
(car rest)
error-foreign-filter))
((2) (%htmlprag:write-shtml-as-html/fixed
shtml
(car rest)
(cadr rest)))
(else
(%htmlprag:error "write-shtml-as-html"
"extraneous arguments:"
(cddr rest)))))))
;;; @defproc shtml->html shtml
;;;
;;; Yields an HTML encoding of SHTML @var{shtml} as a string. For example:
;;;
;;; @lisp
;;; (shtml->html
;;; (html->shtml
;;; "This is
bold italic b > text.
"))
;;; @result{} "This is
bold italic text.
"
;;; @end lisp
;;;
;;; Note that, since this procedure constructs a string, it should normally
;;; only be used when the HTML is relatively small. When encoding HTML
;;; documents of conventional size and larger, @code{write-shtml-as-html} is
;;; much more efficient.
(define (shtml->html shtml)
(let ((os (open-output-string)))
(write-shtml-as-html shtml os)
(%htmlprag:gosc os)))
;;; @section Tests
;;; The HtmlPrag test suite can be enabled by editing the source code file and
;;; loading @uref{http://www.neilvandyke.org/testeez/, Testeez}.
(define (%htmlprag:test)
(%htmlprag:testeez
"HtmlPrag"
(test-define "" lf (string (%htmlprag:a2c 10)))
(test/equal "" (html->shtml ">") `(,shtml-top-symbol (a ">")))
(test/equal "" (html->shtml "") `(,shtml-top-symbol (a "<" ">")))
(test/equal "" (html->shtml "<>") `(,shtml-top-symbol "<" ">"))
(test/equal "" (html->shtml "< >") `(,shtml-top-symbol "<" ">"))
(test/equal "" (html->shtml "< a>") `(,shtml-top-symbol (a)))
(test/equal "" (html->shtml "< a / >") `(,shtml-top-symbol (a)))
(test/equal "" (html->shtml "shtml "shtml ">") `(,shtml-top-symbol ">" (a)))
(test/equal "" (html->shtml ">") `(,shtml-top-symbol))
(test/equal "" (html->shtml "<\">") `(,shtml-top-symbol "<" "\"" ">"))
(test/equal ""
(html->shtml (string-append "xxxaaa" lf
"bbb" lf
"cshtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol " xxx ") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol " xxx ") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol " xxx -") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol " xxx --") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol " xxx -y") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol "-") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol
"aaa" (,shtml-comment-symbol "") "bbb"))
(test/equal ""
(html->shtml "aaabbb")
`(,shtml-top-symbol "aaa" (,shtml-comment-symbol "->bbb")))
(test/equal "" (html->shtml "
") `(,shtml-top-symbol (hr)))
(test/equal "" (html->shtml "
") `(,shtml-top-symbol (hr)))
(test/equal "" (html->shtml "
") `(,shtml-top-symbol (hr)))
(test/equal ""
(html->shtml "
")
`(,shtml-top-symbol (hr (@ (noshade)))))
(test/equal ""
(html->shtml "
")
`(,shtml-top-symbol (hr (@ (noshade)))))
(test/equal ""
(html->shtml "
")
`(,shtml-top-symbol (hr (@ (noshade)))))
(test/equal ""
(html->shtml "
")
`(,shtml-top-symbol (hr (@ (noshade)))))
(test/equal ""
(html->shtml "
")
`(,shtml-top-symbol (hr (@ (noshade "1")))))
(test/equal ""
(html->shtml "
")
`(,shtml-top-symbol (hr (@ (noshade "1/")))))
(test/equal ""
(html->shtml "aaabbb
ccc
ddd")
`(,shtml-top-symbol (q "aaa" (p) "bbb") "ccc" "ddd"))
(test/equal "" (html->shtml "<") `(,shtml-top-symbol "<"))
(test/equal "" (html->shtml ">") `(,shtml-top-symbol ">"))
(test/equal ""
(html->shtml "Gilbert & Sullivan")
`(,shtml-top-symbol "Gilbert & Sullivan"))
(test/equal ""
(html->shtml "Gilbert & Sullivan")
`(,shtml-top-symbol "Gilbert & Sullivan"))
(test/equal ""
(html->shtml "Gilbert & Sullivan")
`(,shtml-top-symbol "Gilbert & Sullivan"))
(test/equal ""
(html->shtml "Copyright © Foo")
`(,shtml-top-symbol "Copyright "
(& ,(string->symbol "copy"))
" Foo"))
(test/equal ""
(html->shtml "aaa©bbb")
`(,shtml-top-symbol
"aaa" (& ,(string->symbol "copy")) "bbb"))
(test/equal ""
(html->shtml "aaa©")
`(,shtml-top-symbol
"aaa" (& ,(string->symbol "copy"))))
(test/equal "" (html->shtml "*") `(,shtml-top-symbol "*"))
(test/equal "" (html->shtml "*") `(,shtml-top-symbol "*"))
(test/equal "" (html->shtml "*x") `(,shtml-top-symbol "*x"))
(test/equal "" (html->shtml "") `(,shtml-top-symbol
(& 151)
;; ,(string (%htmlprag:a2c 151))
))
(test/equal "" (html->shtml "Ϩ") `(,shtml-top-symbol (& 1000)))
(test/equal "" (html->shtml "B") `(,shtml-top-symbol "B"))
(test/equal "" (html->shtml "¢") `(,shtml-top-symbol
(& 162)
;; ,(string (%htmlprag:a2c 162))
))
(test/equal "" (html->shtml "ÿ") `(,shtml-top-symbol
(& 255)
;; ,(string (%htmlprag:a2c 255))
))
(test/equal "" (html->shtml "Ā") `(,shtml-top-symbol (& 256)))
(test/equal "" (html->shtml "B") `(,shtml-top-symbol "B"))
(test/equal "" (html->shtml "&42;") `(,shtml-top-symbol "&42;"))
(test/equal ""
(html->shtml (string-append "aaa©bbb&ccc<ddd&>"
"eee*fffϨgggZhhh"))
`(,shtml-top-symbol
"aaa"
(& ,(string->symbol "copy"))
"bbb&ccceee*fff"
(& 1000)
"gggZhhh"))
(test/equal ""
(html->shtml
(string-append
"
2"))
`(,shtml-top-symbol
(img (@
(src
"http://e.e/aw/pics/listings/ebayLogo_38x16.gif")
(border "0") (width "38") (height "16")
(hspace "5") (vspace "0")))
"2"))
(test/equal ""
(html->shtml "eee")
`(,shtml-top-symbol (aaa (@ (bbb "ccc") (ddd)) "eee")))
(test/equal ""
(html->shtml "eee")
`(,shtml-top-symbol (aaa (@ (bbb "ccc") (ddd)) "eee")))
(test/equal ""
(html->shtml
(string-append
"My Title"
""
"This is a bold-italic test of "
"broken HTML.
Yes it is."))
`(,shtml-top-symbol
(html (head (title "My Title"))
(body (@ (bgcolor "white") (foo "42"))
"This is a "
(b (i "bold-italic"))
" test of "
"broken HTML."
(br)
"Yes it is."))))
(test/equal ""
(html->shtml
(string-append
""))
`(,shtml-top-symbol
(,shtml-decl-symbol
,(string->symbol "DOCTYPE")
html
,(string->symbol "PUBLIC")
"-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")))
(test/equal ""
(html->shtml
(string-append
""))
`(,shtml-top-symbol
(html (@ (xmlns "http://www.w3.org/1999/xhtml")
(xml:lang "en") (lang "en")))))
(test/equal
""
(html->shtml
(string-append
""
"Frobnostication"
"Moved to "
"here."))
`(,shtml-top-symbol
(html (@ (xmlns:html "http://www.w3.org/TR/REC-html40"))
(head (title "Frobnostication"))
(body (p "Moved to "
(a (@ (href "http://frob.com"))
"here."))))))
(test/equal ""
(html->shtml
(string-append
""
"Layman, A"
"33B"
"Check Status"
"1997-05-24T07:55:00+1"))
`(,shtml-top-symbol
(reservation (@ (,(string->symbol "xmlns:HTML")
"http://www.w3.org/TR/REC-html40"))
(name (@ (class "largeSansSerif"))
"Layman, A")
(seat (@ (class "Y") (class "largeMonotype"))
"33B")
(a (@ (href "/cgi-bin/ResStatus"))
"Check Status")
(departure "1997-05-24T07:55:00+1"))))
(test/equal
""
(html->shtml
(string-append
"whatever"
"link"
"BLah italic bold ened "
" still < bold
But not done yet..."))
`(,shtml-top-symbol
(html (head (title) (title "whatever"))
(body (a (@ (href "url")) "link")
(p (@ (align "center"))
(ul (@ (compact) (style "aa"))))
(p "BLah"
(,shtml-comment-symbol " comment ")
" "
(i " italic " (b " bold " (tt " ened ")))
" still < bold "))
(p " But not done yet..."))))
(test/equal ""
(html->shtml "")
`(,shtml-top-symbol
(,shtml-pi-symbol xml "version=\"1.0\" encoding=\"UTF-8\"")))
(test/equal ""
(html->shtml "")
`(,shtml-top-symbol (,shtml-pi-symbol php "php_info(); ")))
(test/equal ""
(html->shtml "shtml "shtml " blort ?>")
`(,shtml-top-symbol
(,shtml-pi-symbol foo "bar ? baz > blort ")))
(test/equal ""
(html->shtml "x")
`(,shtml-top-symbol (,shtml-pi-symbol foo "b") "x"))
(test/equal ""
(html->shtml "x")
`(,shtml-top-symbol (,shtml-pi-symbol foo "") "x"))
(test/equal ""
(html->shtml "x")
`(,shtml-top-symbol (,shtml-pi-symbol foo "") "x"))
(test/equal ""
(html->shtml "x")
`(,shtml-top-symbol (,shtml-pi-symbol foo "") "x"))
(test/equal ""
(html->shtml "x")
`(,shtml-top-symbol (,shtml-pi-symbol f "") "x"))
(test/equal ""
(html->shtml "?>x")
`(,shtml-top-symbol (,shtml-pi-symbol #f "") "x"))
(test/equal ""
(html->shtml ">x")
`(,shtml-top-symbol (,shtml-pi-symbol #f ">x")))
(test/equal ""
(html->shtml "blort")
`(,shtml-top-symbol (foo (@ (bar "baz")) "blort")))
(test/equal ""
(html->shtml "blort")
`(,shtml-top-symbol (foo (@ (bar "baz")) "blort")))
(test/equal ""
(html->shtml "blort")
`(,shtml-top-symbol (foo (@ (bar "baz'>blort")))))
(test/equal ""
(html->shtml "
")
(test/equal ""
(shtml->html '(p "CONTENT")) "CONTENT
")
(test/equal ""
(shtml->html '(br)) "
")
(test/equal ""
(shtml->html '(br "CONTENT")) "
")
(test/equal ""
(shtml->html `(hr (@ (clear "all"))))
"
")
(test/equal ""
(shtml->html `(hr (@ (noshade))))
"
")
(test/equal ""
(shtml->html `(hr (@ (noshade #t))))
"
") ;; TODO: Maybe lose this test.
(test/equal ""
(shtml->html `(hr (@ (noshade "noshade"))))
"
")
(test/equal ""
(shtml->html `(hr (@ (aaa "bbbccc"))))
"
")
(test/equal ""
(shtml->html `(hr (@ (aaa "bbb'ccc"))))
"
")
(test/equal ""
(shtml->html `(hr (@ (aaa "bbb\"ccc"))))
"
")
(test/equal ""
(shtml->html `(hr (@ (aaa "bbb\"ccc'ddd"))))
"
")
(test/equal "" (shtml->html '(& "copy")) "©")
(test/equal "" (shtml->html '(& "rArr")) "⇒")
(test/equal "" (shtml->html `(& ,(string->symbol "rArr"))) "⇒")
(test/equal "" (shtml->html '(& 151)) "")
(test/equal ""
(html->shtml "©")
`(,shtml-top-symbol (& ,(string->symbol "copy"))))
(test/equal ""
(html->shtml "⇒")
`(,shtml-top-symbol (& ,(string->symbol "rArr"))))
(test/equal ""
(html->shtml "")
`(,shtml-top-symbol
(& 151)
;; ,(string (%htmlprag:a2c 151))
))
(test/equal ""
(html->shtml "ϧ")
`(,shtml-top-symbol (& 999)))
(test/equal ""
(shtml->html
`(,shtml-pi-symbol xml "version=\"1.0\" encoding=\"UTF-8\""))
"")
(test/equal ""
(shtml->html
`(,shtml-decl-symbol
,(string->symbol "DOCTYPE")
html
,(string->symbol "PUBLIC")
"-//W3C//DTD XHTML 1.0 Strict//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"))
(string-append
""))
(test/equal ""
(shtml-entity-value '(*ENTITY* "shtml-named-char" "rArr"))
(string->symbol "rArr"))
(test/equal ""
(shtml-entity-value '(& "rArr"))
(string->symbol "rArr"))
(test/equal ""
(shtml-entity-value `(& ,(string->symbol "rArr")))
(string->symbol "rArr"))
(test/equal ""
(html->shtml "xxxyyy")
`(,shtml-top-symbol "xxx" "abc" "yyy"))
(test/equal ""
(html->shtml "xxxyyy")
`(,shtml-top-symbol "xxx" "ab]c" "yyy"))
(test/equal ""
(html->shtml "xxxyyy")
`(,shtml-top-symbol "xxx" "ab]]c" "yyy"))
(test/equal ""
(html->shtml "xxxyyy")
`(,shtml-top-symbol "xxx" "]" "yyy"))
(test/equal ""
(html->shtml "xxxshtml "P3
")
`(,shtml-top-symbol (html (div (p "P1")
(p "P2"))
(p "P3"))))
(test/equal "we no longer convert character references above 126 to string"
(html->shtml "")
`(,shtml-top-symbol (& 151)))
;; TODO: Write more test cases for HTML encoding.
;; TODO: Write test cases for foreign-filter of HTML writing.
;; TODO: Write test cases for attribute values that aren't simple strings.
;; TODO: Document this.
;;
;; (define html-1 "")
;; (define shtml (html->shtml html-1))
;; shtml
;; (define html-2 (shtml->html shtml))
;; html-2
))
;;; @unnumberedsec History
;;; @table @asis
;;;
;;; @item Version 0.16 --- 2005-12-18
;;; Documentation fix.
;;;
;;; @item Version 0.15 --- 2005-12-18
;;; In the HTML parent element constraints that are used for structure
;;; recovery, @code{div} is now always permitted as a parent, as a stopgap
;;; measure until substantial time can be spent reworking the algorithm to
;;; better support @code{div} (bug reported by Corey Sweeney and Jepri). Also
;;; no longer convert to Scheme character any HTML numeric character reference
;;; with value above 126, to avoid Unicode problem with PLT 299/300 (bug
;;; reported by Corey Sweeney).
;;;
;;; @item Version 0.14 --- 2005-06-16
;;; XML CDATA sections are now tokenized. Thanks to Alejandro Forero Cuervo
;;; for suggesting this feature. The deprecated procedures @code{sxml->html}
;;; and @code{write-sxml-html} have been removed. Minor documentation changes.
;;;
;;; @item Version 0.13 --- 2005-02-23
;;; HtmlPrag now requires @code{syntax-rules}, and a reader that can read
;;; @code{@@} as a symbol. SHTML now has a special @code{&} element for
;;; character entities, and it is emitted by the parser rather than the old
;;; @code{*ENTITY*} kludge. @code{shtml-entity-value} supports both the new
;;; and the old character entity representations. @code{shtml-entity-value}
;;; now yields @code{#f} on invalid SHTML entity, rather than raising an error.
;;; @code{write-shtml-as-html} now has a third argument, @code{foreign-filter}.
;;; @code{write-shtml-as-html} now emits SHTML @code{&} entity references.
;;; Changed @code{shtml-named-char-id} and @code{shtml-numeric-char-id}, as
;;; previously warned. Testeez is now used for the test suite. Test procedure
;;; is now the internal @code{%htmlprag:test}. Documentation changes.
;;; Notably, much documentation about using HtmlPrag under various particular
;;; Scheme implementations has been removed.
;;;
;;; @item Version 0.12 --- 2004-07-12
;;; Forward-slash in an unquoted attribute value is now considered a value
;;; constituent rather than an unconsumed terminator of the value (thanks to
;;; Maurice Davis for reporting and a suggested fix). @code{xml:} is now
;;; preserved as a namespace qualifier (thanks to Peter Barabas for
;;; reporting). Output port term of @code{write-shtml-as-html} is now
;;; optional. Began documenting loading for particular implementation-specific
;;; packagings.
;;;
;;; @item Version 0.11 --- 2004-05-13
;;; To reduce likely namespace collisions with SXML tools, and in anticipation
;;; of a forthcoming set of new features, introduced the concept of ``SHTML,''
;;; which will be elaborated upon in a future version of HtmlPrag. Renamed
;;; @code{sxml-@var{x}-symbol} to @code{shtml-@var{x}-symbol},
;;; @code{sxml-html-@var{x}} to @code{shtml-@var{x}}, and
;;; @code{sxml-token-kind} to @code{shtml-token-kind}. @code{html->shtml},
;;; @code{shtml->html}, and @code{write-shtml-as-html} have been added as
;;; names. Considered deprecated but still defined (see the ``Deprecated''
;;; section of this documentation) are @code{sxml->html} and
;;; @code{write-sxml-html}. The growing pains should now be all but over.
;;; Internally, @code{htmlprag-internal:error} introduced for Bigloo
;;; portability. SISC returned to the test list; thanks to Scott G. Miller
;;; for his help. Fixed a new character @code{eq?} bug, thanks to SISC.
;;;
;;; @item Version 0.10 --- 2004-05-11
;;; All public identifiers have been renamed to drop the ``@code{htmlprag:}''
;;; prefix. The portability identifiers have been renamed to begin with an
;;; @code{htmlprag-internal:} prefix, are now considered strictly
;;; internal-use-only, and have otherwise been changed. @code{parse-html} and
;;; @code{always-empty-html-elements} are no longer public.
;;; @code{test-htmlprag} now tests @code{html->sxml} rather than
;;; @code{parse-html}. SISC temporarily removed from the test list, until an
;;; open source Java that works correctly is found.
;;;
;;; @item Version 0.9 --- 2004-05-07
;;; HTML encoding procedures added. Added
;;; @code{htmlprag:sxml-html-entity-value}. Upper-case @code{X} in hexadecimal
;;; character entities is now parsed, in addition to lower-case @code{x}.
;;; Added @code{htmlprag:always-empty-html-elements}. Added additional
;;; portability bindings. Added more test cases.
;;;
;;; @item Version 0.8 --- 2004-04-27
;;; Entity references (symbolic, decimal numeric, hexadecimal numeric) are now
;;; parsed into @code{*ENTITY*} SXML. SXML symbols like @code{*TOP*} are now
;;; always upper-case, regardless of the Scheme implementation. Identifiers
;;; such as @code{htmlprag:sxml-top-symbol} are bound to the upper-case
;;; symbols. Procedures @code{htmlprag:html->sxml-0nf},
;;; @code{htmlprag:html->sxml-1nf}, and @code{htmlprag:html->sxml-2nf} have
;;; been added. @code{htmlprag:html->sxml} now an alias for
;;; @code{htmlprag:html->sxml-0nf}. @code{htmlprag:parse} has been refashioned
;;; as @code{htmlprag:parse-html} and should no longer be directly. A number
;;; of identifiers have been renamed to be more appropriate when the
;;; @code{htmlprag:} prefix is dropped in some implementation-specific
;;; packagings of HtmlPrag: @code{htmlprag:make-tokenizer} to
;;; @code{htmlprag:make-html-tokenizer}, @code{htmlprag:parse/tokenizer} to
;;; @code{htmlprag:parse-html/tokenizer}, @code{htmlprag:html->token-list} to
;;; @code{htmlprag:tokenize-html}, @code{htmlprag:token-kind} to
;;; @code{htmlprag:sxml-token-kind}, and @code{htmlprag:test} to
;;; @code{htmlprag:test-htmlprag}. Verbatim elements with empty-element tag
;;; syntax are handled correctly. New versions of Bigloo and RScheme tested.
;;;
;;; @item Version 0.7 --- 2004-03-10
;;; Verbatim pair elements like @code{script} and @code{xmp} are now parsed
;;; correctly. Two Scheme implementations have temporarily been dropped from
;;; regression testing: Kawa, due to a Java bytecode verifier error likely due
;;; to a Java installation problem on the test machine; and SXM 1.1, due to
;;; hitting a limit on the number of literals late in the test suite code.
;;; Tested newer versions of Bigloo, Chicken, Gauche, Guile, MIT Scheme, PLT
;;; MzScheme, RScheme, SISC, and STklos. RScheme no longer requires the
;;; ``@code{(define get-output-string close-output-port)}'' workaround.
;;;
;;; @item Version 0.6 --- 2003-07-03
;;; Fixed uses of @code{eq?} in character comparisons, thanks to Scott G.
;;; Miller. Added @code{htmlprag:html->normalized-sxml} and
;;; @code{htmlprag:html->nonnormalized-sxml}. Started to add
;;; @code{close-output-port} to uses of output strings, then reverted due to
;;; bug in one of the supported dialects. Tested newer versions of Bigloo,
;;; Gauche, PLT MzScheme, RScheme.
;;;
;;; @item Version 0.5 --- 2003-02-26
;;; Removed uses of @code{call-with-values}. Re-ordered top-level definitions,
;;; for portability. Now tests under Kawa 1.6.99, RScheme 0.7.3.2, Scheme 48
;;; 0.57, SISC 1.7.4, STklos 0.54, and SXM 1.1.
;;;
;;; @item Version 0.4 --- 2003-02-19
;;; Apostrophe-quoted element attribute values are now handled. A bug that
;;; incorrectly assumed left-to-right term evaluation order has been fixed
;;; (thanks to MIT Scheme for confronting us with this). Now also tests OK
;;; under Gauche 0.6.6 and MIT Scheme 7.7.1. Portability improvement for
;;; implementations (e.g., RScheme 0.7.3.2.b6, Stalin 0.9) that cannot read
;;; @code{@@} as a symbol (although those implementations tend to present other
;;; portability issues, as yet unresolved).
;;;
;;; @item Version 0.3 --- 2003-02-05
;;; A test suite with 66 cases has been added, and necessary changes have been
;;; made for the suite to pass on five popular Scheme implementations. XML
;;; processing instructions are now parsed. Parent constraints have been added
;;; for @code{colgroup}, @code{tbody}, and @code{thead} elements. Erroneous
;;; input, including invalid hexadecimal entity reference syntax and extraneous
;;; double quotes in element tags, is now parsed better.
;;; @code{htmlprag:token-kind} emits symbols more consistent with SXML.
;;;
;;; @item Version 0.2 --- 2003-02-02
;;; Portability improvements.
;;;
;;; @item Version 0.1 --- 2003-01-31
;;; Dusted off old Guile-specific code from April 2001, converted to emit SXML,
;;; mostly ported to R5RS and SRFI-6, added some XHTML support and
;;; documentation. A little preliminary testing has been done, and the package
;;; is already useful for some applications, but this release should be
;;; considered a preview to invite comments.
;;;
;;; @end table
(provide (all-defined)))