cleanup and contracts
svn: r13871
This commit is contained in:
parent
20424b47a4
commit
12ffd359a7
|
@ -1,622 +0,0 @@
|
|||
#lang scheme
|
||||
;; copyright by Paul Graunke June 2000 AD
|
||||
|
||||
(require "html-structs.ss"
|
||||
"html-spec.ss"
|
||||
(prefix-in sgml: "sgml-reader.ss")
|
||||
xml)
|
||||
|
||||
(provide (all-from-out "html-structs.ss")
|
||||
read-xhtml
|
||||
read-html
|
||||
read-html-as-xml
|
||||
use-html-spec)
|
||||
|
||||
;; Html-content = Html-element | Pc-data | Entity
|
||||
|
||||
;; xml-single-content->html : Content (listof Html-content) -> (listof Html-content)
|
||||
(define (xml-single-content->html x acc)
|
||||
(cond
|
||||
((element? x)
|
||||
(case (element-name x)
|
||||
((basefont) (cons (make-basefont (element-attributes x)) acc))
|
||||
((br) (cons (make-br (element-attributes x)) acc))
|
||||
((area) (cons (make-area (element-attributes x)) acc))
|
||||
((link) (cons (make-link (element-attributes x)) acc))
|
||||
((img) (cons (make-img (element-attributes x)) acc))
|
||||
((param) (cons (make-param (element-attributes x)) acc))
|
||||
((hr) (cons (make-hr (element-attributes x)) acc))
|
||||
((input) (cons (make-input (element-attributes x)) acc))
|
||||
((col) (cons (make-col (element-attributes x)) acc))
|
||||
((isindex) (cons (make-isindex (element-attributes x)) acc))
|
||||
((base) (cons (make-base (element-attributes x)) acc))
|
||||
((meta) (cons (make-meta (element-attributes x)) acc))
|
||||
((mzscheme)
|
||||
(cons
|
||||
(make-mzscheme
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((html)
|
||||
(cons
|
||||
(make-html
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((div)
|
||||
(cons
|
||||
(make-div
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((center)
|
||||
(cons
|
||||
(make-center
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((blockquote)
|
||||
(cons
|
||||
(make-blockquote
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((ins)
|
||||
(cons
|
||||
(make-ins
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((del)
|
||||
(cons
|
||||
(make-del
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dd)
|
||||
(cons
|
||||
(make-dd
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((li)
|
||||
(cons
|
||||
(make-li
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((th)
|
||||
(cons
|
||||
(make-th
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((td)
|
||||
(cons
|
||||
(make-td
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((iframe)
|
||||
(cons
|
||||
(make-iframe
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((noframes)
|
||||
(cons
|
||||
(make-noframes
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((noscript)
|
||||
(cons
|
||||
(make-noscript
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((style)
|
||||
(cons
|
||||
(make-style
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((script)
|
||||
(cons
|
||||
(make-script
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((option)
|
||||
(cons
|
||||
(make-option
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((textarea)
|
||||
(cons
|
||||
(make-textarea
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((title)
|
||||
(cons
|
||||
(make-title
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((head)
|
||||
(cons
|
||||
(make-head
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tr)
|
||||
(cons
|
||||
(make-tr
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((colgroup)
|
||||
(cons
|
||||
(make-colgroup
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((thead)
|
||||
(cons
|
||||
(make-thead
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tfoot)
|
||||
(cons
|
||||
(make-tfoot
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tbody)
|
||||
(cons
|
||||
(make-tbody
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tt)
|
||||
(cons
|
||||
(make-tt
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((i)
|
||||
(cons
|
||||
(make-i
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((b)
|
||||
(cons
|
||||
(make-b
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((u)
|
||||
(cons
|
||||
(make-u
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((s)
|
||||
(cons
|
||||
(make-s
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((strike)
|
||||
(cons
|
||||
(make-strike
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((big)
|
||||
(cons
|
||||
(make-big
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((small)
|
||||
(cons
|
||||
(make-small
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((em)
|
||||
(cons
|
||||
(make-em
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((strong)
|
||||
(cons
|
||||
(make-strong
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dfn)
|
||||
(cons
|
||||
(make-dfn
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((code)
|
||||
(cons
|
||||
(make-code
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((samp)
|
||||
(cons
|
||||
(make-samp
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((kbd)
|
||||
(cons
|
||||
(make-kbd
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((var)
|
||||
(cons
|
||||
(make-var
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((cite)
|
||||
(cons
|
||||
(make-cite
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((abbr)
|
||||
(cons
|
||||
(make-abbr
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((acronym)
|
||||
(cons
|
||||
(make-acronym
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((sub)
|
||||
(cons
|
||||
(make-sub
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((sup)
|
||||
(cons
|
||||
(make-sup
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((span)
|
||||
(cons
|
||||
(make-span
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((bdo)
|
||||
(cons
|
||||
(make-bdo
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((font)
|
||||
(cons
|
||||
(make-font
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((p)
|
||||
(cons
|
||||
(make-p
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h1)
|
||||
(cons
|
||||
(make-h1
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h2)
|
||||
(cons
|
||||
(make-h2
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h3)
|
||||
(cons
|
||||
(make-h3
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h4)
|
||||
(cons
|
||||
(make-h4
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h5)
|
||||
(cons
|
||||
(make-h5
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h6)
|
||||
(cons
|
||||
(make-h6
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((q)
|
||||
(cons
|
||||
(make-q
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dt)
|
||||
(cons
|
||||
(make-dt
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((legend)
|
||||
(cons
|
||||
(make-legend
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((caption)
|
||||
(cons
|
||||
(make-caption
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((table)
|
||||
(cons
|
||||
(make-table
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((button)
|
||||
(cons
|
||||
(make-button
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((fieldset)
|
||||
(cons
|
||||
(make-fieldset
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((optgroup)
|
||||
(cons
|
||||
(make-optgroup
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((select)
|
||||
(cons
|
||||
(make-select
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((label)
|
||||
(cons
|
||||
(make-label
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((form)
|
||||
(cons
|
||||
(make-form
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((ol)
|
||||
(cons
|
||||
(make-ol
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((ul)
|
||||
(cons
|
||||
(make-ul
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dir)
|
||||
(cons
|
||||
(make-dir
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((menu)
|
||||
(cons
|
||||
(make-menu
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dl)
|
||||
(cons
|
||||
(make-dl
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((pre)
|
||||
(cons
|
||||
(make-pre
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((object)
|
||||
(cons
|
||||
(make-object (element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((applet)
|
||||
(cons
|
||||
(make-applet
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((map)
|
||||
(cons
|
||||
(make--map
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((a)
|
||||
(cons
|
||||
(make-a
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((address)
|
||||
(cons
|
||||
(make-address
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((body)
|
||||
(cons
|
||||
(make-body
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
(else acc)))
|
||||
((or (pcdata? x) (entity? x)) (cons x acc))
|
||||
(else acc)))
|
||||
|
||||
;; xml->html : Document -> Html
|
||||
(define (xml->html doc)
|
||||
(let ([root (document-element doc)])
|
||||
(unless (eq? 'html (element-name root))
|
||||
(error 'xml->html "This is not an html document. Expected 'html, given ~a" (element-name root)))
|
||||
(make-html (element-attributes root) (xml-contents->html (element-content root)))))
|
||||
|
||||
|
||||
;; xml-content->html : (listof Content) -> (listof Html-element)
|
||||
(define (xml-contents->html contents)
|
||||
(foldr xml-single-content->html
|
||||
null
|
||||
contents))
|
||||
|
||||
;; read-xhtml : [Input-port] -> Html
|
||||
(define read-xhtml (compose xml->html read-xml))
|
||||
|
||||
;; peel-f : (Html-content -> Bool) (listof Html-content) (listof Html-content) -> (listof Html-content)
|
||||
(define (peel-f toss? to-toss acc0)
|
||||
(foldr (lambda (x acc)
|
||||
(if (toss? x)
|
||||
(append (html-full-content x) acc)
|
||||
(cons x acc)))
|
||||
acc0
|
||||
to-toss))
|
||||
|
||||
;; repackage-html : (listof Html-content) -> Html
|
||||
(define (repackage-html contents)
|
||||
(let* ([html (memf html? contents)]
|
||||
[peeled (peel-f html? contents null)]
|
||||
[body (memf body? peeled)])
|
||||
(make-html (if html
|
||||
(html-element-attributes (car html))
|
||||
null)
|
||||
(append (filter head? peeled)
|
||||
(list (make-body (if body
|
||||
(html-element-attributes (car body))
|
||||
null)
|
||||
(filter (compose not head?) (peel-f body? peeled null))))))))
|
||||
|
||||
;; clean-up-pcdata : (listof Content) -> (listof Content)
|
||||
;; Each pcdata inside a tag that isn't supposed to contain pcdata is either
|
||||
;; a) appended to the end of the previous subelement, if that subelement may contain pcdata
|
||||
;; b) prepended to the front of the next subelement, if that subelement may contain pcdata
|
||||
;; c) discarded
|
||||
;; unknown tags may contain pcdata
|
||||
;; the top level may contain pcdata
|
||||
(define clean-up-pcdata
|
||||
;; clean-up-pcdata : (listof Content) -> (listof Content)
|
||||
(letrec ([clean-up-pcdata
|
||||
(lambda (content)
|
||||
(map (lambda (to-fix)
|
||||
(cond
|
||||
[(element? to-fix)
|
||||
(recontent-xml to-fix
|
||||
(let ([possible (may-contain (element-name to-fix))]
|
||||
[content (element-content to-fix)])
|
||||
(if (or (not possible) (memq 'pcdata possible))
|
||||
(clean-up-pcdata content)
|
||||
(eliminate-pcdata content))))]
|
||||
[else to-fix]))
|
||||
content))]
|
||||
[eliminate-pcdata
|
||||
;: (listof Content) -> (listof Content)
|
||||
(lambda (content)
|
||||
(let ([non-elements (first-non-elements content)]
|
||||
[more (memf element? content)])
|
||||
(if more
|
||||
(let* ([el (car more)]
|
||||
[possible (may-contain (element-name el))])
|
||||
(if (or (not possible) (memq 'pcdata possible))
|
||||
(cons (recontent-xml el (append non-elements (clean-up-pcdata (element-content el)) (eliminate-pcdata (first-non-elements (cdr more)))))
|
||||
(or (memf element? (cdr more)) null))
|
||||
(cons (recontent-xml el (eliminate-pcdata (element-content el)))
|
||||
(eliminate-pcdata (cdr more)))))
|
||||
null)))])
|
||||
clean-up-pcdata))
|
||||
|
||||
;; first-non-elements : (listof Content) -> (listof Content)
|
||||
(define (first-non-elements content)
|
||||
(cond
|
||||
[(null? content) null]
|
||||
[else (if (element? (car content))
|
||||
null
|
||||
(cons (car content) (first-non-elements (cdr content))))]))
|
||||
|
||||
;; recontent-xml : Element (listof Content) -> Element
|
||||
(define (recontent-xml e c)
|
||||
(make-element (source-start e) (source-stop e) (element-name e) (element-attributes e) c))
|
||||
|
||||
;; implicit-starts : Symbol Symbol -> (U #f Symbol)
|
||||
(define (implicit-starts parent child)
|
||||
(or (and (eq? child 'tr) (eq? parent 'table) 'tbody)
|
||||
(and (eq? child 'td) (memq parent '(table tbody tfoot thead)) 'tr)))
|
||||
|
||||
;; may-contain : Kid-lister
|
||||
(define may-contain
|
||||
(sgml:gen-may-contain html-spec))
|
||||
|
||||
(define may-contain-anything
|
||||
(sgml:gen-may-contain null))
|
||||
|
||||
(define use-html-spec (make-parameter #t))
|
||||
|
||||
;; read-html-as-xml : [Input-port] -> (listof Content)
|
||||
(define read-html-as-xml
|
||||
(case-lambda
|
||||
[(port)
|
||||
((if (use-html-spec) clean-up-pcdata values)
|
||||
((sgml:gen-read-sgml (if (use-html-spec)
|
||||
may-contain
|
||||
may-contain-anything)
|
||||
implicit-starts) port))]
|
||||
[() (read-html-as-xml (current-input-port))]))
|
||||
|
||||
;; read-html : [Input-port] -> Html
|
||||
(define read-html
|
||||
(compose repackage-html xml-contents->html read-html-as-xml))
|
|
@ -1,6 +1,33 @@
|
|||
#lang scheme/base
|
||||
|
||||
(provide html-spec)
|
||||
#lang scheme
|
||||
(require "sgml-reader.ss")
|
||||
(provide/contract
|
||||
[html-spec spec/c])
|
||||
|
||||
(define html-spec
|
||||
'(((mzscheme) pcdata) ((html) body head) ((div center blockquote ins del dd li th td iframe noframes noscript) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label map menu noframes noscript object ol p pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var) ((style script) cdata) ((basefont br area link img param hr input col isindex base meta)) ((option textarea title) pcdata) ((head) base isindex link meta object script style title) ((tr) td th) ((colgroup) col) ((thead tfoot tbody) tr) ((tt i b u s strike big small em strong dfn code samp kbd var cite abbr acronym sub sup span bdo font p h1 h2 h3 h4 h5 h6 q dt legend caption) a abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd label map object pcdata q s samp script select small span strike strong sub sup textarea tt u var) ((table) caption col colgroup tbody tfoot thead) ((button) abbr acronym address applet b basefont bdo big blockquote br center cite code dfn dir div dl em font h1 h2 h3 h4 h5 h6 hr i img kbd map menu noframes noscript object ol p pcdata pre q s samp script small span strike strong sub sup table tt u ul var) ((fieldset) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label legend map menu noframes noscript object ol p pcdata pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var) ((optgroup) option) ((select) optgroup option) ((label) a abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd map object pcdata q s samp script select small span strike strong sub sup textarea tt u var) ((form) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label map menu noframes noscript object ol p pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var) ((ol ul dir menu) li) ((dl) dd dt) ((pre) a abbr acronym b bdo br button cite code dfn em i iframe input kbd label map pcdata q s samp script select span strike strong textarea tt u var) ((object applet) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label map menu noframes noscript object ol p param pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var) ((map) address area blockquote center dir div dl fieldset form h1 h2 h3 h4 h5 h6 hr isindex menu noframes noscript ol p pre table ul) ((a) abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd label map object pcdata q s samp script select small span strike strong sub sup textarea tt u var) ((address) a abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd label map object p pcdata q s samp script select small span strike strong sub sup textarea tt u var) ((body) a abbr acronym address applet b basefont bdo big blockquote br button center cite code del dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input ins isindex kbd label map menu noframes noscript object ol p pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var)))
|
||||
'(((mzscheme) pcdata)
|
||||
((html) body head)
|
||||
((div center blockquote ins del dd li th td iframe noframes noscript) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label map menu noframes noscript object ol p pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var)
|
||||
((style script) cdata)
|
||||
((basefont br area link img param hr input col isindex base meta))
|
||||
((option textarea title) pcdata)
|
||||
((head) base isindex link meta object script style title)
|
||||
((tr) td th)
|
||||
((colgroup) col)
|
||||
((thead tfoot tbody) tr)
|
||||
((tt i b u s strike big small em strong dfn code samp kbd var cite abbr acronym sub sup span bdo font p h1 h2 h3 h4 h5 h6 q dt legend caption) a abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd label map object pcdata q s samp script select small span strike strong sub sup textarea tt u var)
|
||||
((table) caption col colgroup tbody tfoot thead)
|
||||
((button) abbr acronym address applet b basefont bdo big blockquote br center cite code dfn dir div dl em font h1 h2 h3 h4 h5 h6 hr i img kbd map menu noframes noscript object ol p pcdata pre q s samp script small span strike strong sub sup table tt u ul var)
|
||||
((fieldset) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label legend map menu noframes noscript object ol p pcdata pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var)
|
||||
((optgroup) option)
|
||||
((select) optgroup option)
|
||||
((label) a abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd map object pcdata q s samp script select small span strike strong sub sup textarea tt u var)
|
||||
((form) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label map menu noframes noscript object ol p pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var)
|
||||
((ol ul dir menu) li)
|
||||
((dl) dd dt)
|
||||
((pre) a abbr acronym b bdo br button cite code dfn em i iframe input kbd label map pcdata q s samp script select span strike strong textarea tt u var)
|
||||
((object applet) a abbr acronym address applet b basefont bdo big blockquote br button center cite code dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input isindex kbd label map menu noframes noscript object ol p param pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var)
|
||||
((map) address area blockquote center dir div dl fieldset form h1 h2 h3 h4 h5 h6 hr isindex menu noframes noscript ol p pre table ul)
|
||||
((a) abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd label map object pcdata q s samp script select small span strike strong sub sup textarea tt u var)
|
||||
((address) a abbr acronym applet b basefont bdo big br button cite code dfn em font i iframe img input kbd label map object p pcdata q s samp script select small span strike strong sub sup textarea tt u var)
|
||||
((body) a abbr acronym address applet b basefont bdo big blockquote br button center cite code del dfn dir div dl em fieldset font form h1 h2 h3 h4 h5 h6 hr i iframe img input ins isindex kbd label map menu noframes noscript object ol p pcdata pre q s samp script select small span strike strong sub sup table textarea tt u ul var)
|
||||
))
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#lang scheme
|
||||
(provide (all-defined-out))
|
||||
(require xml)
|
||||
|
||||
(define-struct html-element (attributes))
|
||||
(define-struct (html-full html-element) (content))
|
||||
|
@ -84,7 +84,7 @@
|
|||
(define-struct (basefont html-element) ())
|
||||
(define-struct (br html-element) ())
|
||||
(define-struct (area html-element) ())
|
||||
(define-struct (link html-element) ())
|
||||
(define-struct (alink html-element) ())
|
||||
(define-struct (img html-element) ())
|
||||
(define-struct (param html-element) ())
|
||||
(define-struct (hr html-element) ())
|
||||
|
@ -93,3 +93,272 @@
|
|||
(define-struct (isindex html-element) ())
|
||||
(define-struct (base html-element) ())
|
||||
(define-struct (meta html-element) ())
|
||||
|
||||
;; Html-content = Html-element | Pc-data | Entity
|
||||
(define html-content/c
|
||||
(or/c html-element? pcdata? entity?))
|
||||
|
||||
(provide/contract
|
||||
[html-content/c contract?]
|
||||
[struct html-element ([attributes (listof attribute?)])]
|
||||
[struct (html-full html-element)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (mzscheme html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (html html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (div html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (center html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (blockquote html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (ins html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (del html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (dd html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (li html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (th html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (td html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (iframe html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (noframes html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (noscript html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (style html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (script html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (option html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (textarea html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (title html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (head html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (tr html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (colgroup html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (thead html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (tfoot html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (tbody html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (tt html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (i html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (b html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (u html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (s html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (strike html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (big html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (small html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (em html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (strong html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (dfn html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (code html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (samp html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (kbd html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (var html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (cite html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (abbr html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (acronym html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (sub html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (sup html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (span html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (bdo html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (font html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (p html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (h1 html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (h2 html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (h3 html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (h4 html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (h5 html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (h6 html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (q html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (dt html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (legend html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (caption html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (table html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (button html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (fieldset html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (optgroup html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (select html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (label html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (form html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (ol html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (ul html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (dir html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (menu html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (dl html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (pre html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (object html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (applet html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (-map html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (a html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (address html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (body html-full)
|
||||
([attributes (listof attribute?)]
|
||||
[content (listof html-content/c)])]
|
||||
[struct (basefont html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (br html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (area html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (alink html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (img html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (param html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (hr html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (input html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (col html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (isindex html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (base html-element)
|
||||
([attributes (listof attribute?)])]
|
||||
[struct (meta html-element)
|
||||
([attributes (listof attribute?)])])
|
|
@ -287,7 +287,7 @@ A @scheme[Contents-of-head] is either
|
|||
@itemize[
|
||||
@item[@scheme[base]]
|
||||
@item[@scheme[isindex]]
|
||||
@item[@scheme[link]]
|
||||
@item[@scheme[alink]]
|
||||
@item[@scheme[meta]]
|
||||
@item[@scheme[object]]
|
||||
@item[@scheme[script]]
|
||||
|
|
|
@ -1,6 +1,619 @@
|
|||
#lang scheme
|
||||
;; copyright by Paul Graunke June 2000 AD
|
||||
|
||||
(require "html-mod.ss" "sgml-reader.ss")
|
||||
(provide (all-from-out "html-mod.ss")
|
||||
(require "html-structs.ss"
|
||||
"html-spec.ss"
|
||||
"sgml-reader.ss"
|
||||
xml)
|
||||
|
||||
(provide (all-from-out "html-structs.ss")
|
||||
read-html-comments)
|
||||
(provide/contract
|
||||
[use-html-spec (parameter/c boolean?)]
|
||||
[read-html (() (input-port?) . ->* . html?)]
|
||||
[read-xhtml (() (input-port?) . ->* . html?)]
|
||||
[read-html-as-xml (() (input-port?) . ->* . (listof content?))])
|
||||
|
||||
;; xml-single-content->html : Content (listof Html-content) -> (listof Html-content)
|
||||
(define (xml-single-content->html x acc)
|
||||
(cond
|
||||
((element? x)
|
||||
(case (element-name x)
|
||||
((basefont) (cons (make-basefont (element-attributes x)) acc))
|
||||
((br) (cons (make-br (element-attributes x)) acc))
|
||||
((area) (cons (make-area (element-attributes x)) acc))
|
||||
((link) (cons (make-alink (element-attributes x)) acc))
|
||||
((img) (cons (make-img (element-attributes x)) acc))
|
||||
((param) (cons (make-param (element-attributes x)) acc))
|
||||
((hr) (cons (make-hr (element-attributes x)) acc))
|
||||
((input) (cons (make-input (element-attributes x)) acc))
|
||||
((col) (cons (make-col (element-attributes x)) acc))
|
||||
((isindex) (cons (make-isindex (element-attributes x)) acc))
|
||||
((base) (cons (make-base (element-attributes x)) acc))
|
||||
((meta) (cons (make-meta (element-attributes x)) acc))
|
||||
((mzscheme)
|
||||
(cons
|
||||
(make-mzscheme
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((html)
|
||||
(cons
|
||||
(make-html
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((div)
|
||||
(cons
|
||||
(make-div
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((center)
|
||||
(cons
|
||||
(make-center
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((blockquote)
|
||||
(cons
|
||||
(make-blockquote
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((ins)
|
||||
(cons
|
||||
(make-ins
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((del)
|
||||
(cons
|
||||
(make-del
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dd)
|
||||
(cons
|
||||
(make-dd
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((li)
|
||||
(cons
|
||||
(make-li
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((th)
|
||||
(cons
|
||||
(make-th
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((td)
|
||||
(cons
|
||||
(make-td
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((iframe)
|
||||
(cons
|
||||
(make-iframe
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((noframes)
|
||||
(cons
|
||||
(make-noframes
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((noscript)
|
||||
(cons
|
||||
(make-noscript
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((style)
|
||||
(cons
|
||||
(make-style
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((script)
|
||||
(cons
|
||||
(make-script
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((option)
|
||||
(cons
|
||||
(make-option
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((textarea)
|
||||
(cons
|
||||
(make-textarea
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((title)
|
||||
(cons
|
||||
(make-title
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((head)
|
||||
(cons
|
||||
(make-head
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tr)
|
||||
(cons
|
||||
(make-tr
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((colgroup)
|
||||
(cons
|
||||
(make-colgroup
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((thead)
|
||||
(cons
|
||||
(make-thead
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tfoot)
|
||||
(cons
|
||||
(make-tfoot
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tbody)
|
||||
(cons
|
||||
(make-tbody
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((tt)
|
||||
(cons
|
||||
(make-tt
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((i)
|
||||
(cons
|
||||
(make-i
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((b)
|
||||
(cons
|
||||
(make-b
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((u)
|
||||
(cons
|
||||
(make-u
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((s)
|
||||
(cons
|
||||
(make-s
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((strike)
|
||||
(cons
|
||||
(make-strike
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((big)
|
||||
(cons
|
||||
(make-big
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((small)
|
||||
(cons
|
||||
(make-small
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((em)
|
||||
(cons
|
||||
(make-em
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((strong)
|
||||
(cons
|
||||
(make-strong
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dfn)
|
||||
(cons
|
||||
(make-dfn
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((code)
|
||||
(cons
|
||||
(make-code
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((samp)
|
||||
(cons
|
||||
(make-samp
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((kbd)
|
||||
(cons
|
||||
(make-kbd
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((var)
|
||||
(cons
|
||||
(make-var
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((cite)
|
||||
(cons
|
||||
(make-cite
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((abbr)
|
||||
(cons
|
||||
(make-abbr
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((acronym)
|
||||
(cons
|
||||
(make-acronym
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((sub)
|
||||
(cons
|
||||
(make-sub
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((sup)
|
||||
(cons
|
||||
(make-sup
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((span)
|
||||
(cons
|
||||
(make-span
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((bdo)
|
||||
(cons
|
||||
(make-bdo
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((font)
|
||||
(cons
|
||||
(make-font
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((p)
|
||||
(cons
|
||||
(make-p
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h1)
|
||||
(cons
|
||||
(make-h1
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h2)
|
||||
(cons
|
||||
(make-h2
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h3)
|
||||
(cons
|
||||
(make-h3
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h4)
|
||||
(cons
|
||||
(make-h4
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h5)
|
||||
(cons
|
||||
(make-h5
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((h6)
|
||||
(cons
|
||||
(make-h6
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((q)
|
||||
(cons
|
||||
(make-q
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dt)
|
||||
(cons
|
||||
(make-dt
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((legend)
|
||||
(cons
|
||||
(make-legend
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((caption)
|
||||
(cons
|
||||
(make-caption
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((table)
|
||||
(cons
|
||||
(make-table
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((button)
|
||||
(cons
|
||||
(make-button
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((fieldset)
|
||||
(cons
|
||||
(make-fieldset
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((optgroup)
|
||||
(cons
|
||||
(make-optgroup
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((select)
|
||||
(cons
|
||||
(make-select
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((label)
|
||||
(cons
|
||||
(make-label
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((form)
|
||||
(cons
|
||||
(make-form
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((ol)
|
||||
(cons
|
||||
(make-ol
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((ul)
|
||||
(cons
|
||||
(make-ul
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dir)
|
||||
(cons
|
||||
(make-dir
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((menu)
|
||||
(cons
|
||||
(make-menu
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((dl)
|
||||
(cons
|
||||
(make-dl
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((pre)
|
||||
(cons
|
||||
(make-pre
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((object)
|
||||
(cons
|
||||
(make-object (element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((applet)
|
||||
(cons
|
||||
(make-applet
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((map)
|
||||
(cons
|
||||
(make--map
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((a)
|
||||
(cons
|
||||
(make-a
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((address)
|
||||
(cons
|
||||
(make-address
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
((body)
|
||||
(cons
|
||||
(make-body
|
||||
(element-attributes x)
|
||||
(xml-contents->html (element-content x)))
|
||||
acc))
|
||||
(else acc)))
|
||||
((or (pcdata? x) (entity? x)) (cons x acc))
|
||||
(else acc)))
|
||||
|
||||
;; xml->html : Document -> Html
|
||||
(define (xml->html doc)
|
||||
(let ([root (document-element doc)])
|
||||
(unless (eq? 'html (element-name root))
|
||||
(error 'xml->html "This is not an html document. Expected 'html, given ~a" (element-name root)))
|
||||
(make-html (element-attributes root) (xml-contents->html (element-content root)))))
|
||||
|
||||
|
||||
;; xml-content->html : (listof Content) -> (listof Html-element)
|
||||
(define (xml-contents->html contents)
|
||||
(foldr xml-single-content->html
|
||||
null
|
||||
contents))
|
||||
|
||||
;; read-xhtml : [Input-port] -> Html
|
||||
(define read-xhtml (compose xml->html read-xml))
|
||||
|
||||
;; peel-f : (Html-content -> Bool) (listof Html-content) (listof Html-content) -> (listof Html-content)
|
||||
(define (peel-f toss? to-toss acc0)
|
||||
(foldr (lambda (x acc)
|
||||
(if (toss? x)
|
||||
(append (html-full-content x) acc)
|
||||
(cons x acc)))
|
||||
acc0
|
||||
to-toss))
|
||||
|
||||
;; repackage-html : (listof Html-content) -> Html
|
||||
(define (repackage-html contents)
|
||||
(let* ([html (memf html? contents)]
|
||||
[peeled (peel-f html? contents null)]
|
||||
[body (memf body? peeled)])
|
||||
(make-html (if html
|
||||
(html-element-attributes (car html))
|
||||
null)
|
||||
(append (filter head? peeled)
|
||||
(list (make-body (if body
|
||||
(html-element-attributes (car body))
|
||||
null)
|
||||
(filter (compose not head?) (peel-f body? peeled null))))))))
|
||||
|
||||
;; clean-up-pcdata : (listof Content) -> (listof Content)
|
||||
;; Each pcdata inside a tag that isn't supposed to contain pcdata is either
|
||||
;; a) appended to the end of the previous subelement, if that subelement may contain pcdata
|
||||
;; b) prepended to the front of the next subelement, if that subelement may contain pcdata
|
||||
;; c) discarded
|
||||
;; unknown tags may contain pcdata
|
||||
;; the top level may contain pcdata
|
||||
(define clean-up-pcdata
|
||||
;; clean-up-pcdata : (listof Content) -> (listof Content)
|
||||
(letrec ([clean-up-pcdata
|
||||
(lambda (content)
|
||||
(map (lambda (to-fix)
|
||||
(cond
|
||||
[(element? to-fix)
|
||||
(recontent-xml to-fix
|
||||
(let ([possible (may-contain (element-name to-fix))]
|
||||
[content (element-content to-fix)])
|
||||
(if (or (not possible) (memq 'pcdata possible))
|
||||
(clean-up-pcdata content)
|
||||
(eliminate-pcdata content))))]
|
||||
[else to-fix]))
|
||||
content))]
|
||||
[eliminate-pcdata
|
||||
;: (listof Content) -> (listof Content)
|
||||
(lambda (content)
|
||||
(let ([non-elements (first-non-elements content)]
|
||||
[more (memf element? content)])
|
||||
(if more
|
||||
(let* ([el (car more)]
|
||||
[possible (may-contain (element-name el))])
|
||||
(if (or (not possible) (memq 'pcdata possible))
|
||||
(cons (recontent-xml el (append non-elements (clean-up-pcdata (element-content el)) (eliminate-pcdata (first-non-elements (cdr more)))))
|
||||
(or (memf element? (cdr more)) null))
|
||||
(cons (recontent-xml el (eliminate-pcdata (element-content el)))
|
||||
(eliminate-pcdata (cdr more)))))
|
||||
null)))])
|
||||
clean-up-pcdata))
|
||||
|
||||
;; first-non-elements : (listof Content) -> (listof Content)
|
||||
(define (first-non-elements content)
|
||||
(cond
|
||||
[(null? content) null]
|
||||
[else (if (element? (car content))
|
||||
null
|
||||
(cons (car content) (first-non-elements (cdr content))))]))
|
||||
|
||||
;; recontent-xml : Element (listof Content) -> Element
|
||||
(define (recontent-xml e c)
|
||||
(make-element (source-start e) (source-stop e) (element-name e) (element-attributes e) c))
|
||||
|
||||
;; implicit-starts : Symbol Symbol -> (U #f Symbol)
|
||||
(define (implicit-starts parent child)
|
||||
(or (and (eq? child 'tr) (eq? parent 'table) 'tbody)
|
||||
(and (eq? child 'td) (memq parent '(table tbody tfoot thead)) 'tr)))
|
||||
|
||||
;; may-contain : Kid-lister
|
||||
(define may-contain
|
||||
(gen-may-contain html-spec))
|
||||
|
||||
(define may-contain-anything
|
||||
(gen-may-contain null))
|
||||
|
||||
(define use-html-spec (make-parameter #t))
|
||||
|
||||
;; read-html-as-xml : [Input-port] -> (listof Content)
|
||||
(define (read-html-as-xml [port (current-input-port)])
|
||||
((if (use-html-spec) clean-up-pcdata values)
|
||||
((gen-read-sgml (if (use-html-spec)
|
||||
may-contain
|
||||
may-contain-anything)
|
||||
implicit-starts) port)))
|
||||
|
||||
;; read-html : [Input-port] -> Html
|
||||
(define read-html
|
||||
(compose repackage-html xml-contents->html read-html-as-xml))
|
|
@ -1,9 +1,4 @@
|
|||
#lang scheme/base
|
||||
|
||||
(require "html.ss")
|
||||
(provide (except-out (all-from-out "html.ss")
|
||||
link struct:link make-link link?)
|
||||
(rename-out [link alink]
|
||||
[struct:link struct:alink]
|
||||
[make-link make-alink]
|
||||
[link? alink?]))
|
||||
(provide (all-from-out "html.ss"))
|
||||
|
|
|
@ -3,11 +3,20 @@
|
|||
;; It needs to be abstracted back in.
|
||||
#lang scheme
|
||||
(require xml)
|
||||
(provide
|
||||
read-html-comments
|
||||
trim-whitespace
|
||||
gen-may-contain
|
||||
gen-read-sgml)
|
||||
|
||||
;; Kid-lister : (Symbol -> (U (listof Symbol) #f))
|
||||
(define kid-lister/c
|
||||
(symbol? . -> . (or/c (listof symbol?) false/c)))
|
||||
|
||||
(define spec/c
|
||||
(listof (cons/c (listof symbol?) (listof symbol?))))
|
||||
|
||||
(provide/contract
|
||||
[spec/c contract?]
|
||||
[read-html-comments (parameter/c boolean?)]
|
||||
[trim-whitespace (parameter/c boolean?)]
|
||||
[gen-may-contain (spec/c . -> . kid-lister/c)]
|
||||
[gen-read-sgml (kid-lister/c (symbol? symbol? . -> . (or/c symbol? false/c)) . -> . (() (input-port?) . ->* . (listof content?)))])
|
||||
|
||||
;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute))
|
||||
(define-struct (start-tag source) (name attrs))
|
||||
|
@ -20,8 +29,6 @@
|
|||
(define read-html-comments (make-parameter #f))
|
||||
(define trim-whitespace (make-parameter #f))
|
||||
|
||||
;; Kid-lister : (Symbol -> (U (listof Symbol) #f))
|
||||
|
||||
;; gen-may-contain : Spec -> Kid-lister
|
||||
(define (gen-may-contain spec)
|
||||
(let ([table (make-hash)])
|
||||
|
@ -34,10 +41,8 @@
|
|||
(hash-ref table name (lambda () #f)))))
|
||||
|
||||
;; gen-read-sgml : Kid-lister (Symbol Symbol -> (U #f Symbol)) -> [Input-port] -> (listof Content)
|
||||
(define (gen-read-sgml may-contain auto-insert)
|
||||
(case-lambda
|
||||
[(in) (read-from-port may-contain auto-insert in)]
|
||||
[() (read-from-port may-contain auto-insert (current-input-port))]))
|
||||
(define ((gen-read-sgml may-contain auto-insert) [in (current-input-port)])
|
||||
(read-from-port may-contain auto-insert in))
|
||||
|
||||
;; read-from-port : Kid-lister (Symbol Symbol -> (U #f Symbol)) Input-port -> (listof Content)
|
||||
(define (read-from-port may-contain auto-insert in)
|
||||
|
|
Loading…
Reference in New Issue
Block a user