contracts and cleanup

svn: r13874
This commit is contained in:
Jay McCarthy 2009-02-27 20:29:48 +00:00
parent 54ecd4b9bb
commit 5fda17741b
20 changed files with 1215 additions and 1283 deletions

View File

@ -6,7 +6,7 @@
mzlib/port
net/url-sig
(only-in html read-html-as-xml read-html-comments use-html-spec)
(except-in xml/xml read-comments)
(except-in xml read-comments)
mzlib/class
"bullet.ss"
"option-snip.ss"
@ -492,7 +492,7 @@
(fixup-whitespace content leading-ok?))
(values "" leading-ok?)))
(values "" leading-ok?)))]
[(pi? c) (values "" leading-ok?)] ;; processing instruction
[(p-i? c) (values "" leading-ok?)] ;; processing instruction
[else (let ([tag (car c)])
(if (memq tag exact-whitespace-tags)
(let-values ([(s done?) (remove-leading-newline c)])
@ -879,7 +879,7 @@
(values void 0))))]
[(number? e)
(values (translate-number e) 0)]
[(or (comment? e) (pi? e)) (values void forced-lines)]
[(or (comment? e) (p-i? e)) (values void forced-lines)]
[else (let* ([tag (car e)]
[rest/base/depth/form/fl
(lambda (para-base enum-depth form forced-lines)

View File

@ -4,10 +4,20 @@
web-server/servlet
htdp/error
xml
scheme/contract
mzlib/etc)
(provide (all-from web-server/servlet-env)
(all-from web-server/servlet)
(rename wrapped-build-suspender build-suspender))
(all-from web-server/servlet))
(provide/contract
[build-suspender
(((listof xexpr/c)
(listof xexpr/c))
((listof (list/c symbol? string?))
(listof (list/c symbol? string?)))
. ->* .
(string?
. -> .
xexpr/c))])
; build-suspender : (listof html) (listof html) [(listof (cons sym str))] [(listof (cons sym str))] -> str -> response
(define build-suspender
@ -20,39 +30,4 @@
(title . ,title))
(body ,body-attributes
(form ([action ,k-url] [method "post"])
,@content))))))
(define wrapped-build-suspender
(case-lambda
[(title content)
(check-suspender2 title content)
(build-suspender title content)]
[(title content body-attributes)
(check-suspender3 title content body-attributes)
(build-suspender title content body-attributes)]
[(title content body-attributes head-attributes)
(check-suspender4 title content body-attributes head-attributes)
(build-suspender title content body-attributes head-attributes)]))
; : tst tst -> void
(define (check-suspender2 title content)
(check-arg 'build-suspender (listof? xexpr? title) "(listof xexpr[HTML])" "1st" title)
(check-arg 'build-suspender (listof? xexpr? content) "(listof xexpr[HTML])" "2nd" content))
; : tst tst tst -> void
(define (check-suspender3 title content body-attributes)
(check-suspender2 title content)
(check-arg 'build-suspender (listof? attribute-pair? body-attributes)
"(listof (cons sym str))" "3rd" body-attributes))
; : tst tst tst tst -> void
(define (check-suspender4 title content body-attributes head-attributes)
(check-suspender3 title content body-attributes)
(check-arg 'build-suspender (listof? attribute-pair? head-attributes)
"(listof (cons sym str))" "4th" head-attributes))
; : tst -> bool
(define (attribute-pair? b)
(and (pair? b)
(symbol? (car b))
(string? (cdr b)))))
,@content)))))))

View File

@ -24,10 +24,10 @@ Reads (X)HTML from a port, producing an @scheme[html] instance.}
@defproc[(read-html-as-xml [port input-port?])
(listof content?)]{
(listof content/c)]{
Reads HTML from a port, producing an @xexpr compatible with the
@schememodname[xml] library (which defines @scheme[content?]).}
@schememodname[xml] library (which defines @scheme[content/c]).}
@defboolparam[read-html-comments v]{
If @scheme[v] is not @scheme[#f], then comments are read and returned. Defaults to @scheme[#f].

View File

@ -12,7 +12,7 @@
[use-html-spec (parameter/c boolean?)]
[read-html (() (input-port?) . ->* . html?)]
[read-xhtml (() (input-port?) . ->* . html?)]
[read-html-as-xml (() (input-port?) . ->* . (listof content?))])
[read-html-as-xml (() (input-port?) . ->* . (listof content/c))])
;; xml-single-content->html : Content (listof Html-content) -> (listof Html-content)
(define (xml-single-content->html x acc)

View File

@ -16,7 +16,7 @@
[read-html-comments (parameter/c boolean?)]
[trim-whitespace (parameter/c boolean?)]
[gen-may-contain (spec/c . -> . kid-lister/c)]
[gen-read-sgml (kid-lister/c (symbol? symbol? . -> . (or/c symbol? false/c)) . -> . (() (input-port?) . ->* . (listof content?)))])
[gen-read-sgml (kid-lister/c (symbol? symbol? . -> . (or/c symbol? false/c)) . -> . (() (input-port?) . ->* . (listof content/c)))])
;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute))
(define-struct (start-tag source) (name attrs))

View File

@ -25,4 +25,6 @@
"syntax-color"
"typed-scheme"
"units"
"xml"
"html"
"web-server"))

View File

@ -48,6 +48,13 @@
(define (test-not-xexpr? xe)
(test-false (format "~S" xe) (xexpr? xe)))
(define (contract->predicate c)
(lambda (v)
(with-handlers ([exn:fail:contract?
(lambda (x) #f)])
(contract c v 'pos 'neg)
#t)))
(define xml-tests
(test-suite
"XML"
@ -79,7 +86,7 @@ END
"DOCTYPE dropping" result-string expected-string)))
(local
[(define a-pi (make-p-i #f #f "foo" "bar"))
[(define a-pi (make-p-i #f #f 'foo "bar"))
(define a-p (make-prolog empty #f empty))
(define a-p/pi (make-prolog (list a-pi) #f (list)))
(define a-d0
@ -137,11 +144,14 @@ END
(test-not-false "element" (element? (make-element #f #f 'br empty empty)))
(test-not-false "content? pcdata" (content? (make-pcdata #f #f "pcdata")))
(test-not-false "content? element" (content? (make-element #f #f 'br empty empty)))
(test-not-false "content? entity" (content? (make-entity #f #f 'nbsp)))
(test-not-false "content? comment" (content? (make-comment "string")))
(test-not-false "content? cdata" (content? (make-cdata #f #f "cdata")))
(local [(define content? (contract->predicate content/c))]
(test-suite
"content?"
(test-not-false "content? pcdata" (content? (make-pcdata #f #f "pcdata")))
(test-not-false "content? element" (content? (make-element #f #f 'br empty empty)))
(test-not-false "content? entity" (content? (make-entity #f #f 'nbsp)))
(test-not-false "content? comment" (content? (make-comment "string")))
(test-not-false "content? cdata" (content? (make-cdata #f #f "cdata")))))
(test-not-false "attribute" (attribute? (make-attribute #f #f 'name "value")))
@ -152,16 +162,14 @@ END
(test-not-false "cdata" (cdata? (make-cdata #f #f "string")))
(test-not-false "p-i" (p-i? (make-p-i #f #f "target" "instruction")))
(test-not-false "p-i" (p-i? (make-p-i #f #f 'target "instruction")))
(test-not-false "comment" (comment? (make-comment "text")))
(test-not-false "source" (source? (make-source 'start 'stop)))
(test-not-false "source" (source? (make-source (make-location 1 2 3) 'stop)))
(test-not-false "source" (source? (make-source 'start (make-location 1 2 3))))
(test-not-false "source" (source? (make-source (make-location 1 2 3) (make-location 4 5 6))))
(test-not-false "exn:invalid-xexpr" (exn:invalid-xexpr? (make-exn:invalid-xexpr "string" (current-continuation-marks) 'nbsp))))
(test-not-false "source" (source? (make-source (make-location 1 2 3) (make-location 4 5 6)))))
(test-suite
"Reading and Writing XML"
@ -543,7 +551,7 @@ END
(test-validate-xexpr (make-pcdata #f #f "pcdata"))
(test-validate-xexpr (make-cdata #f #f "cdata"))
(test-validate-xexpr (make-comment "comment"))
(test-validate-xexpr (make-p-i #f #f "s1" "s2"))
(test-validate-xexpr (make-p-i #f #f 's1 "s2"))
(test-validate-xexpr '(br))
(test-validate-xexpr '(br ()))
(test-validate-xexpr '(a ([href "#"]) "string"))
@ -561,7 +569,7 @@ END
"Non-permissive"
(lambda (exn)
(and (exn? exn)
(regexp-match #rx"Expected content," (exn-message exn))))
(regexp-match #rx"not in permissive mode" (exn-message exn))))
(lambda ()
(xml->xexpr #f)))
@ -596,7 +604,7 @@ END
(test-empty-tag-shorthand '(html) "<html>Hey</html>" "<html>Hey</html>")
(test-empty-tag-shorthand '(p) "<html></html>" "<html></html>")
(test-empty-tag-shorthand '(p) "<html>Hey</html>" "<html>Hey</html>"))
(test-equal? "html-empty-tags"
html-empty-tags
'(param meta link isindex input img hr frame col br basefont base area))
@ -615,7 +623,7 @@ END
"read-comments"
(test-read-comments #f "<html><!-- Foo --></html>" "<html />")
(test-read-comments #t "<html><!-- Foo --></html>" "<html><!-- Foo --></html>"))
(local
[(define (test-xexpr-drop-empty-attributes v istr xe)
(test-equal? (format "~S" (list v istr))

View File

@ -91,7 +91,7 @@
() #:rest (listof (formlet/c alpha))
. ->* . (formlet/c beta))]
[xml-forest (xexpr-forest/c . -> . (formlet/c procedure?))]
[xml (xexpr? . -> . (formlet/c procedure?))]
[xml (xexpr/c . -> . (formlet/c procedure?))]
[text (string? . -> . (formlet/c procedure?))]
[tag-xexpr (symbol? (listof (list/c symbol? string?)) (formlet/c alpha) . -> . (formlet/c alpha))]
[formlet-display ((formlet/c alpha) . -> . xexpr-forest/c)]

View File

@ -1,6 +1,6 @@
#lang scheme
(require scheme/pretty
xml/xml)
xml)
(provide/contract
[format-xexpr/errors (any/c . -> . string?)]
@ -26,7 +26,7 @@
(symbol? v)
(exact-nonnegative-integer? v)
(comment? v)
(pi? v)
(p-i? v)
(cdata? v)))
(define (format-xexpr v)

View File

@ -1,462 +1,463 @@
#lang scheme
(require "sig.ss")
(require "structures.ss")
(provide reader@)
(provide/contract
[read-xml (() (input-port?) . ->* . document?)]
[read-xml/element (() (input-port?) . ->* . element?)]
[read-comments (parameter/c boolean?)]
[collapse-whitespace (parameter/c boolean?)]
[exn:xml? (any/c . -> . boolean?)])
(define-unit reader@
(import xml-structs^)
(export reader^)
;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute))
(define-struct (start-tag source) (name attrs))
;; End-tag ::= (make-end-tag Location Location Symbol)
(define-struct (end-tag source) (name))
;; Token ::= Contents | Start-tag | End-tag | Eof
(define read-comments (make-parameter #f))
(define collapse-whitespace (make-parameter #f))
;; read-xml : [Input-port] -> Document
(define read-xml
(lambda ([in (current-input-port)])
(let*-values ([(in pos) (positionify in)]
[(misc0 start) (read-misc in pos)])
(make-document (make-prolog misc0 #f empty)
(read-xml-element-helper pos in start)
(let ([loc-before (pos)])
(let-values ([(misc1 end-of-file) (read-misc in pos)])
(unless (eof-object? end-of-file)
(let ([loc-after (pos)])
(parse-error (list
(make-srcloc
(object-name in)
#f
#f
(location-offset loc-before)
(- (location-offset loc-after)
(location-offset loc-before))))
"extra stuff at end of document ~e"
end-of-file)))
misc1))))))
;; read-xml/element : [Input-port] -> Element
(define read-xml/element
(lambda ([in (current-input-port)])
(let-values ([(in pos) (positionify in)])
(skip-space in)
(read-xml-element-helper pos in (lex in pos)))))
;; read-xml-element-helper : Nat Iport Token -> Element
(define (read-xml-element-helper pos in start)
(cond
[(start-tag? start) (read-element start in pos)]
[(element? start) start]
[else (parse-error (list
(make-srcloc
(object-name in)
#f
#f
1
(- (location-offset (pos)) 1)))
"expected root element - received ~e"
(if (pcdata? start) (pcdata-string start) start))]))
;; read-misc : Input-port (-> Location) -> (listof Misc) Token
(define (read-misc in pos)
(let read-more ()
;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute))
(define-struct (start-tag source) (name attrs))
;; End-tag ::= (make-end-tag Location Location Symbol)
(define-struct (end-tag source) (name))
;; Token ::= Contents | Start-tag | End-tag | Eof
(define read-comments (make-parameter #f))
(define collapse-whitespace (make-parameter #f))
;; read-xml : [Input-port] -> Document
(define read-xml
(lambda ([in (current-input-port)])
(let*-values ([(in pos) (positionify in)]
[(misc0 start) (read-misc in pos)])
(make-document (make-prolog misc0 #f empty)
(read-xml-element-helper pos in start)
(let ([loc-before (pos)])
(let-values ([(misc1 end-of-file) (read-misc in pos)])
(unless (eof-object? end-of-file)
(let ([loc-after (pos)])
(parse-error (list
(make-srcloc
(object-name in)
#f
#f
(location-offset loc-before)
(- (location-offset loc-after)
(location-offset loc-before))))
"extra stuff at end of document ~e"
end-of-file)))
misc1))))))
;; read-xml/element : [Input-port] -> Element
(define read-xml/element
(lambda ([in (current-input-port)])
(let-values ([(in pos) (positionify in)])
(skip-space in)
(read-xml-element-helper pos in (lex in pos)))))
;; read-xml-element-helper : Nat Iport Token -> Element
(define (read-xml-element-helper pos in start)
(cond
[(start-tag? start) (read-element start in pos)]
[(element? start) start]
[else (parse-error (list
(make-srcloc
(object-name in)
#f
#f
1
(- (location-offset (pos)) 1)))
"expected root element - received ~e"
(if (pcdata? start) (pcdata-string start) start))]))
;; read-misc : Input-port (-> Location) -> (listof Misc) Token
(define (read-misc in pos)
(let read-more ()
(let ([x (lex in pos)])
(cond
[(p-i? x)
(let-values ([(lst next) (read-more)])
(values (cons x lst) next))]
[(comment? x)
(let-values ([(lst next) (read-more)])
(if (read-comments)
(values (cons x lst) next)
(values lst next)))]
[(and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x))))
(read-more)]
[else (values null x)]))))
;; read-element : Start-tag Input-port (-> Location) -> Element
(define (read-element start in pos)
(let ([name (start-tag-name start)]
[a (source-start start)]
[b (source-stop start)])
(let read-content ([k (lambda (body end-loc)
(make-element
a end-loc name (start-tag-attrs start)
body))])
(let ([x (lex in pos)])
(cond
[(p-i? x)
(let-values ([(lst next) (read-more)])
(values (cons x lst) next))]
[(comment? x)
(let-values ([(lst next) (read-more)])
(if (read-comments)
(values (cons x lst) next)
(values lst next)))]
[(and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x))))
(read-more)]
[else (values null x)]))))
;; read-element : Start-tag Input-port (-> Location) -> Element
(define (read-element start in pos)
(let ([name (start-tag-name start)]
[a (source-start start)]
[b (source-stop start)])
(let read-content ([k (lambda (body end-loc)
(make-element
a end-loc name (start-tag-attrs start)
body))])
(let ([x (lex in pos)])
(cond
[(eof-object? x)
(parse-error (list
(make-srcloc
(object-name in)
#f
#f
(location-offset (source-start start))
(- (location-offset (source-stop start))
(location-offset (source-start start)))))
"unclosed `~a' tag at [~a ~a]"
name
(format-source a)
(format-source b))]
[(start-tag? x)
(let ([next-el (read-element x in pos)])
(read-content (lambda (body end-loc)
(k (cons next-el body)
end-loc))))]
[(end-tag? x)
(let ([end-loc (source-stop x)])
(unless (eq? name (end-tag-name x))
(parse-error
(list
(make-srcloc (object-name in)
#f
#f
(location-offset a)
(- (location-offset b) (location-offset a)))
(make-srcloc (object-name in)
#f
#f
(location-offset (source-start x))
(- (location-offset end-loc) (location-offset (source-start x)))))
"start tag `~a' at [~a ~a] doesn't match end tag `~a' at [~a ~a]"
name
(format-source a)
(format-source b)
(end-tag-name x)
(format-source (source-start x))
(format-source end-loc)))
(k null end-loc))]
[(entity? x) (read-content (lambda (body end-loc)
(k (cons (expand-entity x) body)
end-loc)))]
[(comment? x) (if (read-comments)
(read-content (lambda (body end-loc) (k (cons x body) end-loc)))
(read-content k))]
[else (read-content (lambda (body end-loc) (k (cons x body) end-loc)))])))))
;; expand-entity : Entity -> (U Entity Pcdata)
;; more here - allow expansion of user defined entities
(define (expand-entity x)
(let ([expanded (default-entity-table (entity-text x))])
(if expanded
(make-pcdata (source-start x) (source-stop x) expanded)
x)))
;; default-entity-table : Symbol -> (U #f String)
(define (default-entity-table name)
(case name
[(amp) "&"]
[(lt) "<"]
[(gt) ">"]
[(quot) "\""]
[(apos) "'"]
[else #f]))
;; lex : Input-port (-> Location) -> (U Token special)
(define (lex in pos)
[(eof-object? x)
(parse-error (list
(make-srcloc
(object-name in)
#f
#f
(location-offset (source-start start))
(- (location-offset (source-stop start))
(location-offset (source-start start)))))
"unclosed `~a' tag at [~a ~a]"
name
(format-source a)
(format-source b))]
[(start-tag? x)
(let ([next-el (read-element x in pos)])
(read-content (lambda (body end-loc)
(k (cons next-el body)
end-loc))))]
[(end-tag? x)
(let ([end-loc (source-stop x)])
(unless (eq? name (end-tag-name x))
(parse-error
(list
(make-srcloc (object-name in)
#f
#f
(location-offset a)
(- (location-offset b) (location-offset a)))
(make-srcloc (object-name in)
#f
#f
(location-offset (source-start x))
(- (location-offset end-loc) (location-offset (source-start x)))))
"start tag `~a' at [~a ~a] doesn't match end tag `~a' at [~a ~a]"
name
(format-source a)
(format-source b)
(end-tag-name x)
(format-source (source-start x))
(format-source end-loc)))
(k null end-loc))]
[(entity? x) (read-content (lambda (body end-loc)
(k (cons (expand-entity x) body)
end-loc)))]
[(comment? x) (if (read-comments)
(read-content (lambda (body end-loc) (k (cons x body) end-loc)))
(read-content k))]
[else (read-content (lambda (body end-loc) (k (cons x body) end-loc)))])))))
;; expand-entity : Entity -> (U Entity Pcdata)
;; more here - allow expansion of user defined entities
(define (expand-entity x)
(let ([expanded (default-entity-table (entity-text x))])
(if expanded
(make-pcdata (source-start x) (source-stop x) expanded)
x)))
;; default-entity-table : Symbol -> (U #f String)
(define (default-entity-table name)
(case name
[(amp) "&"]
[(lt) "<"]
[(gt) ">"]
[(quot) "\""]
[(apos) "'"]
[else #f]))
;; lex : Input-port (-> Location) -> (U Token special)
(define (lex in pos)
(let ([c (peek-char-or-special in)])
(cond
[(eof-object? c) c]
[(eq? c #\&) (lex-entity in pos)]
[(eq? c #\<) (lex-tag-cdata-pi-comment in pos)]
[(not (char? c)) (read-char-or-special in)]
[else (lex-pcdata in pos)])))
; lex-entity : Input-port (-> Location) -> Entity
; pre: the first char is a #\&
(define (lex-entity in pos)
(let ([start (pos)])
(read-char in)
(let ([data (case (peek-char in)
[(#\#)
(read-char in)
(let ([n (case (peek-char in)
[(#\x) (read-char in)
(string->number (read-until #\; in pos) 16)]
[else (string->number (read-until #\; in pos))])])
(unless (number? n)
(lex-error in pos "malformed numeric entity"))
n)]
[else
(begin0
(lex-name in pos)
(unless (eq? (read-char in) #\;)
(lex-error in pos "expected ; at the end of an entity")))])])
(make-entity start (pos) data))))
; lex-tag-cdata-pi-comment : Input-port (-> Location) -> Start-tag | Element | End-tag | Cdata | p-i | Comment
; pre: the first char is a #\<
(define (lex-tag-cdata-pi-comment in pos)
(let ([start (pos)])
(read-char in)
(case (non-eof peek-char-or-special in pos)
[(#\!)
(read-char in)
(case (non-eof peek-char in pos)
[(#\-) (read-char in)
(unless (eq? (read-char-or-special in) #\-)
(lex-error in pos "expected second - after <!-"))
(let ([data (lex-comment-contents in pos)])
(unless (eq? (read-char in) #\>)
(lex-error in pos "expected > to end comment (\"--\" can't appear in comments)"))
;(make-comment start (pos) data)
(make-comment data))]
[(#\[) (read-char in)
(unless (string=? (read-string 6 in) "CDATA[")
(lex-error in pos "expected CDATA following <["))
(let ([data (lex-cdata-contents in pos)])
(make-cdata start (pos) (format "<![CDATA[~a]]>" data)))]
[else (skip-dtd in pos)
(skip-space in)
(unless (eq? (peek-char-or-special in) #\<)
(lex-error in pos "expected p-i, comment, or element after doctype"))
(lex-tag-cdata-pi-comment in pos)])]
[(#\?) (read-char in)
(let ([name (lex-name in pos)])
(skip-space in)
(let ([data (lex-pi-data in pos)])
(make-p-i start (pos) name data)))]
[(#\/) (read-char in)
(let ([name (lex-name in pos)])
(skip-space in)
(unless (eq? (read-char-or-special in) #\>)
(lex-error in pos "expected > to close ~a's end tag" name))
(make-end-tag start (pos) name))]
[else ; includes 'special, but lex-name will fail in that case
(let ([name (lex-name in pos)]
[attrs (lex-attributes in pos)])
(skip-space in)
(case (read-char-or-special in)
[(#\/)
(unless (eq? (read-char in) #\>)
(lex-error in pos "expected > to close empty element ~a" name))
(make-element start (pos) name attrs null)]
[(#\>) (make-start-tag start (pos) name attrs)]
[else (lex-error in pos "expected / or > to close tag `~a'" name)]))])))
;; lex-attributes : Input-port (-> Location) -> (listof Attribute)
(define (lex-attributes in pos)
(sort (let loop ()
(skip-space in)
(cond [(name-start? (peek-char-or-special in))
(cons (lex-attribute in pos) (loop))]
[else null]))
(lambda (a b)
(let ([na (attribute-name a)]
[nb (attribute-name b)])
(cond
[(eq? na nb) (lex-error in pos "duplicated attribute name ~a" na)]
[else (string<? (symbol->string na) (symbol->string nb))])))))
;; lex-attribute : Input-port (-> Location) -> Attribute
(define (lex-attribute in pos)
(let ([start (pos)]
[name (lex-name in pos)])
(skip-space in)
(unless (eq? (read-char in) #\=)
(lex-error in pos "expected = in attribute ~a" name))
(skip-space in)
;; more here - handle entites and disallow "<"
(let* ([delimiter (read-char-or-special in)]
[value (case delimiter
[(#\' #\")
(list->string
(let read-more ()
(let ([c (non-eof peek-char-or-special in pos)])
(cond
[(eq? c 'special)
(lex-error in pos "attribute values cannot contain non-text values")]
[(eq? c delimiter) (read-char in) null]
[(eq? c #\&)
(let ([entity (expand-entity (lex-entity in pos))])
(if (pcdata? entity)
(append (string->list (pcdata-string entity)) (read-more))
;; more here - do something with user defined entites
(read-more)))]
[else (read-char in) (cons c (read-more))]))))]
[else (if (char? delimiter)
(lex-error in pos "attribute values must be in ''s or in \"\"s")
delimiter)])])
(make-attribute start (pos) name value))))
;; skip-space : Input-port -> Void
;; deviation - should sometimes insist on at least one space
(define (skip-space in)
(let loop ()
(let ([c (peek-char-or-special in)])
(cond
[(eof-object? c) c]
[(eq? c #\&) (lex-entity in pos)]
[(eq? c #\<) (lex-tag-cdata-pi-comment in pos)]
[(not (char? c)) (read-char-or-special in)]
[else (lex-pcdata in pos)])))
; lex-entity : Input-port (-> Location) -> Entity
; pre: the first char is a #\&
(define (lex-entity in pos)
(let ([start (pos)])
(read-char in)
(let ([data (case (peek-char in)
[(#\#)
(read-char in)
(let ([n (case (peek-char in)
[(#\x) (read-char in)
(string->number (read-until #\; in pos) 16)]
[else (string->number (read-until #\; in pos))])])
(unless (number? n)
(lex-error in pos "malformed numeric entity"))
n)]
[else
(begin0
(lex-name in pos)
(unless (eq? (read-char in) #\;)
(lex-error in pos "expected ; at the end of an entity")))])])
(make-entity start (pos) data))))
; lex-tag-cdata-pi-comment : Input-port (-> Location) -> Start-tag | Element | End-tag | Cdata | p-i | Comment
; pre: the first char is a #\<
(define (lex-tag-cdata-pi-comment in pos)
(let ([start (pos)])
(read-char in)
(case (non-eof peek-char-or-special in pos)
[(#\!)
(read-char in)
(case (non-eof peek-char in pos)
[(#\-) (read-char in)
(unless (eq? (read-char-or-special in) #\-)
(lex-error in pos "expected second - after <!-"))
(let ([data (lex-comment-contents in pos)])
(unless (eq? (read-char in) #\>)
(lex-error in pos "expected > to end comment (\"--\" can't appear in comments)"))
;(make-comment start (pos) data)
(make-comment data))]
[(#\[) (read-char in)
(unless (string=? (read-string 6 in) "CDATA[")
(lex-error in pos "expected CDATA following <["))
(let ([data (lex-cdata-contents in pos)])
(make-cdata start (pos) (format "<![CDATA[~a]]>" data)))]
[else (skip-dtd in pos)
(skip-space in)
(unless (eq? (peek-char-or-special in) #\<)
(lex-error in pos "expected p-i, comment, or element after doctype"))
(lex-tag-cdata-pi-comment in pos)])]
[(#\?) (read-char in)
(let ([name (lex-name in pos)])
(skip-space in)
(let ([data (lex-pi-data in pos)])
(make-p-i start (pos) name data)))]
[(#\/) (read-char in)
(let ([name (lex-name in pos)])
(skip-space in)
(unless (eq? (read-char-or-special in) #\>)
(lex-error in pos "expected > to close ~a's end tag" name))
(make-end-tag start (pos) name))]
[else ; includes 'special, but lex-name will fail in that case
(let ([name (lex-name in pos)]
[attrs (lex-attributes in pos)])
(skip-space in)
(case (read-char-or-special in)
[(#\/)
(unless (eq? (read-char in) #\>)
(lex-error in pos "expected > to close empty element ~a" name))
(make-element start (pos) name attrs null)]
[(#\>) (make-start-tag start (pos) name attrs)]
[else (lex-error in pos "expected / or > to close tag `~a'" name)]))])))
;; lex-attributes : Input-port (-> Location) -> (listof Attribute)
(define (lex-attributes in pos)
(sort (let loop ()
(skip-space in)
(cond [(name-start? (peek-char-or-special in))
(cons (lex-attribute in pos) (loop))]
[else null]))
(lambda (a b)
(let ([na (attribute-name a)]
[nb (attribute-name b)])
(cond
[(eq? na nb) (lex-error in pos "duplicated attribute name ~a" na)]
[else (string<? (symbol->string na) (symbol->string nb))])))))
;; lex-attribute : Input-port (-> Location) -> Attribute
(define (lex-attribute in pos)
(let ([start (pos)]
[name (lex-name in pos)])
(skip-space in)
(unless (eq? (read-char in) #\=)
(lex-error in pos "expected = in attribute ~a" name))
(skip-space in)
;; more here - handle entites and disallow "<"
(let* ([delimiter (read-char-or-special in)]
[value (case delimiter
[(#\' #\")
(list->string
(let read-more ()
(let ([c (non-eof peek-char-or-special in pos)])
(cond
[(eq? c 'special)
(lex-error in pos "attribute values cannot contain non-text values")]
[(eq? c delimiter) (read-char in) null]
[(eq? c #\&)
(let ([entity (expand-entity (lex-entity in pos))])
(if (pcdata? entity)
(append (string->list (pcdata-string entity)) (read-more))
;; more here - do something with user defined entites
(read-more)))]
[else (read-char in) (cons c (read-more))]))))]
[else (if (char? delimiter)
(lex-error in pos "attribute values must be in ''s or in \"\"s")
delimiter)])])
(make-attribute start (pos) name value))))
;; skip-space : Input-port -> Void
;; deviation - should sometimes insist on at least one space
(define (skip-space in)
(let loop ()
(let ([c (peek-char-or-special in)])
(when (and (char? c)
(char-whitespace? c))
(read-char in)
(loop)))))
;; lex-pcdata : Input-port (-> Location) -> Pcdata
;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec
(define (lex-pcdata in pos)
(let ([start (pos)]
[data (let loop ()
(let ([next (peek-char-or-special in)])
(cond
[(or (eof-object? next)
(not (char? next))
(eq? next #\&)
(eq? next #\<))
null]
[(and (char-whitespace? next) (collapse-whitespace))
(skip-space in)
(cons #\space (loop))]
[else (cons (read-char in) (loop))])))])
(make-pcdata start
(pos)
(list->string data))))
;; lex-name : Input-port (-> Location) -> Symbol
(define (lex-name in pos)
(let ([c (non-eof read-char-or-special in pos)])
(unless (name-start? c)
(lex-error in pos "expected name, received ~e" c))
(string->symbol
(list->string
(cons c (let lex-rest ()
(let ([c (non-eof peek-char-or-special in pos)])
(cond
[(eq? c 'special)
(lex-error in pos "names cannot contain non-text values")]
[(name-char? c)
(cons (read-char in) (lex-rest))]
[else null]))))))))
;; skip-dtd : Input-port (-> Location) -> Void
(define (skip-dtd in pos)
(let skip ()
(case (non-eof read-char in pos)
[(#\') (read-until #\' in pos) (skip)]
[(#\") (read-until #\" in pos) (skip)]
[(#\<)
(case (non-eof read-char in pos)
[(#\!) (case (non-eof read-char in pos)
[(#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip)]
[else (skip) (skip)])]
[(#\?) (lex-pi-data in pos) (skip)]
[else (skip) (skip)])]
[(#\>) (void)]
[else (skip)])))
;; name-start? : Char -> Bool
(define (name-start? ch)
(and (char? ch)
(or (char-alphabetic? ch)
(eq? ch #\_)
(eq? ch #\:))))
;; name-char? : Char -> Bool
(define (name-char? ch)
(and (char? ch)
(or (name-start? ch)
(char-numeric? ch)
(eq? ch #\.)
(eq? ch #\-))))
;; read-until : Char Input-port (-> Location) -> String
;; discards the stop character, too
(define (read-until char in pos)
(list->string
(let read-more ()
(let ([c (non-eof read-char in pos)])
(cond
[(eq? c char) null]
[else (cons c (read-more))])))))
;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Location) -> Char
(define (non-eof f in pos)
(let ([c (f in)])
(cond
[(eof-object? c) (lex-error in pos "unexpected eof")]
[else c])))
;; gen-read-until-string : String -> Input-port (-> Location) -> String
;; uses Knuth-Morris-Pratt from
;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876
;; discards stop from input
(define (gen-read-until-string stop)
(let* ([len (string-length stop)]
[prefix (make-vector len 0)]
[fall-back
(lambda (k c)
(let ([k (let loop ([k k])
(cond
[(and (> k 0) (not (eq? (string-ref stop k) c)))
(loop (vector-ref prefix (sub1 k)))]
[else k]))])
(if (eq? (string-ref stop k) c)
(add1 k)
k)))])
(let init ([k 0] [q 1])
(when (< q len)
(let ([k (fall-back k (string-ref stop q))])
(vector-set! prefix q k)
(init k (add1 q)))))
;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop
(lambda (in pos)
(list->string
(let/ec out
(let loop ([matched 0] [out out])
(let* ([c (non-eof read-char in pos)]
[matched (fall-back matched c)])
(cond
[(= matched len) (out null)]
[(zero? matched) (cons c (let/ec out (loop matched out)))]
[else (cons c (loop matched out))]))))))))
;; "-->" makes more sense, but "--" follows the spec.
(define lex-comment-contents (gen-read-until-string "--"))
(define lex-pi-data (gen-read-until-string "?>"))
(define lex-cdata-contents (gen-read-until-string "]]>"))
;; positionify : Input-port -> Input-port (-> Location)
; This function predates port-count-lines! and port-next-location.
; Otherwise I would have used those directly at the call sites.
(define (positionify in)
(port-count-lines! in)
(values
in
(lambda ()
(let-values ([(line column offset) (port-next-location in)])
(make-location line column offset)))))
;; locs : (listof (list number number))
(define-struct (exn:xml exn:fail:read) ())
;; lex-error : Input-port String (-> Location) TST* -> alpha
;; raises a lexer error, using exn:xml
(define (lex-error in pos str . rest)
(let* ([the-pos (pos)]
[offset (location-offset the-pos)])
(raise
(make-exn:xml
(format "read-xml: lex-error: at position ~a: ~a"
(format-source the-pos)
(apply format str rest))
(current-continuation-marks)
(list
(make-srcloc (object-name in) #f #f offset 1))))))
;; parse-error : (listof srcloc) (listof TST) *-> alpha
;; raises a parsing error, using exn:xml
(define (parse-error src fmt . args)
(raise (make-exn:xml (string-append "read-xml: parse-error: "
(apply format fmt args))
(current-continuation-marks)
src)))
;; format-source : Location -> string
;; to format the source location for an error message
(define (format-source loc)
(if (location? loc)
(format "~a.~a/~a" (location-line loc) (location-char loc) (location-offset loc))
(format "~a" loc))))
(when (and (char? c)
(char-whitespace? c))
(read-char in)
(loop)))))
;; lex-pcdata : Input-port (-> Location) -> Pcdata
;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec
(define (lex-pcdata in pos)
(let ([start (pos)]
[data (let loop ()
(let ([next (peek-char-or-special in)])
(cond
[(or (eof-object? next)
(not (char? next))
(eq? next #\&)
(eq? next #\<))
null]
[(and (char-whitespace? next) (collapse-whitespace))
(skip-space in)
(cons #\space (loop))]
[else (cons (read-char in) (loop))])))])
(make-pcdata start
(pos)
(list->string data))))
;; lex-name : Input-port (-> Location) -> Symbol
(define (lex-name in pos)
(let ([c (non-eof read-char-or-special in pos)])
(unless (name-start? c)
(lex-error in pos "expected name, received ~e" c))
(string->symbol
(list->string
(cons c (let lex-rest ()
(let ([c (non-eof peek-char-or-special in pos)])
(cond
[(eq? c 'special)
(lex-error in pos "names cannot contain non-text values")]
[(name-char? c)
(cons (read-char in) (lex-rest))]
[else null]))))))))
;; skip-dtd : Input-port (-> Location) -> Void
(define (skip-dtd in pos)
(let skip ()
(case (non-eof read-char in pos)
[(#\') (read-until #\' in pos) (skip)]
[(#\") (read-until #\" in pos) (skip)]
[(#\<)
(case (non-eof read-char in pos)
[(#\!) (case (non-eof read-char in pos)
[(#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip)]
[else (skip) (skip)])]
[(#\?) (lex-pi-data in pos) (skip)]
[else (skip) (skip)])]
[(#\>) (void)]
[else (skip)])))
;; name-start? : Char -> Bool
(define (name-start? ch)
(and (char? ch)
(or (char-alphabetic? ch)
(eq? ch #\_)
(eq? ch #\:))))
;; name-char? : Char -> Bool
(define (name-char? ch)
(and (char? ch)
(or (name-start? ch)
(char-numeric? ch)
(eq? ch #\.)
(eq? ch #\-))))
;; read-until : Char Input-port (-> Location) -> String
;; discards the stop character, too
(define (read-until char in pos)
(list->string
(let read-more ()
(let ([c (non-eof read-char in pos)])
(cond
[(eq? c char) null]
[else (cons c (read-more))])))))
;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Location) -> Char
(define (non-eof f in pos)
(let ([c (f in)])
(cond
[(eof-object? c) (lex-error in pos "unexpected eof")]
[else c])))
;; gen-read-until-string : String -> Input-port (-> Location) -> String
;; uses Knuth-Morris-Pratt from
;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876
;; discards stop from input
(define (gen-read-until-string stop)
(let* ([len (string-length stop)]
[prefix (make-vector len 0)]
[fall-back
(lambda (k c)
(let ([k (let loop ([k k])
(cond
[(and (> k 0) (not (eq? (string-ref stop k) c)))
(loop (vector-ref prefix (sub1 k)))]
[else k]))])
(if (eq? (string-ref stop k) c)
(add1 k)
k)))])
(let init ([k 0] [q 1])
(when (< q len)
(let ([k (fall-back k (string-ref stop q))])
(vector-set! prefix q k)
(init k (add1 q)))))
;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop
(lambda (in pos)
(list->string
(let/ec out
(let loop ([matched 0] [out out])
(let* ([c (non-eof read-char in pos)]
[matched (fall-back matched c)])
(cond
[(= matched len) (out null)]
[(zero? matched) (cons c (let/ec out (loop matched out)))]
[else (cons c (loop matched out))]))))))))
;; "-->" makes more sense, but "--" follows the spec.
(define lex-comment-contents (gen-read-until-string "--"))
(define lex-pi-data (gen-read-until-string "?>"))
(define lex-cdata-contents (gen-read-until-string "]]>"))
;; positionify : Input-port -> Input-port (-> Location)
; This function predates port-count-lines! and port-next-location.
; Otherwise I would have used those directly at the call sites.
(define (positionify in)
(port-count-lines! in)
(values
in
(lambda ()
(let-values ([(line column offset) (port-next-location in)])
(make-location line column offset)))))
;; locs : (listof (list number number))
(define-struct (exn:xml exn:fail:read) ())
;; lex-error : Input-port String (-> Location) TST* -> alpha
;; raises a lexer error, using exn:xml
(define (lex-error in pos str . rest)
(let* ([the-pos (pos)]
[offset (location-offset the-pos)])
(raise
(make-exn:xml
(format "read-xml: lex-error: at position ~a: ~a"
(format-source the-pos)
(apply format str rest))
(current-continuation-marks)
(list
(make-srcloc (object-name in) #f #f offset 1))))))
;; parse-error : (listof srcloc) (listof TST) *-> alpha
;; raises a parsing error, using exn:xml
(define (parse-error src fmt . args)
(raise (make-exn:xml (string-append "read-xml: parse-error: "
(apply format fmt args))
(current-continuation-marks)
src)))
;; format-source : Location -> string
;; to format the source location for an error message
(define (format-source loc)
(if (location? loc)
(format "~a.~a/~a" (location-line loc) (location-char loc) (location-offset loc))
(format "~a" loc)))

View File

@ -1,96 +0,0 @@
#lang scheme
(define-signature xml-structs^
((struct/ctc location ([line exact-nonnegative-integer?]
[char exact-nonnegative-integer?]
[offset exact-nonnegative-integer?]))
(struct/ctc source ([start (or/c location? symbol?)]
[stop (or/c location? symbol?)]))
(struct/ctc comment ([text string?]))
(struct pcdata (string)) ; XXX needs parent
(struct cdata (string)) ; XXX needs parent
(struct/ctc document-type ([name symbol?]
#;[external external-dtd?]
; XXX results in this error
; ->: expected contract or a value that can be coerced into one, got #<undefined>
; I presume that there is a letrec somewhere
[external any/c]
[inlined false/c]))
(struct/ctc document (#;[prolog prolog?] ; XXX same as above
[prolog any/c]
#;[element element?]
[element any/c]
#;[misc (listof (or/c comment? pi?))]
[misc (listof any/c)]))
(struct/ctc prolog (#;[misc (listof (or/c comment? pi?))] ; XXX same as above
[misc (listof any/c)]
#;[dtd document-type?]
[dtd any/c]
#;[misc2 (listof (or/c comment? pi?))]
[misc2 (listof any/c)]))
(struct/ctc external-dtd ([system string?]))
(struct external-dtd/public (public)) ; XXX needs parent
(struct external-dtd/system ()) ; XXX needs parent
(struct element (name attributes content)) ; XXX needs parent
(struct attribute (name value)) ; XXX needs parent
(struct p-i (target-name instruction)) ; XXX needs parent
(struct entity (text)) ; XXX needs parent
(contracted
[content? (any/c . -> . boolean?)])))
(define-signature writer^
((contracted
[write-xml ((any/c) (output-port?) . ->* . void?)]
[display-xml ((any/c) (output-port?) . ->* . void?)]
[write-xml/content ((any/c) (output-port?) . ->* . void?)]
[display-xml/content ((any/c) (output-port?) . ->* . void?)])
; XXX I can't contract the above (well), because they refer to structs from xml-structs^
(contracted
[empty-tag-shorthand (parameter/c (or/c (symbols 'always 'never) (listof symbol?)))]
[html-empty-tags (listof symbol?)])))
(define-signature reader^
((contracted
[read-xml (() (input-port?) . ->* . any/c)]
[read-xml/element (() (input-port?) . ->* . any/c)]
[read-comments (parameter/c boolean?)]
[collapse-whitespace (parameter/c boolean?)])
; XXX can't contract the above (well) because they refer to structs
; XXX can't contract exn:xml beacuse of parent
(struct exn:xml ())))
(define-signature xexpr^
((struct exn:invalid-xexpr (code)) ; XXX needs parent
(contracted
[xexpr/c contract?]
[xexpr? (any/c . -> . boolean?)]
[xexpr->string (xexpr/c . -> . string?)]
[xml->xexpr (any/c . -> . xexpr/c)] ; XXX bad because of struct
[xexpr->xml (xexpr/c . -> . any/c)] ; XXX bad because of struct
[xexpr-drop-empty-attributes (parameter/c boolean?)]
[permissive? (parameter/c boolean?)]
[validate-xexpr (any/c . -> . (one-of/c #t))]
[correct-xexpr? (any/c (-> any/c) (exn:invalid-xexpr? . -> . any/c) . -> . any/c)]
[xexpr-attribute? (any/c . -> . boolean?)]
[listof? ((any/c . -> . boolean?) any/c . -> . boolean?)]
[attribute->srep (any/c . -> . xexpr-attribute?)] ; XXX bad because of struct
[bcompose ((any/c any/c . -> . any/c) (any/c . -> . any/c) . -> . (any/c any/c . -> . any/c))]
[assoc-sort ((listof (list/c symbol? string?)) . -> . (listof (list/c symbol? string?)))])))
(define-signature space^
((contracted
; XXX bad because of struct
[eliminate-whitespace ((listof symbol?) (boolean? . -> . boolean?) . -> . (any/c . -> . any/c))])))
(define-signature xml-syntax^
((contracted
; XXX these should both actually return syntax? that is also xexpr/c
[syntax:read-xml (() (input-port?) . ->* . syntax?)]
[syntax:read-xml/element (() (input-port?) . ->* . syntax?)])))
(provide xml-structs^
writer^
reader^
xexpr^
space^
xml-syntax^)

View File

@ -1,34 +1,30 @@
#lang scheme
(require "sig.ss")
(require "structures.ss")
(provide/contract
[eliminate-whitespace ((listof symbol?) (boolean? . -> . boolean?) . -> . (element? . -> . element?))])
(provide space@)
;; eliminate-whitespace : (listof Symbol) (Bool -> Bool) -> Element -> Element
(define (eliminate-whitespace special eliminate-special?)
(letrec ([blank-it
(lambda (el)
(let ([name (element-name el)]
[content (map (lambda (x)
(if (element? x) (blank-it x) x))
(element-content el))])
(make-element
(source-start el)
(source-stop el)
name
(element-attributes el)
(cond
[(eliminate-special? (and (memq (element-name el) special) #t))
(filter (lambda (s)
(not (and (pcdata? s)
(or (all-blank (pcdata-string s))
(error 'eliminate-blanks "Element <~a> is not allowed to contain text ~e" name (pcdata-string s))))))
content)]
[else content]))))])
blank-it))
(define-unit space@
(import xml-structs^)
(export space^)
;; eliminate-whitespace : (listof Symbol) (Bool -> Bool) -> Element -> Element
(define (eliminate-whitespace special eliminate-special?)
(letrec ([blank-it
(lambda (el)
(let ([name (element-name el)]
[content (map (lambda (x)
(if (element? x) (blank-it x) x))
(element-content el))])
(make-element
(source-start el)
(source-stop el)
name
(element-attributes el)
(cond
[(eliminate-special? (and (memq (element-name el) special) #t))
(filter (lambda (s)
(not (and (pcdata? s)
(or (all-blank (pcdata-string s))
(error 'eliminate-blanks "Element <~a> is not allowed to contain text ~e" name (pcdata-string s))))))
content)]
[else content]))))])
blank-it))
;; all-blank : String -> Bool
(define (all-blank s) (andmap char-whitespace? (string->list s))))
;; all-blank : String -> Bool
(define (all-blank s) (andmap char-whitespace? (string->list s)))

View File

@ -1,71 +1,126 @@
#lang scheme
(require "sig.ss")
(provide xml-structs@)
; Location = (make-location Nat Nat Nat) | Symbol
(define-struct location (line char offset))
(define-unit xml-structs@
(import)
(export xml-structs^)
; Location = (make-location Nat Nat Nat) | Symbol
(define-struct location (line char offset))
; Source = (make-source Location Location)
(define-struct source (start stop))
; Document = (make-document Prolog Element (listof Misc))
(define-struct document (prolog element misc))
; Prolog = (make-prolog (listof Misc) Document-type (listof Misc))
(define-struct prolog (misc dtd misc2))
; Document-type = (make-document-type sym External-dtd #f)
; | #f
(define-struct document-type (name external inlined))
; External-dtd = (make-external-dtd/public str str)
; | (make-external-dtd/system str)
; | #f
(define-struct external-dtd (system))
(define-struct (external-dtd/public external-dtd) (public))
(define-struct (external-dtd/system external-dtd) ())
; Element = (make-element Location Location Symbol (listof Attribute) (listof Content))
(define-struct (element source) (name attributes content))
; Attribute = (make-attribute Location Location Symbol String)
(define-struct (attribute source) (name value))
; Pcdata = (make-pcdata Location Location String)
(define-struct (pcdata source) (string))
; Cdata = (make-cdata Location Location String)
(define-struct (cdata source) (string))
; Content = Pcdata
; | Element
; | Entity
; | Misc
; | Cdata
; Misc = Comment
; | Processing-instruction
; Entity = (make-entity Location Location (U Nat Symbol))
(define-struct (entity source) (text))
; Processing-instruction = (make-p-i Location Location String String)
; also represents XMLDecl
(define-struct (p-i source) (target-name instruction))
; Comment = (make-comment String)
(define-struct comment (text))
; content? : TST -> Bool
(define (content? x)
(or (pcdata? x)
(element? x)
(entity? x)
(comment? x)
(cdata? x)
(p-i? x))))
; Source = (make-source Location Location)
(define-struct source (start stop))
; Document = (make-document Prolog Element (listof Misc))
(define-struct document (prolog element misc))
; Prolog = (make-prolog (listof Misc) Document-type (listof Misc))
(define-struct prolog (misc dtd misc2))
; Document-type = (make-document-type sym External-dtd #f)
; | #f
(define-struct document-type (name external inlined))
; External-dtd = (make-external-dtd/public str str)
; | (make-external-dtd/system str)
; | #f
(define-struct external-dtd (system))
(define-struct (external-dtd/public external-dtd) (public))
(define-struct (external-dtd/system external-dtd) ())
; Element = (make-element Location Location Symbol (listof Attribute) (listof Content))
(define-struct (element source) (name attributes content))
; Attribute = (make-attribute Location Location Symbol String)
(define-struct (attribute source) (name value))
; Pcdata = (make-pcdata Location Location String)
(define-struct (pcdata source) (string))
; Cdata = (make-cdata Location Location String)
(define-struct (cdata source) (string))
; Content = Pcdata
; | Element
; | Entity
; | Misc
; | Cdata
; Misc = Comment
; | Processing-instruction
; Entity = (make-entity Location Location (U Nat Symbol))
(define-struct (entity source) (text))
; Processing-instruction = (make-p-i Location Location String String)
; also represents XMLDecl
(define-struct (p-i source) (target-name instruction))
; Comment = (make-comment String)
(define-struct comment (text))
; permissive? : parameter bool
(define permissive? (make-parameter #f))
(define permissive/c
(make-proj-contract 'permissive/c
(lambda (pos neg src-info name)
(lambda (v)
(if (permissive?)
v
(raise-contract-error
v src-info pos name "not in permissive mode"))))
(lambda (v)
(permissive?))))
; content? : TST -> Bool
(define content/c
(or/c pcdata? element? entity? comment? cdata? p-i? permissive/c))
(define misc/c
(or/c comment? p-i?))
(define location/c
(or/c location? symbol? false/c))
(provide/contract
(struct location ([line exact-nonnegative-integer?]
[char exact-nonnegative-integer?]
[offset exact-nonnegative-integer?]))
[location/c contract?]
(struct source ([start location/c]
[stop location/c]))
(struct external-dtd ([system string?]))
(struct (external-dtd/public external-dtd) ([system string?]
[public string?]))
(struct (external-dtd/system external-dtd) ([system string?]))
(struct document-type ([name symbol?]
[external external-dtd?]
[inlined false/c]))
(struct comment ([text string?]))
(struct (p-i source) ([start location/c]
[stop location/c]
[target-name symbol?]
[instruction string?]))
[misc/c contract?]
(struct prolog ([misc (listof misc/c)]
[dtd (or/c document-type? false/c)]
[misc2 (listof misc/c)]))
(struct document ([prolog prolog?]
[element element?]
[misc (listof misc/c)]))
(struct (element source) ([start location/c]
[stop location/c]
[name symbol?]
[attributes (listof attribute?)]
[content (listof content/c)]))
(struct (attribute source) ([start location/c]
[stop location/c]
[name symbol?]
[value string?]))
[permissive? (parameter/c boolean?)]
[permissive/c contract?]
[content/c contract?]
(struct (pcdata source) ([start location/c]
[stop location/c]
[string string?]))
(struct (cdata source) ([start location/c]
[stop location/c]
[string string?]))
(struct (entity source) ([start location/c]
[stop location/c]
[text (or/c symbol? exact-nonnegative-integer?)])))

View File

@ -1,51 +1,52 @@
#lang scheme
(require "sig.ss")
(require "structures.ss"
"reader.ss"
"xexpr.ss")
(provide native-xml-syntax@)
(provide/contract
; XXX these should both actually return syntax? that is also xexpr/c
[syntax:read-xml (() (input-port?) . ->* . syntax?)]
[syntax:read-xml/element (() (input-port?) . ->* . syntax?)])
(define-unit native-xml-syntax@
(import xml-structs^ reader^ xexpr^)
(export xml-syntax^)
(define (syntax:read-xml [in (current-input-port)])
(define the-xml (read-xml in))
(define the-xml-element (document-element the-xml))
(element->xexpr-syntax the-xml-element))
(define (syntax:read-xml/element [in (current-input-port)])
(define the-xml-element (read-xml/element in))
(element->xexpr-syntax the-xml-element))
(define (position from to)
(let ([start-offset (location-offset from)])
(list #f (location-line from) (location-char from) start-offset
(- (location-offset to) start-offset))))
(define (wrap s e)
(datum->syntax #f e (position (source-start s) (source-stop s))))
(define (attribute->syntax a)
(wrap a (list (attribute-name a) (attribute-value a))))
(define (non-dropping-combine atts body)
(list* (map attribute->syntax atts) body))
(define (combine atts body)
(if (xexpr-drop-empty-attributes)
(if (empty? atts)
body
(non-dropping-combine atts body))
(non-dropping-combine atts body)))
(define (element->xexpr-syntax e)
(wrap e
(list* (element-name e)
(combine (element-attributes e)
(map content->xexpr-syntax (element-content e))))))
(define (content->xexpr-syntax x)
(cond
[(element? x) (element->xexpr-syntax x)]
[(pcdata? x) (wrap x (pcdata-string x))]
[(entity? x) (wrap x (entity-text x))]
[else (wrap x x)])))
(define (syntax:read-xml [in (current-input-port)])
(define the-xml (read-xml in))
(define the-xml-element (document-element the-xml))
(element->xexpr-syntax the-xml-element))
(define (syntax:read-xml/element [in (current-input-port)])
(define the-xml-element (read-xml/element in))
(element->xexpr-syntax the-xml-element))
(define (position from to)
(let ([start-offset (location-offset from)])
(list #f (location-line from) (location-char from) start-offset
(- (location-offset to) start-offset))))
(define (wrap s e)
(datum->syntax #f e (position (source-start s) (source-stop s))))
(define (attribute->syntax a)
(wrap a (list (attribute-name a) (attribute-value a))))
(define (non-dropping-combine atts body)
(list* (map attribute->syntax atts) body))
(define (combine atts body)
(if (xexpr-drop-empty-attributes)
(if (empty? atts)
body
(non-dropping-combine atts body))
(non-dropping-combine atts body)))
(define (element->xexpr-syntax e)
(wrap e
(list* (element-name e)
(combine (element-attributes e)
(map content->xexpr-syntax (element-content e))))))
(define (content->xexpr-syntax x)
(cond
[(element? x) (element->xexpr-syntax x)]
[(pcdata? x) (wrap x (pcdata-string x))]
[(entity? x) (wrap x (entity-text x))]
[else (wrap x x)]))

View File

@ -1,167 +1,169 @@
#lang scheme
(require "sig.ss")
(require "structures.ss")
(provide writer@)
(provide/contract
[write-xml ((document?) (output-port?) . ->* . void?)]
[display-xml ((document?) (output-port?) . ->* . void?)]
[write-xml/content ((content/c) (output-port?) . ->* . void?)]
[display-xml/content ((content/c) (output-port?) . ->* . void?)]
[empty-tag-shorthand (parameter/c (or/c (symbols 'always 'never) (listof symbol?)))]
[html-empty-tags (listof symbol?)])
(define-unit writer@
(import xml-structs^)
(export writer^)
;; (empty-tag-shorthand) : (U 'always 'never (listof Symbol))
(define empty-tag-shorthand
(make-parameter 'always
(lambda (x)
(if (or (eq? x 'always) (eq? x 'never) (and (list? x) (andmap symbol? x)))
x
(error 'empty-tag-shorthand "expected 'always, 'never, or a list of symbols: received ~e" x)))))
(define html-empty-tags '(param meta link isindex input img hr frame col br basefont base area))
;; gen-write/display-xml/content : (Nat Output-port -> Void) -> Content [Output-Port]-> Void
(define (gen-write/display-xml/content dent)
(lambda (c [out (current-output-port)]) (write-xml-content c 0 dent out)))
;; indent : Nat Output-port -> Void
(define (indent n out)
(newline out)
(let loop ([n n])
(unless (zero? n)
(display #\space out)
(loop (sub1 n)))))
;; write-xml/content : Content [Output-port] -> Void
(define write-xml/content (gen-write/display-xml/content void))
;; display-xml/content : Content [Output-port] -> Void
(define display-xml/content (gen-write/display-xml/content indent))
;; gen-write/display-xml : (Content [Output-port] -> Void) -> Document [Output-port] -> Void
(define (gen-write/display-xml output-content)
(lambda (doc [out (current-output-port)])
(let ([prolog (document-prolog doc)])
(display-outside-misc (prolog-misc prolog) out)
(display-dtd (prolog-dtd prolog) out)
(display-outside-misc (prolog-misc2 prolog) out))
(output-content (document-element doc) out)
(display-outside-misc (document-misc doc) out)))
; display-dtd : document-type oport -> void
(define (display-dtd dtd out)
(when dtd
(fprintf out "<!DOCTYPE ~a" (document-type-name dtd))
(let ([external (document-type-external dtd)])
(cond
[(external-dtd/public? external)
(fprintf out " PUBLIC \"~a\" \"~a\""
(external-dtd/public-public external)
(external-dtd-system external))]
[(external-dtd/system? external)
(fprintf out " SYSTEM \"~a\"" (external-dtd-system external))]
[(not external) (void)]))
(display ">" out)
(newline out)))
;; write-xml : Document [Output-port] -> Void
(define write-xml (gen-write/display-xml write-xml/content))
;; display-xml : Document [Output-port] -> Void
(define display-xml (gen-write/display-xml display-xml/content))
;; display-outside-misc : (listof Misc) Output-port -> Void
(define (display-outside-misc misc out)
(for-each (lambda (x)
((cond
[(comment? x) write-xml-comment]
[(p-i? x) write-xml-p-i]) x 0 void out)
(newline out))
misc))
;; write-xml-content : Content Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-content el over dent out)
((cond
[(element? el) write-xml-element]
[(pcdata? el) write-xml-pcdata]
[(cdata? el) write-xml-cdata]
[(entity? el) write-xml-entity]
[(comment? el) write-xml-comment]
[(p-i? el) write-xml-p-i]
[else (error 'write-xml-content "received ~e" el)])
el over dent out))
;; write-xml-element : Element Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-element el over dent out)
(let* ([name (element-name el)]
[start (lambda (str)
(write-xml-base str over dent out)
(display name out))]
[content (element-content el)])
(start "<")
(for ([att (in-list (element-attributes el))])
(fprintf out " ~a=\"~a\"" (attribute-name att)
(escape (attribute-value att) escape-attribute-table)))
(if (and (null? content)
(let ([short (empty-tag-shorthand)])
(case short
[(always) #t]
[(never) #f]
[else (memq (lowercase-symbol name) short)])))
(display " />" out)
(begin
(display ">" out)
(for ([c (in-list content)])
(write-xml-content c (incr over) dent out))
(start "</")
(display ">" out)))))
; : sym -> sym
(define lowercases (make-weak-hash))
(define (lowercase-symbol x)
(or (hash-ref lowercases x #f)
(let ([s (symbol->string x)])
(let ([s (string->symbol (string-downcase s))])
(hash-set! lowercases x s)
s))))
;; write-xml-base : (U String Char Symbol) Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-base el over dent out)
(dent over out)
(display el out))
;; write-xml-pcdata : Pcdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-pcdata str over dent out)
(write-xml-base (escape (pcdata-string str) escape-table) over dent out))
;; write-xml-cdata : Cdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-cdata cdata over dent out)
;; XXX: Different kind of quote is needed, for assume the user includes the <![CDATA[...]]> with proper quoting
(write-xml-base (format "~a" (cdata-string cdata)) over dent out))
;; write-xml-p-i : Processing-instruction Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-p-i p-i over dent out)
(write-xml-base (format "<?~a ~a?>" (p-i-target-name p-i) (p-i-instruction p-i)) over dent out))
;; write-xml-comment : Comment Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-comment comment over dent out)
(write-xml-base (format "<!--~a-->" (comment-text comment)) over dent out))
;; write-xml-entity : Entity Nat (Nat Output-stream -> Void) Output-stream -> Void
(define (write-xml-entity entity over dent out)
(let ([n (entity-text entity)])
(fprintf out (if (number? n) "&#~a;" "&~a;") n)))
(define escape-table #rx"[<>&]")
(define escape-attribute-table #rx"[<>&\"]")
(define (replace-escaped s)
(case (string-ref s 0)
[(#\<) "&lt;"]
[(#\>) "&gt;"]
[(#\&) "&amp;"]
[(#\") "&quot;"]))
;; escape : String -> String
(define (escape x table)
(regexp-replace* table x replace-escaped))
;; incr : Nat -> Nat
(define (incr n) (+ n 2)))
;; (empty-tag-shorthand) : (U 'always 'never (listof Symbol))
(define empty-tag-shorthand
(make-parameter 'always
(lambda (x)
(if (or (eq? x 'always) (eq? x 'never) (and (list? x) (andmap symbol? x)))
x
(error 'empty-tag-shorthand "expected 'always, 'never, or a list of symbols: received ~e" x)))))
(define html-empty-tags '(param meta link isindex input img hr frame col br basefont base area))
;; gen-write/display-xml/content : (Nat Output-port -> Void) -> Content [Output-Port]-> Void
(define (gen-write/display-xml/content dent)
(lambda (c [out (current-output-port)]) (write-xml-content c 0 dent out)))
;; indent : Nat Output-port -> Void
(define (indent n out)
(newline out)
(let loop ([n n])
(unless (zero? n)
(display #\space out)
(loop (sub1 n)))))
;; write-xml/content : Content [Output-port] -> Void
(define write-xml/content (gen-write/display-xml/content void))
;; display-xml/content : Content [Output-port] -> Void
(define display-xml/content (gen-write/display-xml/content indent))
;; gen-write/display-xml : (Content [Output-port] -> Void) -> Document [Output-port] -> Void
(define (gen-write/display-xml output-content)
(lambda (doc [out (current-output-port)])
(let ([prolog (document-prolog doc)])
(display-outside-misc (prolog-misc prolog) out)
(display-dtd (prolog-dtd prolog) out)
(display-outside-misc (prolog-misc2 prolog) out))
(output-content (document-element doc) out)
(display-outside-misc (document-misc doc) out)))
; display-dtd : document-type oport -> void
(define (display-dtd dtd out)
(when dtd
(fprintf out "<!DOCTYPE ~a" (document-type-name dtd))
(let ([external (document-type-external dtd)])
(cond
[(external-dtd/public? external)
(fprintf out " PUBLIC \"~a\" \"~a\""
(external-dtd/public-public external)
(external-dtd-system external))]
[(external-dtd/system? external)
(fprintf out " SYSTEM \"~a\"" (external-dtd-system external))]
[(not external) (void)]))
(display ">" out)
(newline out)))
;; write-xml : Document [Output-port] -> Void
(define write-xml (gen-write/display-xml write-xml/content))
;; display-xml : Document [Output-port] -> Void
(define display-xml (gen-write/display-xml display-xml/content))
;; display-outside-misc : (listof Misc) Output-port -> Void
(define (display-outside-misc misc out)
(for-each (lambda (x)
((cond
[(comment? x) write-xml-comment]
[(p-i? x) write-xml-p-i]) x 0 void out)
(newline out))
misc))
;; write-xml-content : Content Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-content el over dent out)
((cond
[(element? el) write-xml-element]
[(pcdata? el) write-xml-pcdata]
[(cdata? el) write-xml-cdata]
[(entity? el) write-xml-entity]
[(comment? el) write-xml-comment]
[(p-i? el) write-xml-p-i]
[else (error 'write-xml-content "received ~e" el)])
el over dent out))
;; write-xml-element : Element Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-element el over dent out)
(let* ([name (element-name el)]
[start (lambda (str)
(write-xml-base str over dent out)
(display name out))]
[content (element-content el)])
(start "<")
(for ([att (in-list (element-attributes el))])
(fprintf out " ~a=\"~a\"" (attribute-name att)
(escape (attribute-value att) escape-attribute-table)))
(if (and (null? content)
(let ([short (empty-tag-shorthand)])
(case short
[(always) #t]
[(never) #f]
[else (memq (lowercase-symbol name) short)])))
(display " />" out)
(begin
(display ">" out)
(for ([c (in-list content)])
(write-xml-content c (incr over) dent out))
(start "</")
(display ">" out)))))
; : sym -> sym
(define lowercases (make-weak-hash))
(define (lowercase-symbol x)
(or (hash-ref lowercases x #f)
(let ([s (symbol->string x)])
(let ([s (string->symbol (string-downcase s))])
(hash-set! lowercases x s)
s))))
;; write-xml-base : (U String Char Symbol) Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-base el over dent out)
(dent over out)
(display el out))
;; write-xml-pcdata : Pcdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-pcdata str over dent out)
(write-xml-base (escape (pcdata-string str) escape-table) over dent out))
;; write-xml-cdata : Cdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-cdata cdata over dent out)
;; XXX: Different kind of quote is needed, for assume the user includes the <![CDATA[...]]> with proper quoting
(write-xml-base (format "~a" (cdata-string cdata)) over dent out))
;; write-xml-p-i : Processing-instruction Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-p-i p-i over dent out)
(write-xml-base (format "<?~a ~a?>" (p-i-target-name p-i) (p-i-instruction p-i)) over dent out))
;; write-xml-comment : Comment Nat (Nat Output-Stream -> Void) Output-Stream -> Void
(define (write-xml-comment comment over dent out)
(write-xml-base (format "<!--~a-->" (comment-text comment)) over dent out))
;; write-xml-entity : Entity Nat (Nat Output-stream -> Void) Output-stream -> Void
(define (write-xml-entity entity over dent out)
(let ([n (entity-text entity)])
(fprintf out (if (number? n) "&#~a;" "&~a;") n)))
(define escape-table #rx"[<>&]")
(define escape-attribute-table #rx"[<>&\"]")
(define (replace-escaped s)
(case (string-ref s 0)
[(#\<) "&lt;"]
[(#\>) "&gt;"]
[(#\&) "&amp;"]
[(#\") "&quot;"]))
;; escape : String -> String
(define (escape x table)
(regexp-replace* table x replace-escaped))
;; incr : Nat -> Nat
(define (incr n) (+ n 2))

View File

@ -1,228 +1,232 @@
#lang scheme
(require scheme/pretty)
(require "sig.ss")
(require scheme/pretty
"structures.ss"
"writer.ss")
(provide xexpr@)
;; Xexpr ::= String
;; | (list* Symbol (listof Attribute-srep) (listof Xexpr))
;; | (cons Symbol (listof Xexpr))
;; | Symbol
;; | Nat
;; | Comment
;; | Processing-instruction
;; | Cdata
;; Attribute-srep ::= (list Symbol String)
(define-unit xexpr@
(import xml-structs^ writer^)
(export xexpr^)
;; Xexpr ::= String
;; | (list* Symbol (listof Attribute-srep) (listof Xexpr))
;; | (cons Symbol (listof Xexpr))
;; | Symbol
;; | Nat
;; | Comment
;; | Processing-instruction
;; | Cdata
;; Attribute-srep ::= (list Symbol String)
;; sorting is no longer necessary, since xt3d uses xml->zxexpr, which sorts.
;; assoc-sort : (listof (list Symbol a)) -> (listof (list Symbol a))
(define (assoc-sort to-sort)
(sort to-sort (bcompose string<? (compose symbol->string car))))
(define xexpr-drop-empty-attributes (make-parameter #f))
(define xexpr/c
(make-proj-contract
'xexpr?
(lambda (pos neg src-info name)
(lambda (val)
(with-handlers ([exn:invalid-xexpr?
(lambda (exn)
(raise-contract-error
val
src-info
pos
name
"Not an Xexpr. ~a~n~nContext:~n~a"
(exn-message exn)
(pretty-format val)))])
(validate-xexpr val)
val)))
(lambda (v) #t)))
(define (xexpr? x)
(correct-xexpr? x (lambda () #t) (lambda (exn) #f)))
(define (validate-xexpr x)
(correct-xexpr? x (lambda () #t) (lambda (exn) (raise exn))))
;; ;; ;; ;; ;; ;; ;
;; ; xexpr? helpers
(define-struct (exn:invalid-xexpr exn:fail) (code))
;; correct-xexpr? : any (-> a) (exn -> a) -> a
(define (correct-xexpr? x true false)
(cond
((string? x) (true))
((symbol? x) (true))
((exact-nonnegative-integer? x) (true))
((comment? x) (true))
((p-i? x) (true))
((cdata? x) (true))
((pcdata? x) (true))
((list? x)
(or (null? x)
(if (symbol? (car x))
(if (has-attribute? x)
(and (attribute-pairs? (cadr x) true false)
(andmap (lambda (part)
(correct-xexpr? part true false))
(cddr x))
(true))
(andmap (lambda (part)
(correct-xexpr? part true false))
(cdr x)))
(false (make-exn:invalid-xexpr
(format
"Expected a symbol as the element name, given ~s"
(car x))
(current-continuation-marks)
x)))))
[(permissive?) (true)]
(else (false
;; sorting is no longer necessary, since xt3d uses xml->zxexpr, which sorts.
;; assoc-sort : (listof (list Symbol a)) -> (listof (list Symbol a))
(define (assoc-sort to-sort)
(sort to-sort (bcompose string<? (compose symbol->string car))))
(define xexpr-drop-empty-attributes (make-parameter #f))
(define xexpr/c
(make-proj-contract
'xexpr?
(lambda (pos neg src-info name)
(lambda (val)
(with-handlers ([exn:invalid-xexpr?
(lambda (exn)
(raise-contract-error
val
src-info
pos
name
"Not an Xexpr. ~a~n~nContext:~n~a"
(exn-message exn)
(pretty-format val)))])
(validate-xexpr val)
val)))
(lambda (v) #t)))
(define (xexpr? x)
(correct-xexpr? x (lambda () #t) (lambda (exn) #f)))
(define (validate-xexpr x)
(correct-xexpr? x (lambda () #t) (lambda (exn) (raise exn))))
;; ;; ;; ;; ;; ;; ;
;; ; xexpr? helpers
(define-struct (exn:invalid-xexpr exn:fail) (code))
;; correct-xexpr? : any (-> a) (exn -> a) -> a
(define (correct-xexpr? x true false)
(cond
((string? x) (true))
((symbol? x) (true))
((exact-nonnegative-integer? x) (true))
((comment? x) (true))
((p-i? x) (true))
((cdata? x) (true))
((pcdata? x) (true))
((list? x)
(or (null? x)
(if (symbol? (car x))
(if (has-attribute? x)
(and (attribute-pairs? (cadr x) true false)
(andmap (lambda (part)
(correct-xexpr? part true false))
(cddr x))
(true))
(andmap (lambda (part)
(correct-xexpr? part true false))
(cdr x)))
(false (make-exn:invalid-xexpr
(format
"Expected a symbol as the element name, given ~s"
(car x))
(current-continuation-marks)
x)))))
[(permissive?) (true)]
(else (false
(make-exn:invalid-xexpr
(format (string-append
"Expected a string, symbol, number, comment, "
"processing instruction, or list, given ~s")
x)
(current-continuation-marks)
x)))))
;; has-attribute? : List -> Boolean
;; True if the Xexpr provided has an attribute list.
(define (has-attribute? x)
(and (> (length x) 1)
(list? (cadr x))
(andmap (lambda (attr)
(pair? attr))
(cadr x))))
;; attribute-pairs? : List (-> a) (exn -> a) -> a
;; True if the list is a list of pairs.
(define (attribute-pairs? attrs true false)
(if (null? attrs)
(true)
(let ((attr (car attrs)))
(if (pair? attr)
(and (attribute-symbol-string? attr true false)
(attribute-pairs? (cdr attrs) true false )
(true))
(false
(make-exn:invalid-xexpr
(format (string-append
"Expected a string, symbol, number, comment, "
"processing instruction, or list, given ~s")
x)
(format "Expected a pair, given ~a" attr)
(current-continuation-marks)
x)))))
;; has-attribute? : List -> Boolean
;; True if the Xexpr provided has an attribute list.
(define (has-attribute? x)
(and (> (length x) 1)
(list? (cadr x))
(andmap (lambda (attr)
(pair? attr))
(cadr x))))
;; attribute-pairs? : List (-> a) (exn -> a) -> a
;; True if the list is a list of pairs.
(define (attribute-pairs? attrs true false)
(if (null? attrs)
(true)
(let ((attr (car attrs)))
(if (pair? attr)
(and (attribute-symbol-string? attr true false)
(attribute-pairs? (cdr attrs) true false )
(true))
(false
(make-exn:invalid-xexpr
(format "Expected a pair, given ~a" attr)
(current-continuation-marks)
attr))))))
;; attribute-symbol-string? : List (-> a) (exn -> a) -> a
;; True if the list is a list of String,Symbol pairs.
(define (attribute-symbol-string? attr true false)
(if (symbol? (car attr))
(if (string? (cadr attr))
(true)
(false (make-exn:invalid-xexpr
(format "Expected a string, given ~a" (cadr attr))
(current-continuation-marks)
(cadr attr))))
(false (make-exn:invalid-xexpr
(format "Expected a symbol, given ~a" (car attr))
(current-continuation-marks)
(cadr attr)))))
;; ; end xexpr? helpers
;; ;; ;; ;; ;; ;; ;; ;;
; : (a -> bool) tst -> bool
; To check if l is a (listof p?)
; Don't use (and (list? l) (andmap p? l)) because l may be improper.
(define (listof? p? l)
(let listof-p? ([l l])
(or (null? l)
(and (cons? l) (p? (car l)) (listof-p? (cdr l))))))
; : tst -> bool
(define (xexpr-attribute? b)
(and (pair? b)
(symbol? (car b))
(pair? (cdr b))
(string? (cadr b))
(null? (cddr b))))
; permissive? : parameter bool
(define permissive? (make-parameter #f))
;; xml->xexpr : Content -> Xexpr
(define (xml->xexpr x)
(let* ([non-dropping-combine
(lambda (atts body)
(cons (assoc-sort (map attribute->srep atts))
body))]
[combine (if (xexpr-drop-empty-attributes)
(lambda (atts body)
(if (null? atts)
body
(non-dropping-combine atts body)))
non-dropping-combine)])
(let loop ([x x])
(cond
[(element? x)
(let ([body (map loop (element-content x))]
[atts (element-attributes x)])
(cons (element-name x) (combine atts body)))]
[(pcdata? x) (pcdata-string x)]
[(entity? x) (entity-text x)]
[(or (comment? x) (p-i? x) (cdata? x)) x]
[(document? x) (error 'xml->xexpr "Expected content, given ~e\nUse document-element to extract the content." x)]
[(permissive?) x]
[else (error 'xml->xexpr "Expected content, given ~e" x)]))))
;; attribute->srep : Attribute -> Attribute-srep
(define (attribute->srep a)
(list (attribute-name a) (attribute-value a)))
;; srep->attribute : Attribute-srep -> Attribute
(define (srep->attribute a)
(unless (and (pair? a) (pair? (cdr a)) (null? (cddr a)) (symbol? (car a)) (string? (cadr a)))
(error 'srep->attribute "expected (list Symbol String) given ~e" a))
(make-attribute 'scheme 'scheme (car a) (cadr a)))
;; xexpr->xml : Xexpr -> Content
;; The contract is enforced.
(define (xexpr->xml x)
(cond
[(pair? x)
(let ([f (lambda (atts body)
(unless (list? body)
(error 'xexpr->xml
"expected a list of xexprs for the body in ~e"
x))
(make-element 'scheme 'scheme (car x)
atts
(map xexpr->xml body)))])
(if (and (pair? (cdr x))
(or (null? (cadr x))
(and (pair? (cadr x)) (pair? (caadr x)))))
(f (map srep->attribute (cadr x)) (cddr x))
(f null (cdr x))))]
[(string? x) (make-pcdata 'scheme 'scheme x)]
[(or (symbol? x) (exact-nonnegative-integer? x))
(make-entity 'scheme 'scheme x)]
[(or (comment? x) (p-i? x) (cdata? x) (pcdata? x)) x]
[else ;(error 'xexpr->xml "malformed xexpr ~e" x)
x]))
;; xexpr->string : Xexpression -> String
(define (xexpr->string xexpr)
(let ([port (open-output-string)])
(write-xml/content (xexpr->xml xexpr) port)
(get-output-string port)))
;; bcompose : (a a -> c) (b -> a) -> (b b -> c)
(define (bcompose f g)
(lambda (x y) (f (g x) (g y)))))
attr))))))
;; attribute-symbol-string? : List (-> a) (exn -> a) -> a
;; True if the list is a list of String,Symbol pairs.
(define (attribute-symbol-string? attr true false)
(if (symbol? (car attr))
(if (string? (cadr attr))
(true)
(false (make-exn:invalid-xexpr
(format "Expected a string, given ~a" (cadr attr))
(current-continuation-marks)
(cadr attr))))
(false (make-exn:invalid-xexpr
(format "Expected a symbol, given ~a" (car attr))
(current-continuation-marks)
(cadr attr)))))
;; ; end xexpr? helpers
;; ;; ;; ;; ;; ;; ;; ;;
; : (a -> bool) tst -> bool
; To check if l is a (listof p?)
; Don't use (and (list? l) (andmap p? l)) because l may be improper.
(define (listof? p? l)
(let listof-p? ([l l])
(or (null? l)
(and (cons? l) (p? (car l)) (listof-p? (cdr l))))))
; : tst -> bool
(define (xexpr-attribute? b)
(and (pair? b)
(symbol? (car b))
(pair? (cdr b))
(string? (cadr b))
(null? (cddr b))))
;; xml->xexpr : Content -> Xexpr
(define (xml->xexpr x)
(let* ([non-dropping-combine
(lambda (atts body)
(cons (assoc-sort (map attribute->srep atts))
body))]
[combine (if (xexpr-drop-empty-attributes)
(lambda (atts body)
(if (null? atts)
body
(non-dropping-combine atts body)))
non-dropping-combine)])
(let loop ([x x])
(cond
[(element? x)
(let ([body (map loop (element-content x))]
[atts (element-attributes x)])
(cons (element-name x) (combine atts body)))]
[(pcdata? x) (pcdata-string x)]
[(entity? x) (entity-text x)]
[(or (comment? x) (p-i? x) (cdata? x)) x]
[(document? x) (error 'xml->xexpr "Expected content, given ~e\nUse document-element to extract the content." x)]
[(permissive?) x]
[else (error 'xml->xexpr "Expected content, given ~e" x)]))))
;; attribute->srep : Attribute -> Attribute-srep
(define (attribute->srep a)
(list (attribute-name a) (attribute-value a)))
;; srep->attribute : Attribute-srep -> Attribute
(define (srep->attribute a)
(unless (and (pair? a) (pair? (cdr a)) (null? (cddr a)) (symbol? (car a)) (string? (cadr a)))
(error 'srep->attribute "expected (list Symbol String) given ~e" a))
(make-attribute 'scheme 'scheme (car a) (cadr a)))
;; xexpr->xml : Xexpr -> Content
;; The contract is enforced.
(define (xexpr->xml x)
(cond
[(pair? x)
(let ([f (lambda (atts body)
(unless (list? body)
(error 'xexpr->xml
"expected a list of xexprs for the body in ~e"
x))
(make-element 'scheme 'scheme (car x)
atts
(map xexpr->xml body)))])
(if (and (pair? (cdr x))
(or (null? (cadr x))
(and (pair? (cadr x)) (pair? (caadr x)))))
(f (map srep->attribute (cadr x)) (cddr x))
(f null (cdr x))))]
[(string? x) (make-pcdata 'scheme 'scheme x)]
[(or (symbol? x) (exact-nonnegative-integer? x))
(make-entity 'scheme 'scheme x)]
[(or (comment? x) (p-i? x) (cdata? x) (pcdata? x)) x]
[else ;(error 'xexpr->xml "malformed xexpr ~e" x)
x]))
;; xexpr->string : Xexpression -> String
(define (xexpr->string xexpr)
(let ([port (open-output-string)])
(write-xml/content (xexpr->xml xexpr) port)
(get-output-string port)))
;; bcompose : (a a -> c) (b -> a) -> (b b -> c)
(define (bcompose f g)
(lambda (x y) (f (g x) (g y))))
(provide/contract
[exn:invalid-xexpr? (any/c . -> . boolean?)]
[exn:invalid-xexpr-code (exn:invalid-xexpr? . -> . any/c)]
[xexpr/c contract?]
[xexpr? (any/c . -> . boolean?)]
[xexpr->string (xexpr/c . -> . string?)]
[xml->xexpr (content/c . -> . xexpr/c)]
[xexpr->xml (xexpr/c . -> . content/c)]
[xexpr-drop-empty-attributes (parameter/c boolean?)]
[validate-xexpr (any/c . -> . (one-of/c #t))]
[correct-xexpr? (any/c (-> any/c) (exn:invalid-xexpr? . -> . any/c) . -> . any/c)])

View File

@ -1,12 +0,0 @@
#lang scheme
(require "private/sig.ss")
(define-signature xml^
((open xml-structs^)
(open reader^)
(open writer^)
(open xexpr^)
(open space^)
(open xml-syntax^)))
(provide xml^)

View File

@ -1,16 +0,0 @@
#lang scheme
(require "xml-sig.ss"
"private/sig.ss"
"private/structures.ss"
"private/reader.ss"
"private/writer.ss"
"private/xexpr.ss"
"private/space.ss"
"private/syntax.ss")
(provide xml@)
(define-compound-unit/infer xml@
(import)
(export xml-structs^ reader^ xml-syntax^ writer^ xexpr^ space^)
(link xml-structs@ reader@ native-xml-syntax@ writer@ xexpr@ space@))

View File

@ -33,6 +33,107 @@ It also does not expand user-defined entities or read user-defined entities in a
@section{Datatypes}
@defstruct[location ([line exact-nonnegative-integer?]
[char exact-nonnegative-integer?]
[offset exact-nonnegative-integer?])]{
Represents a location in an input stream.}
@defthing[location/c contract?]{
Equivalent to @scheme[(or/c location? symbol? false/c)].
}
@defstruct[source ([start location/c]
[stop location/c])]{
Represents a source location. Other structure types extend @scheme[source].
When XML is generated from an input stream by @scheme[read-xml],
locations are represented by @scheme[location] instances. When XML
structures are generated by @scheme[xexpr->xml], then locations are
symbols.}
@deftogether[(
@defstruct[external-dtd ([system string?])]
@defstruct[(external-dtd/public external-dtd) ([public string?])]
@defstruct[(external-dtd/system external-dtd) ()]
)]{
Represents an externally defined DTD.}
@defstruct[document-type ([name symbol?]
[external external-dtd?]
[inlined false/c])]{
Represents a document type.}
@defstruct[comment ([text string?])]{
Represents a comment.}
@defstruct[(p-i source) ([target-name symbol?]
[instruction string?])]{
Represents a processing instruction.}
@defthing[misc/c contract?]{
Equivalent to @scheme[(or/c comment? p-i?)]
}
@defstruct[prolog ([misc (listof misc/c)]
[dtd (or/c document-type false/c)]
[misc2 (listof misc/c)])]{
Represents a document prolog.
}
@defstruct[document ([prolog prolog?]
[element element?]
[misc (listof misc/c)])]{
Represents a document.}
@defstruct[(element source) ([name symbol?]
[attributes (listof attribute?)]
[content (listof content/c)])]{
Represents an element.}
@defstruct[(attribute source) ([name symbol?] [value string?])]{
Represents an attribute within an element.}
@defthing[content/c contract?]{
Equivalent to @scheme[(or/c pcdata? element? entity? comment? cdata? p-i? permissive/c)].
}
@defthing[permissive/c contract?]{
If @scheme[(permissive?)] is @scheme[#t], then equivalent to @scheme[any/c], otherwise equivalent to @scheme[(make-none/c 'permissive)]}
@defstruct[(entity source) ([text (or/c symbol? exact-nonnegative-integer?)])]{
Represents a symbolic or numerical entity.}
@defstruct[(pcdata source) ([string string?])]{
Represents PCDATA content.}
@defstruct[(cdata source) ([string string?])]{
Represents CDATA content.
The @scheme[string] field is assumed to be of the form
@litchar{<![CDATA[}@nonterm{content}@litchar{]]>} with proper quoting
of @nonterm{content}. Otherwise, @scheme[write-xml] generates
incorrect output.}
@defstruct[(exn:invalid-xexpr exn:fail) ([code any/c])]{
Raised by @scheme[validate-xexpr] when passed an invalid
@tech{X-expression}. The @scheme[code] fields contains an invalid part
of the input to @scheme[validate-xexpr].}
@defstruct[(exn:xml exn:fail:read) ()]{
Raised by @scheme[read-xml] when an error in the XML input is found.
}
@defproc[(xexpr? [v any/c]) boolean?]{
Returns @scheme[#t] if @scheme[v] is a @tech{X-expression}, @scheme[#f] otherwise.
@ -65,108 +166,12 @@ An @scheme[_exact-nonnegative-integer] represents a numeric entity. For example,
A @scheme[_cdata] is an instance of the @scheme[cdata] structure type,
and a @scheme[_misc] is an instance of the @scheme[comment] or
@scheme[pcdata] structure types.}
@scheme[p-i] structure types.}
@defthing[xexpr/c contract?]{
A contract that is like @scheme[xexpr?] except produces a better error message when the value is not an @tech{X-expression}.
}
@defstruct[document ([prolog prolog?]
[element element?]
[misc (listof (or/c comment? p-i?))])]{
Represents a document.}
@defstruct[prolog ([misc (listof (or/c comment? p-i?))]
[dtd (or/c document-type false/c)]
[misc2 (listof (or/c comment? p-i?))])]{
Represents a document prolog.
}
@defstruct[document-type ([name symbol?]
[external external-dtd?]
[inlined false/c])]{
Represents a document type.}
@deftogether[(
@defstruct[external-dtd ([system string?])]
@defstruct[(external-dtd/public external-dtd) ([public string?])]
@defstruct[(external-dtd/system external-dtd) ()]
)]{
Represents an externally defined DTD.}
@defstruct[(element source) ([name symbol?]
[attributes (listof attribute?)]
[content (listof content?)])]{
Represents an element.}
@defproc[(content? [v any/c]) boolean?]{
Returns @scheme[#t] if @scheme[v] is a @scheme[pcdata] instance,
@scheme[element] instance, an @scheme[entity] instance,
@scheme[comment], or @scheme[cdata] instance.}
@defstruct[(attribute source) ([name symbol?] [value string?])]{
Represents an attribute within an element.}
@defstruct[(entity source) ([text (or/c symbol? exact-nonnegative-integer?)])]{
Represents a symbolic or numerical entity.}
@defstruct[(pcdata source) ([string string?])]{
Represents PCDATA content.}
@defstruct[(cdata source) ([string string?])]{
Represents CDATA content.
The @scheme[string] field is assumed to be of the form
@litchar{<![CDATA[}@nonterm{content}@litchar{]]>} with proper quoting
of @nonterm{content}. Otherwise, @scheme[write-xml] generates
incorrect output.}
@defstruct[(p-i source) ([target-name string?]
[instruction string?])]{
Represents a processing instruction.}
@defstruct[comment ([text string?])]{
Represents a comment.}
@defstruct[source ([start (or/c location? symbol?)]
[stop (or/c location? symbol?)])]{
Represents a source location. Other structure types extend @scheme[source].
When XML is generated from an input stream by @scheme[read-xml],
locations are represented by @scheme[location] instances. When XML
structures are generated by @scheme[xexpr->xml], then locations are
symbols.}
@defstruct[location ([line exact-nonnegative-integer?]
[char exact-nonnegative-integer?]
[offset exact-nonnegative-integer?])]{
Represents a location in an input stream.}
@defstruct[(exn:invalid-xexpr exn:fail) ([code any/c])]{
Raised by @scheme[validate-xexpr] when passed an invalid
@tech{X-expression}. The @scheme[code] fields contains an invalid part
of the input to @scheme[validate-xexpr].}
@; ----------------------------------------------------------------------
@section{Reading and Writing XML}
@ -217,7 +222,7 @@ Like @scheme[syntax:real-xml], but it reads an XML element like
Writes a document to the given output port, currently ignoring
everything except the document's root element.}
@defproc[(write-xml/content [content content?] [out output-port? (current-output-port)])
@defproc[(write-xml/content [content content/c] [out output-port? (current-output-port)])
void?]{
Writes document content to the given output port.}
@ -229,7 +234,7 @@ Like @scheme[write-xml], but newlines and indentation make the output
more readable, though less technically correct when whitespace is
significant.}
@defproc[(display-xml/content [content content?] [out output-port? (current-output-port)])
@defproc[(display-xml/content [content content/c] [out output-port? (current-output-port)])
void?]{
Like @scheme[write-xml/content], but with indentation and newlines
@ -246,12 +251,12 @@ like @scheme[display-xml].}
and leave them in place in the resulting ``@tech{X-expression}''.
}
@defproc[(xml->xexpr [content content?]) xexpr/c]{
@defproc[(xml->xexpr [content content/c]) xexpr/c]{
Converts document content into an @tech{X-expression}, using
@scheme[permissive?] to determine if foreign objects are allowed.}
@defproc[(xexpr->xml [xexpr xexpr/c]) content?]{
@defproc[(xexpr->xml [xexpr xexpr/c]) content/c]{
Converts an @tech{X-expression} into XML content.}

View File

@ -1,7 +1,14 @@
#lang scheme
(require "xml-sig.ss"
"xml-unit.ss")
(require "private/structures.ss"
"private/reader.ss"
"private/space.ss"
"private/writer.ss"
"private/xexpr.ss"
"private/syntax.ss")
(define-values/invoke-unit/infer xml@)
(provide-signature-elements xml^)
(provide (all-from-out "private/structures.ss"
"private/reader.ss"
"private/space.ss"
"private/writer.ss"
"private/xexpr.ss"
"private/syntax.ss"))