diff --git a/collects/browser/private/html.ss b/collects/browser/private/html.ss index 742b2f6869..318447aec1 100644 --- a/collects/browser/private/html.ss +++ b/collects/browser/private/html.ss @@ -6,7 +6,7 @@ mzlib/port net/url-sig (only-in html read-html-as-xml read-html-comments use-html-spec) - (except-in xml/xml read-comments) + (except-in xml read-comments) mzlib/class "bullet.ss" "option-snip.ss" @@ -492,7 +492,7 @@ (fixup-whitespace content leading-ok?)) (values "" leading-ok?))) (values "" leading-ok?)))] - [(pi? c) (values "" leading-ok?)] ;; processing instruction + [(p-i? c) (values "" leading-ok?)] ;; processing instruction [else (let ([tag (car c)]) (if (memq tag exact-whitespace-tags) (let-values ([(s done?) (remove-leading-newline c)]) @@ -879,7 +879,7 @@ (values void 0))))] [(number? e) (values (translate-number e) 0)] - [(or (comment? e) (pi? e)) (values void forced-lines)] + [(or (comment? e) (p-i? e)) (values void forced-lines)] [else (let* ([tag (car e)] [rest/base/depth/form/fl (lambda (para-base enum-depth form forced-lines) diff --git a/collects/htdp/servlet.ss b/collects/htdp/servlet.ss index f0847c2376..cd6485f11b 100644 --- a/collects/htdp/servlet.ss +++ b/collects/htdp/servlet.ss @@ -4,10 +4,20 @@ web-server/servlet htdp/error xml + scheme/contract mzlib/etc) (provide (all-from web-server/servlet-env) - (all-from web-server/servlet) - (rename wrapped-build-suspender build-suspender)) + (all-from web-server/servlet)) + (provide/contract + [build-suspender + (((listof xexpr/c) + (listof xexpr/c)) + ((listof (list/c symbol? string?)) + (listof (list/c symbol? string?))) + . ->* . + (string? + . -> . + xexpr/c))]) ; build-suspender : (listof html) (listof html) [(listof (cons sym str))] [(listof (cons sym str))] -> str -> response (define build-suspender @@ -20,39 +30,4 @@ (title . ,title)) (body ,body-attributes (form ([action ,k-url] [method "post"]) - ,@content)))))) - - (define wrapped-build-suspender - (case-lambda - [(title content) - (check-suspender2 title content) - (build-suspender title content)] - [(title content body-attributes) - (check-suspender3 title content body-attributes) - (build-suspender title content body-attributes)] - [(title content body-attributes head-attributes) - (check-suspender4 title content body-attributes head-attributes) - (build-suspender title content body-attributes head-attributes)])) - - ; : tst tst -> void - (define (check-suspender2 title content) - (check-arg 'build-suspender (listof? xexpr? title) "(listof xexpr[HTML])" "1st" title) - (check-arg 'build-suspender (listof? xexpr? content) "(listof xexpr[HTML])" "2nd" content)) - - ; : tst tst tst -> void - (define (check-suspender3 title content body-attributes) - (check-suspender2 title content) - (check-arg 'build-suspender (listof? attribute-pair? body-attributes) - "(listof (cons sym str))" "3rd" body-attributes)) - - ; : tst tst tst tst -> void - (define (check-suspender4 title content body-attributes head-attributes) - (check-suspender3 title content body-attributes) - (check-arg 'build-suspender (listof? attribute-pair? head-attributes) - "(listof (cons sym str))" "4th" head-attributes)) - - ; : tst -> bool - (define (attribute-pair? b) - (and (pair? b) - (symbol? (car b)) - (string? (cdr b))))) + ,@content))))))) diff --git a/collects/html/html.scrbl b/collects/html/html.scrbl index b61e267040..9b8d446116 100644 --- a/collects/html/html.scrbl +++ b/collects/html/html.scrbl @@ -24,10 +24,10 @@ Reads (X)HTML from a port, producing an @scheme[html] instance.} @defproc[(read-html-as-xml [port input-port?]) - (listof content?)]{ + (listof content/c)]{ Reads HTML from a port, producing an @xexpr compatible with the -@schememodname[xml] library (which defines @scheme[content?]).} +@schememodname[xml] library (which defines @scheme[content/c]).} @defboolparam[read-html-comments v]{ If @scheme[v] is not @scheme[#f], then comments are read and returned. Defaults to @scheme[#f]. diff --git a/collects/html/html.ss b/collects/html/html.ss index 98729ad09b..5e2aaabc02 100644 --- a/collects/html/html.ss +++ b/collects/html/html.ss @@ -12,7 +12,7 @@ [use-html-spec (parameter/c boolean?)] [read-html (() (input-port?) . ->* . html?)] [read-xhtml (() (input-port?) . ->* . html?)] - [read-html-as-xml (() (input-port?) . ->* . (listof content?))]) + [read-html-as-xml (() (input-port?) . ->* . (listof content/c))]) ;; xml-single-content->html : Content (listof Html-content) -> (listof Html-content) (define (xml-single-content->html x acc) diff --git a/collects/html/sgml-reader.ss b/collects/html/sgml-reader.ss index 7904b4b9cc..fbcf60f85a 100644 --- a/collects/html/sgml-reader.ss +++ b/collects/html/sgml-reader.ss @@ -16,7 +16,7 @@ [read-html-comments (parameter/c boolean?)] [trim-whitespace (parameter/c boolean?)] [gen-may-contain (spec/c . -> . kid-lister/c)] - [gen-read-sgml (kid-lister/c (symbol? symbol? . -> . (or/c symbol? false/c)) . -> . (() (input-port?) . ->* . (listof content?)))]) + [gen-read-sgml (kid-lister/c (symbol? symbol? . -> . (or/c symbol? false/c)) . -> . (() (input-port?) . ->* . (listof content/c)))]) ;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute)) (define-struct (start-tag source) (name attrs)) diff --git a/collects/tests/info.ss b/collects/tests/info.ss index 0a825c9071..05abf1ffb9 100644 --- a/collects/tests/info.ss +++ b/collects/tests/info.ss @@ -25,4 +25,6 @@ "syntax-color" "typed-scheme" "units" + "xml" + "html" "web-server")) diff --git a/collects/tests/xml/test.ss b/collects/tests/xml/test.ss index 6fd84aadf1..0da31f526c 100644 --- a/collects/tests/xml/test.ss +++ b/collects/tests/xml/test.ss @@ -48,6 +48,13 @@ (define (test-not-xexpr? xe) (test-false (format "~S" xe) (xexpr? xe))) +(define (contract->predicate c) + (lambda (v) + (with-handlers ([exn:fail:contract? + (lambda (x) #f)]) + (contract c v 'pos 'neg) + #t))) + (define xml-tests (test-suite "XML" @@ -79,7 +86,7 @@ END "DOCTYPE dropping" result-string expected-string))) (local - [(define a-pi (make-p-i #f #f "foo" "bar")) + [(define a-pi (make-p-i #f #f 'foo "bar")) (define a-p (make-prolog empty #f empty)) (define a-p/pi (make-prolog (list a-pi) #f (list))) (define a-d0 @@ -137,11 +144,14 @@ END (test-not-false "element" (element? (make-element #f #f 'br empty empty))) - (test-not-false "content? pcdata" (content? (make-pcdata #f #f "pcdata"))) - (test-not-false "content? element" (content? (make-element #f #f 'br empty empty))) - (test-not-false "content? entity" (content? (make-entity #f #f 'nbsp))) - (test-not-false "content? comment" (content? (make-comment "string"))) - (test-not-false "content? cdata" (content? (make-cdata #f #f "cdata"))) + (local [(define content? (contract->predicate content/c))] + (test-suite + "content?" + (test-not-false "content? pcdata" (content? (make-pcdata #f #f "pcdata"))) + (test-not-false "content? element" (content? (make-element #f #f 'br empty empty))) + (test-not-false "content? entity" (content? (make-entity #f #f 'nbsp))) + (test-not-false "content? comment" (content? (make-comment "string"))) + (test-not-false "content? cdata" (content? (make-cdata #f #f "cdata"))))) (test-not-false "attribute" (attribute? (make-attribute #f #f 'name "value"))) @@ -152,16 +162,14 @@ END (test-not-false "cdata" (cdata? (make-cdata #f #f "string"))) - (test-not-false "p-i" (p-i? (make-p-i #f #f "target" "instruction"))) + (test-not-false "p-i" (p-i? (make-p-i #f #f 'target "instruction"))) (test-not-false "comment" (comment? (make-comment "text"))) (test-not-false "source" (source? (make-source 'start 'stop))) (test-not-false "source" (source? (make-source (make-location 1 2 3) 'stop))) (test-not-false "source" (source? (make-source 'start (make-location 1 2 3)))) - (test-not-false "source" (source? (make-source (make-location 1 2 3) (make-location 4 5 6)))) - - (test-not-false "exn:invalid-xexpr" (exn:invalid-xexpr? (make-exn:invalid-xexpr "string" (current-continuation-marks) 'nbsp)))) + (test-not-false "source" (source? (make-source (make-location 1 2 3) (make-location 4 5 6))))) (test-suite "Reading and Writing XML" @@ -543,7 +551,7 @@ END (test-validate-xexpr (make-pcdata #f #f "pcdata")) (test-validate-xexpr (make-cdata #f #f "cdata")) (test-validate-xexpr (make-comment "comment")) - (test-validate-xexpr (make-p-i #f #f "s1" "s2")) + (test-validate-xexpr (make-p-i #f #f 's1 "s2")) (test-validate-xexpr '(br)) (test-validate-xexpr '(br ())) (test-validate-xexpr '(a ([href "#"]) "string")) @@ -561,7 +569,7 @@ END "Non-permissive" (lambda (exn) (and (exn? exn) - (regexp-match #rx"Expected content," (exn-message exn)))) + (regexp-match #rx"not in permissive mode" (exn-message exn)))) (lambda () (xml->xexpr #f))) @@ -596,7 +604,7 @@ END (test-empty-tag-shorthand '(html) "Hey" "Hey") (test-empty-tag-shorthand '(p) "" "") (test-empty-tag-shorthand '(p) "Hey" "Hey")) - + (test-equal? "html-empty-tags" html-empty-tags '(param meta link isindex input img hr frame col br basefont base area)) @@ -615,7 +623,7 @@ END "read-comments" (test-read-comments #f "" "") (test-read-comments #t "" "")) - + (local [(define (test-xexpr-drop-empty-attributes v istr xe) (test-equal? (format "~S" (list v istr)) diff --git a/collects/web-server/formlets/lib.ss b/collects/web-server/formlets/lib.ss index 068a98b50b..cc7fe46b6e 100644 --- a/collects/web-server/formlets/lib.ss +++ b/collects/web-server/formlets/lib.ss @@ -91,7 +91,7 @@ () #:rest (listof (formlet/c alpha)) . ->* . (formlet/c beta))] [xml-forest (xexpr-forest/c . -> . (formlet/c procedure?))] - [xml (xexpr? . -> . (formlet/c procedure?))] + [xml (xexpr/c . -> . (formlet/c procedure?))] [text (string? . -> . (formlet/c procedure?))] [tag-xexpr (symbol? (listof (list/c symbol? string?)) (formlet/c alpha) . -> . (formlet/c alpha))] [formlet-display ((formlet/c alpha) . -> . xexpr-forest/c)] diff --git a/collects/web-server/private/xexpr.ss b/collects/web-server/private/xexpr.ss index 03eb5b1f5f..9cdf527848 100644 --- a/collects/web-server/private/xexpr.ss +++ b/collects/web-server/private/xexpr.ss @@ -1,6 +1,6 @@ #lang scheme (require scheme/pretty - xml/xml) + xml) (provide/contract [format-xexpr/errors (any/c . -> . string?)] @@ -26,7 +26,7 @@ (symbol? v) (exact-nonnegative-integer? v) (comment? v) - (pi? v) + (p-i? v) (cdata? v))) (define (format-xexpr v) diff --git a/collects/xml/private/reader.ss b/collects/xml/private/reader.ss index cc8e95db9e..300f0050ec 100644 --- a/collects/xml/private/reader.ss +++ b/collects/xml/private/reader.ss @@ -1,462 +1,463 @@ #lang scheme -(require "sig.ss") +(require "structures.ss") -(provide reader@) +(provide/contract + [read-xml (() (input-port?) . ->* . document?)] + [read-xml/element (() (input-port?) . ->* . element?)] + [read-comments (parameter/c boolean?)] + [collapse-whitespace (parameter/c boolean?)] + [exn:xml? (any/c . -> . boolean?)]) -(define-unit reader@ - (import xml-structs^) - (export reader^) - - ;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute)) - (define-struct (start-tag source) (name attrs)) - - ;; End-tag ::= (make-end-tag Location Location Symbol) - (define-struct (end-tag source) (name)) - - ;; Token ::= Contents | Start-tag | End-tag | Eof - - (define read-comments (make-parameter #f)) - (define collapse-whitespace (make-parameter #f)) - - ;; read-xml : [Input-port] -> Document - (define read-xml - (lambda ([in (current-input-port)]) - (let*-values ([(in pos) (positionify in)] - [(misc0 start) (read-misc in pos)]) - (make-document (make-prolog misc0 #f empty) - (read-xml-element-helper pos in start) - (let ([loc-before (pos)]) - (let-values ([(misc1 end-of-file) (read-misc in pos)]) - (unless (eof-object? end-of-file) - (let ([loc-after (pos)]) - (parse-error (list - (make-srcloc - (object-name in) - #f - #f - (location-offset loc-before) - (- (location-offset loc-after) - (location-offset loc-before)))) - "extra stuff at end of document ~e" - end-of-file))) - misc1)))))) - - ;; read-xml/element : [Input-port] -> Element - (define read-xml/element - (lambda ([in (current-input-port)]) - (let-values ([(in pos) (positionify in)]) - (skip-space in) - (read-xml-element-helper pos in (lex in pos))))) - - ;; read-xml-element-helper : Nat Iport Token -> Element - (define (read-xml-element-helper pos in start) - (cond - [(start-tag? start) (read-element start in pos)] - [(element? start) start] - [else (parse-error (list - (make-srcloc - (object-name in) - #f - #f - 1 - (- (location-offset (pos)) 1))) - "expected root element - received ~e" - (if (pcdata? start) (pcdata-string start) start))])) - - ;; read-misc : Input-port (-> Location) -> (listof Misc) Token - (define (read-misc in pos) - (let read-more () +;; Start-tag ::= (make-start-tag Location Location Symbol (listof Attribute)) +(define-struct (start-tag source) (name attrs)) + +;; End-tag ::= (make-end-tag Location Location Symbol) +(define-struct (end-tag source) (name)) + +;; Token ::= Contents | Start-tag | End-tag | Eof + +(define read-comments (make-parameter #f)) +(define collapse-whitespace (make-parameter #f)) + +;; read-xml : [Input-port] -> Document +(define read-xml + (lambda ([in (current-input-port)]) + (let*-values ([(in pos) (positionify in)] + [(misc0 start) (read-misc in pos)]) + (make-document (make-prolog misc0 #f empty) + (read-xml-element-helper pos in start) + (let ([loc-before (pos)]) + (let-values ([(misc1 end-of-file) (read-misc in pos)]) + (unless (eof-object? end-of-file) + (let ([loc-after (pos)]) + (parse-error (list + (make-srcloc + (object-name in) + #f + #f + (location-offset loc-before) + (- (location-offset loc-after) + (location-offset loc-before)))) + "extra stuff at end of document ~e" + end-of-file))) + misc1)))))) + +;; read-xml/element : [Input-port] -> Element +(define read-xml/element + (lambda ([in (current-input-port)]) + (let-values ([(in pos) (positionify in)]) + (skip-space in) + (read-xml-element-helper pos in (lex in pos))))) + +;; read-xml-element-helper : Nat Iport Token -> Element +(define (read-xml-element-helper pos in start) + (cond + [(start-tag? start) (read-element start in pos)] + [(element? start) start] + [else (parse-error (list + (make-srcloc + (object-name in) + #f + #f + 1 + (- (location-offset (pos)) 1))) + "expected root element - received ~e" + (if (pcdata? start) (pcdata-string start) start))])) + +;; read-misc : Input-port (-> Location) -> (listof Misc) Token +(define (read-misc in pos) + (let read-more () + (let ([x (lex in pos)]) + (cond + [(p-i? x) + (let-values ([(lst next) (read-more)]) + (values (cons x lst) next))] + [(comment? x) + (let-values ([(lst next) (read-more)]) + (if (read-comments) + (values (cons x lst) next) + (values lst next)))] + [(and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x)))) + (read-more)] + [else (values null x)])))) + +;; read-element : Start-tag Input-port (-> Location) -> Element +(define (read-element start in pos) + (let ([name (start-tag-name start)] + [a (source-start start)] + [b (source-stop start)]) + (let read-content ([k (lambda (body end-loc) + (make-element + a end-loc name (start-tag-attrs start) + body))]) (let ([x (lex in pos)]) (cond - [(p-i? x) - (let-values ([(lst next) (read-more)]) - (values (cons x lst) next))] - [(comment? x) - (let-values ([(lst next) (read-more)]) - (if (read-comments) - (values (cons x lst) next) - (values lst next)))] - [(and (pcdata? x) (andmap char-whitespace? (string->list (pcdata-string x)))) - (read-more)] - [else (values null x)])))) - - ;; read-element : Start-tag Input-port (-> Location) -> Element - (define (read-element start in pos) - (let ([name (start-tag-name start)] - [a (source-start start)] - [b (source-stop start)]) - (let read-content ([k (lambda (body end-loc) - (make-element - a end-loc name (start-tag-attrs start) - body))]) - (let ([x (lex in pos)]) - (cond - [(eof-object? x) - (parse-error (list - (make-srcloc - (object-name in) - #f - #f - (location-offset (source-start start)) - (- (location-offset (source-stop start)) - (location-offset (source-start start))))) - "unclosed `~a' tag at [~a ~a]" - name - (format-source a) - (format-source b))] - [(start-tag? x) - (let ([next-el (read-element x in pos)]) - (read-content (lambda (body end-loc) - (k (cons next-el body) - end-loc))))] - [(end-tag? x) - (let ([end-loc (source-stop x)]) - (unless (eq? name (end-tag-name x)) - (parse-error - (list - (make-srcloc (object-name in) - #f - #f - (location-offset a) - (- (location-offset b) (location-offset a))) - (make-srcloc (object-name in) - #f - #f - (location-offset (source-start x)) - (- (location-offset end-loc) (location-offset (source-start x))))) - "start tag `~a' at [~a ~a] doesn't match end tag `~a' at [~a ~a]" - name - (format-source a) - (format-source b) - (end-tag-name x) - (format-source (source-start x)) - (format-source end-loc))) - (k null end-loc))] - [(entity? x) (read-content (lambda (body end-loc) - (k (cons (expand-entity x) body) - end-loc)))] - [(comment? x) (if (read-comments) - (read-content (lambda (body end-loc) (k (cons x body) end-loc))) - (read-content k))] - [else (read-content (lambda (body end-loc) (k (cons x body) end-loc)))]))))) - - ;; expand-entity : Entity -> (U Entity Pcdata) - ;; more here - allow expansion of user defined entities - (define (expand-entity x) - (let ([expanded (default-entity-table (entity-text x))]) - (if expanded - (make-pcdata (source-start x) (source-stop x) expanded) - x))) - - ;; default-entity-table : Symbol -> (U #f String) - (define (default-entity-table name) - (case name - [(amp) "&"] - [(lt) "<"] - [(gt) ">"] - [(quot) "\""] - [(apos) "'"] - [else #f])) - - ;; lex : Input-port (-> Location) -> (U Token special) - (define (lex in pos) + [(eof-object? x) + (parse-error (list + (make-srcloc + (object-name in) + #f + #f + (location-offset (source-start start)) + (- (location-offset (source-stop start)) + (location-offset (source-start start))))) + "unclosed `~a' tag at [~a ~a]" + name + (format-source a) + (format-source b))] + [(start-tag? x) + (let ([next-el (read-element x in pos)]) + (read-content (lambda (body end-loc) + (k (cons next-el body) + end-loc))))] + [(end-tag? x) + (let ([end-loc (source-stop x)]) + (unless (eq? name (end-tag-name x)) + (parse-error + (list + (make-srcloc (object-name in) + #f + #f + (location-offset a) + (- (location-offset b) (location-offset a))) + (make-srcloc (object-name in) + #f + #f + (location-offset (source-start x)) + (- (location-offset end-loc) (location-offset (source-start x))))) + "start tag `~a' at [~a ~a] doesn't match end tag `~a' at [~a ~a]" + name + (format-source a) + (format-source b) + (end-tag-name x) + (format-source (source-start x)) + (format-source end-loc))) + (k null end-loc))] + [(entity? x) (read-content (lambda (body end-loc) + (k (cons (expand-entity x) body) + end-loc)))] + [(comment? x) (if (read-comments) + (read-content (lambda (body end-loc) (k (cons x body) end-loc))) + (read-content k))] + [else (read-content (lambda (body end-loc) (k (cons x body) end-loc)))]))))) + +;; expand-entity : Entity -> (U Entity Pcdata) +;; more here - allow expansion of user defined entities +(define (expand-entity x) + (let ([expanded (default-entity-table (entity-text x))]) + (if expanded + (make-pcdata (source-start x) (source-stop x) expanded) + x))) + +;; default-entity-table : Symbol -> (U #f String) +(define (default-entity-table name) + (case name + [(amp) "&"] + [(lt) "<"] + [(gt) ">"] + [(quot) "\""] + [(apos) "'"] + [else #f])) + +;; lex : Input-port (-> Location) -> (U Token special) +(define (lex in pos) + (let ([c (peek-char-or-special in)]) + (cond + [(eof-object? c) c] + [(eq? c #\&) (lex-entity in pos)] + [(eq? c #\<) (lex-tag-cdata-pi-comment in pos)] + [(not (char? c)) (read-char-or-special in)] + [else (lex-pcdata in pos)]))) + +; lex-entity : Input-port (-> Location) -> Entity +; pre: the first char is a #\& +(define (lex-entity in pos) + (let ([start (pos)]) + (read-char in) + (let ([data (case (peek-char in) + [(#\#) + (read-char in) + (let ([n (case (peek-char in) + [(#\x) (read-char in) + (string->number (read-until #\; in pos) 16)] + [else (string->number (read-until #\; in pos))])]) + (unless (number? n) + (lex-error in pos "malformed numeric entity")) + n)] + [else + (begin0 + (lex-name in pos) + (unless (eq? (read-char in) #\;) + (lex-error in pos "expected ; at the end of an entity")))])]) + (make-entity start (pos) data)))) + +; lex-tag-cdata-pi-comment : Input-port (-> Location) -> Start-tag | Element | End-tag | Cdata | p-i | Comment +; pre: the first char is a #\< +(define (lex-tag-cdata-pi-comment in pos) + (let ([start (pos)]) + (read-char in) + (case (non-eof peek-char-or-special in pos) + [(#\!) + (read-char in) + (case (non-eof peek-char in pos) + [(#\-) (read-char in) + (unless (eq? (read-char-or-special in) #\-) + (lex-error in pos "expected second - after ) + (lex-error in pos "expected > to end comment (\"--\" can't appear in comments)")) + ;(make-comment start (pos) data) + (make-comment data))] + [(#\[) (read-char in) + (unless (string=? (read-string 6 in) "CDATA[") + (lex-error in pos "expected CDATA following <[")) + (let ([data (lex-cdata-contents in pos)]) + (make-cdata start (pos) (format "" data)))] + [else (skip-dtd in pos) + (skip-space in) + (unless (eq? (peek-char-or-special in) #\<) + (lex-error in pos "expected p-i, comment, or element after doctype")) + (lex-tag-cdata-pi-comment in pos)])] + [(#\?) (read-char in) + (let ([name (lex-name in pos)]) + (skip-space in) + (let ([data (lex-pi-data in pos)]) + (make-p-i start (pos) name data)))] + [(#\/) (read-char in) + (let ([name (lex-name in pos)]) + (skip-space in) + (unless (eq? (read-char-or-special in) #\>) + (lex-error in pos "expected > to close ~a's end tag" name)) + (make-end-tag start (pos) name))] + [else ; includes 'special, but lex-name will fail in that case + (let ([name (lex-name in pos)] + [attrs (lex-attributes in pos)]) + (skip-space in) + (case (read-char-or-special in) + [(#\/) + (unless (eq? (read-char in) #\>) + (lex-error in pos "expected > to close empty element ~a" name)) + (make-element start (pos) name attrs null)] + [(#\>) (make-start-tag start (pos) name attrs)] + [else (lex-error in pos "expected / or > to close tag `~a'" name)]))]))) + +;; lex-attributes : Input-port (-> Location) -> (listof Attribute) +(define (lex-attributes in pos) + (sort (let loop () + (skip-space in) + (cond [(name-start? (peek-char-or-special in)) + (cons (lex-attribute in pos) (loop))] + [else null])) + (lambda (a b) + (let ([na (attribute-name a)] + [nb (attribute-name b)]) + (cond + [(eq? na nb) (lex-error in pos "duplicated attribute name ~a" na)] + [else (stringstring na) (symbol->string nb))]))))) + +;; lex-attribute : Input-port (-> Location) -> Attribute +(define (lex-attribute in pos) + (let ([start (pos)] + [name (lex-name in pos)]) + (skip-space in) + (unless (eq? (read-char in) #\=) + (lex-error in pos "expected = in attribute ~a" name)) + (skip-space in) + ;; more here - handle entites and disallow "<" + (let* ([delimiter (read-char-or-special in)] + [value (case delimiter + [(#\' #\") + (list->string + (let read-more () + (let ([c (non-eof peek-char-or-special in pos)]) + (cond + [(eq? c 'special) + (lex-error in pos "attribute values cannot contain non-text values")] + [(eq? c delimiter) (read-char in) null] + [(eq? c #\&) + (let ([entity (expand-entity (lex-entity in pos))]) + (if (pcdata? entity) + (append (string->list (pcdata-string entity)) (read-more)) + ;; more here - do something with user defined entites + (read-more)))] + [else (read-char in) (cons c (read-more))]))))] + [else (if (char? delimiter) + (lex-error in pos "attribute values must be in ''s or in \"\"s") + delimiter)])]) + (make-attribute start (pos) name value)))) + +;; skip-space : Input-port -> Void +;; deviation - should sometimes insist on at least one space +(define (skip-space in) + (let loop () (let ([c (peek-char-or-special in)]) - (cond - [(eof-object? c) c] - [(eq? c #\&) (lex-entity in pos)] - [(eq? c #\<) (lex-tag-cdata-pi-comment in pos)] - [(not (char? c)) (read-char-or-special in)] - [else (lex-pcdata in pos)]))) - - ; lex-entity : Input-port (-> Location) -> Entity - ; pre: the first char is a #\& - (define (lex-entity in pos) - (let ([start (pos)]) - (read-char in) - (let ([data (case (peek-char in) - [(#\#) - (read-char in) - (let ([n (case (peek-char in) - [(#\x) (read-char in) - (string->number (read-until #\; in pos) 16)] - [else (string->number (read-until #\; in pos))])]) - (unless (number? n) - (lex-error in pos "malformed numeric entity")) - n)] - [else - (begin0 - (lex-name in pos) - (unless (eq? (read-char in) #\;) - (lex-error in pos "expected ; at the end of an entity")))])]) - (make-entity start (pos) data)))) - - ; lex-tag-cdata-pi-comment : Input-port (-> Location) -> Start-tag | Element | End-tag | Cdata | p-i | Comment - ; pre: the first char is a #\< - (define (lex-tag-cdata-pi-comment in pos) - (let ([start (pos)]) - (read-char in) - (case (non-eof peek-char-or-special in pos) - [(#\!) - (read-char in) - (case (non-eof peek-char in pos) - [(#\-) (read-char in) - (unless (eq? (read-char-or-special in) #\-) - (lex-error in pos "expected second - after ) - (lex-error in pos "expected > to end comment (\"--\" can't appear in comments)")) - ;(make-comment start (pos) data) - (make-comment data))] - [(#\[) (read-char in) - (unless (string=? (read-string 6 in) "CDATA[") - (lex-error in pos "expected CDATA following <[")) - (let ([data (lex-cdata-contents in pos)]) - (make-cdata start (pos) (format "" data)))] - [else (skip-dtd in pos) - (skip-space in) - (unless (eq? (peek-char-or-special in) #\<) - (lex-error in pos "expected p-i, comment, or element after doctype")) - (lex-tag-cdata-pi-comment in pos)])] - [(#\?) (read-char in) - (let ([name (lex-name in pos)]) - (skip-space in) - (let ([data (lex-pi-data in pos)]) - (make-p-i start (pos) name data)))] - [(#\/) (read-char in) - (let ([name (lex-name in pos)]) - (skip-space in) - (unless (eq? (read-char-or-special in) #\>) - (lex-error in pos "expected > to close ~a's end tag" name)) - (make-end-tag start (pos) name))] - [else ; includes 'special, but lex-name will fail in that case - (let ([name (lex-name in pos)] - [attrs (lex-attributes in pos)]) - (skip-space in) - (case (read-char-or-special in) - [(#\/) - (unless (eq? (read-char in) #\>) - (lex-error in pos "expected > to close empty element ~a" name)) - (make-element start (pos) name attrs null)] - [(#\>) (make-start-tag start (pos) name attrs)] - [else (lex-error in pos "expected / or > to close tag `~a'" name)]))]))) - - ;; lex-attributes : Input-port (-> Location) -> (listof Attribute) - (define (lex-attributes in pos) - (sort (let loop () - (skip-space in) - (cond [(name-start? (peek-char-or-special in)) - (cons (lex-attribute in pos) (loop))] - [else null])) - (lambda (a b) - (let ([na (attribute-name a)] - [nb (attribute-name b)]) - (cond - [(eq? na nb) (lex-error in pos "duplicated attribute name ~a" na)] - [else (stringstring na) (symbol->string nb))]))))) - - ;; lex-attribute : Input-port (-> Location) -> Attribute - (define (lex-attribute in pos) - (let ([start (pos)] - [name (lex-name in pos)]) - (skip-space in) - (unless (eq? (read-char in) #\=) - (lex-error in pos "expected = in attribute ~a" name)) - (skip-space in) - ;; more here - handle entites and disallow "<" - (let* ([delimiter (read-char-or-special in)] - [value (case delimiter - [(#\' #\") - (list->string - (let read-more () - (let ([c (non-eof peek-char-or-special in pos)]) - (cond - [(eq? c 'special) - (lex-error in pos "attribute values cannot contain non-text values")] - [(eq? c delimiter) (read-char in) null] - [(eq? c #\&) - (let ([entity (expand-entity (lex-entity in pos))]) - (if (pcdata? entity) - (append (string->list (pcdata-string entity)) (read-more)) - ;; more here - do something with user defined entites - (read-more)))] - [else (read-char in) (cons c (read-more))]))))] - [else (if (char? delimiter) - (lex-error in pos "attribute values must be in ''s or in \"\"s") - delimiter)])]) - (make-attribute start (pos) name value)))) - - ;; skip-space : Input-port -> Void - ;; deviation - should sometimes insist on at least one space - (define (skip-space in) - (let loop () - (let ([c (peek-char-or-special in)]) - (when (and (char? c) - (char-whitespace? c)) - (read-char in) - (loop))))) - - ;; lex-pcdata : Input-port (-> Location) -> Pcdata - ;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec - (define (lex-pcdata in pos) - (let ([start (pos)] - [data (let loop () - (let ([next (peek-char-or-special in)]) - (cond - [(or (eof-object? next) - (not (char? next)) - (eq? next #\&) - (eq? next #\<)) - null] - [(and (char-whitespace? next) (collapse-whitespace)) - (skip-space in) - (cons #\space (loop))] - [else (cons (read-char in) (loop))])))]) - (make-pcdata start - (pos) - (list->string data)))) - - ;; lex-name : Input-port (-> Location) -> Symbol - (define (lex-name in pos) - (let ([c (non-eof read-char-or-special in pos)]) - (unless (name-start? c) - (lex-error in pos "expected name, received ~e" c)) - (string->symbol - (list->string - (cons c (let lex-rest () - (let ([c (non-eof peek-char-or-special in pos)]) - (cond - [(eq? c 'special) - (lex-error in pos "names cannot contain non-text values")] - [(name-char? c) - (cons (read-char in) (lex-rest))] - [else null])))))))) - - ;; skip-dtd : Input-port (-> Location) -> Void - (define (skip-dtd in pos) - (let skip () - (case (non-eof read-char in pos) - [(#\') (read-until #\' in pos) (skip)] - [(#\") (read-until #\" in pos) (skip)] - [(#\<) - (case (non-eof read-char in pos) - [(#\!) (case (non-eof read-char in pos) - [(#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip)] - [else (skip) (skip)])] - [(#\?) (lex-pi-data in pos) (skip)] - [else (skip) (skip)])] - [(#\>) (void)] - [else (skip)]))) - - ;; name-start? : Char -> Bool - (define (name-start? ch) - (and (char? ch) - (or (char-alphabetic? ch) - (eq? ch #\_) - (eq? ch #\:)))) - - ;; name-char? : Char -> Bool - (define (name-char? ch) - (and (char? ch) - (or (name-start? ch) - (char-numeric? ch) - (eq? ch #\.) - (eq? ch #\-)))) - - ;; read-until : Char Input-port (-> Location) -> String - ;; discards the stop character, too - (define (read-until char in pos) - (list->string - (let read-more () - (let ([c (non-eof read-char in pos)]) - (cond - [(eq? c char) null] - [else (cons c (read-more))]))))) - - ;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Location) -> Char - (define (non-eof f in pos) - (let ([c (f in)]) - (cond - [(eof-object? c) (lex-error in pos "unexpected eof")] - [else c]))) - - ;; gen-read-until-string : String -> Input-port (-> Location) -> String - ;; uses Knuth-Morris-Pratt from - ;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876 - ;; discards stop from input - (define (gen-read-until-string stop) - (let* ([len (string-length stop)] - [prefix (make-vector len 0)] - [fall-back - (lambda (k c) - (let ([k (let loop ([k k]) - (cond - [(and (> k 0) (not (eq? (string-ref stop k) c))) - (loop (vector-ref prefix (sub1 k)))] - [else k]))]) - (if (eq? (string-ref stop k) c) - (add1 k) - k)))]) - (let init ([k 0] [q 1]) - (when (< q len) - (let ([k (fall-back k (string-ref stop q))]) - (vector-set! prefix q k) - (init k (add1 q))))) - ;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop - (lambda (in pos) - (list->string - (let/ec out - (let loop ([matched 0] [out out]) - (let* ([c (non-eof read-char in pos)] - [matched (fall-back matched c)]) - (cond - [(= matched len) (out null)] - [(zero? matched) (cons c (let/ec out (loop matched out)))] - [else (cons c (loop matched out))])))))))) - - ;; "-->" makes more sense, but "--" follows the spec. - (define lex-comment-contents (gen-read-until-string "--")) - (define lex-pi-data (gen-read-until-string "?>")) - (define lex-cdata-contents (gen-read-until-string "]]>")) - - ;; positionify : Input-port -> Input-port (-> Location) - ; This function predates port-count-lines! and port-next-location. - ; Otherwise I would have used those directly at the call sites. - (define (positionify in) - (port-count-lines! in) - (values - in - (lambda () - (let-values ([(line column offset) (port-next-location in)]) - (make-location line column offset))))) - - ;; locs : (listof (list number number)) - (define-struct (exn:xml exn:fail:read) ()) - - ;; lex-error : Input-port String (-> Location) TST* -> alpha - ;; raises a lexer error, using exn:xml - (define (lex-error in pos str . rest) - (let* ([the-pos (pos)] - [offset (location-offset the-pos)]) - (raise - (make-exn:xml - (format "read-xml: lex-error: at position ~a: ~a" - (format-source the-pos) - (apply format str rest)) - (current-continuation-marks) - (list - (make-srcloc (object-name in) #f #f offset 1)))))) - - ;; parse-error : (listof srcloc) (listof TST) *-> alpha - ;; raises a parsing error, using exn:xml - (define (parse-error src fmt . args) - (raise (make-exn:xml (string-append "read-xml: parse-error: " - (apply format fmt args)) - (current-continuation-marks) - src))) - - ;; format-source : Location -> string - ;; to format the source location for an error message - (define (format-source loc) - (if (location? loc) - (format "~a.~a/~a" (location-line loc) (location-char loc) (location-offset loc)) - (format "~a" loc)))) + (when (and (char? c) + (char-whitespace? c)) + (read-char in) + (loop))))) + +;; lex-pcdata : Input-port (-> Location) -> Pcdata +;; deviation - disallow ]]> "for compatability" with SGML, sec 2.4 XML spec +(define (lex-pcdata in pos) + (let ([start (pos)] + [data (let loop () + (let ([next (peek-char-or-special in)]) + (cond + [(or (eof-object? next) + (not (char? next)) + (eq? next #\&) + (eq? next #\<)) + null] + [(and (char-whitespace? next) (collapse-whitespace)) + (skip-space in) + (cons #\space (loop))] + [else (cons (read-char in) (loop))])))]) + (make-pcdata start + (pos) + (list->string data)))) + +;; lex-name : Input-port (-> Location) -> Symbol +(define (lex-name in pos) + (let ([c (non-eof read-char-or-special in pos)]) + (unless (name-start? c) + (lex-error in pos "expected name, received ~e" c)) + (string->symbol + (list->string + (cons c (let lex-rest () + (let ([c (non-eof peek-char-or-special in pos)]) + (cond + [(eq? c 'special) + (lex-error in pos "names cannot contain non-text values")] + [(name-char? c) + (cons (read-char in) (lex-rest))] + [else null])))))))) + +;; skip-dtd : Input-port (-> Location) -> Void +(define (skip-dtd in pos) + (let skip () + (case (non-eof read-char in pos) + [(#\') (read-until #\' in pos) (skip)] + [(#\") (read-until #\" in pos) (skip)] + [(#\<) + (case (non-eof read-char in pos) + [(#\!) (case (non-eof read-char in pos) + [(#\-) (read-char in) (lex-comment-contents in pos) (read-char in) (skip)] + [else (skip) (skip)])] + [(#\?) (lex-pi-data in pos) (skip)] + [else (skip) (skip)])] + [(#\>) (void)] + [else (skip)]))) + +;; name-start? : Char -> Bool +(define (name-start? ch) + (and (char? ch) + (or (char-alphabetic? ch) + (eq? ch #\_) + (eq? ch #\:)))) + +;; name-char? : Char -> Bool +(define (name-char? ch) + (and (char? ch) + (or (name-start? ch) + (char-numeric? ch) + (eq? ch #\.) + (eq? ch #\-)))) + +;; read-until : Char Input-port (-> Location) -> String +;; discards the stop character, too +(define (read-until char in pos) + (list->string + (let read-more () + (let ([c (non-eof read-char in pos)]) + (cond + [(eq? c char) null] + [else (cons c (read-more))]))))) + +;; non-eof : (Input-port -> (U Char Eof)) Input-port (-> Location) -> Char +(define (non-eof f in pos) + (let ([c (f in)]) + (cond + [(eof-object? c) (lex-error in pos "unexpected eof")] + [else c]))) + +;; gen-read-until-string : String -> Input-port (-> Location) -> String +;; uses Knuth-Morris-Pratt from +;; Introduction to Algorithms, Cormen, Leiserson, and Rivest, pages 869-876 +;; discards stop from input +(define (gen-read-until-string stop) + (let* ([len (string-length stop)] + [prefix (make-vector len 0)] + [fall-back + (lambda (k c) + (let ([k (let loop ([k k]) + (cond + [(and (> k 0) (not (eq? (string-ref stop k) c))) + (loop (vector-ref prefix (sub1 k)))] + [else k]))]) + (if (eq? (string-ref stop k) c) + (add1 k) + k)))]) + (let init ([k 0] [q 1]) + (when (< q len) + (let ([k (fall-back k (string-ref stop q))]) + (vector-set! prefix q k) + (init k (add1 q))))) + ;; (vector-ref prefix x) = the longest suffix that matches a prefix of stop + (lambda (in pos) + (list->string + (let/ec out + (let loop ([matched 0] [out out]) + (let* ([c (non-eof read-char in pos)] + [matched (fall-back matched c)]) + (cond + [(= matched len) (out null)] + [(zero? matched) (cons c (let/ec out (loop matched out)))] + [else (cons c (loop matched out))])))))))) + +;; "-->" makes more sense, but "--" follows the spec. +(define lex-comment-contents (gen-read-until-string "--")) +(define lex-pi-data (gen-read-until-string "?>")) +(define lex-cdata-contents (gen-read-until-string "]]>")) + +;; positionify : Input-port -> Input-port (-> Location) +; This function predates port-count-lines! and port-next-location. +; Otherwise I would have used those directly at the call sites. +(define (positionify in) + (port-count-lines! in) + (values + in + (lambda () + (let-values ([(line column offset) (port-next-location in)]) + (make-location line column offset))))) + +;; locs : (listof (list number number)) +(define-struct (exn:xml exn:fail:read) ()) + +;; lex-error : Input-port String (-> Location) TST* -> alpha +;; raises a lexer error, using exn:xml +(define (lex-error in pos str . rest) + (let* ([the-pos (pos)] + [offset (location-offset the-pos)]) + (raise + (make-exn:xml + (format "read-xml: lex-error: at position ~a: ~a" + (format-source the-pos) + (apply format str rest)) + (current-continuation-marks) + (list + (make-srcloc (object-name in) #f #f offset 1)))))) + +;; parse-error : (listof srcloc) (listof TST) *-> alpha +;; raises a parsing error, using exn:xml +(define (parse-error src fmt . args) + (raise (make-exn:xml (string-append "read-xml: parse-error: " + (apply format fmt args)) + (current-continuation-marks) + src))) + +;; format-source : Location -> string +;; to format the source location for an error message +(define (format-source loc) + (if (location? loc) + (format "~a.~a/~a" (location-line loc) (location-char loc) (location-offset loc)) + (format "~a" loc))) \ No newline at end of file diff --git a/collects/xml/private/sig.ss b/collects/xml/private/sig.ss deleted file mode 100644 index b317a64d93..0000000000 --- a/collects/xml/private/sig.ss +++ /dev/null @@ -1,96 +0,0 @@ -#lang scheme - -(define-signature xml-structs^ - ((struct/ctc location ([line exact-nonnegative-integer?] - [char exact-nonnegative-integer?] - [offset exact-nonnegative-integer?])) - (struct/ctc source ([start (or/c location? symbol?)] - [stop (or/c location? symbol?)])) - (struct/ctc comment ([text string?])) - (struct pcdata (string)) ; XXX needs parent - (struct cdata (string)) ; XXX needs parent - (struct/ctc document-type ([name symbol?] - #;[external external-dtd?] - ; XXX results in this error - ; ->: expected contract or a value that can be coerced into one, got # - ; I presume that there is a letrec somewhere - [external any/c] - [inlined false/c])) - (struct/ctc document (#;[prolog prolog?] ; XXX same as above - [prolog any/c] - #;[element element?] - [element any/c] - #;[misc (listof (or/c comment? pi?))] - [misc (listof any/c)])) - (struct/ctc prolog (#;[misc (listof (or/c comment? pi?))] ; XXX same as above - [misc (listof any/c)] - #;[dtd document-type?] - [dtd any/c] - #;[misc2 (listof (or/c comment? pi?))] - [misc2 (listof any/c)])) - (struct/ctc external-dtd ([system string?])) - (struct external-dtd/public (public)) ; XXX needs parent - (struct external-dtd/system ()) ; XXX needs parent - (struct element (name attributes content)) ; XXX needs parent - (struct attribute (name value)) ; XXX needs parent - (struct p-i (target-name instruction)) ; XXX needs parent - (struct entity (text)) ; XXX needs parent - (contracted - [content? (any/c . -> . boolean?)]))) - -(define-signature writer^ - ((contracted - [write-xml ((any/c) (output-port?) . ->* . void?)] - [display-xml ((any/c) (output-port?) . ->* . void?)] - [write-xml/content ((any/c) (output-port?) . ->* . void?)] - [display-xml/content ((any/c) (output-port?) . ->* . void?)]) - ; XXX I can't contract the above (well), because they refer to structs from xml-structs^ - (contracted - [empty-tag-shorthand (parameter/c (or/c (symbols 'always 'never) (listof symbol?)))] - [html-empty-tags (listof symbol?)]))) - -(define-signature reader^ - ((contracted - [read-xml (() (input-port?) . ->* . any/c)] - [read-xml/element (() (input-port?) . ->* . any/c)] - [read-comments (parameter/c boolean?)] - [collapse-whitespace (parameter/c boolean?)]) - ; XXX can't contract the above (well) because they refer to structs - ; XXX can't contract exn:xml beacuse of parent - (struct exn:xml ()))) - -(define-signature xexpr^ - ((struct exn:invalid-xexpr (code)) ; XXX needs parent - (contracted - [xexpr/c contract?] - [xexpr? (any/c . -> . boolean?)] - [xexpr->string (xexpr/c . -> . string?)] - [xml->xexpr (any/c . -> . xexpr/c)] ; XXX bad because of struct - [xexpr->xml (xexpr/c . -> . any/c)] ; XXX bad because of struct - [xexpr-drop-empty-attributes (parameter/c boolean?)] - [permissive? (parameter/c boolean?)] - [validate-xexpr (any/c . -> . (one-of/c #t))] - [correct-xexpr? (any/c (-> any/c) (exn:invalid-xexpr? . -> . any/c) . -> . any/c)] - [xexpr-attribute? (any/c . -> . boolean?)] - [listof? ((any/c . -> . boolean?) any/c . -> . boolean?)] - [attribute->srep (any/c . -> . xexpr-attribute?)] ; XXX bad because of struct - [bcompose ((any/c any/c . -> . any/c) (any/c . -> . any/c) . -> . (any/c any/c . -> . any/c))] - [assoc-sort ((listof (list/c symbol? string?)) . -> . (listof (list/c symbol? string?)))]))) - -(define-signature space^ - ((contracted - ; XXX bad because of struct - [eliminate-whitespace ((listof symbol?) (boolean? . -> . boolean?) . -> . (any/c . -> . any/c))]))) - -(define-signature xml-syntax^ - ((contracted - ; XXX these should both actually return syntax? that is also xexpr/c - [syntax:read-xml (() (input-port?) . ->* . syntax?)] - [syntax:read-xml/element (() (input-port?) . ->* . syntax?)]))) - -(provide xml-structs^ - writer^ - reader^ - xexpr^ - space^ - xml-syntax^) diff --git a/collects/xml/private/space.ss b/collects/xml/private/space.ss index 24e24b2891..2cacee1758 100644 --- a/collects/xml/private/space.ss +++ b/collects/xml/private/space.ss @@ -1,34 +1,30 @@ #lang scheme -(require "sig.ss") +(require "structures.ss") +(provide/contract + [eliminate-whitespace ((listof symbol?) (boolean? . -> . boolean?) . -> . (element? . -> . element?))]) -(provide space@) +;; eliminate-whitespace : (listof Symbol) (Bool -> Bool) -> Element -> Element +(define (eliminate-whitespace special eliminate-special?) + (letrec ([blank-it + (lambda (el) + (let ([name (element-name el)] + [content (map (lambda (x) + (if (element? x) (blank-it x) x)) + (element-content el))]) + (make-element + (source-start el) + (source-stop el) + name + (element-attributes el) + (cond + [(eliminate-special? (and (memq (element-name el) special) #t)) + (filter (lambda (s) + (not (and (pcdata? s) + (or (all-blank (pcdata-string s)) + (error 'eliminate-blanks "Element <~a> is not allowed to contain text ~e" name (pcdata-string s)))))) + content)] + [else content]))))]) + blank-it)) -(define-unit space@ - (import xml-structs^) - (export space^) - - ;; eliminate-whitespace : (listof Symbol) (Bool -> Bool) -> Element -> Element - (define (eliminate-whitespace special eliminate-special?) - (letrec ([blank-it - (lambda (el) - (let ([name (element-name el)] - [content (map (lambda (x) - (if (element? x) (blank-it x) x)) - (element-content el))]) - (make-element - (source-start el) - (source-stop el) - name - (element-attributes el) - (cond - [(eliminate-special? (and (memq (element-name el) special) #t)) - (filter (lambda (s) - (not (and (pcdata? s) - (or (all-blank (pcdata-string s)) - (error 'eliminate-blanks "Element <~a> is not allowed to contain text ~e" name (pcdata-string s)))))) - content)] - [else content]))))]) - blank-it)) - - ;; all-blank : String -> Bool - (define (all-blank s) (andmap char-whitespace? (string->list s)))) +;; all-blank : String -> Bool +(define (all-blank s) (andmap char-whitespace? (string->list s))) \ No newline at end of file diff --git a/collects/xml/private/structures.ss b/collects/xml/private/structures.ss index 949f5c8534..b211c2b8f4 100644 --- a/collects/xml/private/structures.ss +++ b/collects/xml/private/structures.ss @@ -1,71 +1,126 @@ #lang scheme -(require "sig.ss") -(provide xml-structs@) +; Location = (make-location Nat Nat Nat) | Symbol +(define-struct location (line char offset)) -(define-unit xml-structs@ - (import) - (export xml-structs^) - - ; Location = (make-location Nat Nat Nat) | Symbol - (define-struct location (line char offset)) - - ; Source = (make-source Location Location) - (define-struct source (start stop)) - - ; Document = (make-document Prolog Element (listof Misc)) - (define-struct document (prolog element misc)) - - ; Prolog = (make-prolog (listof Misc) Document-type (listof Misc)) - (define-struct prolog (misc dtd misc2)) - - ; Document-type = (make-document-type sym External-dtd #f) - ; | #f - (define-struct document-type (name external inlined)) - - ; External-dtd = (make-external-dtd/public str str) - ; | (make-external-dtd/system str) - ; | #f - (define-struct external-dtd (system)) - (define-struct (external-dtd/public external-dtd) (public)) - (define-struct (external-dtd/system external-dtd) ()) - - ; Element = (make-element Location Location Symbol (listof Attribute) (listof Content)) - (define-struct (element source) (name attributes content)) - - ; Attribute = (make-attribute Location Location Symbol String) - (define-struct (attribute source) (name value)) - - ; Pcdata = (make-pcdata Location Location String) - (define-struct (pcdata source) (string)) - - ; Cdata = (make-cdata Location Location String) - (define-struct (cdata source) (string)) - - ; Content = Pcdata - ; | Element - ; | Entity - ; | Misc - ; | Cdata - - ; Misc = Comment - ; | Processing-instruction - - ; Entity = (make-entity Location Location (U Nat Symbol)) - (define-struct (entity source) (text)) - - ; Processing-instruction = (make-p-i Location Location String String) - ; also represents XMLDecl - (define-struct (p-i source) (target-name instruction)) - - ; Comment = (make-comment String) - (define-struct comment (text)) - - ; content? : TST -> Bool - (define (content? x) - (or (pcdata? x) - (element? x) - (entity? x) - (comment? x) - (cdata? x) - (p-i? x)))) +; Source = (make-source Location Location) +(define-struct source (start stop)) + +; Document = (make-document Prolog Element (listof Misc)) +(define-struct document (prolog element misc)) + +; Prolog = (make-prolog (listof Misc) Document-type (listof Misc)) +(define-struct prolog (misc dtd misc2)) + +; Document-type = (make-document-type sym External-dtd #f) +; | #f +(define-struct document-type (name external inlined)) + +; External-dtd = (make-external-dtd/public str str) +; | (make-external-dtd/system str) +; | #f +(define-struct external-dtd (system)) +(define-struct (external-dtd/public external-dtd) (public)) +(define-struct (external-dtd/system external-dtd) ()) + +; Element = (make-element Location Location Symbol (listof Attribute) (listof Content)) +(define-struct (element source) (name attributes content)) + +; Attribute = (make-attribute Location Location Symbol String) +(define-struct (attribute source) (name value)) + +; Pcdata = (make-pcdata Location Location String) +(define-struct (pcdata source) (string)) + +; Cdata = (make-cdata Location Location String) +(define-struct (cdata source) (string)) + +; Content = Pcdata +; | Element +; | Entity +; | Misc +; | Cdata + +; Misc = Comment +; | Processing-instruction + +; Entity = (make-entity Location Location (U Nat Symbol)) +(define-struct (entity source) (text)) + +; Processing-instruction = (make-p-i Location Location String String) +; also represents XMLDecl +(define-struct (p-i source) (target-name instruction)) + +; Comment = (make-comment String) +(define-struct comment (text)) + +; permissive? : parameter bool +(define permissive? (make-parameter #f)) + +(define permissive/c + (make-proj-contract 'permissive/c + (lambda (pos neg src-info name) + (lambda (v) + (if (permissive?) + v + (raise-contract-error + v src-info pos name "not in permissive mode")))) + (lambda (v) + (permissive?)))) + +; content? : TST -> Bool +(define content/c + (or/c pcdata? element? entity? comment? cdata? p-i? permissive/c)) + +(define misc/c + (or/c comment? p-i?)) + +(define location/c + (or/c location? symbol? false/c)) +(provide/contract + (struct location ([line exact-nonnegative-integer?] + [char exact-nonnegative-integer?] + [offset exact-nonnegative-integer?])) + [location/c contract?] + (struct source ([start location/c] + [stop location/c])) + (struct external-dtd ([system string?])) + (struct (external-dtd/public external-dtd) ([system string?] + [public string?])) + (struct (external-dtd/system external-dtd) ([system string?])) + (struct document-type ([name symbol?] + [external external-dtd?] + [inlined false/c])) + (struct comment ([text string?])) + (struct (p-i source) ([start location/c] + [stop location/c] + [target-name symbol?] + [instruction string?])) + [misc/c contract?] + (struct prolog ([misc (listof misc/c)] + [dtd (or/c document-type? false/c)] + [misc2 (listof misc/c)])) + (struct document ([prolog prolog?] + [element element?] + [misc (listof misc/c)])) + (struct (element source) ([start location/c] + [stop location/c] + [name symbol?] + [attributes (listof attribute?)] + [content (listof content/c)])) + (struct (attribute source) ([start location/c] + [stop location/c] + [name symbol?] + [value string?])) + [permissive? (parameter/c boolean?)] + [permissive/c contract?] + [content/c contract?] + (struct (pcdata source) ([start location/c] + [stop location/c] + [string string?])) + (struct (cdata source) ([start location/c] + [stop location/c] + [string string?])) + (struct (entity source) ([start location/c] + [stop location/c] + [text (or/c symbol? exact-nonnegative-integer?)]))) \ No newline at end of file diff --git a/collects/xml/private/syntax.ss b/collects/xml/private/syntax.ss index 5c601eb693..bb22179d51 100644 --- a/collects/xml/private/syntax.ss +++ b/collects/xml/private/syntax.ss @@ -1,51 +1,52 @@ #lang scheme -(require "sig.ss") +(require "structures.ss" + "reader.ss" + "xexpr.ss") -(provide native-xml-syntax@) +(provide/contract + ; XXX these should both actually return syntax? that is also xexpr/c + [syntax:read-xml (() (input-port?) . ->* . syntax?)] + [syntax:read-xml/element (() (input-port?) . ->* . syntax?)]) -(define-unit native-xml-syntax@ - (import xml-structs^ reader^ xexpr^) - (export xml-syntax^) - - (define (syntax:read-xml [in (current-input-port)]) - (define the-xml (read-xml in)) - (define the-xml-element (document-element the-xml)) - (element->xexpr-syntax the-xml-element)) - - (define (syntax:read-xml/element [in (current-input-port)]) - (define the-xml-element (read-xml/element in)) - (element->xexpr-syntax the-xml-element)) - - (define (position from to) - (let ([start-offset (location-offset from)]) - (list #f (location-line from) (location-char from) start-offset - (- (location-offset to) start-offset)))) - - (define (wrap s e) - (datum->syntax #f e (position (source-start s) (source-stop s)))) - - (define (attribute->syntax a) - (wrap a (list (attribute-name a) (attribute-value a)))) - - (define (non-dropping-combine atts body) - (list* (map attribute->syntax atts) body)) - - (define (combine atts body) - (if (xexpr-drop-empty-attributes) - (if (empty? atts) - body - (non-dropping-combine atts body)) - (non-dropping-combine atts body))) - - (define (element->xexpr-syntax e) - (wrap e - (list* (element-name e) - (combine (element-attributes e) - (map content->xexpr-syntax (element-content e)))))) - - (define (content->xexpr-syntax x) - (cond - [(element? x) (element->xexpr-syntax x)] - [(pcdata? x) (wrap x (pcdata-string x))] - [(entity? x) (wrap x (entity-text x))] - [else (wrap x x)]))) +(define (syntax:read-xml [in (current-input-port)]) + (define the-xml (read-xml in)) + (define the-xml-element (document-element the-xml)) + (element->xexpr-syntax the-xml-element)) + +(define (syntax:read-xml/element [in (current-input-port)]) + (define the-xml-element (read-xml/element in)) + (element->xexpr-syntax the-xml-element)) + +(define (position from to) + (let ([start-offset (location-offset from)]) + (list #f (location-line from) (location-char from) start-offset + (- (location-offset to) start-offset)))) + +(define (wrap s e) + (datum->syntax #f e (position (source-start s) (source-stop s)))) + +(define (attribute->syntax a) + (wrap a (list (attribute-name a) (attribute-value a)))) + +(define (non-dropping-combine atts body) + (list* (map attribute->syntax atts) body)) + +(define (combine atts body) + (if (xexpr-drop-empty-attributes) + (if (empty? atts) + body + (non-dropping-combine atts body)) + (non-dropping-combine atts body))) + +(define (element->xexpr-syntax e) + (wrap e + (list* (element-name e) + (combine (element-attributes e) + (map content->xexpr-syntax (element-content e)))))) + +(define (content->xexpr-syntax x) + (cond + [(element? x) (element->xexpr-syntax x)] + [(pcdata? x) (wrap x (pcdata-string x))] + [(entity? x) (wrap x (entity-text x))] + [else (wrap x x)])) \ No newline at end of file diff --git a/collects/xml/private/writer.ss b/collects/xml/private/writer.ss index 94a9ffd9d0..d277ff2380 100644 --- a/collects/xml/private/writer.ss +++ b/collects/xml/private/writer.ss @@ -1,167 +1,169 @@ #lang scheme -(require "sig.ss") +(require "structures.ss") -(provide writer@) +(provide/contract + [write-xml ((document?) (output-port?) . ->* . void?)] + [display-xml ((document?) (output-port?) . ->* . void?)] + [write-xml/content ((content/c) (output-port?) . ->* . void?)] + [display-xml/content ((content/c) (output-port?) . ->* . void?)] + [empty-tag-shorthand (parameter/c (or/c (symbols 'always 'never) (listof symbol?)))] + [html-empty-tags (listof symbol?)]) -(define-unit writer@ - (import xml-structs^) - (export writer^) - - ;; (empty-tag-shorthand) : (U 'always 'never (listof Symbol)) - (define empty-tag-shorthand - (make-parameter 'always - (lambda (x) - (if (or (eq? x 'always) (eq? x 'never) (and (list? x) (andmap symbol? x))) - x - (error 'empty-tag-shorthand "expected 'always, 'never, or a list of symbols: received ~e" x))))) - - (define html-empty-tags '(param meta link isindex input img hr frame col br basefont base area)) - - ;; gen-write/display-xml/content : (Nat Output-port -> Void) -> Content [Output-Port]-> Void - (define (gen-write/display-xml/content dent) - (lambda (c [out (current-output-port)]) (write-xml-content c 0 dent out))) - - ;; indent : Nat Output-port -> Void - (define (indent n out) - (newline out) - (let loop ([n n]) - (unless (zero? n) - (display #\space out) - (loop (sub1 n))))) - - ;; write-xml/content : Content [Output-port] -> Void - (define write-xml/content (gen-write/display-xml/content void)) - - ;; display-xml/content : Content [Output-port] -> Void - (define display-xml/content (gen-write/display-xml/content indent)) - - ;; gen-write/display-xml : (Content [Output-port] -> Void) -> Document [Output-port] -> Void - (define (gen-write/display-xml output-content) - (lambda (doc [out (current-output-port)]) - (let ([prolog (document-prolog doc)]) - (display-outside-misc (prolog-misc prolog) out) - (display-dtd (prolog-dtd prolog) out) - (display-outside-misc (prolog-misc2 prolog) out)) - (output-content (document-element doc) out) - (display-outside-misc (document-misc doc) out))) - - ; display-dtd : document-type oport -> void - (define (display-dtd dtd out) - (when dtd - (fprintf out "" out) - (newline out))) - - ;; write-xml : Document [Output-port] -> Void - (define write-xml (gen-write/display-xml write-xml/content)) - - ;; display-xml : Document [Output-port] -> Void - (define display-xml (gen-write/display-xml display-xml/content)) - - ;; display-outside-misc : (listof Misc) Output-port -> Void - (define (display-outside-misc misc out) - (for-each (lambda (x) - ((cond - [(comment? x) write-xml-comment] - [(p-i? x) write-xml-p-i]) x 0 void out) - (newline out)) - misc)) - - ;; write-xml-content : Content Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-content el over dent out) - ((cond - [(element? el) write-xml-element] - [(pcdata? el) write-xml-pcdata] - [(cdata? el) write-xml-cdata] - [(entity? el) write-xml-entity] - [(comment? el) write-xml-comment] - [(p-i? el) write-xml-p-i] - [else (error 'write-xml-content "received ~e" el)]) - el over dent out)) - - ;; write-xml-element : Element Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-element el over dent out) - (let* ([name (element-name el)] - [start (lambda (str) - (write-xml-base str over dent out) - (display name out))] - [content (element-content el)]) - (start "<") - (for ([att (in-list (element-attributes el))]) - (fprintf out " ~a=\"~a\"" (attribute-name att) - (escape (attribute-value att) escape-attribute-table))) - (if (and (null? content) - (let ([short (empty-tag-shorthand)]) - (case short - [(always) #t] - [(never) #f] - [else (memq (lowercase-symbol name) short)]))) - (display " />" out) - (begin - (display ">" out) - (for ([c (in-list content)]) - (write-xml-content c (incr over) dent out)) - (start "" out))))) - - ; : sym -> sym - (define lowercases (make-weak-hash)) - (define (lowercase-symbol x) - (or (hash-ref lowercases x #f) - (let ([s (symbol->string x)]) - (let ([s (string->symbol (string-downcase s))]) - (hash-set! lowercases x s) - s)))) - - ;; write-xml-base : (U String Char Symbol) Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-base el over dent out) - (dent over out) - (display el out)) - - ;; write-xml-pcdata : Pcdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-pcdata str over dent out) - (write-xml-base (escape (pcdata-string str) escape-table) over dent out)) - - ;; write-xml-cdata : Cdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-cdata cdata over dent out) - ;; XXX: Different kind of quote is needed, for assume the user includes the with proper quoting - (write-xml-base (format "~a" (cdata-string cdata)) over dent out)) - - ;; write-xml-p-i : Processing-instruction Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-p-i p-i over dent out) - (write-xml-base (format "" (p-i-target-name p-i) (p-i-instruction p-i)) over dent out)) - - ;; write-xml-comment : Comment Nat (Nat Output-Stream -> Void) Output-Stream -> Void - (define (write-xml-comment comment over dent out) - (write-xml-base (format "" (comment-text comment)) over dent out)) - - ;; write-xml-entity : Entity Nat (Nat Output-stream -> Void) Output-stream -> Void - (define (write-xml-entity entity over dent out) - (let ([n (entity-text entity)]) - (fprintf out (if (number? n) "&#~a;" "&~a;") n))) - - (define escape-table #rx"[<>&]") - (define escape-attribute-table #rx"[<>&\"]") - - (define (replace-escaped s) - (case (string-ref s 0) - [(#\<) "<"] - [(#\>) ">"] - [(#\&) "&"] - [(#\") """])) - - ;; escape : String -> String - (define (escape x table) - (regexp-replace* table x replace-escaped)) - - ;; incr : Nat -> Nat - (define (incr n) (+ n 2))) +;; (empty-tag-shorthand) : (U 'always 'never (listof Symbol)) +(define empty-tag-shorthand + (make-parameter 'always + (lambda (x) + (if (or (eq? x 'always) (eq? x 'never) (and (list? x) (andmap symbol? x))) + x + (error 'empty-tag-shorthand "expected 'always, 'never, or a list of symbols: received ~e" x))))) + +(define html-empty-tags '(param meta link isindex input img hr frame col br basefont base area)) + +;; gen-write/display-xml/content : (Nat Output-port -> Void) -> Content [Output-Port]-> Void +(define (gen-write/display-xml/content dent) + (lambda (c [out (current-output-port)]) (write-xml-content c 0 dent out))) + +;; indent : Nat Output-port -> Void +(define (indent n out) + (newline out) + (let loop ([n n]) + (unless (zero? n) + (display #\space out) + (loop (sub1 n))))) + +;; write-xml/content : Content [Output-port] -> Void +(define write-xml/content (gen-write/display-xml/content void)) + +;; display-xml/content : Content [Output-port] -> Void +(define display-xml/content (gen-write/display-xml/content indent)) + +;; gen-write/display-xml : (Content [Output-port] -> Void) -> Document [Output-port] -> Void +(define (gen-write/display-xml output-content) + (lambda (doc [out (current-output-port)]) + (let ([prolog (document-prolog doc)]) + (display-outside-misc (prolog-misc prolog) out) + (display-dtd (prolog-dtd prolog) out) + (display-outside-misc (prolog-misc2 prolog) out)) + (output-content (document-element doc) out) + (display-outside-misc (document-misc doc) out))) + +; display-dtd : document-type oport -> void +(define (display-dtd dtd out) + (when dtd + (fprintf out "" out) + (newline out))) + +;; write-xml : Document [Output-port] -> Void +(define write-xml (gen-write/display-xml write-xml/content)) + +;; display-xml : Document [Output-port] -> Void +(define display-xml (gen-write/display-xml display-xml/content)) + +;; display-outside-misc : (listof Misc) Output-port -> Void +(define (display-outside-misc misc out) + (for-each (lambda (x) + ((cond + [(comment? x) write-xml-comment] + [(p-i? x) write-xml-p-i]) x 0 void out) + (newline out)) + misc)) + +;; write-xml-content : Content Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-content el over dent out) + ((cond + [(element? el) write-xml-element] + [(pcdata? el) write-xml-pcdata] + [(cdata? el) write-xml-cdata] + [(entity? el) write-xml-entity] + [(comment? el) write-xml-comment] + [(p-i? el) write-xml-p-i] + [else (error 'write-xml-content "received ~e" el)]) + el over dent out)) + +;; write-xml-element : Element Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-element el over dent out) + (let* ([name (element-name el)] + [start (lambda (str) + (write-xml-base str over dent out) + (display name out))] + [content (element-content el)]) + (start "<") + (for ([att (in-list (element-attributes el))]) + (fprintf out " ~a=\"~a\"" (attribute-name att) + (escape (attribute-value att) escape-attribute-table))) + (if (and (null? content) + (let ([short (empty-tag-shorthand)]) + (case short + [(always) #t] + [(never) #f] + [else (memq (lowercase-symbol name) short)]))) + (display " />" out) + (begin + (display ">" out) + (for ([c (in-list content)]) + (write-xml-content c (incr over) dent out)) + (start "" out))))) + +; : sym -> sym +(define lowercases (make-weak-hash)) +(define (lowercase-symbol x) + (or (hash-ref lowercases x #f) + (let ([s (symbol->string x)]) + (let ([s (string->symbol (string-downcase s))]) + (hash-set! lowercases x s) + s)))) + +;; write-xml-base : (U String Char Symbol) Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-base el over dent out) + (dent over out) + (display el out)) + +;; write-xml-pcdata : Pcdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-pcdata str over dent out) + (write-xml-base (escape (pcdata-string str) escape-table) over dent out)) + +;; write-xml-cdata : Cdata Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-cdata cdata over dent out) + ;; XXX: Different kind of quote is needed, for assume the user includes the with proper quoting + (write-xml-base (format "~a" (cdata-string cdata)) over dent out)) + +;; write-xml-p-i : Processing-instruction Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-p-i p-i over dent out) + (write-xml-base (format "" (p-i-target-name p-i) (p-i-instruction p-i)) over dent out)) + +;; write-xml-comment : Comment Nat (Nat Output-Stream -> Void) Output-Stream -> Void +(define (write-xml-comment comment over dent out) + (write-xml-base (format "" (comment-text comment)) over dent out)) + +;; write-xml-entity : Entity Nat (Nat Output-stream -> Void) Output-stream -> Void +(define (write-xml-entity entity over dent out) + (let ([n (entity-text entity)]) + (fprintf out (if (number? n) "&#~a;" "&~a;") n))) + +(define escape-table #rx"[<>&]") +(define escape-attribute-table #rx"[<>&\"]") + +(define (replace-escaped s) + (case (string-ref s 0) + [(#\<) "<"] + [(#\>) ">"] + [(#\&) "&"] + [(#\") """])) + +;; escape : String -> String +(define (escape x table) + (regexp-replace* table x replace-escaped)) + +;; incr : Nat -> Nat +(define (incr n) (+ n 2)) \ No newline at end of file diff --git a/collects/xml/private/xexpr.ss b/collects/xml/private/xexpr.ss index 4efba40596..aa9dc0cac2 100644 --- a/collects/xml/private/xexpr.ss +++ b/collects/xml/private/xexpr.ss @@ -1,228 +1,232 @@ #lang scheme -(require scheme/pretty) -(require "sig.ss") +(require scheme/pretty + "structures.ss" + "writer.ss") -(provide xexpr@) +;; Xexpr ::= String +;; | (list* Symbol (listof Attribute-srep) (listof Xexpr)) +;; | (cons Symbol (listof Xexpr)) +;; | Symbol +;; | Nat +;; | Comment +;; | Processing-instruction +;; | Cdata +;; Attribute-srep ::= (list Symbol String) -(define-unit xexpr@ - (import xml-structs^ writer^) - (export xexpr^) - ;; Xexpr ::= String - ;; | (list* Symbol (listof Attribute-srep) (listof Xexpr)) - ;; | (cons Symbol (listof Xexpr)) - ;; | Symbol - ;; | Nat - ;; | Comment - ;; | Processing-instruction - ;; | Cdata - ;; Attribute-srep ::= (list Symbol String) - - ;; sorting is no longer necessary, since xt3d uses xml->zxexpr, which sorts. - - ;; assoc-sort : (listof (list Symbol a)) -> (listof (list Symbol a)) - (define (assoc-sort to-sort) - (sort to-sort (bcompose stringstring car)))) - - (define xexpr-drop-empty-attributes (make-parameter #f)) - - (define xexpr/c - (make-proj-contract - 'xexpr? - (lambda (pos neg src-info name) - (lambda (val) - (with-handlers ([exn:invalid-xexpr? - (lambda (exn) - (raise-contract-error - val - src-info - pos - name - "Not an Xexpr. ~a~n~nContext:~n~a" - (exn-message exn) - (pretty-format val)))]) - (validate-xexpr val) - val))) - (lambda (v) #t))) - - (define (xexpr? x) - (correct-xexpr? x (lambda () #t) (lambda (exn) #f))) - - - (define (validate-xexpr x) - (correct-xexpr? x (lambda () #t) (lambda (exn) (raise exn)))) - - ;; ;; ;; ;; ;; ;; ; - ;; ; xexpr? helpers - - (define-struct (exn:invalid-xexpr exn:fail) (code)) - - ;; correct-xexpr? : any (-> a) (exn -> a) -> a - (define (correct-xexpr? x true false) - (cond - ((string? x) (true)) - ((symbol? x) (true)) - ((exact-nonnegative-integer? x) (true)) - ((comment? x) (true)) - ((p-i? x) (true)) - ((cdata? x) (true)) - ((pcdata? x) (true)) - ((list? x) - (or (null? x) - (if (symbol? (car x)) - (if (has-attribute? x) - (and (attribute-pairs? (cadr x) true false) - (andmap (lambda (part) - (correct-xexpr? part true false)) - (cddr x)) - (true)) - (andmap (lambda (part) - (correct-xexpr? part true false)) - (cdr x))) - (false (make-exn:invalid-xexpr - (format - "Expected a symbol as the element name, given ~s" - (car x)) - (current-continuation-marks) - x))))) - [(permissive?) (true)] - (else (false +;; sorting is no longer necessary, since xt3d uses xml->zxexpr, which sorts. + +;; assoc-sort : (listof (list Symbol a)) -> (listof (list Symbol a)) +(define (assoc-sort to-sort) + (sort to-sort (bcompose stringstring car)))) + +(define xexpr-drop-empty-attributes (make-parameter #f)) + +(define xexpr/c + (make-proj-contract + 'xexpr? + (lambda (pos neg src-info name) + (lambda (val) + (with-handlers ([exn:invalid-xexpr? + (lambda (exn) + (raise-contract-error + val + src-info + pos + name + "Not an Xexpr. ~a~n~nContext:~n~a" + (exn-message exn) + (pretty-format val)))]) + (validate-xexpr val) + val))) + (lambda (v) #t))) + +(define (xexpr? x) + (correct-xexpr? x (lambda () #t) (lambda (exn) #f))) + +(define (validate-xexpr x) + (correct-xexpr? x (lambda () #t) (lambda (exn) (raise exn)))) + +;; ;; ;; ;; ;; ;; ; +;; ; xexpr? helpers + +(define-struct (exn:invalid-xexpr exn:fail) (code)) + +;; correct-xexpr? : any (-> a) (exn -> a) -> a +(define (correct-xexpr? x true false) + (cond + ((string? x) (true)) + ((symbol? x) (true)) + ((exact-nonnegative-integer? x) (true)) + ((comment? x) (true)) + ((p-i? x) (true)) + ((cdata? x) (true)) + ((pcdata? x) (true)) + ((list? x) + (or (null? x) + (if (symbol? (car x)) + (if (has-attribute? x) + (and (attribute-pairs? (cadr x) true false) + (andmap (lambda (part) + (correct-xexpr? part true false)) + (cddr x)) + (true)) + (andmap (lambda (part) + (correct-xexpr? part true false)) + (cdr x))) + (false (make-exn:invalid-xexpr + (format + "Expected a symbol as the element name, given ~s" + (car x)) + (current-continuation-marks) + x))))) + [(permissive?) (true)] + (else (false + (make-exn:invalid-xexpr + (format (string-append + "Expected a string, symbol, number, comment, " + "processing instruction, or list, given ~s") + x) + (current-continuation-marks) + x))))) + +;; has-attribute? : List -> Boolean +;; True if the Xexpr provided has an attribute list. +(define (has-attribute? x) + (and (> (length x) 1) + (list? (cadr x)) + (andmap (lambda (attr) + (pair? attr)) + (cadr x)))) + +;; attribute-pairs? : List (-> a) (exn -> a) -> a +;; True if the list is a list of pairs. +(define (attribute-pairs? attrs true false) + (if (null? attrs) + (true) + (let ((attr (car attrs))) + (if (pair? attr) + (and (attribute-symbol-string? attr true false) + (attribute-pairs? (cdr attrs) true false ) + (true)) + (false (make-exn:invalid-xexpr - (format (string-append - "Expected a string, symbol, number, comment, " - "processing instruction, or list, given ~s") - x) + (format "Expected a pair, given ~a" attr) (current-continuation-marks) - x))))) - - ;; has-attribute? : List -> Boolean - ;; True if the Xexpr provided has an attribute list. - (define (has-attribute? x) - (and (> (length x) 1) - (list? (cadr x)) - (andmap (lambda (attr) - (pair? attr)) - (cadr x)))) - - ;; attribute-pairs? : List (-> a) (exn -> a) -> a - ;; True if the list is a list of pairs. - (define (attribute-pairs? attrs true false) - (if (null? attrs) - (true) - (let ((attr (car attrs))) - (if (pair? attr) - (and (attribute-symbol-string? attr true false) - (attribute-pairs? (cdr attrs) true false ) - (true)) - (false - (make-exn:invalid-xexpr - (format "Expected a pair, given ~a" attr) - (current-continuation-marks) - attr)))))) - - ;; attribute-symbol-string? : List (-> a) (exn -> a) -> a - ;; True if the list is a list of String,Symbol pairs. - (define (attribute-symbol-string? attr true false) - (if (symbol? (car attr)) - (if (string? (cadr attr)) - (true) - (false (make-exn:invalid-xexpr - (format "Expected a string, given ~a" (cadr attr)) - (current-continuation-marks) - (cadr attr)))) - (false (make-exn:invalid-xexpr - (format "Expected a symbol, given ~a" (car attr)) - (current-continuation-marks) - (cadr attr))))) - - ;; ; end xexpr? helpers - ;; ;; ;; ;; ;; ;; ;; ;; - - - ; : (a -> bool) tst -> bool - ; To check if l is a (listof p?) - ; Don't use (and (list? l) (andmap p? l)) because l may be improper. - (define (listof? p? l) - (let listof-p? ([l l]) - (or (null? l) - (and (cons? l) (p? (car l)) (listof-p? (cdr l)))))) - - ; : tst -> bool - (define (xexpr-attribute? b) - (and (pair? b) - (symbol? (car b)) - (pair? (cdr b)) - (string? (cadr b)) - (null? (cddr b)))) - - ; permissive? : parameter bool - (define permissive? (make-parameter #f)) - - ;; xml->xexpr : Content -> Xexpr - (define (xml->xexpr x) - (let* ([non-dropping-combine - (lambda (atts body) - (cons (assoc-sort (map attribute->srep atts)) - body))] - [combine (if (xexpr-drop-empty-attributes) - (lambda (atts body) - (if (null? atts) - body - (non-dropping-combine atts body))) - non-dropping-combine)]) - (let loop ([x x]) - (cond - [(element? x) - (let ([body (map loop (element-content x))] - [atts (element-attributes x)]) - (cons (element-name x) (combine atts body)))] - [(pcdata? x) (pcdata-string x)] - [(entity? x) (entity-text x)] - [(or (comment? x) (p-i? x) (cdata? x)) x] - [(document? x) (error 'xml->xexpr "Expected content, given ~e\nUse document-element to extract the content." x)] - [(permissive?) x] - [else (error 'xml->xexpr "Expected content, given ~e" x)])))) - - ;; attribute->srep : Attribute -> Attribute-srep - (define (attribute->srep a) - (list (attribute-name a) (attribute-value a))) - - ;; srep->attribute : Attribute-srep -> Attribute - (define (srep->attribute a) - (unless (and (pair? a) (pair? (cdr a)) (null? (cddr a)) (symbol? (car a)) (string? (cadr a))) - (error 'srep->attribute "expected (list Symbol String) given ~e" a)) - (make-attribute 'scheme 'scheme (car a) (cadr a))) - - ;; xexpr->xml : Xexpr -> Content - ;; The contract is enforced. - (define (xexpr->xml x) - (cond - [(pair? x) - (let ([f (lambda (atts body) - (unless (list? body) - (error 'xexpr->xml - "expected a list of xexprs for the body in ~e" - x)) - (make-element 'scheme 'scheme (car x) - atts - (map xexpr->xml body)))]) - (if (and (pair? (cdr x)) - (or (null? (cadr x)) - (and (pair? (cadr x)) (pair? (caadr x))))) - (f (map srep->attribute (cadr x)) (cddr x)) - (f null (cdr x))))] - [(string? x) (make-pcdata 'scheme 'scheme x)] - [(or (symbol? x) (exact-nonnegative-integer? x)) - (make-entity 'scheme 'scheme x)] - [(or (comment? x) (p-i? x) (cdata? x) (pcdata? x)) x] - [else ;(error 'xexpr->xml "malformed xexpr ~e" x) - x])) - - ;; xexpr->string : Xexpression -> String - (define (xexpr->string xexpr) - (let ([port (open-output-string)]) - (write-xml/content (xexpr->xml xexpr) port) - (get-output-string port))) - - ;; bcompose : (a a -> c) (b -> a) -> (b b -> c) - (define (bcompose f g) - (lambda (x y) (f (g x) (g y))))) + attr)))))) + +;; attribute-symbol-string? : List (-> a) (exn -> a) -> a +;; True if the list is a list of String,Symbol pairs. +(define (attribute-symbol-string? attr true false) + (if (symbol? (car attr)) + (if (string? (cadr attr)) + (true) + (false (make-exn:invalid-xexpr + (format "Expected a string, given ~a" (cadr attr)) + (current-continuation-marks) + (cadr attr)))) + (false (make-exn:invalid-xexpr + (format "Expected a symbol, given ~a" (car attr)) + (current-continuation-marks) + (cadr attr))))) + +;; ; end xexpr? helpers +;; ;; ;; ;; ;; ;; ;; ;; + + +; : (a -> bool) tst -> bool +; To check if l is a (listof p?) +; Don't use (and (list? l) (andmap p? l)) because l may be improper. +(define (listof? p? l) + (let listof-p? ([l l]) + (or (null? l) + (and (cons? l) (p? (car l)) (listof-p? (cdr l)))))) + +; : tst -> bool +(define (xexpr-attribute? b) + (and (pair? b) + (symbol? (car b)) + (pair? (cdr b)) + (string? (cadr b)) + (null? (cddr b)))) + +;; xml->xexpr : Content -> Xexpr +(define (xml->xexpr x) + (let* ([non-dropping-combine + (lambda (atts body) + (cons (assoc-sort (map attribute->srep atts)) + body))] + [combine (if (xexpr-drop-empty-attributes) + (lambda (atts body) + (if (null? atts) + body + (non-dropping-combine atts body))) + non-dropping-combine)]) + (let loop ([x x]) + (cond + [(element? x) + (let ([body (map loop (element-content x))] + [atts (element-attributes x)]) + (cons (element-name x) (combine atts body)))] + [(pcdata? x) (pcdata-string x)] + [(entity? x) (entity-text x)] + [(or (comment? x) (p-i? x) (cdata? x)) x] + [(document? x) (error 'xml->xexpr "Expected content, given ~e\nUse document-element to extract the content." x)] + [(permissive?) x] + [else (error 'xml->xexpr "Expected content, given ~e" x)])))) + +;; attribute->srep : Attribute -> Attribute-srep +(define (attribute->srep a) + (list (attribute-name a) (attribute-value a))) + +;; srep->attribute : Attribute-srep -> Attribute +(define (srep->attribute a) + (unless (and (pair? a) (pair? (cdr a)) (null? (cddr a)) (symbol? (car a)) (string? (cadr a))) + (error 'srep->attribute "expected (list Symbol String) given ~e" a)) + (make-attribute 'scheme 'scheme (car a) (cadr a))) + +;; xexpr->xml : Xexpr -> Content +;; The contract is enforced. +(define (xexpr->xml x) + (cond + [(pair? x) + (let ([f (lambda (atts body) + (unless (list? body) + (error 'xexpr->xml + "expected a list of xexprs for the body in ~e" + x)) + (make-element 'scheme 'scheme (car x) + atts + (map xexpr->xml body)))]) + (if (and (pair? (cdr x)) + (or (null? (cadr x)) + (and (pair? (cadr x)) (pair? (caadr x))))) + (f (map srep->attribute (cadr x)) (cddr x)) + (f null (cdr x))))] + [(string? x) (make-pcdata 'scheme 'scheme x)] + [(or (symbol? x) (exact-nonnegative-integer? x)) + (make-entity 'scheme 'scheme x)] + [(or (comment? x) (p-i? x) (cdata? x) (pcdata? x)) x] + [else ;(error 'xexpr->xml "malformed xexpr ~e" x) + x])) + +;; xexpr->string : Xexpression -> String +(define (xexpr->string xexpr) + (let ([port (open-output-string)]) + (write-xml/content (xexpr->xml xexpr) port) + (get-output-string port))) + +;; bcompose : (a a -> c) (b -> a) -> (b b -> c) +(define (bcompose f g) + (lambda (x y) (f (g x) (g y)))) + +(provide/contract + [exn:invalid-xexpr? (any/c . -> . boolean?)] + [exn:invalid-xexpr-code (exn:invalid-xexpr? . -> . any/c)] + [xexpr/c contract?] + [xexpr? (any/c . -> . boolean?)] + [xexpr->string (xexpr/c . -> . string?)] + [xml->xexpr (content/c . -> . xexpr/c)] + [xexpr->xml (xexpr/c . -> . content/c)] + [xexpr-drop-empty-attributes (parameter/c boolean?)] + [validate-xexpr (any/c . -> . (one-of/c #t))] + [correct-xexpr? (any/c (-> any/c) (exn:invalid-xexpr? . -> . any/c) . -> . any/c)]) \ No newline at end of file diff --git a/collects/xml/xml-sig.ss b/collects/xml/xml-sig.ss deleted file mode 100644 index 85a386f22f..0000000000 --- a/collects/xml/xml-sig.ss +++ /dev/null @@ -1,12 +0,0 @@ -#lang scheme -(require "private/sig.ss") - -(define-signature xml^ - ((open xml-structs^) - (open reader^) - (open writer^) - (open xexpr^) - (open space^) - (open xml-syntax^))) - -(provide xml^) diff --git a/collects/xml/xml-unit.ss b/collects/xml/xml-unit.ss deleted file mode 100644 index 1aff346605..0000000000 --- a/collects/xml/xml-unit.ss +++ /dev/null @@ -1,16 +0,0 @@ -#lang scheme -(require "xml-sig.ss" - "private/sig.ss" - "private/structures.ss" - "private/reader.ss" - "private/writer.ss" - "private/xexpr.ss" - "private/space.ss" - "private/syntax.ss") - -(provide xml@) - -(define-compound-unit/infer xml@ - (import) - (export xml-structs^ reader^ xml-syntax^ writer^ xexpr^ space^) - (link xml-structs@ reader@ native-xml-syntax@ writer@ xexpr@ space@)) diff --git a/collects/xml/xml.scrbl b/collects/xml/xml.scrbl index 155c8d3c9c..36d98fae47 100644 --- a/collects/xml/xml.scrbl +++ b/collects/xml/xml.scrbl @@ -33,6 +33,107 @@ It also does not expand user-defined entities or read user-defined entities in a @section{Datatypes} +@defstruct[location ([line exact-nonnegative-integer?] + [char exact-nonnegative-integer?] + [offset exact-nonnegative-integer?])]{ + +Represents a location in an input stream.} + +@defthing[location/c contract?]{ + Equivalent to @scheme[(or/c location? symbol? false/c)]. +} + +@defstruct[source ([start location/c] + [stop location/c])]{ + +Represents a source location. Other structure types extend @scheme[source]. + +When XML is generated from an input stream by @scheme[read-xml], +locations are represented by @scheme[location] instances. When XML +structures are generated by @scheme[xexpr->xml], then locations are +symbols.} + +@deftogether[( +@defstruct[external-dtd ([system string?])] +@defstruct[(external-dtd/public external-dtd) ([public string?])] +@defstruct[(external-dtd/system external-dtd) ()] +)]{ + +Represents an externally defined DTD.} + +@defstruct[document-type ([name symbol?] + [external external-dtd?] + [inlined false/c])]{ + +Represents a document type.} + +@defstruct[comment ([text string?])]{ + +Represents a comment.} + +@defstruct[(p-i source) ([target-name symbol?] + [instruction string?])]{ + +Represents a processing instruction.} + +@defthing[misc/c contract?]{ + Equivalent to @scheme[(or/c comment? p-i?)] +} + +@defstruct[prolog ([misc (listof misc/c)] + [dtd (or/c document-type false/c)] + [misc2 (listof misc/c)])]{ +Represents a document prolog. +} + +@defstruct[document ([prolog prolog?] + [element element?] + [misc (listof misc/c)])]{ +Represents a document.} + +@defstruct[(element source) ([name symbol?] + [attributes (listof attribute?)] + [content (listof content/c)])]{ +Represents an element.} + +@defstruct[(attribute source) ([name symbol?] [value string?])]{ + +Represents an attribute within an element.} + +@defthing[content/c contract?]{ + Equivalent to @scheme[(or/c pcdata? element? entity? comment? cdata? p-i? permissive/c)]. +} + +@defthing[permissive/c contract?]{ + If @scheme[(permissive?)] is @scheme[#t], then equivalent to @scheme[any/c], otherwise equivalent to @scheme[(make-none/c 'permissive)]} + +@defstruct[(entity source) ([text (or/c symbol? exact-nonnegative-integer?)])]{ + +Represents a symbolic or numerical entity.} + +@defstruct[(pcdata source) ([string string?])]{ + +Represents PCDATA content.} + +@defstruct[(cdata source) ([string string?])]{ + +Represents CDATA content. + +The @scheme[string] field is assumed to be of the form +@litchar{} with proper quoting +of @nonterm{content}. Otherwise, @scheme[write-xml] generates +incorrect output.} + +@defstruct[(exn:invalid-xexpr exn:fail) ([code any/c])]{ + +Raised by @scheme[validate-xexpr] when passed an invalid +@tech{X-expression}. The @scheme[code] fields contains an invalid part +of the input to @scheme[validate-xexpr].} + +@defstruct[(exn:xml exn:fail:read) ()]{ + Raised by @scheme[read-xml] when an error in the XML input is found. +} + @defproc[(xexpr? [v any/c]) boolean?]{ Returns @scheme[#t] if @scheme[v] is a @tech{X-expression}, @scheme[#f] otherwise. @@ -65,108 +166,12 @@ An @scheme[_exact-nonnegative-integer] represents a numeric entity. For example, A @scheme[_cdata] is an instance of the @scheme[cdata] structure type, and a @scheme[_misc] is an instance of the @scheme[comment] or -@scheme[pcdata] structure types.} +@scheme[p-i] structure types.} @defthing[xexpr/c contract?]{ A contract that is like @scheme[xexpr?] except produces a better error message when the value is not an @tech{X-expression}. } - -@defstruct[document ([prolog prolog?] - [element element?] - [misc (listof (or/c comment? p-i?))])]{ - -Represents a document.} - -@defstruct[prolog ([misc (listof (or/c comment? p-i?))] - [dtd (or/c document-type false/c)] - [misc2 (listof (or/c comment? p-i?))])]{ - -Represents a document prolog. -} - -@defstruct[document-type ([name symbol?] - [external external-dtd?] - [inlined false/c])]{ - -Represents a document type.} - -@deftogether[( -@defstruct[external-dtd ([system string?])] -@defstruct[(external-dtd/public external-dtd) ([public string?])] -@defstruct[(external-dtd/system external-dtd) ()] -)]{ - -Represents an externally defined DTD.} - -@defstruct[(element source) ([name symbol?] - [attributes (listof attribute?)] - [content (listof content?)])]{ - -Represents an element.} - -@defproc[(content? [v any/c]) boolean?]{ - -Returns @scheme[#t] if @scheme[v] is a @scheme[pcdata] instance, -@scheme[element] instance, an @scheme[entity] instance, -@scheme[comment], or @scheme[cdata] instance.} - -@defstruct[(attribute source) ([name symbol?] [value string?])]{ - -Represents an attribute within an element.} - -@defstruct[(entity source) ([text (or/c symbol? exact-nonnegative-integer?)])]{ - -Represents a symbolic or numerical entity.} - -@defstruct[(pcdata source) ([string string?])]{ - -Represents PCDATA content.} - - -@defstruct[(cdata source) ([string string?])]{ - -Represents CDATA content. - -The @scheme[string] field is assumed to be of the form -@litchar{} with proper quoting -of @nonterm{content}. Otherwise, @scheme[write-xml] generates -incorrect output.} - -@defstruct[(p-i source) ([target-name string?] - [instruction string?])]{ - -Represents a processing instruction.} - - -@defstruct[comment ([text string?])]{ - -Represents a comment.} - - -@defstruct[source ([start (or/c location? symbol?)] - [stop (or/c location? symbol?)])]{ - -Represents a source location. Other structure types extend @scheme[source]. - -When XML is generated from an input stream by @scheme[read-xml], -locations are represented by @scheme[location] instances. When XML -structures are generated by @scheme[xexpr->xml], then locations are -symbols.} - - -@defstruct[location ([line exact-nonnegative-integer?] - [char exact-nonnegative-integer?] - [offset exact-nonnegative-integer?])]{ - -Represents a location in an input stream.} - - -@defstruct[(exn:invalid-xexpr exn:fail) ([code any/c])]{ - -Raised by @scheme[validate-xexpr] when passed an invalid -@tech{X-expression}. The @scheme[code] fields contains an invalid part -of the input to @scheme[validate-xexpr].} - + @; ---------------------------------------------------------------------- @section{Reading and Writing XML} @@ -217,7 +222,7 @@ Like @scheme[syntax:real-xml], but it reads an XML element like Writes a document to the given output port, currently ignoring everything except the document's root element.} -@defproc[(write-xml/content [content content?] [out output-port? (current-output-port)]) +@defproc[(write-xml/content [content content/c] [out output-port? (current-output-port)]) void?]{ Writes document content to the given output port.} @@ -229,7 +234,7 @@ Like @scheme[write-xml], but newlines and indentation make the output more readable, though less technically correct when whitespace is significant.} -@defproc[(display-xml/content [content content?] [out output-port? (current-output-port)]) +@defproc[(display-xml/content [content content/c] [out output-port? (current-output-port)]) void?]{ Like @scheme[write-xml/content], but with indentation and newlines @@ -246,12 +251,12 @@ like @scheme[display-xml].} and leave them in place in the resulting ``@tech{X-expression}''. } -@defproc[(xml->xexpr [content content?]) xexpr/c]{ +@defproc[(xml->xexpr [content content/c]) xexpr/c]{ Converts document content into an @tech{X-expression}, using @scheme[permissive?] to determine if foreign objects are allowed.} -@defproc[(xexpr->xml [xexpr xexpr/c]) content?]{ +@defproc[(xexpr->xml [xexpr xexpr/c]) content/c]{ Converts an @tech{X-expression} into XML content.} diff --git a/collects/xml/xml.ss b/collects/xml/xml.ss index f4103edb9d..9b455f1ba5 100644 --- a/collects/xml/xml.ss +++ b/collects/xml/xml.ss @@ -1,7 +1,14 @@ #lang scheme -(require "xml-sig.ss" - "xml-unit.ss") +(require "private/structures.ss" + "private/reader.ss" + "private/space.ss" + "private/writer.ss" + "private/xexpr.ss" + "private/syntax.ss") -(define-values/invoke-unit/infer xml@) - -(provide-signature-elements xml^) \ No newline at end of file +(provide (all-from-out "private/structures.ss" + "private/reader.ss" + "private/space.ss" + "private/writer.ss" + "private/xexpr.ss" + "private/syntax.ss")) \ No newline at end of file