ssax is now a planet package
svn: r2117
This commit is contained in:
parent
7a5b2fb2c5
commit
c3091ca556
|
@ -1,47 +0,0 @@
|
|||
(define-syntax run-test (syntax-rules (define) ((run-test "scan-exp" (define vars body)) (define vars (run-test "scan-exp" body))) ((run-test "scan-exp" ?body) (letrec-syntax ((scan-exp (syntax-rules (quote quasiquote !) ((scan-exp (quote ()) (k-head ! . args)) (k-head (quote ()) . args)) ((scan-exp (quote (hd . tl)) k) (scan-lit-lst (hd . tl) (do-wrap ! quasiquote k))) ((scan-exp (quasiquote (hd . tl)) k) (scan-lit-lst (hd . tl) (do-wrap ! quasiquote k))) ((scan-exp (quote x) (k-head ! . args)) (k-head (if (string? (quote x)) (string->symbol (quote x)) (quote x)) . args)) ((scan-exp (hd . tl) k) (scan-exp hd (do-tl ! scan-exp tl k))) ((scan-exp x (k-head ! . args)) (k-head x . args)))) (do-tl (syntax-rules (!) ((do-tl processed-hd fn () (k-head ! . args)) (k-head (processed-hd) . args)) ((do-tl processed-hd fn old-tl k) (fn old-tl (do-cons ! processed-hd k))))) (do-cons (syntax-rules (!) ((do-cons processed-tl processed-hd (k-head ! . args)) (k-head (processed-hd . processed-tl) . args)))) (do-wrap (syntax-rules (!) ((do-wrap val fn (k-head ! . args)) (k-head (fn val) . args)))) (do-finish (syntax-rules () ((do-finish new-body) new-body))) (scan-lit-lst (syntax-rules (quote unquote unquote-splicing !) ((scan-lit-lst (quote ()) (k-head ! . args)) (k-head (quote ()) . args)) ((scan-lit-lst (quote (hd . tl)) k) (do-tl quote scan-lit-lst ((hd . tl)) k)) ((scan-lit-lst (unquote x) k) (scan-exp x (do-wrap ! unquote k))) ((scan-lit-lst (unquote-splicing x) k) (scan-exp x (do-wrap ! unquote-splicing k))) ((scan-lit-lst (quote x) (k-head ! . args)) (k-head (unquote (if (string? (quote x)) (string->symbol (quote x)) (quote x))) . args)) ((scan-lit-lst (hd . tl) k) (scan-lit-lst hd (do-tl ! scan-lit-lst tl k))) ((scan-lit-lst x (k-head ! . args)) (k-head x . args))))) (scan-exp ?body (do-finish !)))) ((run-test body ...) (begin (run-test "scan-exp" body) ...))))
|
||||
(define (make-xml-token kind head) (cons kind head))
|
||||
(define xml-token? pair?)
|
||||
(define-syntax xml-token-kind (syntax-rules () ((xml-token-kind token) (car token))))
|
||||
(define-syntax xml-token-head (syntax-rules () ((xml-token-head token) (cdr token))))
|
||||
(define (string-whitespace? str) (let ((len (string-length str))) (cond ((zero? len) #t) ((= 1 len) (char-whitespace? (string-ref str 0))) ((= 2 len) (and (char-whitespace? (string-ref str 0)) (char-whitespace? (string-ref str 1)))) (else (let loop ((i 0)) (or (>= i len) (and (char-whitespace? (string-ref str i)) (loop (inc i)))))))))
|
||||
(define (assq-values val alist) (let loop ((alist alist) (scanned (quote ()))) (cond ((null? alist) (values #f scanned)) ((equal? val (caar alist)) (values (car alist) (append scanned (cdr alist)))) (else (loop (cdr alist) (cons (car alist) scanned))))))
|
||||
(define (fold-right kons knil lis1) (let recur ((lis lis1)) (if (null? lis) knil (let ((head (car lis))) (kons head (recur (cdr lis)))))))
|
||||
(define (fold kons knil lis1) (let lp ((lis lis1) (ans knil)) (if (null? lis) ans (lp (cdr lis) (kons (car lis) ans)))))
|
||||
(define ssax:S-chars (map ascii->char (quote (32 10 9 13))))
|
||||
(define (ssax:skip-S port) (skip-while ssax:S-chars port))
|
||||
(define (ssax:ncname-starting-char? a-char) (and (char? a-char) (or (char-alphabetic? a-char) (char=? #\_ a-char))))
|
||||
(define (ssax:read-NCName port) (let ((first-char (peek-char port))) (or (ssax:ncname-starting-char? first-char) (parser-error port "XMLNS [4] for '" first-char "'"))) (string->symbol (next-token-of (lambda (c) (cond ((eof-object? c) #f) ((char-alphabetic? c) c) ((string-index "0123456789.-_" c) c) (else #f))) port)))
|
||||
(define (ssax:read-QName port) (let ((prefix-or-localpart (ssax:read-NCName port))) (case (peek-char port) ((#\:) (read-char port) (cons prefix-or-localpart (ssax:read-NCName port))) (else prefix-or-localpart))))
|
||||
(define ssax:Prefix-XML (string->symbol "xml"))
|
||||
(define name-compare (letrec ((symbol-compare (lambda (symb1 symb2) (cond ((eq? symb1 symb2) (quote =)) ((string<? (symbol->string symb1) (symbol->string symb2)) (quote <)) (else (quote >)))))) (lambda (name1 name2) (cond ((symbol? name1) (if (symbol? name2) (symbol-compare name1 name2) (quote <))) ((symbol? name2) (quote >)) ((eq? name2 ssax:largest-unres-name) (quote <)) ((eq? name1 ssax:largest-unres-name) (quote >)) ((eq? (car name1) (car name2)) (symbol-compare (cdr name1) (cdr name2))) (else (symbol-compare (car name1) (car name2)))))))
|
||||
(define ssax:largest-unres-name (cons (string->symbol "#LARGEST-SYMBOL") (string->symbol "#LARGEST-SYMBOL")))
|
||||
(define ssax:read-markup-token (let () (define (skip-comment port) (assert-curr-char (quote (#\-)) "XML [15], second dash" port) (if (not (find-string-from-port? "-->" port)) (parser-error port "XML [15], no -->")) (make-xml-token (quote COMMENT) #f)) (define (read-cdata port) (assert (string=? "CDATA[" (read-string 6 port))) (make-xml-token (quote CDSECT) #f)) (lambda (port) (assert-curr-char (quote (#\<)) "start of the token" port) (case (peek-char port) ((#\/) (read-char port) (begin0 (make-xml-token (quote END) (ssax:read-QName port)) (ssax:skip-S port) (assert-curr-char (quote (#\>)) "XML [42]" port))) ((#\?) (read-char port) (make-xml-token (quote PI) (ssax:read-NCName port))) ((#\!) (case (peek-next-char port) ((#\-) (read-char port) (skip-comment port)) ((#\[) (read-char port) (read-cdata port)) (else (make-xml-token (quote DECL) (ssax:read-NCName port))))) (else (make-xml-token (quote START) (ssax:read-QName port)))))))
|
||||
(define (ssax:skip-pi port) (if (not (find-string-from-port? "?>" port)) (parser-error port "Failed to find ?> terminating the PI")))
|
||||
(define (ssax:read-pi-body-as-string port) (ssax:skip-S port) (string-concatenate/shared (let loop () (let ((pi-fragment (next-token (quote ()) (quote (#\?)) "reading PI content" port))) (if (eqv? #\> (peek-next-char port)) (begin (read-char port) (cons pi-fragment (quote ()))) (cons* pi-fragment "?" (loop)))))))
|
||||
(define (ssax:skip-internal-dtd port) (if (not (find-string-from-port? "]>" port)) (parser-error port "Failed to find ]> terminating the internal DTD subset")))
|
||||
(define ssax:read-cdata-body (let ((cdata-delimiters (list char-return #\newline #\] #\&))) (lambda (port str-handler seed) (let loop ((seed seed)) (let ((fragment (next-token (quote ()) cdata-delimiters "reading CDATA" port))) (case (read-char port) ((#\newline) (loop (str-handler fragment nl seed))) ((#\]) (if (not (eqv? (peek-char port) #\])) (loop (str-handler fragment "]" seed)) (let check-after-second-braket ((seed (if (string-null? fragment) seed (str-handler fragment "" seed)))) (case (peek-next-char port) ((#\>) (read-char port) seed) ((#\]) (check-after-second-braket (str-handler "]" "" seed))) (else (loop (str-handler "]]" "" seed))))))) ((#\&) (let ((ent-ref (next-token-of (lambda (c) (and (not (eof-object? c)) (char-alphabetic? c) c)) port))) (cond ((and (string=? "gt" ent-ref) (eqv? (peek-char port) #\;)) (read-char port) (loop (str-handler fragment ">" seed))) (else (loop (str-handler ent-ref "" (str-handler fragment "&" seed))))))) (else (if (eqv? (peek-char port) #\newline) (read-char port)) (loop (str-handler fragment nl seed)))))))))
|
||||
(define (ssax:read-char-ref port) (let* ((base (cond ((eqv? (peek-char port) #\x) (read-char port) 16) (else 10))) (name (next-token (quote ()) (quote (#\;)) "XML [66]" port)) (char-code (string->number name base))) (read-char port) (if (integer? char-code) (ucscode->char char-code) (parser-error port "[wf-Legalchar] broken for '" name "'"))))
|
||||
(define ssax:predefined-parsed-entities (quasiquote (((unquote (string->symbol "amp")) . "&") ((unquote (string->symbol "lt")) . "<") ((unquote (string->symbol "gt")) . ">") ((unquote (string->symbol "apos")) . "'") ((unquote (string->symbol "quot")) . "\""))))
|
||||
(define (ssax:handle-parsed-entity port name entities content-handler str-handler seed) (cond ((assq name entities) => (lambda (decl-entity) (let ((ent-body (cdr decl-entity)) (new-entities (cons (cons name #f) entities))) (cond ((string? ent-body) (call-with-input-string ent-body (lambda (port) (content-handler port new-entities seed)))) ((procedure? ent-body) (let ((port (ent-body))) (begin0 (content-handler port new-entities seed) (close-input-port port)))) (else (parser-error port "[norecursion] broken for " name)))))) ((assq name ssax:predefined-parsed-entities) => (lambda (decl-entity) (str-handler (cdr decl-entity) "" seed))) (else (parser-error port "[wf-entdeclared] broken for " name))))
|
||||
(define (make-empty-attlist) (quote ()))
|
||||
(define (attlist-add attlist name-value) (if (null? attlist) (cons name-value attlist) (case (name-compare (car name-value) (caar attlist)) ((=) #f) ((<) (cons name-value attlist)) (else (cons (car attlist) (attlist-add (cdr attlist) name-value))))))
|
||||
(define attlist-null? null?)
|
||||
(define (attlist-remove-top attlist) (values (car attlist) (cdr attlist)))
|
||||
(define (attlist->alist attlist) attlist)
|
||||
(define attlist-fold fold)
|
||||
(define ssax:read-attributes (let ((value-delimeters (append ssax:S-chars (quote (#\< #\&))))) (define (read-attrib-value delimiter port entities prev-fragments) (let* ((new-fragments (cons (next-token (quote ()) (cons delimiter value-delimeters) "XML [10]" port) prev-fragments)) (cterm (read-char port))) (cond ((or (eof-object? cterm) (eqv? cterm delimiter)) new-fragments) ((eqv? cterm char-return) (if (eqv? (peek-char port) #\newline) (read-char port)) (read-attrib-value delimiter port entities (cons " " new-fragments))) ((memv cterm ssax:S-chars) (read-attrib-value delimiter port entities (cons " " new-fragments))) ((eqv? cterm #\&) (cond ((eqv? (peek-char port) #\#) (read-char port) (read-attrib-value delimiter port entities (cons (string (ssax:read-char-ref port)) new-fragments))) (else (read-attrib-value delimiter port entities (read-named-entity port entities new-fragments))))) (else (parser-error port "[CleanAttrVals] broken"))))) (define (read-named-entity port entities fragments) (let ((name (ssax:read-NCName port))) (assert-curr-char (quote (#\;)) "XML [68]" port) (ssax:handle-parsed-entity port name entities (lambda (port entities fragments) (read-attrib-value (quote *eof*) port entities fragments)) (lambda (str1 str2 fragments) (if (equal? "" str2) (cons str1 fragments) (cons* str2 str1 fragments))) fragments))) (lambda (port entities) (let loop ((attr-list (make-empty-attlist))) (if (not (ssax:ncname-starting-char? (ssax:skip-S port))) attr-list (let ((name (ssax:read-QName port))) (ssax:skip-S port) (assert-curr-char (quote (#\=)) "XML [25]" port) (ssax:skip-S port) (let ((delimiter (assert-curr-char (quote (#\' #\")) "XML [10]" port))) (loop (or (attlist-add attr-list (cons name (string-concatenate-reverse/shared (read-attrib-value delimiter port entities (quote ()))))) (parser-error port "[uniqattspec] broken for " name))))))))))
|
||||
(define (ssax:resolve-name port unres-name namespaces apply-default-ns?) (cond ((pair? unres-name) (cons (cond ((assq (car unres-name) namespaces) => cadr) ((eq? (car unres-name) ssax:Prefix-XML) ssax:Prefix-XML) (else (parser-error port "[nsc-NSDeclared] broken; prefix " (car unres-name)))) (cdr unres-name))) (apply-default-ns? (let ((default-ns (assq (quote *DEFAULT*) namespaces))) (if (and default-ns (cadr default-ns)) (cons (cadr default-ns) unres-name) unres-name))) (else unres-name)))
|
||||
(define (ssax:uri-string->symbol uri-str) (string->symbol uri-str))
|
||||
(define ssax:complete-start-tag (let ((xmlns (string->symbol "xmlns")) (largest-dummy-decl-attr (list ssax:largest-unres-name #f #f #f))) (define (validate-attrs port attlist decl-attrs) (define (add-default-decl decl-attr result) (let*-values (((attr-name content-type use-type default-value) (apply values decl-attr))) (and (eq? use-type (quote REQUIRED)) (parser-error port "[RequiredAttr] broken for" attr-name)) (if default-value (cons (cons attr-name default-value) result) result))) (let loop ((attlist attlist) (decl-attrs decl-attrs) (result (quote ()))) (if (attlist-null? attlist) (attlist-fold add-default-decl result decl-attrs) (let*-values (((attr attr-others) (attlist-remove-top attlist)) ((decl-attr other-decls) (if (attlist-null? decl-attrs) (values largest-dummy-decl-attr decl-attrs) (attlist-remove-top decl-attrs)))) (case (name-compare (car attr) (car decl-attr)) ((<) (if (or (eq? xmlns (car attr)) (and (pair? (car attr)) (eq? xmlns (caar attr)))) (loop attr-others decl-attrs (cons attr result)) (parser-error port "[ValueType] broken for " attr))) ((>) (loop attlist other-decls (add-default-decl decl-attr result))) (else (let*-values (((attr-name content-type use-type default-value) (apply values decl-attr))) (cond ((eq? use-type (quote FIXED)) (or (equal? (cdr attr) default-value) (parser-error port "[FixedAttr] broken for " attr-name))) ((eq? content-type (quote CDATA)) #t) ((pair? content-type) (or (member (cdr attr) content-type) (parser-error port "[enum] broken for " attr-name "=" (cdr attr)))) (else (ssax:warn port "declared content type " content-type " not verified yet"))) (loop attr-others other-decls (cons attr result))))))))) (define (add-ns port prefix uri-str namespaces) (and (equal? "" uri-str) (parser-error port "[dt-NSName] broken for " prefix)) (let ((uri-symbol (ssax:uri-string->symbol uri-str))) (let loop ((nss namespaces)) (cond ((null? nss) (cons (cons* prefix uri-symbol uri-symbol) namespaces)) ((eq? uri-symbol (cddar nss)) (cons (cons* prefix (cadar nss) uri-symbol) namespaces)) (else (loop (cdr nss))))))) (define (adjust-namespace-decl port attrs namespaces) (let loop ((attrs attrs) (proper-attrs (quote ())) (namespaces namespaces)) (cond ((null? attrs) (values proper-attrs namespaces)) ((eq? xmlns (caar attrs)) (loop (cdr attrs) proper-attrs (if (equal? "" (cdar attrs)) (cons (cons* (quote *DEFAULT*) #f #f) namespaces) (add-ns port (quote *DEFAULT*) (cdar attrs) namespaces)))) ((and (pair? (caar attrs)) (eq? xmlns (caaar attrs))) (loop (cdr attrs) proper-attrs (add-ns port (cdaar attrs) (cdar attrs) namespaces))) (else (loop (cdr attrs) (cons (car attrs) proper-attrs) namespaces))))) (lambda (tag-head port elems entities namespaces) (let*-values (((attlist) (ssax:read-attributes port entities)) ((empty-el-tag?) (begin (ssax:skip-S port) (and (eqv? #\/ (assert-curr-char (quote (#\> #\/)) "XML [40], XML [44], no '>'" port)) (assert-curr-char (quote (#\>)) "XML [44], no '>'" port)))) ((elem-content decl-attrs) (if elems (cond ((assoc tag-head elems) => (lambda (decl-elem) (values (if empty-el-tag? (quote EMPTY-TAG) (cadr decl-elem)) (caddr decl-elem)))) (else (parser-error port "[elementvalid] broken, no decl for " tag-head))) (values (if empty-el-tag? (quote EMPTY-TAG) (quote ANY)) #f))) ((merged-attrs) (if decl-attrs (validate-attrs port attlist decl-attrs) (attlist->alist attlist))) ((proper-attrs namespaces) (adjust-namespace-decl port merged-attrs namespaces))) (values (ssax:resolve-name port tag-head namespaces #t) (fold-right (lambda (name-value attlist) (or (attlist-add attlist (cons (ssax:resolve-name port (car name-value) namespaces #f) (cdr name-value))) (parser-error port "[uniqattspec] after NS expansion broken for " name-value))) (make-empty-attlist) proper-attrs) namespaces elem-content)))))
|
||||
(define (ssax:read-external-id port) (let ((discriminator (ssax:read-NCName port))) (assert-curr-char ssax:S-chars "space after SYSTEM or PUBLIC" port) (ssax:skip-S port) (let ((delimiter (assert-curr-char (quote (#\' #\")) "XML [11], XML [12]" port))) (cond ((eq? discriminator (string->symbol "SYSTEM")) (begin0 (next-token (quote ()) (list delimiter) "XML [11]" port) (read-char port))) ((eq? discriminator (string->symbol "PUBLIC")) (skip-until (list delimiter) port) (assert-curr-char ssax:S-chars "space after PubidLiteral" port) (ssax:skip-S port) (let* ((delimiter (assert-curr-char (quote (#\' #\")) "XML [11]" port)) (systemid (next-token (quote ()) (list delimiter) "XML [11]" port))) (read-char port) systemid)) (else (parser-error port "XML [75], " discriminator " rather than SYSTEM or PUBLIC"))))))
|
||||
(define (ssax:scan-Misc port) (let loop ((c (ssax:skip-S port))) (cond ((eof-object? c) c) ((not (char=? c #\<)) (parser-error port "XML [22], char '" c "' unexpected")) (else (let ((token (ssax:read-markup-token port))) (case (xml-token-kind token) ((COMMENT) (loop (ssax:skip-S port))) ((PI DECL START) token) (else (parser-error port "XML [22], unexpected token of kind " (xml-token-kind token)))))))))
|
||||
(define ssax:read-char-data (let ((terminators-usual (list #\< #\& char-return)) (terminators-usual-eof (list #\< (quote *eof*) #\& char-return)) (handle-fragment (lambda (fragment str-handler seed) (if (string-null? fragment) seed (str-handler fragment "" seed))))) (lambda (port expect-eof? str-handler seed) (if (eqv? #\< (peek-char port)) (let ((token (ssax:read-markup-token port))) (case (xml-token-kind token) ((START END) (values seed token)) ((CDSECT) (let ((seed (ssax:read-cdata-body port str-handler seed))) (ssax:read-char-data port expect-eof? str-handler seed))) ((COMMENT) (ssax:read-char-data port expect-eof? str-handler seed)) (else (values seed token)))) (let ((char-data-terminators (if expect-eof? terminators-usual-eof terminators-usual))) (let loop ((seed seed)) (let* ((fragment (next-token (quote ()) char-data-terminators "reading char data" port)) (term-char (peek-char port))) (if (eof-object? term-char) (values (handle-fragment fragment str-handler seed) term-char) (case term-char ((#\<) (let ((token (ssax:read-markup-token port))) (case (xml-token-kind token) ((CDSECT) (loop (ssax:read-cdata-body port str-handler (handle-fragment fragment str-handler seed)))) ((COMMENT) (loop (handle-fragment fragment str-handler seed))) (else (values (handle-fragment fragment str-handler seed) token))))) ((#\&) (case (peek-next-char port) ((#\#) (read-char port) (loop (str-handler fragment (string (ssax:read-char-ref port)) seed))) (else (let ((name (ssax:read-NCName port))) (assert-curr-char (quote (#\;)) "XML [68]" port) (values (handle-fragment fragment str-handler seed) (make-xml-token (quote ENTITY-REF) name)))))) (else (if (eqv? (peek-next-char port) #\newline) (read-char port)) (loop (str-handler fragment (string #\newline) seed))))))))))))
|
||||
(define (ssax:assert-token token kind gi error-cont) (or (and (xml-token? token) (eq? kind (xml-token-kind token)) (equal? gi (xml-token-head token))) (error-cont token kind gi)))
|
||||
(define-syntax ssax:make-pi-parser (syntax-rules () ((ssax:make-pi-parser orig-handlers) (letrec-syntax ((loop (syntax-rules (*DEFAULT*) ((loop () #f accum port target seed) (make-case ((else (ssax:warn port "Skipping PI: " target nl) (ssax:skip-pi port) seed) . accum) () target)) ((loop () default accum port target seed) (make-case ((else (default port target seed)) . accum) () target)) ((loop ((*DEFAULT* . default) . handlers) old-def accum port target seed) (loop handlers default accum port target seed)) ((loop ((tag . handler) . handlers) default accum port target seed) (loop handlers default (((tag) (handler port target seed)) . accum) port target seed)))) (make-case (syntax-rules () ((make-case () clauses target) (case target . clauses)) ((make-case (clause . clauses) accum target) (make-case clauses (clause . accum) target))))) (lambda (port target seed) (loop orig-handlers #f () port target seed))))))
|
||||
(define-syntax ssax:make-elem-parser (syntax-rules () ((ssax:make-elem-parser my-new-level-seed my-finish-element my-char-data-handler my-pi-handlers) (lambda (start-tag-head port elems entities namespaces preserve-ws? seed) (define xml-space-gi (cons ssax:Prefix-XML (string->symbol "space"))) (let handle-start-tag ((start-tag-head start-tag-head) (port port) (entities entities) (namespaces namespaces) (preserve-ws? preserve-ws?) (parent-seed seed)) (let*-values (((elem-gi attributes namespaces expected-content) (ssax:complete-start-tag start-tag-head port elems entities namespaces)) ((seed) (my-new-level-seed elem-gi attributes namespaces expected-content parent-seed))) (case expected-content ((EMPTY-TAG) (my-finish-element elem-gi attributes namespaces parent-seed seed)) ((EMPTY) (ssax:assert-token (and (eqv? #\< (ssax:skip-S port)) (ssax:read-markup-token port)) (quote END) start-tag-head (lambda (token exp-kind exp-head) (parser-error port "[elementvalid] broken for " token " while expecting " exp-kind exp-head))) (my-finish-element elem-gi attributes namespaces parent-seed seed)) (else (let ((preserve-ws? (cond ((assoc xml-space-gi attributes) => (lambda (name-value) (equal? "preserve" (cdr name-value)))) (else preserve-ws?)))) (let loop ((port port) (entities entities) (expect-eof? #f) (seed seed)) (let*-values (((seed term-token) (ssax:read-char-data port expect-eof? my-char-data-handler seed))) (if (eof-object? term-token) seed (case (xml-token-kind term-token) ((END) (ssax:assert-token term-token (quote END) start-tag-head (lambda (token exp-kind exp-head) (parser-error port "[GIMatch] broken for " term-token " while expecting " exp-kind exp-head))) (my-finish-element elem-gi attributes namespaces parent-seed seed)) ((PI) (let ((seed ((ssax:make-pi-parser my-pi-handlers) port (xml-token-head term-token) seed))) (loop port entities expect-eof? seed))) ((ENTITY-REF) (let ((seed (ssax:handle-parsed-entity port (xml-token-head term-token) entities (lambda (port entities seed) (loop port entities #t seed)) my-char-data-handler seed))) (loop port entities expect-eof? seed))) ((START) (if (eq? expected-content (quote PCDATA)) (parser-error port "[elementvalid] broken for " elem-gi " with char content only; unexpected token " term-token)) (let ((seed (handle-start-tag (xml-token-head term-token) port entities namespaces preserve-ws? seed))) (loop port entities expect-eof? seed))) (else (parser-error port "XML [43] broken for " term-token)))))))))))))))
|
||||
(define-syntax ssax:make-parser/positional-args (syntax-rules () ((ssax:make-parser/positional-args *handler-DOCTYPE *handler-UNDECL-ROOT *handler-DECL-ROOT *handler-NEW-LEVEL-SEED *handler-FINISH-ELEMENT *handler-CHAR-DATA-HANDLER *handler-PI) (lambda (port seed) (define (handle-decl port token-head seed) (or (eq? (string->symbol "DOCTYPE") token-head) (parser-error port "XML [22], expected DOCTYPE declaration, found " token-head)) (assert-curr-char ssax:S-chars "XML [28], space after DOCTYPE" port) (ssax:skip-S port) (let*-values (((docname) (ssax:read-QName port)) ((systemid) (and (ssax:ncname-starting-char? (ssax:skip-S port)) (ssax:read-external-id port))) ((internal-subset?) (begin (ssax:skip-S port) (eqv? #\[ (assert-curr-char (quote (#\> #\[)) "XML [28], end-of-DOCTYPE" port)))) ((elems entities namespaces seed) (*handler-DOCTYPE port docname systemid internal-subset? seed))) (scan-for-significant-prolog-token-2 port elems entities namespaces seed))) (define (scan-for-significant-prolog-token-1 port seed) (let ((token (ssax:scan-Misc port))) (if (eof-object? token) (parser-error port "XML [22], unexpected EOF") (case (xml-token-kind token) ((PI) (let ((seed ((ssax:make-pi-parser *handler-PI) port (xml-token-head token) seed))) (scan-for-significant-prolog-token-1 port seed))) ((DECL) (handle-decl port (xml-token-head token) seed)) ((START) (let*-values (((elems entities namespaces seed) (*handler-UNDECL-ROOT (xml-token-head token) seed))) (element-parser (xml-token-head token) port elems entities namespaces #f seed))) (else (parser-error port "XML [22], unexpected markup " token)))))) (define (scan-for-significant-prolog-token-2 port elems entities namespaces seed) (let ((token (ssax:scan-Misc port))) (if (eof-object? token) (parser-error port "XML [22], unexpected EOF") (case (xml-token-kind token) ((PI) (let ((seed ((ssax:make-pi-parser *handler-PI) port (xml-token-head token) seed))) (scan-for-significant-prolog-token-2 port elems entities namespaces seed))) ((START) (element-parser (xml-token-head token) port elems entities namespaces #f (*handler-DECL-ROOT (xml-token-head token) seed))) (else (parser-error port "XML [22], unexpected markup " token)))))) (define element-parser (ssax:make-elem-parser *handler-NEW-LEVEL-SEED *handler-FINISH-ELEMENT *handler-CHAR-DATA-HANDLER *handler-PI)) (scan-for-significant-prolog-token-1 port seed)))))
|
||||
(define-syntax ssax:define-labeled-arg-macro (syntax-rules () ((ssax:define-labeled-arg-macro labeled-arg-macro-name (positional-macro-name (arg-name . arg-def) ...)) (define-syntax labeled-arg-macro-name (syntax-rules () ((labeled-arg-macro-name . kw-val-pairs) (letrec-syntax ((find (syntax-rules (arg-name ...) ((find k-args (arg-name . default) arg-name val . others) (next val . k-args)) ... ((find k-args key arg-no-match-name val . others) (find k-args key . others)) ((find k-args (arg-name default)) (next default . k-args)) ...)) (next (syntax-rules () ((next val vals key . keys) (find ((val . vals) . keys) key . kw-val-pairs)) ((next val vals) (rev-apply (val) vals)))) (rev-apply (syntax-rules () ((rev-apply form (x . xs)) (rev-apply (x . form) xs)) ((rev-apply form ()) form)))) (next positional-macro-name () (arg-name . arg-def) ...))))))))
|
||||
(ssax:define-labeled-arg-macro ssax:make-parser (ssax:make-parser/positional-args (DOCTYPE (lambda (port docname systemid internal-subset? seed) (when internal-subset? (ssax:warn port "Internal DTD subset is not currently handled ") (ssax:skip-internal-dtd port)) (ssax:warn port "DOCTYPE DECL " docname " " systemid " found and skipped") (values #f (quote ()) (quote ()) seed))) (UNDECL-ROOT (lambda (elem-gi seed) (values #f (quote ()) (quote ()) seed))) (DECL-ROOT (lambda (elem-gi seed) seed)) (NEW-LEVEL-SEED) (FINISH-ELEMENT) (CHAR-DATA-HANDLER) (PI ())))
|
||||
(define (ssax:reverse-collect-str fragments) (cond ((null? fragments) (quote ())) ((null? (cdr fragments)) fragments) (else (let loop ((fragments fragments) (result (quote ())) (strs (quote ()))) (cond ((null? fragments) (if (null? strs) result (cons (string-concatenate/shared strs) result))) ((string? (car fragments)) (loop (cdr fragments) result (cons (car fragments) strs))) (else (loop (cdr fragments) (cons (car fragments) (if (null? strs) result (cons (string-concatenate/shared strs) result))) (quote ()))))))))
|
||||
(define (ssax:reverse-collect-str-drop-ws fragments) (cond ((null? fragments) (quote ())) ((null? (cdr fragments)) (if (and (string? (car fragments)) (string-whitespace? (car fragments))) (quote ()) fragments)) (else (let loop ((fragments fragments) (result (quote ())) (strs (quote ())) (all-whitespace? #t)) (cond ((null? fragments) (if all-whitespace? result (cons (string-concatenate/shared strs) result))) ((string? (car fragments)) (loop (cdr fragments) result (cons (car fragments) strs) (and all-whitespace? (string-whitespace? (car fragments))))) (else (loop (cdr fragments) (cons (car fragments) (if all-whitespace? result (cons (string-concatenate/shared strs) result))) (quote ()) #t)))))))
|
||||
(define (ssax:xml->sxml port namespace-prefix-assig) (letrec ((namespaces (map (lambda (el) (cons* #f (car el) (ssax:uri-string->symbol (cdr el)))) namespace-prefix-assig)) (RES-NAME->SXML (lambda (res-name) (string->symbol (string-append (symbol->string (car res-name)) ":" (symbol->string (cdr res-name))))))) (let ((result (reverse ((ssax:make-parser NEW-LEVEL-SEED (lambda (elem-gi attributes namespaces expected-content seed) (quote ())) FINISH-ELEMENT (lambda (elem-gi attributes namespaces parent-seed seed) (let ((seed (ssax:reverse-collect-str-drop-ws seed)) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) (quote ()) attributes))) (cons (cons (if (symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)) (if (null? attrs) seed (cons (cons (quote @) attrs) seed))) parent-seed))) CHAR-DATA-HANDLER (lambda (string1 string2 seed) (if (string-null? string2) (cons string1 seed) (cons* string2 string1 seed))) DOCTYPE (lambda (port docname systemid internal-subset? seed) (when internal-subset? (ssax:warn port "Internal DTD subset is not currently handled ") (ssax:skip-internal-dtd port)) (ssax:warn port "DOCTYPE DECL " docname " " systemid " found and skipped") (values #f (quote ()) namespaces seed)) UNDECL-ROOT (lambda (elem-gi seed) (values #f (quote ()) namespaces seed)) PI ((*DEFAULT* lambda (port pi-tag seed) (cons (list (quote *PI*) pi-tag (ssax:read-pi-body-as-string port)) seed)))) port (quote ()))))) (cons (quote *TOP*) (if (null? namespace-prefix-assig) result (cons (list (quote @) (cons (quote *NAMESPACES*) (map (lambda (ns) (list (car ns) (cdr ns))) namespace-prefix-assig))) result))))))
|
|
@ -1,420 +0,0 @@
|
|||
; HTML Authoring in SXML for my personal Web pages
|
||||
;
|
||||
; The present file defines several functions and higher-order
|
||||
; SXML "tags" that are used to compose HTML pages on my web site.
|
||||
; In LaTeX terms, this file is similar to article.cls.
|
||||
;
|
||||
; See http://pobox.com/~oleg/ftp/Scheme/xml.html#XML-authoring
|
||||
; for more examples and explanation.
|
||||
;
|
||||
; IMPORT
|
||||
; Approporiate Prelude: myenv.scm or myenv-bigloo.scm
|
||||
; srfi-13-local.scm or the appropriate native implementation of SRFI-13
|
||||
; util.scm
|
||||
; SXML-tree-trans.scm
|
||||
; SXML-to-HTML.scm
|
||||
; OS:file-length, unless it is included into the core system
|
||||
; (see myenv-bigloo.scm for example)
|
||||
;
|
||||
; $Id: SXML-to-HTML-ext.scm,v 1.2 2004/11/09 14:11:39 sperber Exp $
|
||||
|
||||
; skip the lst trough the first significant element
|
||||
; return the tail of lst such that (car result) is significant
|
||||
; Insignificant elems are '(), #f, and lists made of them
|
||||
; If all of the list is insignificant, return #f
|
||||
(define (signif-tail lst)
|
||||
(define (signif? obj)
|
||||
(and (not (null? obj)) obj
|
||||
(if (pair? obj)
|
||||
(or (signif? (car obj))
|
||||
(signif? (cdr obj)))
|
||||
obj)))
|
||||
(and (signif? lst)
|
||||
(assert (pair? lst))
|
||||
(if (signif? (car lst)) lst
|
||||
(signif-tail (cdr lst)))))
|
||||
|
||||
; Procedure make-header HEAD-PARMS
|
||||
; Create the 'head' SXML/HTML tag. HEAD-PARMS is an assoc list of
|
||||
; (h-key h-value), where h-value is a typically string;
|
||||
; h-key is a symbol:
|
||||
; title, description, AuthorAddress, keywords,
|
||||
; Date-Revision-yyyymmdd, Date-Creation-yyyymmdd,
|
||||
; long-title
|
||||
; One of the h-key can be Links.
|
||||
; In that case, h-value is a list of
|
||||
; (l-key l-href (attr value) ...)
|
||||
; where l-key is one of the following:
|
||||
; start, contents, prev, next, top, home
|
||||
|
||||
(define (make-header head-parms)
|
||||
`(head
|
||||
(title ,(lookup-def 'title head-parms))
|
||||
,(map
|
||||
(lambda (key)
|
||||
(let ((val (lookup-def key head-parms warn: #f)))
|
||||
(and val
|
||||
`(meta (@ (name ,(symbol->string key)) (content ,val))))))
|
||||
'(description AuthorAddress keywords
|
||||
Date-Revision-yyyymmdd Date-Creation-yyyymmdd))
|
||||
,(let ((links (lookup-def 'Links head-parms '())))
|
||||
(and (pair? links)
|
||||
(map
|
||||
(lambda (link-key)
|
||||
(let ((val (lookup-def link-key links #f)))
|
||||
(and val
|
||||
(let ((val (if (not (pair? val)) (list val) val)))
|
||||
`(link (@ (rel ,(symbol->string link-key))
|
||||
(href ,(car val))
|
||||
,@(cdr val)))))))
|
||||
'(start contents prev next)))))
|
||||
)
|
||||
|
||||
; Create a navigational bar. The argument head-parms is the same
|
||||
; as the one passed to make-header. We're only concerned with the
|
||||
; h-value Links
|
||||
(define (make-navbar head-parms)
|
||||
(let ((links (lookup-def 'Links head-parms '()))
|
||||
(nav-labels '((prev . "previous")
|
||||
(next . "next")
|
||||
(contents . "contents")
|
||||
(top . "top"))))
|
||||
(and (pair? links)
|
||||
`(div (@ (align "center") (class "navbar"))
|
||||
,(let loop ((nav-labels nav-labels) (first? #t))
|
||||
(if (null? nav-labels) '()
|
||||
(let ((val (lookup-def (caar nav-labels) links warn: #f)))
|
||||
(if (not val)
|
||||
(loop (cdr nav-labels) first?)
|
||||
(cons
|
||||
(list " " (if first? #f '(n_)) " "
|
||||
`(a (@ (href ,val)) ,(cdar nav-labels)))
|
||||
(loop (cdr nav-labels) #f))))))
|
||||
(hr)))
|
||||
))
|
||||
|
||||
|
||||
; Create a footer. The argument head-parms is the same
|
||||
; as passed to make-header.
|
||||
(define (make-footer head-parms)
|
||||
`((br)
|
||||
(div (hr))
|
||||
(h3 "Last updated "
|
||||
,(let* ((date-revised
|
||||
(lookup-def 'Date-Revision-yyyymmdd head-parms))
|
||||
(year (string->integer date-revised 0 4))
|
||||
(month (string->integer date-revised 4 6))
|
||||
(day (string->integer date-revised 6 8))
|
||||
(month-name
|
||||
(vector-ref
|
||||
'#("January" "February" "March" "April" "May" "June"
|
||||
"July" "August" "September" "October" "November"
|
||||
"December")
|
||||
(dec month))))
|
||||
(list month-name " " day ", " year)))
|
||||
,(let ((links (lookup-def 'Links head-parms '())))
|
||||
(and (pair? links)
|
||||
(let ((home (lookup-def 'home links warn: #f)))
|
||||
(and home
|
||||
`(p "This site's top page is "
|
||||
(a (@ (href ,home)) (strong ,home)))))))
|
||||
(div
|
||||
(address "oleg-at-pobox.com or oleg-at-acm.org or oleg-at-computer.org"
|
||||
(br)
|
||||
"Your comments, problem reports, questions are very welcome!"))
|
||||
(p (font (@ (size "-2")) "Converted from SXML by SXML->HTML"))
|
||||
,(let ((rcs-id (lookup-def 'rcs-id head-parms #f)))
|
||||
(and rcs-id `(h4 ,rcs-id)))
|
||||
))
|
||||
|
||||
; Bindings for the post-order function, which traverses the SXML tree
|
||||
; and converts it to a tree of fragments
|
||||
|
||||
; The universal transformation from SXML to HTML. The following rules
|
||||
; work for every HTML, present and future
|
||||
(define universal-conversion-rules
|
||||
`((@
|
||||
((*default* ; local override for attributes
|
||||
. ,(lambda (attr-key . value) (enattr attr-key value))))
|
||||
. ,(lambda (trigger . value) (cons '@ value)))
|
||||
(*default* . ,(lambda (tag . elems) (entag tag elems)))
|
||||
(*text* . ,(lambda (trigger str)
|
||||
(if (string? str) (string->goodHTML str) str)))
|
||||
(n_ ; a non-breaking space
|
||||
. ,(lambda (tag . elems)
|
||||
(cons " " elems)))))
|
||||
|
||||
; A variation of universal-conversion-rules which keeps '<', '>', '&'
|
||||
; and similar characters intact. The universal-protected-rules are
|
||||
; useful when the tree of fragments has to be traversed one more time.
|
||||
(define universal-protected-rules
|
||||
`((@
|
||||
((*default* ; local override for attributes
|
||||
. ,(lambda (attr-key . value) (enattr attr-key value))))
|
||||
. ,(lambda (trigger . value) (cons '@ value)))
|
||||
(*default* . ,(lambda (tag . elems) (entag tag elems)))
|
||||
(*text* . ,(lambda (trigger str)
|
||||
str))
|
||||
(n_ ; a non-breaking space
|
||||
. ,(lambda (tag . elems)
|
||||
(cons " " elems)))))
|
||||
|
||||
; The following rules define the identity transformation
|
||||
(define alist-conv-rules
|
||||
`((*default* . ,(lambda (tag . elems) (cons tag elems)))
|
||||
(*text* . ,(lambda (trigger str) str))))
|
||||
|
||||
|
||||
; Find the 'Header' node within the 'Content' SXML expression.
|
||||
; Currently this query is executed via a transformation, with
|
||||
; rules that drop out everything but the 'Header' node.
|
||||
; We use the _breadth-first_ traversal of the Content tree.
|
||||
(define (find-Header Content)
|
||||
(letrec
|
||||
((search-rules
|
||||
`((*default*
|
||||
*preorder*
|
||||
. ,(lambda (tag . elems)
|
||||
(let loop ((elems elems) (worklist '()))
|
||||
(cond
|
||||
((null? elems)
|
||||
(if (null? worklist) '()
|
||||
(pre-post-order worklist search-rules)))
|
||||
((not (pair? (car elems))) (loop (cdr elems) worklist))
|
||||
((eq? 'Header (caar elems)) (car elems)) ; found
|
||||
(else (loop (cdr elems) (cons (car elems) worklist)))))))
|
||||
)))
|
||||
(lookup-def 'Header
|
||||
(list (pre-post-order Content search-rules))
|
||||
)))
|
||||
|
||||
|
||||
; Transformation rules that define a number of higher-order tags,
|
||||
; which give "style" to all my pages.
|
||||
; Some of these rules require a pre-post-order iterator
|
||||
; See xml.scm or any other of my web page master files for an example
|
||||
; of using these stylesheet rules
|
||||
|
||||
(define (generic-web-rules Content additional-rules)
|
||||
(append
|
||||
additional-rules
|
||||
universal-conversion-rules
|
||||
`((html:begin
|
||||
. ,(lambda (tag . elems)
|
||||
(list
|
||||
"<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\""
|
||||
nl
|
||||
"\"http://www.w3.org/TR/html4/loose.dtd\">" nl
|
||||
"<html>" nl
|
||||
elems
|
||||
"</html>" nl)))
|
||||
|
||||
(Header
|
||||
*preorder*
|
||||
. ,(lambda (tag . headers)
|
||||
(post-order (make-header headers) universal-conversion-rules)
|
||||
))
|
||||
|
||||
(body
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<body bgcolor=\"#FFFFFF\">" nl elems "</body>")))
|
||||
|
||||
(navbar ; Find the Header in the Content
|
||||
. ,(lambda (tag) ; and create the navigation bar
|
||||
(let ((header-parms (find-Header Content)))
|
||||
(post-order (make-navbar header-parms)
|
||||
universal-conversion-rules))))
|
||||
|
||||
(footer ; Find the Header in the Content
|
||||
. ,(lambda (tag) ; and create the footer of the page
|
||||
(let ((header-parms (find-Header Content)))
|
||||
(post-order (make-footer header-parms)
|
||||
universal-conversion-rules))))
|
||||
|
||||
(page-title ; Find the Header in the Content
|
||||
. ,(lambda (tag) ; and create the page title rule
|
||||
(let ((header-parms (find-Header Content)))
|
||||
(list "<h1 align=center>"
|
||||
(lookup-def 'long-title header-parms) "</h1>" nl))))
|
||||
|
||||
|
||||
(Section ; (Section level "content ...")
|
||||
. ,(lambda (tag level head-word . elems)
|
||||
(list "<br> <a name=\"" head-word "\"> </a>" nl
|
||||
"<h" level ">" head-word elems "</h" level ">" nl)))
|
||||
|
||||
(TOC ; Re-scan the Content for "Section" tags and generate
|
||||
. ,(lambda (tag) ; the Hierarchical Table of contents
|
||||
(let ((sections
|
||||
(post-order Content
|
||||
`((Section ; (Section level "content ...")
|
||||
((*text* . ,(lambda (tag str) str)))
|
||||
. ,(lambda (tag level head-word . elems)
|
||||
(vector level
|
||||
(list "<li><a href=\"#" head-word
|
||||
"\">" head-word elems "</a>" nl))))
|
||||
(*default*
|
||||
. ,(lambda (attr-key . elems) elems))
|
||||
(*text* . ,(lambda (trigger str) '()))))))
|
||||
;(cerr sections)
|
||||
(list "<div>"
|
||||
(let loop ((curr-level 1) (sections sections))
|
||||
(cond
|
||||
((null? sections)
|
||||
(let fill ((curr-level curr-level))
|
||||
(if (> curr-level 1)
|
||||
(cons "</ol>" (fill (dec curr-level)))
|
||||
'())))
|
||||
((null? (car sections)) (loop curr-level (cdr sections)))
|
||||
((pair? (car sections)) (loop curr-level
|
||||
(append (car sections)
|
||||
(cdr sections))))
|
||||
((vector? (car sections))
|
||||
(let ((new-level (vector-ref (car sections) 0)))
|
||||
(cond
|
||||
((= new-level curr-level)
|
||||
(cons (vector-ref (car sections) 1)
|
||||
(loop curr-level (cdr sections))))
|
||||
((= (inc new-level) curr-level)
|
||||
(cons "</ol>"
|
||||
(cons (vector-ref (car sections) 1)
|
||||
(loop new-level (cdr sections)))))
|
||||
((= new-level (inc curr-level))
|
||||
(cons nl (cons "<ol>"
|
||||
(cons (vector-ref (car sections) 1)
|
||||
(loop new-level (cdr sections))))))
|
||||
(else
|
||||
(error "inconsistent levels: " curr-level new-level)))))
|
||||
(else "wrong item: " sections)))
|
||||
nl "</div>" nl))))
|
||||
|
||||
(bibitem *macro*
|
||||
. ,(lambda (tag label key . text)
|
||||
`(p (a (@ (name ,key)) "[" ,label "]") " " ,text)))
|
||||
|
||||
(cite ; ought to locate the label and use the label!
|
||||
. ,(lambda (tag key)
|
||||
(list "[<a href=\"#" key "\">" key "</a>]")))
|
||||
|
||||
|
||||
(trace ; A debugging aid
|
||||
. ,(lambda (tag . content)
|
||||
(cerr tag content nl)
|
||||
'()))
|
||||
|
||||
(URL *macro*
|
||||
. ,(lambda (tag url)
|
||||
`((br) "<" (a (@ (href ,url)) ,url) ">")))
|
||||
|
||||
|
||||
(verbatim ; set off pieces of code: one or several lines
|
||||
. ,(lambda (tag . lines)
|
||||
(list "<pre>"
|
||||
(map (lambda (line) (list " " line nl))
|
||||
lines)
|
||||
"</pre>")))
|
||||
; (note . text-strings)
|
||||
; A note (remark), similar to a footnote
|
||||
(note
|
||||
. ,(lambda (tag . text-strings)
|
||||
(list " <font size=\"-1\">[" text-strings "]</font>" nl)))
|
||||
|
||||
; A reference to a file
|
||||
(fileref
|
||||
. ,(lambda (tag pathname . descr-text)
|
||||
(list "<a href=\"" pathname "\">"
|
||||
(car (reverse (string-split pathname '(#\/))))
|
||||
"</a> ["
|
||||
(let ((file-size (OS:file-length pathname)))
|
||||
(if (not (positive? file-size))
|
||||
(error "File not found: " pathname))
|
||||
(cond
|
||||
((< file-size 1024) "<1K")
|
||||
(else (list (quotient (+ file-size 1023) 1024) "K"))))
|
||||
"]<br>"
|
||||
descr-text)))
|
||||
|
||||
; A reference to a plain-text file (article)
|
||||
(textref
|
||||
. ,(lambda (tag pathname title . descr)
|
||||
(let ((file-size (OS:file-length pathname)))
|
||||
(if (not (positive? file-size))
|
||||
(error "File not found: " pathname))
|
||||
(list "<a href=\"" pathname "\">" title
|
||||
"</a> <font size=\"-1\">[plain text file]</font><br>" nl
|
||||
descr))))
|
||||
|
||||
; A reference to an anchor in the present file
|
||||
; (local-ref target . title)
|
||||
; If title is given, generate a regular
|
||||
; <a href="#target">title</a>
|
||||
; Otherwise, transform the content so that a
|
||||
; construct that may generate an anchor 'target' (such
|
||||
; as Section or Description-unit) is re-written to the
|
||||
; title SXML. All other constructs re-write to
|
||||
; nothing.
|
||||
(local-ref
|
||||
*macro*
|
||||
. ,(lambda (tag target . title)
|
||||
(let
|
||||
((title
|
||||
(if (pair? title) title ; it is given explicitly
|
||||
(pre-post-order Content
|
||||
`((*text* . ,(lambda (trigger str) '()))
|
||||
(*default*
|
||||
. ,(lambda (tag . elems)
|
||||
(let ((first-sign (signif-tail elems)))
|
||||
(if first-sign
|
||||
(let ((second-sign
|
||||
(signif-tail (cdr first-sign))))
|
||||
(assert (not second-sign))
|
||||
(car first-sign))
|
||||
'()))))
|
||||
(Description-unit
|
||||
*preorder*
|
||||
. ,(lambda (tag key title . elems)
|
||||
(if (equal? key target)
|
||||
(list title)
|
||||
'()))))))))
|
||||
(assert (pair? title))
|
||||
(cerr "title: " title nl)
|
||||
`(a (@ (href #\# ,target)) ,title))))
|
||||
|
||||
; Unit of a description for a piece of code
|
||||
; (Description-unit key title . elems)
|
||||
; where elems is one of the following:
|
||||
; headline, body, platforms, version
|
||||
(Description-unit
|
||||
((headline
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<dt>" elems "</dt>" nl)))
|
||||
(body
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<dd>" elems "</dd>" nl)))
|
||||
(platforms
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<dt><strong>Platforms</strong><dt><dd>"
|
||||
elems "</dd>" nl)))
|
||||
(version
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<dt><strong>Version</strong><dt><dd>"
|
||||
"The current version is " elems ".</dd>" nl)))
|
||||
(references
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<dt><strong>References</strong><dt><dd>"
|
||||
elems "</dd>" nl)))
|
||||
(requires
|
||||
. ,(lambda (tag . elems)
|
||||
(list "<dt><strong>Requires</strong><dt><dd>"
|
||||
elems "</dd>" nl)))
|
||||
)
|
||||
. ,(lambda (tag key title . elems)
|
||||
(post-order
|
||||
`((a (@ (name ,key)) (n_))
|
||||
(h2 ,title)
|
||||
(dl (insert-elems))
|
||||
)
|
||||
`(,@universal-protected-rules
|
||||
(insert-elems
|
||||
. ,(lambda (tag) elems))))))
|
||||
)))
|
|
@ -1,106 +0,0 @@
|
|||
; HTML Authoring in SXML
|
||||
;
|
||||
; The present file defines and demonstrates a function SXML->HTML, the
|
||||
; most generic transformation of SXML into the corresponding HTML
|
||||
; document. The SXML tree is traversed post-order and converted into
|
||||
; another tree, which, written in a depth-first fashion, results in a
|
||||
; HTML document. The function SXML->HTML can generate an arbitrary
|
||||
; HTML markup, for any existing or yet to be introduced HTML
|
||||
; tag. Furthermore, the function supports one higher-level tag,
|
||||
; 'html:begin'. As the source code below indicates, SXML->HTML can be
|
||||
; trivially extended to support other higher-level tags.
|
||||
;
|
||||
; The proper HTML markup is being created by a set of node
|
||||
; handlers. An iterator 'post-order' executes these functions while it
|
||||
; traverses an SXML tree.
|
||||
;
|
||||
; Each node handler takes a tag (the head of an SXML node) and the
|
||||
; list of children nodes, if any. A handler returns a fragment or a
|
||||
; list of HTML fragments -- which become arguments to a handler of a
|
||||
; parent SXML node. A function SRV:send-reply takes the resulting
|
||||
; tree of fragments and writes out the fragments in a depth-first
|
||||
; order. The output is an HTML document that corresponds to the
|
||||
; original SXML tree.
|
||||
;
|
||||
; This pretty-printing operation makes it possible to author and
|
||||
; compose HTML documents in their SXML form. SXML is more concise and
|
||||
; expressive than a raw markup language. SXML representing regular
|
||||
; Scheme code can be entered in any Scheme-sensitive editor. SXML as a
|
||||
; data structure -- a list -- can likewise be composed as a literal or
|
||||
; quasi-literal expression. Furthermore, SXML can be produced by regular
|
||||
; Scheme functions, which may make authoring more succinct, advanced,
|
||||
; and less tedious, as the code below illustrates.
|
||||
;
|
||||
; IMPORT
|
||||
; A prelude appropriate for your Scheme system
|
||||
; (myenv-bigloo.scm, myenv-mit.scm, etc.)
|
||||
; util.scm for make-char-quotator
|
||||
; SXML-tree-trans.scm for post-order
|
||||
;
|
||||
; EXPORT
|
||||
; SXML->HTML enattr entag string->goodHTML
|
||||
;
|
||||
; All these files are available in the same directory as this file.
|
||||
; See vSXML-to-HTML.scm for the validation code, which also
|
||||
; serves as usage examples.
|
||||
;
|
||||
; See http://pobox.com/~oleg/ftp/Scheme/xml.html#XML-authoring
|
||||
; for more examples and explanation.
|
||||
;
|
||||
; $Id: SXML-to-HTML.scm,v 1.2 2004/11/09 14:11:40 sperber Exp $
|
||||
|
||||
|
||||
; The following procedure is the most generic transformation of SXML
|
||||
; into the corresponding HTML document. The SXML tree is traversed
|
||||
; post-oder (depth-first) and transformed into another tree, which,
|
||||
; written in a depth-first fashion, results in an HTML document.
|
||||
|
||||
(define (SXML->HTML tree)
|
||||
(SRV:send-reply
|
||||
(pre-post-order tree
|
||||
; Universal transformation rules. Work for every HTML,
|
||||
; present and future
|
||||
`((@
|
||||
((*default* ; local override for attributes
|
||||
. ,(lambda (attr-key . value) (enattr attr-key value))))
|
||||
. ,(lambda (trigger . value) (cons '@ value)))
|
||||
(*default* . ,(lambda (tag . elems) (entag tag elems)))
|
||||
(*text* . ,(lambda (trigger str)
|
||||
(if (string? str) (string->goodHTML str) str)))
|
||||
|
||||
; Handle a nontraditional but convenient top-level element:
|
||||
; (html:begin title <html-body>) element
|
||||
(html:begin . ,(lambda (tag title . elems)
|
||||
(list "Content-type: text/html" ; HTTP headers
|
||||
nl nl ; two nl end the headers
|
||||
"<HTML><HEAD><TITLE>" title "</TITLE></HEAD>"
|
||||
elems
|
||||
"</HTML>"))))
|
||||
|
||||
)))
|
||||
|
||||
; The following two functions create the HTML markup for tags and attributes.
|
||||
; They are being used in the node handlers for the post-order function, see
|
||||
; above.
|
||||
|
||||
(define (entag tag elems)
|
||||
(if (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems)))
|
||||
(list #\newline #\< tag (cdar elems) #\>
|
||||
(and (pair? (cdr elems))
|
||||
(list (cdr elems) "</" tag #\>)))
|
||||
(list #\newline #\< tag #\> (and (pair? elems) (list elems "</" tag #\>))
|
||||
)))
|
||||
|
||||
(define (enattr attr-key value)
|
||||
(if (null? value) (list #\space attr-key)
|
||||
(list #\space attr-key "=\"" value #\")))
|
||||
|
||||
|
||||
; Given a string, check to make sure it does not contain characters
|
||||
; such as '<' or '&' that require encoding. Return either the original
|
||||
; string, or a list of string fragments with special characters
|
||||
; replaced by appropriate character entities.
|
||||
|
||||
(define string->goodHTML
|
||||
(make-char-quotator
|
||||
'((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """))))
|
|
@ -1,249 +0,0 @@
|
|||
; XML/HTML processing in Scheme
|
||||
; SXML expression tree transformers
|
||||
;
|
||||
; IMPORT
|
||||
; A prelude appropriate for your Scheme system
|
||||
; (myenv-bigloo.scm, myenv-mit.scm, etc.)
|
||||
;
|
||||
; EXPORT
|
||||
; (provide SRV:send-reply
|
||||
; post-order pre-post-order replace-range)
|
||||
;
|
||||
; See vSXML-tree-trans.scm for the validation code, which also
|
||||
; serves as usage examples.
|
||||
;
|
||||
; $Id: SXML-tree-trans.scm,v 1.2 2004/11/09 14:11:40 sperber Exp $
|
||||
|
||||
|
||||
; Output the 'fragments'
|
||||
; The fragments are a list of strings, characters,
|
||||
; numbers, thunks, #f, #t -- and other fragments.
|
||||
; The function traverses the tree depth-first, writes out
|
||||
; strings and characters, executes thunks, and ignores
|
||||
; #f and '().
|
||||
; The function returns #t if anything was written at all;
|
||||
; otherwise the result is #f
|
||||
; If #t occurs among the fragments, it is not written out
|
||||
; but causes the result of SRV:send-reply to be #t
|
||||
|
||||
(define (SRV:send-reply . fragments)
|
||||
(let loop ((fragments fragments) (result #f))
|
||||
(cond
|
||||
((null? fragments) result)
|
||||
((not (car fragments)) (loop (cdr fragments) result))
|
||||
((null? (car fragments)) (loop (cdr fragments) result))
|
||||
((eq? #t (car fragments)) (loop (cdr fragments) #t))
|
||||
((pair? (car fragments))
|
||||
(loop (cdr fragments) (loop (car fragments) result)))
|
||||
((procedure? (car fragments))
|
||||
((car fragments))
|
||||
(loop (cdr fragments) #t))
|
||||
(else
|
||||
(display (car fragments))
|
||||
(loop (cdr fragments) #t)))))
|
||||
|
||||
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; Traversal of an SXML tree or a grove:
|
||||
; a <Node> or a <Nodelist>
|
||||
;
|
||||
; A <Node> and a <Nodelist> are mutually-recursive datatypes that
|
||||
; underlie the SXML tree:
|
||||
; <Node> ::= (name . <Nodelist>) | "text string"
|
||||
; An (ordered) set of nodes is just a list of the constituent nodes:
|
||||
; <Nodelist> ::= (<Node> ...)
|
||||
; Nodelists, and Nodes other than text strings are both lists. A
|
||||
; <Nodelist> however is either an empty list, or a list whose head is
|
||||
; not a symbol (an atom in general). A symbol at the head of a node is
|
||||
; either an XML name (in which case it's a tag of an XML element), or
|
||||
; an administrative name such as '@'.
|
||||
; See SXPath.scm and SSAX.scm for more information on SXML.
|
||||
|
||||
|
||||
; Pre-Post-order traversal of a tree and creation of a new tree:
|
||||
; pre-post-order:: <tree> x <bindings> -> <new-tree>
|
||||
; where
|
||||
; <bindings> ::= (<binding> ...)
|
||||
; <binding> ::= (<trigger-symbol> *preorder* . <handler>) |
|
||||
; (<trigger-symbol> *macro* . <handler>) |
|
||||
; (<trigger-symbol> <new-bindings> . <handler>) |
|
||||
; (<trigger-symbol> . <handler>)
|
||||
; <trigger-symbol> ::= XMLname | *text* | *default*
|
||||
; <handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
|
||||
;
|
||||
; The pre-post-order function visits the nodes and nodelists
|
||||
; pre-post-order (depth-first). For each <Node> of the form (name
|
||||
; <Node> ...) it looks up an association with the given 'name' among
|
||||
; its <bindings>. If failed, pre-post-order tries to locate a
|
||||
; *default* binding. It's an error if the latter attempt fails as
|
||||
; well. Having found a binding, the pre-post-order function first
|
||||
; checks to see if the binding is of the form
|
||||
; (<trigger-symbol> *preorder* . <handler>)
|
||||
; If it is, the handler is 'applied' to the current node. Otherwise,
|
||||
; the pre-post-order function first calls itself recursively for each
|
||||
; child of the current node, with <new-bindings> prepended to the
|
||||
; <bindings> in effect. The result of these calls is passed to the
|
||||
; <handler> (along with the head of the current <Node>). To be more
|
||||
; precise, the handler is _applied_ to the head of the current node
|
||||
; and its processed children. The result of the handler, which should
|
||||
; also be a <tree>, replaces the current <Node>. If the current <Node>
|
||||
; is a text string or other atom, a special binding with a symbol
|
||||
; *text* is looked up.
|
||||
;
|
||||
; A binding can also be of a form
|
||||
; (<trigger-symbol> *macro* . <handler>)
|
||||
; This is equivalent to *preorder* described above. However, the result
|
||||
; is re-processed again, with the current stylesheet.
|
||||
|
||||
(define (pre-post-order tree bindings)
|
||||
(let* ((default-binding (assq '*default* bindings))
|
||||
(text-binding (or (assq '*text* bindings) default-binding))
|
||||
(text-handler ; Cache default and text bindings
|
||||
(and text-binding
|
||||
(if (procedure? (cdr text-binding))
|
||||
(cdr text-binding) (cddr text-binding)))))
|
||||
(let loop ((tree tree))
|
||||
(cond
|
||||
((null? tree) '())
|
||||
((not (pair? tree))
|
||||
(let ((trigger '*text*))
|
||||
(if text-handler (text-handler trigger tree)
|
||||
(error "Unknown binding for " trigger " and no default"))))
|
||||
((not (symbol? (car tree))) (map loop tree)) ; tree is a nodelist
|
||||
(else ; tree is an SXML node
|
||||
(let* ((trigger (car tree))
|
||||
(binding (or (assq trigger bindings) default-binding)))
|
||||
(cond
|
||||
((not binding)
|
||||
(error "Unknown binding for " trigger " and no default"))
|
||||
((not (pair? (cdr binding))) ; must be a procedure: handler
|
||||
(apply (cdr binding) trigger (map loop (cdr tree))))
|
||||
((eq? '*preorder* (cadr binding))
|
||||
(apply (cddr binding) tree))
|
||||
((eq? '*macro* (cadr binding))
|
||||
(loop (apply (cddr binding) tree)))
|
||||
(else ; (cadr binding) is a local binding
|
||||
(apply (cddr binding) trigger
|
||||
(pre-post-order (cdr tree) (append (cadr binding) bindings)))
|
||||
))))))))
|
||||
|
||||
; post-order is a strict subset of pre-post-order without *preorder*
|
||||
; (let alone *macro*) traversals.
|
||||
; Now pre-post-order is actually faster than the old post-order.
|
||||
; The function post-order is deprecated and is aliased below for
|
||||
; backward compatibility.
|
||||
(define post-order pre-post-order)
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; Extended tree fold
|
||||
; tree = atom | (node-name tree ...)
|
||||
;
|
||||
; foldts fdown fup fhere seed (Leaf str) = fhere seed str
|
||||
; foldts fdown fup fhere seed (Nd kids) =
|
||||
; fup seed $ foldl (foldts fdown fup fhere) (fdown seed) kids
|
||||
|
||||
; procedure fhere: seed -> atom -> seed
|
||||
; procedure fdown: seed -> node -> seed
|
||||
; procedure fup: parent-seed -> last-kid-seed -> node -> seed
|
||||
; foldts returns the final seed
|
||||
|
||||
(define (foldts fdown fup fhere seed tree)
|
||||
(cond
|
||||
((null? tree) seed)
|
||||
((not (pair? tree)) ; An atom
|
||||
(fhere seed tree))
|
||||
(else
|
||||
(let loop ((kid-seed (fdown seed tree)) (kids (cdr tree)))
|
||||
(if (null? kids)
|
||||
(fup seed kid-seed tree)
|
||||
(loop (foldts fdown fup fhere kid-seed (car kids))
|
||||
(cdr kids)))))))
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; Traverse a forest depth-first and cut/replace ranges of nodes.
|
||||
;
|
||||
; The nodes that define a range don't have to have the same immediate
|
||||
; parent, don't have to be on the same level, and the end node of a
|
||||
; range doesn't even have to exist. A replace-range procedure removes
|
||||
; nodes from the beginning node of the range up to (but not including)
|
||||
; the end node of the range. In addition, the beginning node of the
|
||||
; range can be replaced by a node or a list of nodes. The range of
|
||||
; nodes is cut while depth-first traversing the forest. If all
|
||||
; branches of the node are cut a node is cut as well. The procedure
|
||||
; can cut several non-overlapping ranges from a forest.
|
||||
|
||||
; replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
|
||||
; where
|
||||
; type FOREST = (NODE ...)
|
||||
; type NODE = Atom | (Name . FOREST) | FOREST
|
||||
;
|
||||
; The range of nodes is specified by two predicates, beg-pred and end-pred.
|
||||
; beg-pred:: NODE -> #f | FOREST
|
||||
; end-pred:: NODE -> #f | FOREST
|
||||
; The beg-pred predicate decides on the beginning of the range. The node
|
||||
; for which the predicate yields non-#f marks the beginning of the range
|
||||
; The non-#f value of the predicate replaces the node. The value can be a
|
||||
; list of nodes. The replace-range procedure then traverses the tree and skips
|
||||
; all the nodes, until the end-pred yields non-#f. The value of the end-pred
|
||||
; replaces the end-range node. The new end node and its brothers will be
|
||||
; re-scanned.
|
||||
; The predicates are evaluated pre-order. We do not descend into a node that
|
||||
; is marked as the beginning of the range.
|
||||
|
||||
(define (replace-range beg-pred end-pred forest)
|
||||
|
||||
; loop forest keep? new-forest
|
||||
; forest is the forest to traverse
|
||||
; new-forest accumulates the nodes we will keep, in the reverse
|
||||
; order
|
||||
; If keep? is #t, keep the curr node if atomic. If the node is not atomic,
|
||||
; traverse its children and keep those that are not in the skip range.
|
||||
; If keep? is #f, skip the current node if atomic. Otherwise,
|
||||
; traverse its children. If all children are skipped, skip the node
|
||||
; as well.
|
||||
|
||||
(define (loop forest keep? new-forest)
|
||||
(if (null? forest) (values (reverse new-forest) keep?)
|
||||
(let ((node (car forest)))
|
||||
(if keep?
|
||||
(cond ; accumulate mode
|
||||
((beg-pred node) => ; see if the node starts the skip range
|
||||
(lambda (repl-branches) ; if so, skip/replace the node
|
||||
(loop (cdr forest) #f
|
||||
(append (reverse repl-branches) new-forest))))
|
||||
((not (pair? node)) ; it's an atom, keep it
|
||||
(loop (cdr forest) keep? (cons node new-forest)))
|
||||
(else
|
||||
(let*-values
|
||||
(((node?) (symbol? (car node))) ; or is it a nodelist?
|
||||
((new-kids keep?) ; traverse its children
|
||||
(loop (if node? (cdr node) node) #t '())))
|
||||
(loop (cdr forest) keep?
|
||||
(cons
|
||||
(if node? (cons (car node) new-kids) new-kids)
|
||||
new-forest)))))
|
||||
; skip mode
|
||||
(cond
|
||||
((end-pred node) => ; end the skip range
|
||||
(lambda (repl-branches) ; repl-branches will be re-scanned
|
||||
(loop (append repl-branches (cdr forest)) #t
|
||||
new-forest)))
|
||||
((not (pair? node)) ; it's an atom, skip it
|
||||
(loop (cdr forest) keep? new-forest))
|
||||
(else
|
||||
(let*-values
|
||||
(((node?) (symbol? (car node))) ; or is it a nodelist?
|
||||
((new-kids keep?) ; traverse its children
|
||||
(loop (if node? (cdr node) node) #f '())))
|
||||
(loop (cdr forest) keep?
|
||||
(if (or keep? (pair? new-kids))
|
||||
(cons
|
||||
(if node? (cons (car node) new-kids) new-kids)
|
||||
new-forest)
|
||||
new-forest) ; if all kids are skipped
|
||||
)))))))) ; skip the node too
|
||||
|
||||
(let*-values (((new-forest keep?) (loop forest #t '())))
|
||||
new-forest))
|
||||
|
File diff suppressed because it is too large
Load Diff
|
@ -1,13 +0,0 @@
|
|||
(module ascii mzscheme
|
||||
|
||||
(provide ascii->char
|
||||
char->ascii
|
||||
ascii-limit
|
||||
ascii-whitespaces)
|
||||
|
||||
(define ascii->char integer->char)
|
||||
(define char->ascii char->integer)
|
||||
(define ascii-limit 256)
|
||||
(define ascii-whitespaces '(32 9 10 11 12 13)))
|
||||
|
||||
|
|
@ -1,38 +0,0 @@
|
|||
(module assertions mzscheme
|
||||
|
||||
(provide assert assure)
|
||||
;
|
||||
; syntax: assert ?expr ?expr ... [report: ?r-exp ?r-exp ...]
|
||||
;
|
||||
; If (and ?expr ?expr ...) evaluates to anything but #f, the result
|
||||
; is the value of that expression.
|
||||
; If (and ?expr ?expr ...) evaluates to #f, an error is reported.
|
||||
; The error message will show the failed expressions, as well
|
||||
; as the values of selected variables (or expressions, in general).
|
||||
; The user may explicitly specify the expressions whose
|
||||
; values are to be printed upon assertion failure -- as ?r-exp that
|
||||
; follow the identifier 'report:'
|
||||
; Typically, ?r-exp is either a variable or a string constant.
|
||||
; If the user specified no ?r-exp, the values of variables that are
|
||||
; referenced in ?expr will be printed upon the assertion failure.
|
||||
|
||||
(define-syntax assert
|
||||
(syntax-rules (report:)
|
||||
((assert "doit" (expr ...) (r-exp ...))
|
||||
(cond
|
||||
((and expr ...) => (lambda (x) x))
|
||||
(else
|
||||
(error 'error "assertion failure: ~a" (list '(and expr ...) r-exp ...)))))
|
||||
((assert "collect" (expr ...))
|
||||
(assert "doit" (expr ...) ()))
|
||||
((assert "collect" (expr ...) report: r-exp ...)
|
||||
(assert "doit" (expr ...) (r-exp ...)))
|
||||
((assert "collect" (expr ...) expr1 stuff ...)
|
||||
(assert "collect" (expr ... expr1) stuff ...))
|
||||
((assert stuff ...)
|
||||
(assert "collect" () stuff ...))))
|
||||
|
||||
(define-syntax assure
|
||||
(syntax-rules ()
|
||||
((assure exp error-msg)
|
||||
(assert exp report: error-msg)))))
|
|
@ -1,15 +0,0 @@
|
|||
(module catch-errors mzscheme
|
||||
|
||||
(provide failed?)
|
||||
|
||||
(define-syntax failed?
|
||||
(syntax-rules ()
|
||||
((failed? stmts ...)
|
||||
(thunk-failed? (lambda () stmts ...)))))
|
||||
(define (thunk-failed? thunk)
|
||||
(call-with-current-continuation
|
||||
(lambda (return)
|
||||
(with-handlers
|
||||
(((lambda (x) #t) (lambda (exn) #t)))
|
||||
(thunk)
|
||||
#f)))))
|
|
@ -1,11 +0,0 @@
|
|||
(module char-encodings mzscheme
|
||||
|
||||
(require "ascii.ss")
|
||||
|
||||
(provide ucscode->char
|
||||
char-return char-tab char-newline)
|
||||
|
||||
(define ucscode->char ascii->char)
|
||||
(define char-return (ascii->char 13))
|
||||
(define char-tab (ascii->char 9))
|
||||
(define char-newline (ascii->char 10)))
|
|
@ -1,5 +0,0 @@
|
|||
(module coutputs mzscheme
|
||||
(provide cout cerr nl)
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "output.scm"))
|
|
@ -1,6 +0,0 @@
|
|||
(module crementing mzscheme
|
||||
|
||||
(provide dec inc)
|
||||
|
||||
(define (inc n) (+ n 1))
|
||||
(define (dec n) (- n 1)))
|
|
@ -1,38 +0,0 @@
|
|||
(define-syntax define-opt
|
||||
(syntax-rules (optional)
|
||||
((define-opt (name . bindings) . bodies)
|
||||
(define-opt "seek-optional" bindings () ((name . bindings) . bodies)))
|
||||
|
||||
((define-opt "seek-optional" ((optional . _opt-bindings))
|
||||
(reqd ...) ((name . _bindings) . _bodies))
|
||||
(define (name reqd ... . _rest)
|
||||
(letrec-syntax
|
||||
((handle-opts
|
||||
(syntax-rules ()
|
||||
((_ rest bodies (var init))
|
||||
(let ((var (if (null? rest) init
|
||||
(if (null? (cdr rest)) (car rest)
|
||||
(error "extra rest" rest)))))
|
||||
. bodies))
|
||||
((_ rest bodies var) (handle-opts rest bodies (var #f)))
|
||||
((_ rest bodies (var init) . other-vars)
|
||||
(let ((var (if (null? rest) init (car rest)))
|
||||
(new-rest (if (null? rest) '() (cdr rest))))
|
||||
(handle-opts new-rest bodies . other-vars)))
|
||||
((_ rest bodies var . other-vars)
|
||||
(handle-opts rest bodies (var #f) . other-vars))
|
||||
((_ rest bodies) ; no optional args, unlikely
|
||||
(let ((_ (or (null? rest) (error "extra rest" rest))))
|
||||
. bodies)))))
|
||||
(handle-opts _rest _bodies . _opt-bindings))))
|
||||
|
||||
((define-opt "seek-optional" (x . rest) (reqd ...) form)
|
||||
(define-opt "seek-optional" rest (reqd ... x) form))
|
||||
|
||||
((define-opt "seek-optional" not-a-pair reqd form)
|
||||
(define . form)) ; No optional found, regular define
|
||||
|
||||
((define-opt name body) ; Just the definition for 'name',
|
||||
(define name body)) ; for compatibilibility with define
|
||||
))
|
||||
|
|
@ -1,5 +0,0 @@
|
|||
(module define-opt mzscheme
|
||||
(provide define-opt)
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "define-opt.scm"))
|
|
@ -1,696 +0,0 @@
|
|||
_SSAX_/_SXML_ Library
|
||||
=====================
|
||||
|
||||
These is Oleg Kiselyov's SSAX/SXML library. For more documentation
|
||||
and papers, refer to
|
||||
|
||||
http://ssax.sourceforge.net/
|
||||
|
||||
SSAX
|
||||
|
||||
================================================================
|
||||
_ssax.ss_
|
||||
================================================================
|
||||
|
||||
To load the module, do
|
||||
(require (lib "ssax.ss" "ssax"))
|
||||
|
||||
Besides the standard SSAX procedures described below, this defines:
|
||||
|
||||
> ssax:warn-parameter - Parameter for the procedure to display
|
||||
warnings. It must accept a port, a warning message, and any number
|
||||
of additional arguments.
|
||||
|
||||
> make-xml-token KIND HEAD
|
||||
This creates an XML token.
|
||||
|
||||
> xml-token? THING
|
||||
|
||||
> xml-token-kind XML-TOKEN
|
||||
|
||||
> xml-token-head XML-TOKEN
|
||||
|
||||
> ssax:read-markup-token PORT
|
||||
This procedure starts parsing of a markup token. The current position
|
||||
in the stream must be #\<. This procedure scans enough of the input stream
|
||||
to figure out what kind of a markup token it is seeing. The procedure returns
|
||||
an xml-token structure describing the token. Note, generally reading
|
||||
of the current markup is not finished! In particular, no attributes of
|
||||
the start-tag token are scanned.
|
||||
|
||||
Here's a detailed break out of the return values and the position in the PORT
|
||||
when that particular value is returned:
|
||||
PI-token: only PI-target is read.
|
||||
To finish the Processing Instruction and disregard it,
|
||||
call ssax:skip-pi. ssax:read-attributes may be useful
|
||||
as well (for PIs whose content is attribute-value
|
||||
pairs)
|
||||
END-token: The end tag is read completely; the current position
|
||||
is right after the terminating #\> character.
|
||||
COMMENT is read and skipped completely. The current position
|
||||
is right after "-->" that terminates the comment.
|
||||
CDSECT The current position is right after "<!CDATA["
|
||||
Use ssax:read-cdata-body to read the rest.
|
||||
DECL We have read the keyword (the one that follows "<!")
|
||||
identifying this declaration markup. The current
|
||||
position is after the keyword (usually a
|
||||
whitespace character)
|
||||
|
||||
START-token We have read the keyword (GI) of this start tag.
|
||||
No attributes are scanned yet. We don't know if this
|
||||
tag has an empty content either.
|
||||
Use ssax:complete-start-tag to finish parsing of
|
||||
the token.
|
||||
|
||||
|
||||
> ssax:read-pi-body-as-string PORT
|
||||
The current position is right after reading the PITarget. We read the
|
||||
body of PI and return is as a string. The port will point to the
|
||||
character right after '?>' combination that terminates PI.
|
||||
[16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
|
||||
|
||||
|
||||
> ssax:skip-internal-dtd PORT
|
||||
The current pos in the port is inside an internal DTD subset
|
||||
(e.g., after reading #\[ that begins an internal DTD subset)
|
||||
Skip until the "]>" combination that terminates this DTD
|
||||
|
||||
> ssax:read-cdata-body PORT STR-HANDLER SEED
|
||||
|
||||
This procedure must be called after we have read a string "<![CDATA["
|
||||
that begins a CDATA section. The current position must be the first
|
||||
position of the CDATA body. This function reads _lines_ of the CDATA
|
||||
body and passes them to a STR-HANDLER, a character data consumer.
|
||||
|
||||
The str-handler is a STR-HANDLER, a procedure STRING1 STRING2 SEED.
|
||||
The first STRING1 argument to STR-HANDLER never contains a newline.
|
||||
The second STRING2 argument often will. On the first invocation of
|
||||
the STR-HANDLER, the seed is the one passed to ssax:read-cdata-body
|
||||
as the third argument. The result of this first invocation will be
|
||||
passed as the seed argument to the second invocation of the line
|
||||
consumer, and so on. The result of the last invocation of the
|
||||
STR-HANDLER is returned by the ssax:read-cdata-body. Note a
|
||||
similarity to the fundamental 'fold' iterator.
|
||||
|
||||
Within a CDATA section all characters are taken at their face value,
|
||||
with only three exceptions:
|
||||
CR, LF, and CRLF are treated as line delimiters, and passed
|
||||
as a single #\newline to the STR-HANDLER
|
||||
"]]>" combination is the end of the CDATA section.
|
||||
> is treated as an embedded #\> character
|
||||
Note, < and & are not specially recognized (and are not expanded)!
|
||||
|
||||
|
||||
> ssax:read-char-ref PORT
|
||||
|
||||
[66] CharRef ::= '&#' [0-9]+ ';'
|
||||
| '&#x' [0-9a-fA-F]+ ';'
|
||||
|
||||
This procedure must be called after we we have read "&#"
|
||||
that introduces a char reference.
|
||||
The procedure reads this reference and returns the corresponding char
|
||||
The current position in PORT will be after ";" that terminates
|
||||
the char reference
|
||||
Faults detected:
|
||||
WFC: XML-Spec.html#wf-Legalchar
|
||||
|
||||
According to Section "4.1 Character and Entity References"
|
||||
of the XML Recommendation:
|
||||
"[Definition: A character reference refers to a specific character
|
||||
in the ISO/IEC 10646 character set, for example one not directly
|
||||
accessible from available input devices.]"
|
||||
Therefore, we use a ucscode->char function to convert a character
|
||||
code into the character -- *regardless* of the current character
|
||||
encoding of the input stream.
|
||||
|
||||
|
||||
> ssax:handle-parsed-entity PORT NAME ENTITIES
|
||||
CONTENT-HANDLER STR-HANDLER SEED
|
||||
|
||||
Expand and handle a parsed-entity reference
|
||||
port - a PORT
|
||||
name - the name of the parsed entity to expand, a symbol
|
||||
entities - see ENTITIES
|
||||
content-handler -- procedure PORT ENTITIES SEED
|
||||
that is supposed to return a SEED
|
||||
str-handler - a STR-HANDLER. It is called if the entity in question
|
||||
turns out to be a pre-declared entity
|
||||
|
||||
The result is the one returned by CONTENT-HANDLER or STR-HANDLER
|
||||
Faults detected:
|
||||
WFC: XML-Spec.html#wf-entdeclared
|
||||
WFC: XML-Spec.html#norecursion
|
||||
|
||||
|
||||
> make-empty-attlist
|
||||
The ATTLIST Abstract Data Type
|
||||
Currently is implemented as an assoc list sorted in the ascending
|
||||
order of NAMES.
|
||||
|
||||
|
||||
> attlist-add ATTLIST NAME-VALUE-PAIR
|
||||
Add a name-value pair to the existing attlist preserving the order
|
||||
Return the new list, in the sorted ascending order.
|
||||
Return #f if a pair with the same name already exists in the attlist
|
||||
|
||||
|
||||
> attlist-null? ATTLIST
|
||||
|
||||
> attlist-remove-top ATTLIST
|
||||
Given an non-null attlist, return a pair of values: the top and the rest
|
||||
|
||||
> attliast->alist
|
||||
|
||||
> attlist-fold
|
||||
|
||||
> ssax:read-attributes PORT ENTITIES
|
||||
|
||||
This procedure reads and parses a production Attribute*
|
||||
[41] Attribute ::= Name Eq AttValue
|
||||
[10] AttValue ::= '"' ([^<&"] | Reference)* '"'
|
||||
| "'" ([^<&'] | Reference)* "'"
|
||||
[25] Eq ::= S? '=' S?
|
||||
|
||||
|
||||
The procedure returns an ATTLIST, of Name (as UNRES-NAME), Value (as string)
|
||||
pairs. The current character on the PORT is a non-whitespace character
|
||||
that is not an ncname-starting character.
|
||||
|
||||
Note the following rules to keep in mind when reading an 'AttValue'
|
||||
"Before the value of an attribute is passed to the application
|
||||
or checked for validity, the XML processor must normalize it as follows:
|
||||
- a character reference is processed by appending the referenced
|
||||
character to the attribute value
|
||||
- an entity reference is processed by recursively processing the
|
||||
replacement text of the entity [see ENTITIES]
|
||||
[named entities amp lt gt quot apos are assumed pre-declared]
|
||||
- a whitespace character (#x20, #xD, #xA, #x9) is processed by appending #x20
|
||||
to the normalized value, except that only a single #x20 is appended for a
|
||||
"#xD#xA" sequence that is part of an external parsed entity or the
|
||||
literal entity value of an internal parsed entity
|
||||
- other characters are processed by appending them to the normalized value "
|
||||
|
||||
|
||||
Faults detected:
|
||||
WFC: XML-Spec.html#CleanAttrVals
|
||||
WFC: XML-Spec.html#uniqattspec
|
||||
|
||||
|
||||
> ssax:uri-string->symbol URI-STR
|
||||
Convert a URI-STR to an appropriate symbol
|
||||
|
||||
> ssax:complete-start-tag TAG PORT ELEMS ENTITIES NAMESPACES
|
||||
|
||||
This procedure is to complete parsing of a start-tag markup. The
|
||||
procedure must be called after the start tag token has been
|
||||
read. TAG is an UNRES-NAME. ELEMS is an instance of xml-decl::elems;
|
||||
it can be #f to tell the function to do _no_ validation of elements
|
||||
and their attributes.
|
||||
|
||||
This procedure returns several values:
|
||||
ELEM-GI: a RES-NAME.
|
||||
ATTRIBUTES: element's attributes, an ATTLIST of (RES-NAME . STRING)
|
||||
pairs. The list does NOT include xmlns attributes.
|
||||
NAMESPACES: the input list of namespaces amended with namespace
|
||||
(re-)declarations contained within the start-tag under parsing
|
||||
ELEM-CONTENT-MODEL
|
||||
|
||||
On exit, the current position in PORT will be the first character after
|
||||
#\> that terminates the start-tag markup.
|
||||
|
||||
Faults detected:
|
||||
VC: XML-Spec.html#enum
|
||||
VC: XML-Spec.html#RequiredAttr
|
||||
VC: XML-Spec.html#FixedAttr
|
||||
VC: XML-Spec.html#ValueType
|
||||
WFC: XML-Spec.html#uniqattspec (after namespaces prefixes are resolved)
|
||||
VC: XML-Spec.html#elementvalid
|
||||
WFC: REC-xml-names/#dt-NSName
|
||||
|
||||
Note, although XML Recommendation does not explicitly say it,
|
||||
xmlns and xmlns: attributes don't have to be declared (although they
|
||||
can be declared, to specify their default value)
|
||||
|
||||
Procedure: ssax:complete-start-tag tag-head port elems entities namespaces
|
||||
|
||||
> ssax:read-external-id PORT
|
||||
|
||||
This procedure parses an ExternalID production:
|
||||
[75] ExternalID ::= 'SYSTEM' S SystemLiteral
|
||||
| 'PUBLIC' S PubidLiteral S SystemLiteral
|
||||
[11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
|
||||
[12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
|
||||
[13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9]
|
||||
| [-'()+,./:=?;!*#@$_%]
|
||||
|
||||
This procedure is supposed to be called when an ExternalID is expected;
|
||||
that is, the current character must be either #\S or #\P that start
|
||||
correspondingly a SYSTEM or PUBLIC token. This procedure returns the
|
||||
SystemLiteral as a string. A PubidLiteral is disregarded if present.
|
||||
|
||||
> ssax:read-char-data PORT EXPECT-EOF? STR-HANDLER SEED
|
||||
|
||||
This procedure is to read the character content of an XML document
|
||||
or an XML element.
|
||||
[43] content ::=
|
||||
(element | CharData | Reference | CDSect | PI
|
||||
| Comment)*
|
||||
To be more precise, the procedure reads CharData, expands CDSect
|
||||
and character entities, and skips comments. The procedure stops
|
||||
at a named reference, EOF, at the beginning of a PI or a start/end tag.
|
||||
|
||||
port
|
||||
a PORT to read
|
||||
expect-eof?
|
||||
a boolean indicating if EOF is normal, i.e., the character
|
||||
data may be terminated by the EOF. EOF is normal
|
||||
while processing a parsed entity.
|
||||
str-handler
|
||||
a STR-HANDLER
|
||||
seed
|
||||
an argument passed to the first invocation of STR-HANDLER.
|
||||
|
||||
The procedure returns two results: SEED and TOKEN.
|
||||
The SEED is the result of the last invocation of STR-HANDLER, or the
|
||||
original seed if STR-HANDLER was never called.
|
||||
|
||||
TOKEN can be either an eof-object (this can happen only if
|
||||
expect-eof? was #t), or:
|
||||
- an xml-token describing a START tag or an END-tag;
|
||||
For a start token, the caller has to finish reading it.
|
||||
- an xml-token describing the beginning of a PI. It's up to an
|
||||
application to read or skip through the rest of this PI;
|
||||
- an xml-token describing a named entity reference.
|
||||
|
||||
CDATA sections and character references are expanded inline and
|
||||
never returned. Comments are silently disregarded.
|
||||
|
||||
As the XML Recommendation requires, all whitespace in character data
|
||||
must be preserved. However, a CR character (#xD) must be disregarded
|
||||
if it appears before a LF character (#xA), or replaced by a #xA character
|
||||
otherwise. See Secs. 2.10 and 2.11 of the XML Recommendation. See also
|
||||
the canonical XML Recommendation.
|
||||
|
||||
|
||||
> ssax:assert-token TOKEN KIND GI
|
||||
Make sure that TOKEN is of anticipated KIND and has anticipated GI
|
||||
Note GI argument may actually be a pair of two symbols, Namespace
|
||||
URI or the prefix, and of the localname.
|
||||
If the assertion fails, error-cont is evaluated by passing it
|
||||
three arguments: token kind gi. The result of error-cont is returned.
|
||||
|
||||
> ssax:make-pi-parser my-pi-handlers
|
||||
Create a parser to parse and process one Processing Element (PI).
|
||||
|
||||
my-pi-handlers
|
||||
An assoc list of pairs (PI-TAG . PI-HANDLER)
|
||||
where PI-TAG is an NCName symbol, the PI target, and
|
||||
PI-HANDLER is a procedure PORT PI-TAG SEED
|
||||
where PORT points to the first symbol after the PI target.
|
||||
The handler should read the rest of the PI up to and including
|
||||
the combination '?>' that terminates the PI. The handler should
|
||||
return a new seed.
|
||||
One of the PI-TAGs may be the symbol *DEFAULT*. The corresponding
|
||||
handler will handle PIs that no other handler will. If the
|
||||
*DEFAULT* PI-TAG is not specified, ssax:make-pi-parser will assume
|
||||
the default handler that skips the body of the PI
|
||||
|
||||
The output of the ssax:make-pi-parser is a procedure
|
||||
PORT PI-TAG SEED
|
||||
that will parse the current PI according to the user-specified handlers.
|
||||
|
||||
The previous version of ssax:make-pi-parser was a low-level macro:
|
||||
(define-macro ssax:make-pi-parser
|
||||
(lambda (my-pi-handlers)
|
||||
`(lambda (port target seed)
|
||||
(case target
|
||||
; Generate the body of the case statement
|
||||
,@(let loop ((pi-handlers my-pi-handlers) (default #f))
|
||||
(cond
|
||||
((null? pi-handlers)
|
||||
(if default `((else (,default port target seed)))
|
||||
'((else
|
||||
(ssax:warn port "Skipping PI: " target nl)
|
||||
(ssax:skip-pi port)
|
||||
seed))))
|
||||
((eq? '*DEFAULT* (caar pi-handlers))
|
||||
(loop (cdr pi-handlers) (cdar pi-handlers)))
|
||||
(else
|
||||
(cons
|
||||
`((,(caar pi-handlers)) (,(cdar pi-handlers) port target seed))
|
||||
(loop (cdr pi-handlers) default)))))))))
|
||||
|
||||
|
||||
> ssax:make-elem-parser my-new-level-seed my-finish-element
|
||||
my-char-data-handler my-pi-handlers
|
||||
|
||||
Create a parser to parse and process one element, including its
|
||||
character content or children elements. The parser is typically
|
||||
applied to the root element of a document.
|
||||
|
||||
my-new-level-seed
|
||||
procedure ELEM-GI ATTRIBUTES NAMESPACES EXPECTED-CONTENT SEED
|
||||
where ELEM-GI is a RES-NAME of the element
|
||||
about to be processed.
|
||||
This procedure is to generate the seed to be passed
|
||||
to handlers that process the content of the element.
|
||||
This is the function identified as 'fdown' in the denotational
|
||||
semantics of the XML parser given in the title comments to this
|
||||
file.
|
||||
|
||||
my-finish-element
|
||||
procedure ELEM-GI ATTRIBUTES NAMESPACES PARENT-SEED SEED
|
||||
This procedure is called when parsing of ELEM-GI is finished.
|
||||
The SEED is the result from the last content parser (or
|
||||
from my-new-level-seed if the element has the empty content).
|
||||
PARENT-SEED is the same seed as was passed to my-new-level-seed.
|
||||
The procedure is to generate a seed that will be the result
|
||||
of the element parser.
|
||||
This is the function identified as 'fup' in the denotational
|
||||
semantics of the XML parser given in the title comments to this
|
||||
file.
|
||||
|
||||
my-char-data-handler
|
||||
A STR-HANDLER
|
||||
|
||||
my-pi-handlers
|
||||
See ssax:make-pi-handler above
|
||||
|
||||
|
||||
The generated parser is a
|
||||
procedure START-TAG-HEAD PORT ELEMS ENTITIES
|
||||
NAMESPACES PRESERVE-WS? SEED
|
||||
The procedure must be called after the start tag token has been
|
||||
read. START-TAG-HEAD is an UNRES-NAME from the start-element tag.
|
||||
ELEMS is an instance of xml-decl::elems.
|
||||
See ssax:complete-start-tag::preserve-ws?
|
||||
|
||||
Faults detected:
|
||||
VC: XML-Spec.html#elementvalid
|
||||
WFC: XML-Spec.html#GIMatch
|
||||
|
||||
|
||||
|
||||
> ssax:make-parser user-handler-tag user-handler-proc ...
|
||||
|
||||
Create an XML parser, an instance of the XML parsing framework.
|
||||
This will be a SAX, a DOM, or a specialized parser depending
|
||||
on the supplied user-handlers.
|
||||
|
||||
user-handler-tag is a symbol that identifies a procedural expression
|
||||
that follows the tag. Given below are tags and signatures of the
|
||||
corresponding procedures. Not all tags have to be specified. If some
|
||||
are omitted, reasonable defaults will apply.
|
||||
|
||||
|
||||
tag: DOCTYPE
|
||||
handler-procedure: PORT DOCNAME SYSTEMID INTERNAL-SUBSET? SEED
|
||||
If internal-subset? is #t, the current position in the port
|
||||
is right after we have read #\[ that begins the internal DTD subset.
|
||||
We must finish reading of this subset before we return
|
||||
(or must call skip-internal-subset if we aren't interested in reading it).
|
||||
The port at exit must be at the first symbol after the whole
|
||||
DOCTYPE declaration.
|
||||
The handler-procedure must generate four values:
|
||||
ELEMS ENTITIES NAMESPACES SEED
|
||||
See xml-decl::elems for ELEMS. It may be #f to switch off the validation.
|
||||
NAMESPACES will typically contain USER-PREFIXes for selected URI-SYMBs.
|
||||
The default handler-procedure skips the internal subset,
|
||||
if any, and returns (values #f '() '() seed)
|
||||
|
||||
tag: UNDECL-ROOT
|
||||
handler-procedure: ELEM-GI SEED
|
||||
where ELEM-GI is an UNRES-NAME of the root element. This procedure
|
||||
is called when an XML document under parsing contains _no_ DOCTYPE
|
||||
declaration.
|
||||
The handler-procedure, as a DOCTYPE handler procedure above,
|
||||
must generate four values:
|
||||
ELEMS ENTITIES NAMESPACES SEED
|
||||
The default handler-procedure returns (values #f '() '() seed)
|
||||
|
||||
tag: DECL-ROOT
|
||||
handler-procedure: ELEM-GI SEED
|
||||
where ELEM-GI is an UNRES-NAME of the root element. This procedure
|
||||
is called when an XML document under parsing does contains the DOCTYPE
|
||||
declaration.
|
||||
The handler-procedure must generate a new SEED (and verify
|
||||
that the name of the root element matches the doctype, if the handler
|
||||
so wishes).
|
||||
The default handler-procedure is the identity function.
|
||||
|
||||
tag: NEW-LEVEL-SEED
|
||||
handler-procedure: see ssax:make-elem-parser, my-new-level-seed
|
||||
|
||||
tag: FINISH-ELEMENT
|
||||
handler-procedure: see ssax:make-elem-parser, my-finish-element
|
||||
|
||||
tag: CHAR-DATA-HANDLER
|
||||
handler-procedure: see ssax:make-elem-parser, my-char-data-handler
|
||||
|
||||
tag: PI
|
||||
handler-procedure: see ssax:make-pi-parser
|
||||
The default value is '()
|
||||
|
||||
> ssax:reverse-collect-str LIST-OF-FRAGS -> LIST-OF-FRAGS
|
||||
given the list of fragments (some of which are text strings)
|
||||
reverse the list and concatenate adjacent text strings.
|
||||
We can prove from the general case below that if LIST-OF-FRAGS
|
||||
has zero or one element, the result of the procedure is equal?
|
||||
to its argument. This fact justifies the shortcut evaluation below.
|
||||
|
||||
> ssax:xml->sxml PORT NAMESPACE-PREFIX-ASSIG
|
||||
|
||||
This is an instance of a SSAX parser above that returns an SXML
|
||||
representation of the XML document to be read from PORT.
|
||||
NAMESPACE-PREFIX-ASSIG is a list of (USER-PREFIX . URI-STRING)
|
||||
that assigns USER-PREFIXes to certain namespaces identified by
|
||||
particular URI-STRINGs. It may be an empty list.
|
||||
The procedure returns an SXML tree. The port points out to the
|
||||
first character after the root element.
|
||||
|
||||
|
||||
|
||||
|
||||
================================================================
|
||||
_input-parse.ss_
|
||||
================================================================
|
||||
|
||||
To load the module, do
|
||||
(require (lib "input-parse.ss" "ssax"))
|
||||
|
||||
This defines
|
||||
|
||||
> (define-struct (exn:ssax exn) (port stuff))
|
||||
|
||||
for parser errors raised by SSAX via RAISE. PORT contains the port on
|
||||
which the error happens, STUFF is a list of additional, but random
|
||||
information.
|
||||
|
||||
Note that this also exports a procedure called read-string, which
|
||||
conflicts with the procedure of the same name in the mzscheme
|
||||
language. The ssax collection comes with a module called
|
||||
restricted-mzscheme that is identical to mzscheme, except that it
|
||||
omits read-string.
|
||||
|
||||
> parser-error PORT MESSAGE SPECIALISING-MSG*
|
||||
Many procedures of this package call parser-error to report a parsing
|
||||
error. The first argument is a port, which typically points to the
|
||||
offending character or its neighborhood. Most of the Scheme systems
|
||||
let the user query a PORT for the current position. MESSAGE is the
|
||||
description of the error. Other arguments supply more details about
|
||||
the problem.
|
||||
|
||||
================================================================
|
||||
_sxml-tree-trans.ss_
|
||||
================================================================
|
||||
|
||||
To load the module, do
|
||||
(require (lib "sxml-tree-trans.ss" "ssax"))
|
||||
|
||||
> SRV:send-reply FRAGMENT ...
|
||||
|
||||
Output the 'fragments'
|
||||
The fragments are a list of strings, characters,
|
||||
numbers, thunks, #f, #t -- and other fragments.
|
||||
The function traverses the tree depth-first, writes out
|
||||
strings and characters, executes thunks, and ignores
|
||||
#f and '().
|
||||
The function returns #t if anything was written at all;
|
||||
otherwise the result is #f
|
||||
If #t occurs among the fragments, it is not written out
|
||||
but causes the result of SRV:send-reply to be #t
|
||||
|
||||
|
||||
> pre-post-order TREE BINDINGS
|
||||
|
||||
Traversal of an SXML tree or a grove:
|
||||
a <Node> or a <Nodelist>
|
||||
|
||||
A <Node> and a <Nodelist> are mutually-recursive datatypes that
|
||||
underlie the SXML tree:
|
||||
<Node> ::= (name . <Nodelist>) | "text string"
|
||||
An (ordered) set of nodes is just a list of the constituent nodes:
|
||||
<Nodelist> ::= (<Node> ...)
|
||||
Nodelists, and Nodes other than text strings are both lists. A
|
||||
<Nodelist> however is either an empty list, or a list whose head is
|
||||
not a symbol (an atom in general). A symbol at the head of a node is
|
||||
either an XML name (in which case it's a tag of an XML element), or
|
||||
an administrative name such as '@'.
|
||||
See SXPath.scm and SSAX.scm for more information on SXML.
|
||||
|
||||
|
||||
Pre-Post-order traversal of a tree and creation of a new tree:
|
||||
pre-post-order:: <tree> x <bindings> -> <new-tree>
|
||||
where
|
||||
<bindings> ::= (<binding> ...)
|
||||
<binding> ::= (<trigger-symbol> *preorder* . <handler>) |
|
||||
(<trigger-symbol> *macro* . <handler>) |
|
||||
(<trigger-symbol> <new-bindings> . <handler>) |
|
||||
(<trigger-symbol> . <handler>)
|
||||
<trigger-symbol> ::= XMLname | *text* | *default*
|
||||
<handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
|
||||
|
||||
The pre-post-order function visits the nodes and nodelists
|
||||
pre-post-order (depth-first). For each <Node> of the form (name
|
||||
<Node> ...) it looks up an association with the given 'name' among
|
||||
its <bindings>. If failed, pre-post-order tries to locate a
|
||||
*default* binding. It's an error if the latter attempt fails as
|
||||
well. Having found a binding, the pre-post-order function first
|
||||
checks to see if the binding is of the form
|
||||
(<trigger-symbol> *preorder* . <handler>)
|
||||
If it is, the handler is 'applied' to the current node. Otherwise,
|
||||
the pre-post-order function first calls itself recursively for each
|
||||
child of the current node, with <new-bindings> prepended to the
|
||||
<bindings> in effect. The result of these calls is passed to the
|
||||
<handler> (along with the head of the current <Node>). To be more
|
||||
precise, the handler is _applied_ to the head of the current node
|
||||
and its processed children. The result of the handler, which should
|
||||
also be a <tree>, replaces the current <Node>. If the current <Node>
|
||||
is a text string or other atom, a special binding with a symbol
|
||||
*text* is looked up.
|
||||
|
||||
A binding can also be of a form
|
||||
(<trigger-symbol> *macro* . <handler>)
|
||||
This is equivalent to *preorder* described above. However, the result
|
||||
is re-processed again, with the current stylesheet.
|
||||
|
||||
|
||||
> post-order TREE BINDINGS
|
||||
post-order is a strict subset of pre-post-order without *preorder*
|
||||
(let alone *macro*) traversals.
|
||||
Now pre-post-order is actually faster than the old post-order.
|
||||
The function post-order is deprecated and is aliased below for
|
||||
backward compatibility.
|
||||
|
||||
> replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
|
||||
Traverse a forest depth-first and cut/replace ranges of nodes.
|
||||
|
||||
The nodes that define a range don't have to have the same immediate
|
||||
parent, don't have to be on the same level, and the end node of a
|
||||
range doesn't even have to exist. A replace-range procedure removes
|
||||
nodes from the beginning node of the range up to (but not including)
|
||||
the end node of the range. In addition, the beginning node of the
|
||||
range can be replaced by a node or a list of nodes. The range of
|
||||
nodes is cut while depth-first traversing the forest. If all
|
||||
branches of the node are cut a node is cut as well. The procedure
|
||||
can cut several non-overlapping ranges from a forest.
|
||||
|
||||
replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
|
||||
where
|
||||
type FOREST = (NODE ...)
|
||||
type NODE = Atom | (Name . FOREST) | FOREST
|
||||
|
||||
The range of nodes is specified by two predicates, beg-pred and end-pred.
|
||||
beg-pred:: NODE -> #f | FOREST
|
||||
end-pred:: NODE -> #f | FOREST
|
||||
The beg-pred predicate decides on the beginning of the range. The node
|
||||
for which the predicate yields non-#f marks the beginning of the range
|
||||
The non-#f value of the predicate replaces the node. The value can be a
|
||||
list of nodes. The replace-range procedure then traverses the tree and skips
|
||||
all the nodes, until the end-pred yields non-#f. The value of the end-pred
|
||||
replaces the end-range node. The new end node and its brothers will be
|
||||
re-scanned.
|
||||
The predicates are evaluated pre-order. We do not descend into a node that
|
||||
is marked as the beginning of the range.
|
||||
|
||||
|
||||
|
||||
|
||||
================================================================
|
||||
_sxml-to-html.ss_
|
||||
================================================================
|
||||
|
||||
To load the module, do
|
||||
(require (lib "sxml-to-html.ss" "ssax"))
|
||||
|
||||
> SXML->HTML TREE
|
||||
|
||||
The following procedure is the most generic transformation of SXML
|
||||
into the corresponding HTML document. The SXML tree is traversed
|
||||
post-oder (depth-first) and transformed into another tree, which,
|
||||
written in a depth-first fashion, results in an HTML document.
|
||||
|
||||
> entag TAG ELEMS
|
||||
Create the HTML markup for tags.
|
||||
This is used in the node handlers for the post-order function, see
|
||||
above.
|
||||
|
||||
|
||||
> enattr ATTR-KEY VALUE
|
||||
Create the HTML markup for attributes.
|
||||
This and entag are being used in the node handlers for the post-order function, see
|
||||
above.
|
||||
|
||||
> string->goodHTML STRING
|
||||
Given a string, check to make sure it does not contain characters
|
||||
such as '<' or '&' that require encoding. Return either the original
|
||||
string, or a list of string fragments with special characters
|
||||
replaced by appropriate character entities.
|
||||
|
||||
|
||||
|
||||
|
||||
================================================================
|
||||
_sxml-to-html-ext.ss_
|
||||
================================================================
|
||||
|
||||
To load the module, do
|
||||
(require (lib "sxml-to-html-ext.ss" "ssax"))
|
||||
|
||||
> make-header HEAD-PARMS
|
||||
Create the 'head' SXML/HTML tag. HEAD-PARMS is an assoc list of
|
||||
(h-key h-value), where h-value is a typically string;
|
||||
h-key is a symbol:
|
||||
title, description, AuthorAddress, keywords,
|
||||
Date-Revision-yyyymmdd, Date-Creation-yyyymmdd,
|
||||
long-title
|
||||
One of the h-key can be Links.
|
||||
In that case, h-value is a list of
|
||||
(l-key l-href (attr value) ...)
|
||||
where l-key is one of the following:
|
||||
start, contents, prev, next, top, home
|
||||
|
||||
|
||||
> make-navbar: HEAD-PARMS
|
||||
Create a navigational bar. The argument head-parms is the same
|
||||
as the one passed to make-header. We're only concerned with the
|
||||
h-value Links
|
||||
|
||||
> make-footer HEAD-PARMS
|
||||
Create a footer. The argument head-parms is the same
|
||||
as passed to make-header.
|
||||
|
||||
> universal-conversion-rules
|
||||
Bindings for the post-order function, which traverses the SXML tree
|
||||
and converts it to a tree of fragments
|
||||
|
||||
The universal transformation from SXML to HTML. The following rules
|
||||
work for every HTML, present and future
|
||||
|
||||
> universal-protected-rules
|
||||
A variation of universal-conversion-rules which keeps '<', '>', '&'
|
||||
and similar characters intact. The universal-protected-rules are
|
||||
useful when the tree of fragments has to be traversed one more time.
|
||||
|
||||
> alist-conv-rules
|
||||
The following rules define the identity transformation
|
|
@ -1,8 +0,0 @@
|
|||
(module find-strings mzscheme
|
||||
|
||||
(provide find-string-from-port?)
|
||||
|
||||
(require "crementing.ss")
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "look-for-str.scm"))
|
|
@ -1,24 +0,0 @@
|
|||
(module info (lib "infotab.ss" "setup")
|
||||
(define name "SSAX/SXML")
|
||||
(define doc.txt "doc.txt")
|
||||
|
||||
(define help-desk-message "Mz/Mr: (require (lib \"ssax.ss\" \"ssax\"))")
|
||||
|
||||
(define blurb
|
||||
`("The SSAX/SXML collection provides functions for reading, writing, and manipulating XML documents."))
|
||||
|
||||
(define compile-omit-files
|
||||
'("SSAX-code.scm"
|
||||
"SXML-to-HTML-ext.scm"
|
||||
"SXML-to-HTML.scm"
|
||||
"SXML-tree-trans.scm"
|
||||
"SXPath-old.scm"
|
||||
"define-opt.scm"
|
||||
"input-parse.scm"
|
||||
"look-for-str.scm"
|
||||
"output.scm"
|
||||
"ppretty-prints.scm"
|
||||
"util.scm"
|
||||
"lookup-def.scm")))
|
||||
|
||||
|
|
@ -1,326 +0,0 @@
|
|||
;****************************************************************************
|
||||
; Simple Parsing of input
|
||||
;
|
||||
; The following simple functions surprisingly often suffice to parse
|
||||
; an input stream. They either skip, or build and return tokens,
|
||||
; according to inclusion or delimiting semantics. The list of
|
||||
; characters to expect, include, or to break at may vary from one
|
||||
; invocation of a function to another. This allows the functions to
|
||||
; easily parse even context-sensitive languages.
|
||||
;
|
||||
; EOF is generally frowned on, and thrown up upon if encountered.
|
||||
; Exceptions are mentioned specifically. The list of expected characters
|
||||
; (characters to skip until, or break-characters) may include an EOF
|
||||
; "character", which is to be coded as symbol *eof*
|
||||
;
|
||||
; The input stream to parse is specified as a PORT, which is usually
|
||||
; the last (and optional) argument. It defaults to the current input
|
||||
; port if omitted.
|
||||
;
|
||||
; IMPORT
|
||||
; This package relies on a function parser-error, which must be defined
|
||||
; by a user of the package. The function has the following signature:
|
||||
; parser-error PORT MESSAGE SPECIALISING-MSG*
|
||||
; Many procedures of this package call parser-error to report a parsing
|
||||
; error. The first argument is a port, which typically points to the
|
||||
; offending character or its neighborhood. Most of the Scheme systems
|
||||
; let the user query a PORT for the current position. MESSAGE is the
|
||||
; description of the error. Other arguments supply more details about
|
||||
; the problem.
|
||||
; myenv.scm, myenv-bigloo.scm or a similar prelude is assumed.
|
||||
; From SRFI-13, string-concatenate-reverse
|
||||
; If a particular implementation lacks SRFI-13 support, please
|
||||
; include the file srfi-13-local.scm
|
||||
;
|
||||
; $Id: input-parse.scm,v 1.2 2004/11/09 14:11:40 sperber Exp $
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
|
||||
; -- procedure+: peek-next-char [PORT]
|
||||
; advances to the next character in the PORT and peeks at it.
|
||||
; This function is useful when parsing LR(1)-type languages
|
||||
; (one-char-read-ahead).
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
|
||||
(define-opt (peek-next-char (optional (port (current-input-port))))
|
||||
(read-char port)
|
||||
(peek-char port))
|
||||
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
|
||||
; -- procedure+: assert-curr-char CHAR-LIST STRING [PORT]
|
||||
; Reads a character from the PORT and looks it up
|
||||
; in the CHAR-LIST of expected characters
|
||||
; If the read character was found among expected, it is returned
|
||||
; Otherwise, the procedure writes a nasty message using STRING
|
||||
; as a comment, and quits.
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
;
|
||||
(define-opt (assert-curr-char expected-chars comment
|
||||
(optional (port (current-input-port))))
|
||||
(let ((c (read-char port)))
|
||||
(if (memv c expected-chars) c
|
||||
(parser-error port "Wrong character " c
|
||||
" (0x" (if (eof-object? c) "*eof*"
|
||||
(number->string (char->integer c) 16)) ") "
|
||||
comment ". " expected-chars " expected"))))
|
||||
|
||||
|
||||
; -- procedure+: skip-until CHAR-LIST [PORT]
|
||||
; Reads and skips characters from the PORT until one of the break
|
||||
; characters is encountered. This break character is returned.
|
||||
; The break characters are specified as the CHAR-LIST. This list
|
||||
; may include EOF, which is to be coded as a symbol *eof*
|
||||
;
|
||||
; -- procedure+: skip-until NUMBER [PORT]
|
||||
; Skips the specified NUMBER of characters from the PORT and returns #f
|
||||
;
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
|
||||
|
||||
(define-opt (skip-until arg (optional (port (current-input-port))) )
|
||||
(cond
|
||||
((number? arg) ; skip 'arg' characters
|
||||
(do ((i arg (dec i)))
|
||||
((not (positive? i)) #f)
|
||||
(if (eof-object? (read-char port))
|
||||
(parser-error port "Unexpected EOF while skipping "
|
||||
arg " characters"))))
|
||||
(else ; skip until break-chars (=arg)
|
||||
(let loop ((c (read-char port)))
|
||||
(cond
|
||||
((memv c arg) c)
|
||||
((eof-object? c)
|
||||
(if (memq '*eof* arg) c
|
||||
(parser-error port "Unexpected EOF while skipping until " arg)))
|
||||
(else (loop (read-char port))))))))
|
||||
|
||||
|
||||
; -- procedure+: skip-while CHAR-LIST [PORT]
|
||||
; Reads characters from the PORT and disregards them,
|
||||
; as long as they are mentioned in the CHAR-LIST.
|
||||
; The first character (which may be EOF) peeked from the stream
|
||||
; that is NOT a member of the CHAR-LIST is returned. This character
|
||||
; is left on the stream.
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
|
||||
(define-opt (skip-while skip-chars (optional (port (current-input-port))) )
|
||||
(do ((c (peek-char port) (peek-char port)))
|
||||
((not (memv c skip-chars)) c)
|
||||
(read-char port)))
|
||||
|
||||
; whitespace const
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; Stream tokenizers
|
||||
|
||||
|
||||
; -- procedure+:
|
||||
; next-token PREFIX-CHAR-LIST BREAK-CHAR-LIST [COMMENT-STRING] [PORT]
|
||||
; skips any number of the prefix characters (members of the
|
||||
; PREFIX-CHAR-LIST), if any, and reads the sequence of characters
|
||||
; up to (but not including) a break character, one of the
|
||||
; BREAK-CHAR-LIST.
|
||||
; The string of characters thus read is returned.
|
||||
; The break character is left on the input stream
|
||||
; The list of break characters may include EOF, which is to be coded as
|
||||
; a symbol *eof*. Otherwise, EOF is fatal, generating an error message
|
||||
; including a specified COMMENT-STRING (if any)
|
||||
;
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
;
|
||||
; Note: since we can't tell offhand how large the token being read is
|
||||
; going to be, we make a guess, pre-allocate a string, and grow it by
|
||||
; quanta if necessary. The quantum is always the length of the string
|
||||
; before it was extended the last time. Thus the algorithm does
|
||||
; a Fibonacci-type extension, which has been proven optimal.
|
||||
; Note, explicit port specification in read-char, peek-char helps.
|
||||
|
||||
; Procedure: input-parse:init-buffer
|
||||
; returns an initial buffer for next-token* procedures.
|
||||
; The input-parse:init-buffer may allocate a new buffer per each invocation:
|
||||
; (define (input-parse:init-buffer) (make-string 32))
|
||||
; Size 32 turns out to be fairly good, on average.
|
||||
; That policy is good only when a Scheme system is multi-threaded with
|
||||
; preemptive scheduling, or when a Scheme system supports shared substrings.
|
||||
; In all the other cases, it's better for input-parse:init-buffer to
|
||||
; return the same static buffer. next-token* functions return a copy
|
||||
; (a substring) of accumulated data, so the same buffer can be reused.
|
||||
; We shouldn't worry about an incoming token being too large:
|
||||
; next-token will use another chunk automatically. Still,
|
||||
; the best size for the static buffer is to allow most of the tokens to fit in.
|
||||
; Using a static buffer _dramatically_ reduces the amount of produced garbage
|
||||
; (e.g., during XML parsing).
|
||||
|
||||
(define input-parse:init-buffer
|
||||
(let ((buffer (make-string 512)))
|
||||
(lambda () buffer)))
|
||||
|
||||
|
||||
; See a better version below
|
||||
(define-opt (next-token-old prefix-skipped-chars break-chars
|
||||
(optional (comment "") (port (current-input-port))) )
|
||||
(let* ((buffer (input-parse:init-buffer))
|
||||
(curr-buf-len (string-length buffer))
|
||||
(quantum curr-buf-len))
|
||||
(let loop ((i 0) (c (skip-while prefix-skipped-chars port)))
|
||||
(cond
|
||||
((memv c break-chars) (substring buffer 0 i))
|
||||
((eof-object? c)
|
||||
(if (memq '*eof* break-chars)
|
||||
(substring buffer 0 i) ; was EOF expected?
|
||||
(parser-error port "EOF while reading a token " comment)))
|
||||
(else
|
||||
(if (>= i curr-buf-len) ; make space for i-th char in buffer
|
||||
(begin ; -> grow the buffer by the quantum
|
||||
(set! buffer (string-append buffer (make-string quantum)))
|
||||
(set! quantum curr-buf-len)
|
||||
(set! curr-buf-len (string-length buffer))))
|
||||
(string-set! buffer i c)
|
||||
(read-char port) ; move to the next char
|
||||
(loop (inc i) (peek-char port))
|
||||
)))))
|
||||
|
||||
|
||||
; A better version of next-token, which accumulates the characters
|
||||
; in chunks, and later on reverse-concatenates them, using
|
||||
; SRFI-13 if available.
|
||||
; The overhead of copying characters is only 100% (or even smaller: bulk
|
||||
; string copying might be well-optimised), compared to the (hypothetical)
|
||||
; circumstance if we had known the size of the token beforehand.
|
||||
; For small tokens, the code performs just as above. For large
|
||||
; tokens, we expect an improvement. Note, the code also has no
|
||||
; assignments.
|
||||
; See next-token-comp.scm
|
||||
|
||||
(define-opt (next-token prefix-skipped-chars break-chars
|
||||
(optional (comment "") (port (current-input-port))) )
|
||||
(let outer ((buffer (input-parse:init-buffer)) (filled-buffer-l '())
|
||||
(c (skip-while prefix-skipped-chars port)))
|
||||
(let ((curr-buf-len (string-length buffer)))
|
||||
(let loop ((i 0) (c c))
|
||||
(cond
|
||||
((memv c break-chars)
|
||||
(if (null? filled-buffer-l) (substring buffer 0 i)
|
||||
(string-concatenate-reverse filled-buffer-l buffer i)))
|
||||
((eof-object? c)
|
||||
(if (memq '*eof* break-chars) ; was EOF expected?
|
||||
(if (null? filled-buffer-l) (substring buffer 0 i)
|
||||
(string-concatenate-reverse filled-buffer-l buffer i))
|
||||
(parser-error port "EOF while reading a token " comment)))
|
||||
((>= i curr-buf-len)
|
||||
(outer (make-string curr-buf-len)
|
||||
(cons buffer filled-buffer-l) c))
|
||||
(else
|
||||
(string-set! buffer i c)
|
||||
(read-char port) ; move to the next char
|
||||
(loop (inc i) (peek-char port))))))))
|
||||
|
||||
; -- procedure+: next-token-of INC-CHARSET [PORT]
|
||||
; Reads characters from the PORT that belong to the list of characters
|
||||
; INC-CHARSET. The reading stops at the first character which is not
|
||||
; a member of the set. This character is left on the stream.
|
||||
; All the read characters are returned in a string.
|
||||
;
|
||||
; -- procedure+: next-token-of PRED [PORT]
|
||||
; Reads characters from the PORT for which PRED (a procedure of one
|
||||
; argument) returns non-#f. The reading stops at the first character
|
||||
; for which PRED returns #f. That character is left on the stream.
|
||||
; All the results of evaluating of PRED up to #f are returned in a
|
||||
; string.
|
||||
;
|
||||
; PRED is a procedure that takes one argument (a character
|
||||
; or the EOF object) and returns a character or #f. The returned
|
||||
; character does not have to be the same as the input argument
|
||||
; to the PRED. For example,
|
||||
; (next-token-of (lambda (c)
|
||||
; (cond ((eof-object? c) #f)
|
||||
; ((char-alphabetic? c) (char-downcase c))
|
||||
; (else #f))))
|
||||
; will try to read an alphabetic token from the current
|
||||
; input port, and return it in lower case.
|
||||
;
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
;
|
||||
; This procedure is similar to next-token but only it implements
|
||||
; an inclusion rather than delimiting semantics.
|
||||
|
||||
(define-opt (next-token-of incl-list/pred
|
||||
(optional (port (current-input-port))) )
|
||||
(let* ((buffer (input-parse:init-buffer))
|
||||
(curr-buf-len (string-length buffer)))
|
||||
(if (procedure? incl-list/pred)
|
||||
(let outer ((buffer buffer) (filled-buffer-l '()))
|
||||
(let loop ((i 0))
|
||||
(if (>= i curr-buf-len) ; make sure we have space
|
||||
(outer (make-string curr-buf-len) (cons buffer filled-buffer-l))
|
||||
(let ((c (incl-list/pred (peek-char port))))
|
||||
(if c
|
||||
(begin
|
||||
(string-set! buffer i c)
|
||||
(read-char port) ; move to the next char
|
||||
(loop (inc i)))
|
||||
; incl-list/pred decided it had had enough
|
||||
(if (null? filled-buffer-l) (substring buffer 0 i)
|
||||
(string-concatenate-reverse filled-buffer-l buffer i)))))))
|
||||
|
||||
; incl-list/pred is a list of allowed characters
|
||||
(let outer ((buffer buffer) (filled-buffer-l '()))
|
||||
(let loop ((i 0))
|
||||
(if (>= i curr-buf-len) ; make sure we have space
|
||||
(outer (make-string curr-buf-len) (cons buffer filled-buffer-l))
|
||||
(let ((c (peek-char port)))
|
||||
(cond
|
||||
((not (memv c incl-list/pred))
|
||||
(if (null? filled-buffer-l) (substring buffer 0 i)
|
||||
(string-concatenate-reverse filled-buffer-l buffer i)))
|
||||
(else
|
||||
(string-set! buffer i c)
|
||||
(read-char port) ; move to the next char
|
||||
(loop (inc i))))))))
|
||||
)))
|
||||
|
||||
|
||||
; -- procedure+: read-text-line [PORT]
|
||||
; Reads one line of text from the PORT, and returns it as a string.
|
||||
; A line is a (possibly empty) sequence of characters terminated
|
||||
; by CR, CRLF or LF (or even the end of file).
|
||||
; The terminating character (or CRLF combination) is removed from
|
||||
; the input stream. The terminating character(s) is not a part
|
||||
; of the return string either.
|
||||
; If EOF is encountered before any character is read, the return
|
||||
; value is EOF.
|
||||
;
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
|
||||
(define *read-line-breaks* (list char-newline char-return '*eof*))
|
||||
|
||||
(define-opt (read-text-line (optional (port (current-input-port))) )
|
||||
(if (eof-object? (peek-char port)) (peek-char port)
|
||||
(let* ((line
|
||||
(next-token '() *read-line-breaks*
|
||||
"reading a line" port))
|
||||
(c (read-char port))) ; must be either \n or \r or EOF
|
||||
(and (eqv? c char-return) (eqv? (peek-char port) #\newline)
|
||||
(read-char port)) ; skip \n that follows \r
|
||||
line)))
|
||||
|
||||
|
||||
; -- procedure+: read-string N [PORT]
|
||||
; Reads N characters from the PORT, and returns them in a string.
|
||||
; If EOF is encountered before N characters are read, a shorter string
|
||||
; will be returned.
|
||||
; If N is not positive, an empty string will be returned.
|
||||
; The optional argument PORT defaults to the current input port.
|
||||
|
||||
(define-opt (read-string n (optional (port (current-input-port))) )
|
||||
(if (not (positive? n)) ""
|
||||
(let ((buffer (make-string n)))
|
||||
(let loop ((i 0) (c (read-char port)))
|
||||
(if (eof-object? c) (substring buffer 0 i)
|
||||
(let ((i1 (inc i)))
|
||||
(string-set! buffer i c)
|
||||
(if (= i1 n) buffer
|
||||
(loop i1 (read-char port)))))))))
|
||||
|
|
@ -1,35 +0,0 @@
|
|||
(module input-parse "restricted-mzscheme.ss"
|
||||
|
||||
(provide peek-next-char
|
||||
assert-curr-char
|
||||
skip-until skip-while
|
||||
next-token next-token-of
|
||||
read-text-line
|
||||
read-string
|
||||
parser-error
|
||||
exn:ssax?
|
||||
exn:ssax-port)
|
||||
|
||||
(require (only (lib "13.ss" "srfi") string-concatenate-reverse))
|
||||
|
||||
(require "define-opt.ss")
|
||||
(require "ascii.ss")
|
||||
(require "char-encodings.ss")
|
||||
(require "crementing.ss")
|
||||
|
||||
(define-struct (exn:ssax exn) (port))
|
||||
|
||||
(define (format-list list)
|
||||
(apply string-append (map format-x list)))
|
||||
|
||||
(define (format-x thing)
|
||||
(format "~a" thing))
|
||||
|
||||
(define (parser-error port message . rest)
|
||||
(raise (make-exn:ssax (string->immutable-string
|
||||
(format-list (cons message rest)))
|
||||
(current-continuation-marks)
|
||||
port)))
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "input-parse.scm"))
|
|
@ -1,100 +0,0 @@
|
|||
; -- Function: find-string-from-port? STR IN-PORT MAX-NO-CHARS
|
||||
; Looks for a string STR within the first MAX-NO-CHARS chars of the
|
||||
; input port IN-PORT
|
||||
; MAX-NO-CHARS may be omitted: in that case, the search span would be
|
||||
; limited only by the end of the input stream.
|
||||
; When the STR is found, the function returns the number of
|
||||
; characters it has read from the port, and the port is set
|
||||
; to read the first char after that (that is, after the STR)
|
||||
; The function returns #f when the string wasn't found
|
||||
; Note the function reads the port *STRICTLY* sequentially, and does not
|
||||
; perform any buffering. So the function can be used even if the port is open
|
||||
; on a pipe or other communication channel.
|
||||
;
|
||||
; Probably can be classified as misc-io.
|
||||
;
|
||||
; Notes on the algorithm.
|
||||
; A special care should be taken in a situation when one had achieved a partial
|
||||
; match with (a head of) STR, and then some unexpected character appeared in
|
||||
; the stream. It'll be rash to discard all already read characters. Consider
|
||||
; an example of string "acab" and the stream "bacacab...", specifically when
|
||||
; a c a _b_
|
||||
; b a c a c a b ...
|
||||
; that is, when 'aca' had matched, but then 'c' showed up in the stream
|
||||
; while we were looking for 'b'. In that case, discarding all already read
|
||||
; characters and starting the matching process from scratch, that is,
|
||||
; from 'c a b ...', would miss a certain match.
|
||||
; Note, we don't actually need to keep already read characters, or at least
|
||||
; strlen(str) characters in some kind of buffer. If there has been no match,
|
||||
; we can safely discard read characters. If there was some partial match,
|
||||
; we already know the characters before, they are in the STR itself, so
|
||||
; we don't need a special buffer for that.
|
||||
|
||||
;;; "MISCIO" Search for string from port.
|
||||
; Written 1995 by Oleg Kiselyov (oleg@ponder.csci.unt.edu)
|
||||
; Modified 1996 by A. Jaffer (jaffer@ai.mit.edu)
|
||||
;
|
||||
; This code is in the public domain.
|
||||
|
||||
(define (MISCIO:find-string-from-port? str <input-port> . max-no-char)
|
||||
(set! max-no-char (if (null? max-no-char) #f (car max-no-char)))
|
||||
(letrec
|
||||
((no-chars-read 0)
|
||||
(my-peek-char ; Return a peeked char or #f
|
||||
(lambda () (and (or (not max-no-char) (< no-chars-read max-no-char))
|
||||
(let ((c (peek-char <input-port>)))
|
||||
(if (eof-object? c) #f c)))))
|
||||
(next-char (lambda () (read-char <input-port>)
|
||||
(set! no-chars-read (inc no-chars-read))))
|
||||
(match-1st-char ; of the string str
|
||||
(lambda ()
|
||||
(let ((c (my-peek-char)))
|
||||
(if (not c) #f
|
||||
(begin (next-char)
|
||||
(if (char=? c (string-ref str 0))
|
||||
(match-other-chars 1)
|
||||
(match-1st-char)))))))
|
||||
;; There has been a partial match, up to the point pos-to-match
|
||||
;; (for example, str[0] has been found in the stream)
|
||||
;; Now look to see if str[pos-to-match] for would be found, too
|
||||
(match-other-chars
|
||||
(lambda (pos-to-match)
|
||||
(if (>= pos-to-match (string-length str))
|
||||
no-chars-read ; the entire string has matched
|
||||
(let ((c (my-peek-char)))
|
||||
(and c
|
||||
(if (not (char=? c (string-ref str pos-to-match)))
|
||||
(backtrack 1 pos-to-match)
|
||||
(begin (next-char)
|
||||
(match-other-chars (inc pos-to-match)))))))))
|
||||
|
||||
;; There had been a partial match, but then a wrong char showed up.
|
||||
;; Before discarding previously read (and matched) characters, we check
|
||||
;; to see if there was some smaller partial match. Note, characters read
|
||||
;; so far (which matter) are those of str[0..matched-substr-len - 1]
|
||||
;; In other words, we will check to see if there is such i>0 that
|
||||
;; substr(str,0,j) = substr(str,i,matched-substr-len)
|
||||
;; where j=matched-substr-len - i
|
||||
(backtrack
|
||||
(lambda (i matched-substr-len)
|
||||
(let ((j (- matched-substr-len i)))
|
||||
(if (<= j 0)
|
||||
(match-1st-char) ; backed off completely to the begining of str
|
||||
(let loop ((k 0))
|
||||
(if (>= k j)
|
||||
(match-other-chars j) ; there was indeed a shorter match
|
||||
(if (char=? (string-ref str k)
|
||||
(string-ref str (+ i k)))
|
||||
(loop (inc k))
|
||||
(backtrack (inc i) matched-substr-len))))))))
|
||||
)
|
||||
(match-1st-char)))
|
||||
|
||||
(define find-string-from-port? MISCIO:find-string-from-port?)
|
||||
|
||||
|
||||
;-----------------------------------------------------------------------------
|
||||
; This is a test driver for miscio:find-string-from-port?, to make sure it
|
||||
; really works as intended
|
||||
|
||||
; moved to vinput-parse.scm
|
|
@ -1,67 +0,0 @@
|
|||
; Look up a value associated with a symbolic key in alist
|
||||
; ((key value) ...) or ((key . value) ...)
|
||||
; and return the associated value.
|
||||
; If the association has the form
|
||||
; (key . value) where value is not a pair --> return value
|
||||
; (key value) --> return value
|
||||
; (key value1 value2 value3 ...) -> return (value1 value2 value3 ...)
|
||||
; that is, the procedure tries to do the right thing for
|
||||
; both kinds of associative lists.
|
||||
;
|
||||
; The form `lookup-def' is a special form rather than a regular
|
||||
; procedure. Its first two arguments are evaluated exactly once. The
|
||||
; default-value argument, if given, is evaluated only if the desired key
|
||||
; is not found. I have not seen any need to pass `lookup-def' as an
|
||||
; argument to other functions. If the latter is desired, it is not
|
||||
; difficult to accomplish by explicitly wrapping `lookup-def' into a
|
||||
; lambda form.
|
||||
;
|
||||
; We use a pseudo-keyword argument warn: as a modifier.
|
||||
; This is not really a keyword argument (although it may be,
|
||||
; if the Scheme system turns out DSSSL-compatible)
|
||||
;
|
||||
; (lookup-def key alist) -- lookup the key in the alist and return the
|
||||
; associated value. Raise an error if the key is not
|
||||
; found.
|
||||
; (lookup-def key alist default-exp)
|
||||
; -- lookup the key in the alist and return the associated
|
||||
; value. If the the key is not found, evaluate
|
||||
; the default-exp and return its result.
|
||||
; (lookup-def key alist warn: default-exp)
|
||||
; -- the same as above. In addition, write a warning
|
||||
; (using cerr above) if the key is not found.
|
||||
|
||||
(define-syntax lookup-def
|
||||
(syntax-rules (warn:)
|
||||
((lookup-def key alist)
|
||||
(let ((nkey key) (nalist alist)) ; evaluate them only once
|
||||
(let ((res (assq nkey nalist)))
|
||||
(if res
|
||||
(let ((res (cdr res)))
|
||||
(cond
|
||||
((not (pair? res)) res)
|
||||
((null? (cdr res)) (car res))
|
||||
(else res)))
|
||||
(error "Failed to find " nkey " in " nalist)))))
|
||||
((lookup-def key alist default-exp)
|
||||
(let ((res (assq key alist)))
|
||||
(if res
|
||||
(let ((res (cdr res)))
|
||||
(cond
|
||||
((not (pair? res)) res)
|
||||
((null? (cdr res)) (car res))
|
||||
(else res)))
|
||||
default-exp)))
|
||||
((lookup-def key alist warn: default-exp)
|
||||
(let ((nkey key) (nalist alist)) ; evaluate them only once
|
||||
(let ((res (assq nkey nalist)))
|
||||
(if res
|
||||
(let ((res (cdr res)))
|
||||
(cond
|
||||
((not (pair? res)) res)
|
||||
((null? (cdr res)) (car res))
|
||||
(else res)))
|
||||
(begin
|
||||
(cerr "Failed to find " nkey " in " nalist #\newline)
|
||||
default-exp)))))
|
||||
))
|
|
@ -1,10 +0,0 @@
|
|||
(module lookup-def mzscheme
|
||||
(provide lookup-def)
|
||||
|
||||
(require (lib "23.ss" "srfi")) ; ERROR
|
||||
(require "coutputs.ss")
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "lookup-def.scm"))
|
||||
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
(module oleg-string-ports mzscheme
|
||||
|
||||
(provide with-output-to-string
|
||||
call-with-input-string
|
||||
with-input-from-string)
|
||||
|
||||
(begin
|
||||
(define (with-output-to-string thunk)
|
||||
(let ((port (open-output-string)))
|
||||
(parameterize ((current-output-port port))
|
||||
(thunk)
|
||||
(get-output-string port))))
|
||||
(define (call-with-input-string string proc)
|
||||
(proc (open-input-string string)))
|
||||
(define (with-input-from-string string thunk)
|
||||
(parameterize ((current-input-port (open-input-string string)))
|
||||
(thunk)))))
|
||||
|
|
@ -1,21 +0,0 @@
|
|||
(module oleg-utils mzscheme
|
||||
|
||||
(provide any?
|
||||
list-intersperse list-intersperse!
|
||||
list-tail-diff
|
||||
string-rindex
|
||||
substring?
|
||||
string->integer
|
||||
string-split
|
||||
make-char-quotator)
|
||||
|
||||
(require (only (lib "13.ss" "srfi")
|
||||
string-index-right string-contains string-null?))
|
||||
(require (lib "23.ss" "srfi"))
|
||||
(require "crementing.ss")
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "util.scm"))
|
||||
|
||||
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
; like cout << arguments << args
|
||||
; where argument can be any Scheme object. If it's a procedure
|
||||
; (without args) it's executed rather than printed (like newline)
|
||||
|
||||
(define (cout . args)
|
||||
(for-each (lambda (x)
|
||||
(if (procedure? x) (x) (display x)))
|
||||
args))
|
||||
|
||||
(define (cerr . args)
|
||||
(for-each (lambda (x)
|
||||
(if (procedure? x)
|
||||
(x (current-error-port))
|
||||
(display x (current-error-port))))
|
||||
args))
|
||||
|
||||
;(##define-macro (nl) '(newline))
|
||||
(define nl (string #\newline))
|
|
@ -1,31 +0,0 @@
|
|||
; Pretty-printer for various Scheme systems
|
||||
;
|
||||
; It implements the structure ppretty-prints for Scheme systems
|
||||
; other than Scheme48 and PLT Scheme.
|
||||
; Also implement display-circle
|
||||
;
|
||||
; $Id: ppretty-prints.scm,v 1.1 2004/10/02 07:50:50 eli Exp $
|
||||
|
||||
; If the pretty-printer is available, use it. Otherwise, use 'display'
|
||||
; If display-circle is available or a regular pretty-printer can handle
|
||||
; circular lists, use them. Otherwise, refuse to display circular data
|
||||
; structures
|
||||
|
||||
(cond-expand
|
||||
(bigloo
|
||||
#f) ; pp and display-circle are natively
|
||||
; available
|
||||
((or scm gambit)
|
||||
; pp is natively available
|
||||
(define (display-circle x) ; display-circle is not
|
||||
(display "Cannot safely display circular datastructures. Use SRFI-38")
|
||||
(newline)))
|
||||
((or petite-chez)
|
||||
(define pp pretty-print)
|
||||
(define display-circle pp))
|
||||
(else
|
||||
(define pp display) ; Fall-back to display
|
||||
(define (display-circle x)
|
||||
(display "Cannot safely display circular datastructures. Use SRFI-38")
|
||||
(newline))))
|
||||
|
|
@ -1,6 +0,0 @@
|
|||
(module ppretty-prints mzscheme
|
||||
|
||||
(provide pp)
|
||||
|
||||
(require (lib "pretty.ss"))
|
||||
(define pp pretty-print))
|
|
@ -1,3 +0,0 @@
|
|||
(module restricted-mzscheme mzscheme
|
||||
(provide (all-from-except mzscheme
|
||||
read-string)))
|
|
@ -1,60 +0,0 @@
|
|||
(module ssax "restricted-mzscheme.ss"
|
||||
|
||||
(provide xml-token?
|
||||
xml-token-kind xml-token-head
|
||||
make-empty-attlist attlist-add
|
||||
attlist-null?
|
||||
attlist-remove-top
|
||||
attlist->alist attlist-fold
|
||||
ssax:uri-string->symbol
|
||||
ssax:skip-internal-dtd
|
||||
ssax:read-pi-body-as-string
|
||||
ssax:reverse-collect-str-drop-ws
|
||||
ssax:read-markup-token
|
||||
ssax:read-cdata-body
|
||||
ssax:read-char-ref
|
||||
ssax:read-attributes
|
||||
ssax:complete-start-tag
|
||||
ssax:read-external-id
|
||||
ssax:read-char-data
|
||||
ssax:make-parser ssax:make-pi-parser ssax:make-elem-parser
|
||||
ssax:xml->sxml
|
||||
ssax:warn-parameter)
|
||||
|
||||
(require (only (lib "1.ss" "srfi") cons*))
|
||||
(require (only (lib "13.ss" "srfi")
|
||||
string-null?
|
||||
string-index
|
||||
string-concatenate/shared
|
||||
string-concatenate-reverse/shared))
|
||||
|
||||
(require "crementing.ss")
|
||||
(require "input-parse.ss")
|
||||
(require "char-encodings.ss")
|
||||
(require "ascii.ss")
|
||||
(require "ppretty-prints.ss")
|
||||
(require "oleg-utils.ss")
|
||||
(require "find-strings.ss")
|
||||
(require "assertions.ss")
|
||||
(require "coutputs.ss")
|
||||
(require "catch-errors.ss")
|
||||
(require "oleg-string-ports.ss")
|
||||
|
||||
(define (SSAX:warn-standard port msg . other-msg)
|
||||
(apply cerr (cons (string-append (string #\newline) "Warning: ")
|
||||
(cons msg
|
||||
other-msg))))
|
||||
|
||||
|
||||
(define ssax:warn-parameter (make-parameter SSAX:warn-standard))
|
||||
|
||||
(define (ssax:warn port msg . other-msg)
|
||||
(apply (ssax:warn-parameter) port msg other-msg))
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "SSAX-code.scm"))
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -1,25 +0,0 @@
|
|||
(module sxml-to-html-ext mzscheme
|
||||
|
||||
(provide make-header
|
||||
make-navbar
|
||||
make-footer
|
||||
universal-conversion-rules
|
||||
universal-protected-rules
|
||||
alist-conv-rules)
|
||||
|
||||
(require (lib "23.ss" "srfi"))
|
||||
(require "oleg-utils.ss")
|
||||
(require "coutputs.ss")
|
||||
(require "assertions.ss")
|
||||
(require "crementing.ss")
|
||||
(require "lookup-def.ss")
|
||||
(require "sxml-to-html.ss")
|
||||
(require "sxml-tree-trans.ss")
|
||||
|
||||
(define OS:file-length file-size)
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "SXML-to-HTML-ext.scm"))
|
||||
|
||||
|
||||
|
|
@ -1,14 +0,0 @@
|
|||
(module sxml-to-html mzscheme
|
||||
|
||||
(provide SXML->HTML
|
||||
enattr
|
||||
entag
|
||||
string->goodHTML)
|
||||
|
||||
(require "coutputs.ss")
|
||||
(require "assertions.ss")
|
||||
(require "oleg-utils.ss")
|
||||
(require "sxml-tree-trans.ss")
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "SXML-to-HTML.scm"))
|
|
@ -1,10 +0,0 @@
|
|||
(module sxml-tree-trans mzscheme
|
||||
|
||||
(provide SRV:send-reply
|
||||
post-order pre-post-order replace-range)
|
||||
|
||||
(require "assertions.ss")
|
||||
(require (lib "23.ss" "srfi"))
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "SXML-tree-trans.scm"))
|
|
@ -1,18 +0,0 @@
|
|||
(module sxpath mzscheme
|
||||
|
||||
(provide nodeset?
|
||||
node-typeof?
|
||||
map-union
|
||||
sxpath)
|
||||
|
||||
(require (lib "pretty.ss"))
|
||||
(require (lib "23.ss" "srfi")) ; ERROR
|
||||
(require "crementing.ss")
|
||||
(require "assertions.ss")
|
||||
(require "coutputs.ss")
|
||||
|
||||
(require (lib "include.ss"))
|
||||
(include "SXPath-old.scm"))
|
||||
|
||||
|
||||
|
|
@ -1,292 +0,0 @@
|
|||
;****************************************************************************
|
||||
; My Scheme misc utility functions
|
||||
; (mainly dealing with string and list manipulations)
|
||||
;
|
||||
; myenv.scm, myenv-bigloo.scm or similar prelude is assumed.
|
||||
; From SRFI-13, import many functions
|
||||
; If a particular implementation lacks SRFI-13 support, please
|
||||
; include the file srfi-13-local.scm
|
||||
;
|
||||
; $Id: util.scm,v 1.2 2004/11/09 14:11:40 sperber Exp $
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; Iterator ANY?
|
||||
;
|
||||
; -- procedure+: any? PRED COLLECTION
|
||||
; Searches for the first element in the collection satisfying a
|
||||
; given predicate
|
||||
; That is, the procedure applies PRED to every element of the
|
||||
; COLLECTION in turn.
|
||||
; The first element for which PRED returns non-#f stops the iteration;
|
||||
; the value of the predicate is returned.
|
||||
; If none of the elements of the COLLECTION satisfy the predicate,
|
||||
; the return value from the procedure is #f
|
||||
; COLLECTION can be a list, a vector, a string, or an input port.
|
||||
; See vmyenv.scm for validation tests.
|
||||
|
||||
(define (any? <pred?> coll)
|
||||
(cond
|
||||
((list? coll)
|
||||
(let loop ((curr-l coll))
|
||||
(if (null? curr-l) #f
|
||||
(or (<pred?> (car curr-l)) (loop (cdr curr-l))))))
|
||||
|
||||
((vector? coll)
|
||||
(let ((len (vector-length coll)))
|
||||
(let loop ((i 0))
|
||||
(if (>= i len) #f
|
||||
(or (<pred?> (vector-ref coll i)) (loop (inc i)))))))
|
||||
|
||||
((string? coll)
|
||||
(let ((len (string-length coll)))
|
||||
(let loop ((i 0))
|
||||
(if (>= i len) #f
|
||||
(or (<pred?> (string-ref coll i)) (loop (inc i)))))))
|
||||
|
||||
((input-port? coll)
|
||||
(let loop ((c (read-char coll)))
|
||||
(if (eof-object? c) #f
|
||||
(or (<pred?> c) (loop (read-char coll))))))
|
||||
|
||||
(else (error "any? on an invalid collection"))))
|
||||
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; Some list manipulation functions
|
||||
|
||||
; -- procedure+: list-intersperse SRC-L ELEM
|
||||
; inserts ELEM between elements of the SRC-L, returning a freshly allocated
|
||||
; list (cells, that is)
|
||||
|
||||
(define (list-intersperse src-l elem)
|
||||
(if (null? src-l) src-l
|
||||
(let loop ((l (cdr src-l)) (dest (cons (car src-l) '())))
|
||||
(if (null? l) (reverse dest)
|
||||
(loop (cdr l) (cons (car l) (cons elem dest)))))))
|
||||
|
||||
|
||||
; -- procedure+: list-intersperse! SRC-L ELEM
|
||||
; inserts ELEM between elements of the SRC-L inplace
|
||||
|
||||
(define (list-intersperse! src-l elem)
|
||||
(if (null? src-l) src-l
|
||||
(let loop ((l src-l))
|
||||
(let ((next-l (cdr l)))
|
||||
(if (null? next-l) src-l
|
||||
(begin
|
||||
(set-cdr! l (cons elem next-l))
|
||||
(loop next-l)))))))
|
||||
|
||||
|
||||
; List-tail-difference: given two lists, list1 and list2 where
|
||||
; list2 is presumably a tail of list1, return
|
||||
; a (freshly allocated) list which is a difference between list1
|
||||
; and list2. If list2 is *not* a tail of list1, the entire list1
|
||||
; is returned.
|
||||
(define (list-tail-diff list1 list2)
|
||||
(let loop ((l1-curr list1) (difference '()))
|
||||
(cond
|
||||
((eq? l1-curr list2) (reverse difference))
|
||||
((null? l1-curr) (reverse difference))
|
||||
(else (loop (cdr l1-curr) (cons (car l1-curr) difference))))))
|
||||
|
||||
|
||||
;------------------------------------------------------------------------
|
||||
; String utilities
|
||||
; See SRFI-13 or srfi-13-local.scm
|
||||
|
||||
|
||||
; Return the index of the last occurence of a-char in str, or #f
|
||||
; See SRFI-13
|
||||
(define string-rindex string-index-right)
|
||||
|
||||
; -- procedure+: substring? PATTERN STRING
|
||||
; Searches STRING to see if it contains the substring PATTERN.
|
||||
; Returns the index of the first substring of STRING that is equal
|
||||
; to PATTERN; or `#f' if STRING does not contain PATTERN.
|
||||
;
|
||||
; (substring? "rat" "pirate") => 2
|
||||
; (substring? "rat" "outrage") => #f
|
||||
; (substring? "" any-string) => 0
|
||||
(define (substring? pattern str) (string-contains str pattern))
|
||||
|
||||
|
||||
; -- procedure+: string->integer STR START END
|
||||
;
|
||||
; Makes sure a substring of the STR from START (inclusive) till END
|
||||
; (exclusive) is a representation of a non-negative integer in decimal
|
||||
; notation. If so, this integer is returned. Otherwise -- when the
|
||||
; substring contains non-decimal characters, or when the range from
|
||||
; START till END is not within STR, the result is #f.
|
||||
;
|
||||
; This procedure is a simplification of the standard string->number.
|
||||
; The latter is far more generic: for example, it will try to read
|
||||
; strings like "1/2" "1S2" "1.34" and even "1/0" (the latter causing
|
||||
; a zero-divide error). Note that to string->number, "1S2" is a valid
|
||||
; representation of an _inexact_ integer (100 to be precise).
|
||||
; Oftentimes we want to be more restrictive about what we consider a
|
||||
; number; we want merely to read an integral label.
|
||||
|
||||
(define (string->integer str start end)
|
||||
(and (< -1 start end (inc (string-length str)))
|
||||
(let loop ((pos start) (accum 0))
|
||||
(cond
|
||||
((>= pos end) accum)
|
||||
((char-numeric? (string-ref str pos))
|
||||
(loop (inc pos) (+ (char->integer (string-ref str pos))
|
||||
(- (char->integer #\0)) (* 10 accum))))
|
||||
(else #f)))))
|
||||
|
||||
|
||||
;
|
||||
; -- procedure+: string-split STRING
|
||||
; -- procedure+: string-split STRING '()
|
||||
; -- procedure+: string-split STRING '() MAXSPLIT
|
||||
;
|
||||
; Returns a list of whitespace delimited words in STRING.
|
||||
; If STRING is empty or contains only whitespace, then the empty list
|
||||
; is returned. Leading and trailing whitespaces are trimmed.
|
||||
; If MAXSPLIT is specified and positive, the resulting list will
|
||||
; contain at most MAXSPLIT elements, the last of which is the string
|
||||
; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
|
||||
; non-positive, the empty list is returned. "In time critical
|
||||
; applications it behooves you not to split into more fields than you
|
||||
; really need."
|
||||
;
|
||||
; -- procedure+: string-split STRING CHARSET
|
||||
; -- procedure+: string-split STRING CHARSET MAXSPLIT
|
||||
;
|
||||
; Returns a list of words delimited by the characters in CHARSET in
|
||||
; STRING. CHARSET is a list of characters that are treated as delimiters.
|
||||
; Leading or trailing delimeters are NOT trimmed. That is, the resulting
|
||||
; list will have as many initial empty string elements as there are
|
||||
; leading delimiters in STRING.
|
||||
;
|
||||
; If MAXSPLIT is specified and positive, the resulting list will
|
||||
; contain at most MAXSPLIT elements, the last of which is the string
|
||||
; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
|
||||
; non-positive, the empty list is returned. "In time critical
|
||||
; applications it behooves you not to split into more fields than you
|
||||
; really need."
|
||||
;
|
||||
; This is based on the split function in Python/Perl
|
||||
;
|
||||
; (string-split " abc d e f ") ==> ("abc" "d" "e" "f")
|
||||
; (string-split " abc d e f " '() 1) ==> ("abc d e f ")
|
||||
; (string-split " abc d e f " '() 0) ==> ()
|
||||
; (string-split ":abc:d:e::f:" '(#\:)) ==> ("" "abc" "d" "e" "" "f" "")
|
||||
; (string-split ":" '(#\:)) ==> ("" "")
|
||||
; (string-split "root:x:0:0:Lord" '(#\:) 2) ==> ("root" "x:0:0:Lord")
|
||||
; (string-split "/usr/local/bin:/usr/bin:/usr/ucb/bin" '(#\:))
|
||||
; ==> ("/usr/local/bin" "/usr/bin" "/usr/ucb/bin")
|
||||
; (string-split "/usr/local/bin" '(#\/)) ==> ("" "usr" "local" "bin")
|
||||
|
||||
(define (string-split str . rest)
|
||||
; maxsplit is a positive number
|
||||
(define (split-by-whitespace str maxsplit)
|
||||
(define (skip-ws i yet-to-split-count)
|
||||
(cond
|
||||
((>= i (string-length str)) '())
|
||||
((char-whitespace? (string-ref str i))
|
||||
(skip-ws (inc i) yet-to-split-count))
|
||||
(else (scan-beg-word (inc i) i yet-to-split-count))))
|
||||
(define (scan-beg-word i from yet-to-split-count)
|
||||
(cond
|
||||
((zero? yet-to-split-count)
|
||||
(cons (substring str from (string-length str)) '()))
|
||||
(else (scan-word i from yet-to-split-count))))
|
||||
(define (scan-word i from yet-to-split-count)
|
||||
(cond
|
||||
((>= i (string-length str))
|
||||
(cons (substring str from i) '()))
|
||||
((char-whitespace? (string-ref str i))
|
||||
(cons (substring str from i)
|
||||
(skip-ws (inc i) (- yet-to-split-count 1))))
|
||||
(else (scan-word (inc i) from yet-to-split-count))))
|
||||
(skip-ws 0 (- maxsplit 1)))
|
||||
|
||||
; maxsplit is a positive number
|
||||
; str is not empty
|
||||
(define (split-by-charset str delimeters maxsplit)
|
||||
(define (scan-beg-word from yet-to-split-count)
|
||||
(cond
|
||||
((>= from (string-length str)) '(""))
|
||||
((zero? yet-to-split-count)
|
||||
(cons (substring str from (string-length str)) '()))
|
||||
(else (scan-word from from yet-to-split-count))))
|
||||
(define (scan-word i from yet-to-split-count)
|
||||
(cond
|
||||
((>= i (string-length str))
|
||||
(cons (substring str from i) '()))
|
||||
((memq (string-ref str i) delimeters)
|
||||
(cons (substring str from i)
|
||||
(scan-beg-word (inc i) (- yet-to-split-count 1))))
|
||||
(else (scan-word (inc i) from yet-to-split-count))))
|
||||
(scan-beg-word 0 (- maxsplit 1)))
|
||||
|
||||
; resolver of overloading...
|
||||
; if omitted, maxsplit defaults to
|
||||
; (inc (string-length str))
|
||||
(if (string-null? str) '()
|
||||
(if (null? rest)
|
||||
(split-by-whitespace str (inc (string-length str)))
|
||||
(let ((charset (car rest))
|
||||
(maxsplit
|
||||
(if (pair? (cdr rest)) (cadr rest) (inc (string-length str)))))
|
||||
(cond
|
||||
((not (positive? maxsplit)) '())
|
||||
((null? charset) (split-by-whitespace str maxsplit))
|
||||
(else (split-by-charset str charset maxsplit))))))
|
||||
)
|
||||
|
||||
|
||||
; make-char-quotator QUOT-RULES
|
||||
;
|
||||
; Given QUOT-RULES, an assoc list of (char . string) pairs, return
|
||||
; a quotation procedure. The returned quotation procedure takes a string
|
||||
; and returns either a string or a list of strings. The quotation procedure
|
||||
; check to see if its argument string contains any instance of a character
|
||||
; that needs to be encoded (quoted). If the argument string is "clean",
|
||||
; it is returned unchanged. Otherwise, the quotation procedure will
|
||||
; return a list of string fragments. The input straing will be broken
|
||||
; at the places where the special characters occur. The special character
|
||||
; will be replaced by the corresponding encoding strings.
|
||||
;
|
||||
; For example, to make a procedure that quotes special HTML characters,
|
||||
; do
|
||||
; (make-char-quotator
|
||||
; '((#\< . "<") (#\> . ">") (#\& . "&") (#\" . """)))
|
||||
|
||||
(define (make-char-quotator char-encoding)
|
||||
(let ((bad-chars (map car char-encoding)))
|
||||
|
||||
; Check to see if str contains one of the characters in charset,
|
||||
; from the position i onward. If so, return that character's index.
|
||||
; otherwise, return #f
|
||||
(define (index-cset str i charset)
|
||||
(let loop ((i i))
|
||||
(and (< i (string-length str))
|
||||
(if (memv (string-ref str i) charset) i
|
||||
(loop (inc i))))))
|
||||
|
||||
; The body of the function
|
||||
(lambda (str)
|
||||
(let ((bad-pos (index-cset str 0 bad-chars)))
|
||||
(if (not bad-pos) str ; str had all good chars
|
||||
(let loop ((from 0) (to bad-pos))
|
||||
(cond
|
||||
((>= from (string-length str)) '())
|
||||
((not to)
|
||||
(cons (substring str from (string-length str)) '()))
|
||||
(else
|
||||
(let ((quoted-char
|
||||
(cdr (assv (string-ref str to) char-encoding)))
|
||||
(new-to
|
||||
(index-cset str (inc to) bad-chars)))
|
||||
(if (< from to)
|
||||
(cons
|
||||
(substring str from to)
|
||||
(cons quoted-char (loop (inc to) new-to)))
|
||||
(cons quoted-char (loop (inc to) new-to))))))))))
|
||||
))
|
||||
|
Loading…
Reference in New Issue
Block a user