v4 progress

svn: r7802
This commit is contained in:
Jay McCarthy 2007-11-21 16:51:53 +00:00
parent 4646c34d1e
commit a4023f2ebe
63 changed files with 32295 additions and 1409 deletions

View File

@ -1,5 +1,6 @@
(module dispatch-passwords mzscheme
(require (lib "kw.ss")
(lib "list.ss")
(lib "url.ss" "net")
(lib "contract.ss"))
(require "dispatch.ss"
@ -22,12 +23,12 @@
(define password-cache (box #f))
(define (update-password-cache!)
(when (and (file-exists? password-file) (memq 'read (file-or-directory-permissions password-file)))
(let ([cur-mtime (file-or-directory-modify-seconds password-file)])
(when (or (not (unbox last-read-time))
(cur-mtime . > . (unbox last-read-time))
(not (unbox password-cache)))
(set-box! last-read-time cur-mtime)
(set-box! password-cache (read-passwords password-file))))))
(let ([cur-mtime (file-or-directory-modify-seconds password-file)])
(when (or (not (unbox last-read-time))
(cur-mtime . > . (unbox last-read-time))
(not (unbox password-cache)))
(set-box! last-read-time cur-mtime)
(set-box! password-cache (read-passwords password-file))))))
(define (read-password-cache)
(update-password-cache!)
(unbox password-cache))
@ -77,11 +78,14 @@
(format "could not load password file ~a" password-path)
(current-continuation-marks))))])
(let ([passwords
(let ([raw (load password-path)])
(unless (password-list? raw)
(raise "malformed passwords"))
(map (lambda (x) (make-pass-entry (car x) (regexp (cadr x)) (cddr x)))
raw))])
(with-input-from-file
password-path
(lambda ()
(let ([raw (second (read))])
(unless (password-list? raw)
(raise "malformed passwords"))
(map (lambda (x) (make-pass-entry (car x) (regexp (cadr x)) (cddr x)))
raw))))])
;; string symbol bytes -> (or/c #f string)
(lambda (request-path user-name password)

View File

@ -1,22 +1,24 @@
(module lang mzscheme
(require-for-syntax (lib "etc.ss")
(lib "list.ss")
"lang/labels.ss"
"lang/util.ss"
"lang/elim-letrec.ss"
"lang/anormal.ss"
"lang/elim-callcc.ss"
"lang/defun.ss")
(require "lang/lang-api.ss")
(provide (rename lang-module-begin #%module-begin))
(provide (all-from "lang/lang-api.ss"))
(define-syntax lang-module-begin
(make-lang-module-begin
make-labeling
(make-module-case/new-defs
(make-define-case/new-defs
(compose #;(lambda (stx) (values stx empty))
defun
elim-callcc
(make-anormal-term elim-letrec-term)))))))
#lang scheme/base
(require (for-syntax scheme/base)
(for-syntax (lib "etc.ss"))
(for-syntax (lib "list.ss"))
(for-syntax "lang/labels.ss")
(for-syntax "lang/util.ss")
(for-syntax "lang/elim-letrec.ss")
(for-syntax "lang/anormal.ss")
(for-syntax "lang/elim-callcc.ss")
(for-syntax "lang/defun.ss")
"lang/lang-api.ss")
(provide (rename-out [lang-module-begin #%plain-module-begin])
(all-from-out "lang/lang-api.ss"))
(define-syntax lang-module-begin
(make-lang-module-begin
make-labeling
(make-module-case/new-defs
(make-define-case/new-defs
(compose #;(lambda (stx) (values stx empty))
defun
elim-callcc
(make-anormal-term elim-letrec-term))))))

View File

@ -1,164 +1,164 @@
(module abort-resume mzscheme
(require (lib "list.ss")
(lib "plt-match.ss")
(lib "serialize.ss")
"../private/define-closure.ss"
"../lang/web-cells.ss")
(provide
;; AUXILLIARIES
abort
resume
the-cont-key
the-save-cm-key
safe-call?
the-undef
activation-record-list
current-saved-continuation-marks-and
;; "SERVLET" INTERFACE
send/suspend
;; "CLIENT" INTERFACE
dispatch-start
dispatch)
;; **********************************************************************
;; **********************************************************************
;; AUXILLIARIES
(define-struct mark-key ())
(define the-cont-key (make-mark-key))
(define the-save-cm-key (make-mark-key))
(define safe-call? (make-mark-key))
(define web-prompt (make-continuation-prompt-tag 'web))
(define (current-saved-continuation-marks-and key val)
(reverse
(list* (cons key val)
(let-values ([(current)
(continuation-mark-set->list (current-continuation-marks) the-save-cm-key)])
(if (empty? current)
empty
(first current))))))
;; current-continuation-as-list: -> (listof value)
;; check the safety marks and return the list of marks representing the continuation
(define (activation-record-list)
(let* ([cm (current-continuation-marks)]
[sl (reverse (continuation-mark-set->list cm safe-call?))])
(if (andmap (lambda (x)
(if (pair? x)
(car x)
x))
sl)
(begin #;(printf "Safe continuation capture from ~S with cm ~S~n" sl cm)
#;(printf "CMs: ~S~n" (continuation-mark-set->list* cm (list the-cont-key the-save-cm-key)))
(reverse (continuation-mark-set->list* cm (list the-cont-key the-save-cm-key))))
(error "Attempt to capture a continuation from within an unsafe context:" sl))))
;; abort: ( -> alpha) -> alpha
;; erase the stack and apply a thunk
(define (abort thunk)
#;(printf "abort ~S~n" thunk)
(abort-current-continuation web-prompt thunk))
;; resume: (listof (value -> value)) value -> value
;; resume a computation given a value and list of frame procedures
(define (resume frames val)
#;(printf "~S~n" `(resume ,frames ,val))
(match frames
[(list)
(apply values val)]
[(list-rest f fs)
(match f
[(vector #f #f)
(error 'resume "Empty frame")]
[(vector f #f)
(call-with-values (lambda () (with-continuation-mark the-cont-key f (resume fs val)))
f)]
[(vector #f (list))
(resume fs val)]
[(vector #f (list-rest (list-rest cm-key cm-val) cms))
(with-continuation-mark
the-save-cm-key
(current-saved-continuation-marks-and cm-key cm-val)
(with-continuation-mark cm-key cm-val
(begin
#;(printf "r: w-c-m ~S ~S~n" cm-key cm-val)
(resume (list* (vector #f cms) fs) val))))]
[(vector f cm)
(resume (list* (vector f #f) (vector #f cm) fs) val)])]))
;; rebuild-cms : frames (-> value) -> value
(define (rebuild-cms frames thunk)
#;(printf "~S~n" `(rebuild-cms ,frames ,thunk))
(match frames
[(list)
(thunk)]
[(list-rest f fs)
(match f
[(vector f #f)
(rebuild-cms fs thunk)]
[(vector f (list))
(rebuild-cms fs thunk)]
[(vector f (list-rest (list-rest cm-key cm-val) cms))
#lang scheme/base
(require (lib "list.ss")
(lib "plt-match.ss")
(lib "serialize.ss")
"../private/define-closure.ss"
"../lang/web-cells.ss")
(provide
;; AUXILLIARIES
abort
resume
the-cont-key
the-save-cm-key
safe-call?
the-undef
activation-record-list
current-saved-continuation-marks-and
;; "SERVLET" INTERFACE
send/suspend
;; "CLIENT" INTERFACE
dispatch-start
dispatch)
;; **********************************************************************
;; **********************************************************************
;; AUXILLIARIES
(define-struct mark-key ())
(define the-cont-key (make-mark-key))
(define the-save-cm-key (make-mark-key))
(define safe-call? (make-mark-key))
(define web-prompt (make-continuation-prompt-tag 'web))
(define (current-saved-continuation-marks-and key val)
(reverse
(list* (cons key val)
(let-values ([(current)
(continuation-mark-set->list (current-continuation-marks) the-save-cm-key)])
(if (empty? current)
empty
(first current))))))
;; current-continuation-as-list: -> (listof value)
;; check the safety marks and return the list of marks representing the continuation
(define (activation-record-list)
(let* ([cm (current-continuation-marks)]
[sl (reverse (continuation-mark-set->list cm safe-call?))])
(if (andmap (lambda (x)
(if (pair? x)
(car x)
x))
sl)
(begin #;(printf "Safe continuation capture from ~S with cm ~S~n" sl cm)
#;(printf "CMs: ~S~n" (continuation-mark-set->list* cm (list the-cont-key the-save-cm-key)))
(reverse (continuation-mark-set->list* cm (list the-cont-key the-save-cm-key))))
(error "Attempt to capture a continuation from within an unsafe context:" sl))))
;; abort: ( -> alpha) -> alpha
;; erase the stack and apply a thunk
(define (abort thunk)
#;(printf "abort ~S~n" thunk)
(abort-current-continuation web-prompt thunk))
;; resume: (listof (value -> value)) value -> value
;; resume a computation given a value and list of frame procedures
(define (resume frames val)
#;(printf "~S~n" `(resume ,frames ,val))
(match frames
[(list)
(apply values val)]
[(list-rest f fs)
(match f
[(vector #f #f)
(error 'resume "Empty frame")]
[(vector f #f)
(call-with-values (lambda () (with-continuation-mark the-cont-key f (resume fs val)))
f)]
[(vector #f (list))
(resume fs val)]
[(vector #f (list-rest (list-rest cm-key cm-val) cms))
(with-continuation-mark
the-save-cm-key
(current-saved-continuation-marks-and cm-key cm-val)
(with-continuation-mark cm-key cm-val
(begin
#;(printf "rcm: w-c-m ~S ~S~n" cm-key cm-val)
(rebuild-cms (list* (vector #f cms) fs) thunk)))])]))
(define (abort/cc thunk)
(call-with-continuation-prompt
thunk
web-prompt))
;; a serializable undefined value
(define-serializable-struct undef ())
(define the-undef (make-undef))
;; **********************************************************************
;; **********************************************************************
;; "SERVLET" INTERFACE
(define-closure kont x (wcs current-marks)
(abort (lambda ()
; Restoring the web-cells is separate from the continuation
(restore-web-cell-set! wcs)
(resume current-marks x))))
;; send/suspend: (continuation -> response) -> request
;; produce the current response and wait for the next request
(define (send/suspend response-maker)
(with-continuation-mark safe-call? '(#t send/suspend)
(let ([current-marks (activation-record-list)]
[wcs (capture-web-cell-set)])
((lambda (k)
(abort (lambda ()
; Since we escaped from the previous context, we need to re-install the user's continuation-marks
(rebuild-cms current-marks (lambda () (response-maker k))))))
(make-kont (lambda () (values wcs current-marks)))))))
;; **********************************************************************
;; **********************************************************************
;; "CLIENT" INTERFACE
;; dispatch-start: (request -> response) request -> reponse
;; pass the initial request to the starting interaction point
(define (dispatch-start start req0)
(abort/cc
(lambda ()
(with-continuation-mark safe-call? '(#t start)
(start
(with-continuation-mark the-cont-key start
req0))))))
;; dispatch: (request -> (request -> response)) request -> response
;; lookup the continuation for this request and invoke it
(define (dispatch decode-continuation req)
(abort/cc
(lambda ()
(cond
[(decode-continuation req)
=> (lambda (k) (k req))]
[else
(error "no continuation associated with the provided request")])))))
#;(printf "r: w-c-m ~S ~S~n" cm-key cm-val)
(resume (list* (vector #f cms) fs) val))))]
[(vector f cm)
(resume (list* (vector f #f) (vector #f cm) fs) val)])]))
;; rebuild-cms : frames (-> value) -> value
(define (rebuild-cms frames thunk)
#;(printf "~S~n" `(rebuild-cms ,frames ,thunk))
(match frames
[(list)
(thunk)]
[(list-rest f fs)
(match f
[(vector f #f)
(rebuild-cms fs thunk)]
[(vector f (list))
(rebuild-cms fs thunk)]
[(vector f (list-rest (list-rest cm-key cm-val) cms))
(with-continuation-mark cm-key cm-val
(begin
#;(printf "rcm: w-c-m ~S ~S~n" cm-key cm-val)
(rebuild-cms (list* (vector #f cms) fs) thunk)))])]))
(define (abort/cc thunk)
(call-with-continuation-prompt
thunk
web-prompt))
;; a serializable undefined value
(define-serializable-struct undef ())
(define the-undef (make-undef))
;; **********************************************************************
;; **********************************************************************
;; "SERVLET" INTERFACE
(define-closure kont x (wcs current-marks)
(abort (lambda ()
; Restoring the web-cells is separate from the continuation
(restore-web-cell-set! wcs)
(resume current-marks x))))
;; send/suspend: (continuation -> response) -> request
;; produce the current response and wait for the next request
(define (send/suspend response-maker)
(with-continuation-mark safe-call? '(#t send/suspend)
(let ([current-marks (activation-record-list)]
[wcs (capture-web-cell-set)])
((lambda (k)
(abort (lambda ()
; Since we escaped from the previous context, we need to re-install the user's continuation-marks
(rebuild-cms current-marks (lambda () (response-maker k))))))
(make-kont (lambda () (values wcs current-marks)))))))
;; **********************************************************************
;; **********************************************************************
;; "CLIENT" INTERFACE
;; dispatch-start: (request -> response) request -> reponse
;; pass the initial request to the starting interaction point
(define (dispatch-start start req0)
(abort/cc
(lambda ()
(with-continuation-mark safe-call? '(#t start)
(start
(with-continuation-mark the-cont-key start
req0))))))
;; dispatch: (request -> (request -> response)) request -> response
;; lookup the continuation for this request and invoke it
(define (dispatch decode-continuation req)
(abort/cc
(lambda ()
(cond
[(decode-continuation req)
=> (lambda (k) (k req))]
[else
(error "no continuation associated with the provided request")]))))

View File

@ -1,183 +1,180 @@
(module anormal mzscheme
(require-for-template mzscheme)
(require (lib "kerncase.ss" "syntax")
(lib "list.ss")
(lib "plt-match.ss")
"util.ss")
(provide make-anormal-term)
#lang scheme/base
(require (lib "kerncase.ss" "syntax")
(lib "list.ss")
(lib "plt-match.ss")
"util.ss")
(provide make-anormal-term)
; A-Normal Form
(define (id x) x)
;; a context is either
;; frame
;; (compose context frame)
;; a frame is either
;; w -> target-redex
;; (listof w) -> target-redex
;; compose: (w -> target-expr) (alpha -> target-redex) -> (alpha -> target-expr)
;; compose a context with a frame
(define (compose ctxt frame)
(if (eq? ctxt id)
frame
(lambda (val)
(let-values ([(x ref-to-x) (generate-formal 'x)])
#`(#%plain-app (#%plain-lambda (#,x) #,(ctxt ref-to-x)) #,(frame val))))))
(define (make-anormal-term elim-letrec-term)
(define (anormal-term stx)
(anormal id stx))
; A-Normal Form
(define (id x) x)
;; a context is either
;; frame
;; (compose context frame)
;; a frame is either
;; w -> target-redex
;; (listof w) -> target-redex
;; compose: (w -> target-expr) (alpha -> target-redex) -> (alpha -> target-expr)
;; compose a context with a frame
(define (compose ctxt frame)
(if (eq? ctxt id)
frame
(lambda (val)
(let-values ([(x ref-to-x) (generate-formal 'x)])
#`(#%app (lambda (#,x) #,(ctxt ref-to-x)) #,(frame val))))))
(define (make-anormal-term elim-letrec-term)
(define (anormal-term stx)
(anormal id stx))
(define (anormal ctxt stx)
(recertify
stx
(kernel-syntax-case
stx (transformer?)
[(begin)
(anormal ctxt (syntax/loc stx (#%app (#%top . void))))]
[(begin lbe)
(anormal ctxt (syntax/loc stx lbe))]
[(begin fbe be ...)
(define (anormal ctxt stx)
(recertify
stx
(kernel-syntax-case
stx (transformer?)
[(begin)
(anormal ctxt (syntax/loc stx (#%plain-app (#%top . void))))]
[(begin lbe)
(anormal ctxt (syntax/loc stx lbe))]
[(begin fbe be ...)
(anormal ctxt
(syntax/loc stx
(#%plain-app call-with-values
(#%plain-lambda () fbe)
(#%plain-lambda throw-away
(begin be ...)))))]
[(begin0)
(anormal ctxt (syntax/loc stx (#%plain-app (#%top . void))))]
[(begin0 lbe)
(anormal ctxt (syntax/loc stx lbe))]
[(begin0 fbe be ...)
(let-values ([(save ref-to-save) (generate-formal 'save)])
(anormal ctxt
(syntax/loc stx
(#%app call-with-values
(lambda () fbe)
(lambda throw-away
(begin be ...)))))]
[(begin0)
(anormal ctxt (syntax/loc stx (#%app (#%top . void))))]
[(begin0 lbe)
(anormal ctxt (syntax/loc stx lbe))]
[(begin0 fbe be ...)
(let-values ([(save ref-to-save) (generate-formal 'save)])
(anormal ctxt
(quasisyntax/loc stx
(#%app call-with-values
(lambda () fbe)
(lambda #,save
(begin be ...
(#%app apply values #,ref-to-save)))))))]
[(define-values (v ...) ve)
(with-syntax ([ve (anormal-term #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
stx]
[(define-values-for-syntax (v ...) ve)
stx]
[(set! v ve)
(anormal
(compose ctxt
(lambda (val)
(quasisyntax/loc stx (set! v #,val))))
#'ve)]
[(let-values () be)
(anormal ctxt (syntax/loc stx be))]
[(let-values ([(v) ve]) be)
(anormal ctxt
(syntax/loc stx
(#%app (lambda (v) be)
ve)))]
[(let-values ([(v ...) ve]) be)
(anormal ctxt
(syntax/loc stx
(#%app call-with-values
(lambda () ve)
(lambda (v ...) be))))]
[(let-values ([(fv ...) fve] [(v ...) ve] ...) be)
(anormal ctxt
(syntax/loc stx
(let-values ([(fv ...) fve])
(let-values ([(v ...) ve] ...)
be))))]
[(let-values ([(v ...) ve] ...) be ...)
(anormal ctxt
(syntax/loc stx
(quasisyntax/loc stx
(#%plain-app call-with-values
(#%plain-lambda () fbe)
(#%plain-lambda #,save
(begin be ...
(#%plain-app apply values #,ref-to-save)))))))]
[(define-values (v ...) ve)
(with-syntax ([ve (anormal-term #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
stx]
[(define-values-for-syntax (v ...) ve)
stx]
[(set! v ve)
(anormal
(compose ctxt
(lambda (val)
(quasisyntax/loc stx (set! v #,val))))
#'ve)]
[(let-values () be)
(anormal ctxt (syntax/loc stx be))]
[(let-values ([(v) ve]) be)
(anormal ctxt
(syntax/loc stx
(#%plain-app (#%plain-lambda (v) be)
ve)))]
[(let-values ([(v ...) ve]) be)
(anormal ctxt
(syntax/loc stx
(#%plain-app call-with-values
(#%plain-lambda () ve)
(#%plain-lambda (v ...) be))))]
[(let-values ([(fv ...) fve] [(v ...) ve] ...) be)
(anormal ctxt
(syntax/loc stx
(let-values ([(fv ...) fve])
(let-values ([(v ...) ve] ...)
(begin be ...))))]
[(letrec-values ([(v ...) ve] ...) be ...)
(anormal ctxt
(elim-letrec-term stx))]
[(#%plain-lambda formals be ...)
(with-syntax ([nbe (anormal-term (syntax/loc stx (begin be ...)))])
(ctxt (syntax/loc stx (#%plain-lambda formals nbe))))]
[(case-lambda [formals be] ...)
(with-syntax ([(be ...) (map anormal-term (syntax->list #'(be ...)))])
(ctxt (syntax/loc stx (case-lambda [formals be] ...))))]
[(case-lambda [formals be ...] ...)
(anormal ctxt
(syntax/loc stx (case-lambda [formals (begin be ...)] ...)))]
[(if te ce ae)
(anormal
(compose ctxt
(lambda (val)
(quasisyntax/loc stx
(if #,val
#,(anormal-term #'ce)
#,(anormal-term #'ae)))))
#'te)]
[(if te ce)
(anormal ctxt (syntax/loc stx (if te ce (#%app void))))]
[(quote datum)
(ctxt stx)]
[(quote-syntax datum)
(ctxt stx)]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(anormal ctxt
(elim-letrec-term stx))]
[(with-continuation-mark ke me be)
(anormal
(compose ctxt
(lambda (kev)
(anormal
(lambda (mev)
(quasisyntax/loc stx
(with-continuation-mark #,kev #,mev
#,(anormal-term #'be))))
#'me)))
#'ke)]
[(#%expression d)
(anormal
(compose ctxt
(lambda (d)
(quasisyntax/loc stx (#%expression #,d))))
#'d)]
[(#%plain-app fe e ...)
(anormal
(lambda (val0)
(anormal*
(compose ctxt
(lambda (rest-vals)
(quasisyntax/loc stx
(#%plain-app #,val0 #,@rest-vals))))
(syntax->list #'(e ...))))
#'fe)]
[(#%top . v)
(ctxt stx)]
[(#%variable-reference . v)
(ctxt stx)]
[id (identifier? #'id)
(ctxt #'id)]
[_
(raise-syntax-error 'anormal "Dropped through:" stx)])))
;; anormal*: ((listof w) -> target-expr) (listof source-expr) -> target-expr
;; normalize an expression given as a context and list of sub-expressions
(define (anormal* multi-ctxt exprs)
(match exprs
[(list)
(multi-ctxt '())]
[(list-rest fe re)
(anormal
(lambda (val)
(anormal*
(lambda (rest-vals)
(multi-ctxt (list* val rest-vals)))
re))
fe)]))
anormal-term))
be))))]
[(let-values ([(v ...) ve] ...) be ...)
(anormal ctxt
(syntax/loc stx
(let-values ([(v ...) ve] ...)
(begin be ...))))]
[(letrec-values ([(v ...) ve] ...) be ...)
(anormal ctxt
(elim-letrec-term stx))]
[(#%plain-lambda formals be ...)
(with-syntax ([nbe (anormal-term (syntax/loc stx (begin be ...)))])
(ctxt (syntax/loc stx (#%plain-lambda formals nbe))))]
[(case-lambda [formals be] ...)
(with-syntax ([(be ...) (map anormal-term (syntax->list #'(be ...)))])
(ctxt (syntax/loc stx (case-lambda [formals be] ...))))]
[(case-lambda [formals be ...] ...)
(anormal ctxt
(syntax/loc stx (case-lambda [formals (begin be ...)] ...)))]
[(if te ce ae)
(anormal
(compose ctxt
(lambda (val)
(quasisyntax/loc stx
(if #,val
#,(anormal-term #'ce)
#,(anormal-term #'ae)))))
#'te)]
[(quote datum)
(ctxt stx)]
[(quote-syntax datum)
(ctxt stx)]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(anormal ctxt
(elim-letrec-term stx))]
[(with-continuation-mark ke me be)
(anormal
(compose ctxt
(lambda (kev)
(anormal
(lambda (mev)
(quasisyntax/loc stx
(with-continuation-mark #,kev #,mev
#,(anormal-term #'be))))
#'me)))
#'ke)]
[(#%expression d)
(anormal
(compose ctxt
(lambda (d)
(quasisyntax/loc stx (#%expression #,d))))
#'d)]
[(#%plain-app fe e ...)
(anormal
(lambda (val0)
(anormal*
(compose ctxt
(lambda (rest-vals)
(quasisyntax/loc stx
(#%plain-app #,val0 #,@rest-vals))))
(syntax->list #'(e ...))))
#'fe)]
[(#%top . v)
(ctxt stx)]
[(#%variable-reference . v)
(ctxt stx)]
[id (identifier? #'id)
(ctxt #'id)]
[_
(raise-syntax-error 'anormal "Dropped through:" stx)])))
;; anormal*: ((listof w) -> target-expr) (listof source-expr) -> target-expr
;; normalize an expression given as a context and list of sub-expressions
(define (anormal* multi-ctxt exprs)
(match exprs
[(list)
(multi-ctxt '())]
[(list-rest fe re)
(anormal
(lambda (val)
(anormal*
(lambda (rest-vals)
(multi-ctxt (list* val rest-vals)))
re))
fe)]))
anormal-term)

View File

@ -1,151 +1,148 @@
(module defun mzscheme
(require-for-template mzscheme)
(require (lib "kerncase.ss" "syntax")
(lib "list.ss")
(lib "plt-match.ss")
"util.ss"
"freevars.ss"
"../private/closure.ss")
(provide defun)
; make-new-clouse-label : (syntax -> syntax) syntax -> syntax
(define (make-new-closure-label labeling stx)
(labeling stx))
; defun : syntax[1] -> (values syntax?[2] (listof syntax?)[3])
; defunctionalizes the first syntax, returning the second and the lifted lambdas [3]
(define (defun stx)
(recertify/new-defs
stx
(lambda ()
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(let-values ([(nbes defs) (defun* (syntax->list #'(be ...)))])
(values (quasisyntax/loc stx (begin #,@nbes))
defs))]
[(begin0 be ...)
(let-values ([(nbes defs) (defun* (syntax->list #'(be ...)))])
(values (quasisyntax/loc stx (begin0 #,@nbes))
defs))]
[(define-values (v ...) ve)
(let-values ([(nve defs) (defun #'ve)])
(values (quasisyntax/loc stx (define-values (v ...) #,nve))
defs))]
[(define-syntaxes (v ...) ve)
(values stx
empty)]
[(define-values-for-syntax (v ...) ve)
(values stx
empty)]
[(set! v ve)
(let-values ([(nve defs) (defun #'ve)])
(values (quasisyntax/loc stx (set! v #,nve))
defs))]
[(let-values ([(v ...) ve] ...) be ...)
(let-values ([(nves ve-defs) (defun* (syntax->list #'(ve ...)))]
[(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nve ...) nves]
[(nbe ...) nbes])
(values (syntax/loc stx (let-values ([(v ...) nve] ...) nbe ...))
(append ve-defs be-defs))))]
[(letrec-values ([(v ...) ve] ...) be ...)
(let-values ([(nves ve-defs) (defun* (syntax->list #'(ve ...)))]
[(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nve ...) nves]
[(nbe ...) nbes])
(values (syntax/loc stx (letrec-values ([(v ...) nve] ...) nbe ...))
(append ve-defs be-defs))))]
[(#%plain-lambda formals be ...)
(let-values ([(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nbe ...) nbes])
(let ([fvars (free-vars stx)])
(let-values ([(make-CLOSURE new-defs)
(make-closure-definition-syntax
(make-new-closure-label (current-code-labeling) stx)
fvars
(syntax/loc stx (#%plain-lambda formals nbe ...)))])
(values (if (empty? fvars)
(quasisyntax/loc stx (#,make-CLOSURE))
(quasisyntax/loc stx (#,make-CLOSURE (#%plain-lambda () (values #,@fvars)))))
(append be-defs new-defs))))))]
[(case-lambda [formals be ...] ...)
(let-values ([(nbes be-defs) (defun** (syntax->list #'((be ...) ...)))])
(with-syntax ([((nbe ...) ...) nbes])
(let ([fvars (free-vars stx)])
(let-values ([(make-CLOSURE new-defs)
(make-closure-definition-syntax
(make-new-closure-label (current-code-labeling) stx)
fvars
(syntax/loc stx (case-lambda [formals nbe ...] ...)))])
(values (if (empty? fvars)
(quasisyntax/loc stx (#,make-CLOSURE))
(quasisyntax/loc stx (#,make-CLOSURE (lambda () (values #,@fvars)))))
(append be-defs new-defs))))))]
[(if te ce ae)
(let-values ([(es defs) (defun* (syntax->list #'(te ce ae)))])
(values (quasisyntax/loc stx (if #,@es))
defs))]
[(if te ce)
(defun (quasisyntax/loc stx (if te ce (#%app void))))]
[(quote datum)
(values stx
empty)]
[(quote-syntax datum)
(values stx
empty)]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(let-values ([(nses se-defs) (defun* (syntax->list #'(se ...)))]
[(nves ve-defs) (defun* (syntax->list #'(ve ...)))]
[(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nse ...) nses]
[(nve ...) nves]
[(nbe ...) nbes])
(values (syntax/loc stx
(letrec-syntaxes+values ([(sv ...) nse] ...)
([(vv ...) nve] ...)
nbe ...))
(append se-defs ve-defs be-defs))))]
[(with-continuation-mark ke me be)
(let-values ([(es defs) (defun* (list #'ke #'me #'be))])
(values (quasisyntax/loc stx (with-continuation-mark #,@es))
defs))]
[(#%expression d)
(let-values ([(nd d-defs) (defun #'d)])
(values (quasisyntax/loc stx (#%expression #,nd))
d-defs))]
[(#%plain-app e ...)
(let-values ([(es defs) (defun* (syntax->list #'(e ...)))])
(values (quasisyntax/loc stx (#%plain-app #,@es))
defs))]
[(#%top . v)
(values stx
empty)]
[(#%variable-reference . v)
(values stx
empty)]
[id (identifier? #'id)
(values stx
empty)]
[_
(raise-syntax-error 'defun "Dropped through:" stx)]))))
; lift defun to list of syntaxes
(define (lift-defun defun)
(lambda (stxs)
(match
(foldl (lambda (stx acc)
(let-values ([(nstx stx-defs) (defun stx)])
(match acc
[(list-rest nstxs defs)
(cons (list* nstx nstxs)
(append stx-defs defs))])))
(cons empty empty)
stxs)
[(list-rest nstxs defs)
(values (reverse nstxs)
defs)])))
(define defun* (lift-defun defun))
(define defun** (lift-defun (lambda (stx) (defun* (syntax->list stx))))))
#lang scheme/base
(require (lib "kerncase.ss" "syntax")
(lib "list.ss")
(lib "plt-match.ss")
"util.ss"
"freevars.ss"
"../private/closure.ss")
(provide defun)
; make-new-clouse-label : (syntax -> syntax) syntax -> syntax
(define (make-new-closure-label labeling stx)
(labeling stx))
; defun : syntax[1] -> (values syntax?[2] (listof syntax?)[3])
; defunctionalizes the first syntax, returning the second and the lifted lambdas [3]
(define (defun stx)
(recertify/new-defs
stx
(lambda ()
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(let-values ([(nbes defs) (defun* (syntax->list #'(be ...)))])
(values (quasisyntax/loc stx (begin #,@nbes))
defs))]
[(begin0 be ...)
(let-values ([(nbes defs) (defun* (syntax->list #'(be ...)))])
(values (quasisyntax/loc stx (begin0 #,@nbes))
defs))]
[(define-values (v ...) ve)
(let-values ([(nve defs) (defun #'ve)])
(values (quasisyntax/loc stx (define-values (v ...) #,nve))
defs))]
[(define-syntaxes (v ...) ve)
(values stx
empty)]
[(define-values-for-syntax (v ...) ve)
(values stx
empty)]
[(set! v ve)
(let-values ([(nve defs) (defun #'ve)])
(values (quasisyntax/loc stx (set! v #,nve))
defs))]
[(let-values ([(v ...) ve] ...) be ...)
(let-values ([(nves ve-defs) (defun* (syntax->list #'(ve ...)))]
[(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nve ...) nves]
[(nbe ...) nbes])
(values (syntax/loc stx (let-values ([(v ...) nve] ...) nbe ...))
(append ve-defs be-defs))))]
[(letrec-values ([(v ...) ve] ...) be ...)
(let-values ([(nves ve-defs) (defun* (syntax->list #'(ve ...)))]
[(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nve ...) nves]
[(nbe ...) nbes])
(values (syntax/loc stx (letrec-values ([(v ...) nve] ...) nbe ...))
(append ve-defs be-defs))))]
[(#%plain-lambda formals be ...)
(let-values ([(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nbe ...) nbes])
(let ([fvars (free-vars stx)])
(let-values ([(make-CLOSURE new-defs)
(make-closure-definition-syntax
(make-new-closure-label (current-code-labeling) stx)
fvars
(syntax/loc stx (#%plain-lambda formals nbe ...)))])
(values (if (empty? fvars)
(quasisyntax/loc stx (#,make-CLOSURE))
(quasisyntax/loc stx (#,make-CLOSURE (#%plain-lambda () (values #,@fvars)))))
(append be-defs new-defs))))))]
[(case-lambda [formals be ...] ...)
(let-values ([(nbes be-defs) (defun** (syntax->list #'((be ...) ...)))])
(with-syntax ([((nbe ...) ...) nbes])
(let ([fvars (free-vars stx)])
(let-values ([(make-CLOSURE new-defs)
(make-closure-definition-syntax
(make-new-closure-label (current-code-labeling) stx)
fvars
(syntax/loc stx (case-lambda [formals nbe ...] ...)))])
(values (if (empty? fvars)
(quasisyntax/loc stx (#,make-CLOSURE))
(quasisyntax/loc stx (#,make-CLOSURE (lambda () (values #,@fvars)))))
(append be-defs new-defs))))))]
[(if te ce ae)
(let-values ([(es defs) (defun* (syntax->list #'(te ce ae)))])
(values (quasisyntax/loc stx (if #,@es))
defs))]
[(quote datum)
(values stx
empty)]
[(quote-syntax datum)
(values stx
empty)]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(let-values ([(nses se-defs) (defun* (syntax->list #'(se ...)))]
[(nves ve-defs) (defun* (syntax->list #'(ve ...)))]
[(nbes be-defs) (defun* (syntax->list #'(be ...)))])
(with-syntax ([(nse ...) nses]
[(nve ...) nves]
[(nbe ...) nbes])
(values (syntax/loc stx
(letrec-syntaxes+values ([(sv ...) nse] ...)
([(vv ...) nve] ...)
nbe ...))
(append se-defs ve-defs be-defs))))]
[(with-continuation-mark ke me be)
(let-values ([(es defs) (defun* (list #'ke #'me #'be))])
(values (quasisyntax/loc stx (with-continuation-mark #,@es))
defs))]
[(#%expression d)
(let-values ([(nd d-defs) (defun #'d)])
(values (quasisyntax/loc stx (#%expression #,nd))
d-defs))]
[(#%plain-app e ...)
(let-values ([(es defs) (defun* (syntax->list #'(e ...)))])
(values (quasisyntax/loc stx (#%plain-app #,@es))
defs))]
[(#%top . v)
(values stx
empty)]
[(#%variable-reference . v)
(values stx
empty)]
[id (identifier? #'id)
(values stx
empty)]
[_
(raise-syntax-error 'defun "Dropped through:" stx)]))))
; lift defun to list of syntaxes
(define (lift-defun defun)
(lambda (stxs)
(match
(foldl (lambda (stx acc)
(let-values ([(nstx stx-defs) (defun stx)])
(match acc
[(list-rest nstxs defs)
(cons (list* nstx nstxs)
(append stx-defs defs))])))
(cons empty empty)
stxs)
[(list-rest nstxs defs)
(values (reverse nstxs)
defs)])))
(define defun* (lift-defun defun))
(define defun** (lift-defun (lambda (stx) (defun* (syntax->list stx)))))

View File

@ -1,174 +1,170 @@
(module elim-callcc mzscheme
(require-for-template mzscheme
"../lang/abort-resume.ss")
(require-for-syntax "../lang/abort-resume.ss")
(require (lib "kerncase.ss" "syntax")
"util.ss")
(provide elim-callcc)
(define (id x) x)
;; mark-lambda-as-safe: w -> w
;; If w is a lambda-expression then add #t to the safety mark, otherwise no mark
(define (mark-lambda-as-safe w)
(recertify
w
(syntax-case w (lambda case-lambda)
[(lambda formals be ...)
(syntax/loc w
(lambda formals
(with-continuation-mark safe-call? '(#t (lambda formals))
be ...)))]
[(case-lambda [formals be ...] ...)
(syntax/loc w
(case-lambda [formals
(with-continuation-mark safe-call? '(#t (case-lambda formals ...))
be ...)] ...))]
[_else w])))
(define (elim-callcc stx)
(elim-callcc/mark id stx))
(define (elim-callcc/mark markit stx)
(recertify
stx
(kernel-syntax-case*
stx (transformer?) (call/cc call-with-values)
[(begin be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(begin0 be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(define-values (v ...) ve)
(with-syntax ([ve (mark-lambda-as-safe (elim-callcc #'ve))])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
stx]
[(define-values-for-syntax (v ...) ve)
stx]
[(set! v ve)
(with-syntax ([ve (elim-callcc #'ve)])
(syntax/loc stx (set! v ve)))]
[(let-values ([(v ...) ve] ...) be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(letrec-values ([(v ...) ve] ...) be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(#%plain-lambda formals be)
(with-syntax ([be (elim-callcc #'be)])
(syntax/loc stx
(#%plain-lambda formals be)))]
[(case-lambda [formals be] ...)
(with-syntax ([(be ...) (map elim-callcc (syntax->list #'(be ...)))])
(syntax/loc stx
(case-lambda [formals be] ...)))]
[(if te ce ae)
(with-syntax ([te (elim-callcc #'te)]
[ce (elim-callcc #'ce)]
[ae (elim-callcc #'ae)])
(markit (syntax/loc stx (if te ce ae))))]
[(if te ce)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(quote datum)
stx]
[(quote-syntax datum)
stx]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(with-continuation-mark ke me be)
(let* ([ke-prime (elim-callcc #'ke)]
[me-prime (elim-callcc #'me)]
[be-prime (elim-callcc #'be)])
; XXX Could be dangerous to evaluate ke-prime and me-prime twice
(markit
(quasisyntax/loc stx
(with-continuation-mark #,ke-prime #,me-prime
(with-continuation-mark
the-save-cm-key
(#%plain-app current-saved-continuation-marks-and #,ke-prime #,me-prime)
#,be-prime)))))]
[(#%expression d)
(markit (quasisyntax/loc stx (#%expression #,(elim-callcc #'d))))]
[(#%plain-app call/cc w)
(let-values ([(cm ref-to-cm) (generate-formal 'current-marks)]
[(x ref-to-x) (generate-formal 'x)])
(markit
(quasisyntax/loc stx
(#%plain-app #,(elim-callcc #'w)
(#%plain-app (#%plain-lambda (#,cm)
(#%plain-lambda #,x
(#%plain-app abort
(#%plain-lambda () (#%plain-app resume #,ref-to-cm #,ref-to-x)))))
(#%plain-app activation-record-list))))))]
[(#%plain-app call-with-values (#%plain-lambda () prod) cons)
(let ([cons-prime (datum->syntax-object #f (gensym 'cons))])
(quasisyntax/loc stx
(let-values ([(#,cons-prime) #,(mark-lambda-as-safe (elim-callcc #'cons))])
#,(markit
(quasisyntax/loc stx
(#%plain-app call-with-values
#,(mark-lambda-as-safe
(quasisyntax/loc stx
(#%plain-lambda ()
#,(elim-callcc/mark
(lambda (x)
(quasisyntax/loc stx
(with-continuation-mark the-cont-key #,cons-prime #,x)))
#'prod))))
#,cons-prime))))))]
[(#%plain-app w (#%plain-app . stuff))
(with-syntax ([e #'(#%plain-app . stuff)])
(syntax-case #'w (#%plain-lambda case-lambda)
[(#%plain-lambda formals body)
(let ([w-prime (datum->syntax-object #f (gensym 'l))])
(quasisyntax/loc stx
(let-values ([(#,w-prime) #,(elim-callcc #'w)])
#,(markit
(quasisyntax/loc stx
(#%plain-app #,w-prime
#,(elim-callcc/mark
(lambda (x)
(quasisyntax/loc stx
(with-continuation-mark the-cont-key #,w-prime #,x)))
#'e)))))))]
[(case-lambda [formals body] ...)
(let ([w-prime (datum->syntax-object #f (gensym 'cl))])
(quasisyntax/loc stx
(let-values ([(#,w-prime) #,(elim-callcc #'w)])
#,(markit
(quasisyntax/loc stx
(#%plain-app #,w-prime
#,(elim-callcc/mark
(lambda (x)
(quasisyntax/loc stx
(with-continuation-mark the-cont-key #,w-prime #,x)))
#'e)))))))]
[_else
(let ([w-prime (elim-callcc #'w)])
(markit
(quasisyntax/loc stx
(#%plain-app #,w-prime
#,(elim-callcc/mark
(lambda (x)
#`(with-continuation-mark the-cont-key #,w-prime #,x))
#'e)))))]))]
[(#%plain-app w rest ...)
(markit
#lang scheme/base
(require (lib "kerncase.ss" "syntax")
(for-syntax "../lang/abort-resume.ss")
"util.ss")
(provide elim-callcc)
(define (id x) x)
;; mark-lambda-as-safe: w -> w
;; If w is a lambda-expression then add #t to the safety mark, otherwise no mark
(define (mark-lambda-as-safe w)
(recertify
w
(syntax-case w (#%plain-lambda case-lambda)
[(#%plain-lambda formals be ...)
(syntax/loc w
(#%plain-lambda formals
(with-continuation-mark safe-call? '(#t (lambda formals))
be ...)))]
[(case-lambda [formals be ...] ...)
(syntax/loc w
(case-lambda [formals
(with-continuation-mark safe-call? '(#t (case-lambda formals ...))
be ...)] ...))]
[_else w])))
(define (elim-callcc stx)
(elim-callcc/mark id stx))
(define (elim-callcc/mark markit stx)
(recertify
stx
(kernel-syntax-case*
stx (transformer?) (call/cc call-with-values)
[(begin be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(begin0 be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(define-values (v ...) ve)
(with-syntax ([ve (mark-lambda-as-safe (elim-callcc #'ve))])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
stx]
[(define-values-for-syntax (v ...) ve)
stx]
[(set! v ve)
(with-syntax ([ve (elim-callcc #'ve)])
(syntax/loc stx (set! v ve)))]
[(let-values ([(v ...) ve] ...) be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(letrec-values ([(v ...) ve] ...) be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(#%plain-lambda formals be)
(with-syntax ([be (elim-callcc #'be)])
(syntax/loc stx
(#%plain-lambda formals be)))]
[(case-lambda [formals be] ...)
(with-syntax ([(be ...) (map elim-callcc (syntax->list #'(be ...)))])
(syntax/loc stx
(case-lambda [formals be] ...)))]
[(if te ce ae)
(with-syntax ([te (elim-callcc #'te)]
[ce (elim-callcc #'ce)]
[ae (elim-callcc #'ae)])
(markit (syntax/loc stx (if te ce ae))))]
[(quote datum)
stx]
[(quote-syntax datum)
stx]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(raise-syntax-error 'elim-callcc/mark "Not in ANF" stx)]
[(with-continuation-mark ke me be)
(let* ([ke-prime (elim-callcc #'ke)]
[me-prime (elim-callcc #'me)]
[be-prime (elim-callcc #'be)])
; XXX Could be dangerous to evaluate ke-prime and me-prime twice
(markit
(quasisyntax/loc stx
(with-continuation-mark safe-call? '(#f stx)
(#%plain-app #,(mark-lambda-as-safe (elim-callcc #'w))
#,@(map
(lambda (an-expr)
(mark-lambda-as-safe
(elim-callcc
an-expr)))
(syntax->list #'(rest ...)))))))]
[(#%top . v)
stx]
[(#%variable-reference . v)
stx]
[id (identifier? #'id)
stx]
[_
(raise-syntax-error 'elim-callcc "Dropped through:" stx)]))))
(with-continuation-mark #,ke-prime #,me-prime
(with-continuation-mark
the-save-cm-key
(#%plain-app current-saved-continuation-marks-and #,ke-prime #,me-prime)
#,be-prime)))))]
[(#%expression d)
(markit (quasisyntax/loc stx (#%expression #,(elim-callcc #'d))))]
[(#%plain-app call/cc w)
(let-values ([(cm ref-to-cm) (generate-formal 'current-marks)]
[(x ref-to-x) (generate-formal 'x)])
(markit
(quasisyntax/loc stx
(#%plain-app #,(elim-callcc #'w)
(#%plain-app (#%plain-lambda (#,cm)
(#%plain-lambda #,x
(#%plain-app abort
(#%plain-lambda () (#%plain-app resume #,ref-to-cm #,ref-to-x)))))
(#%plain-app activation-record-list))))))]
[(#%plain-app call-with-values (#%plain-lambda () prod) cons)
(let ([cons-prime (datum->syntax #f (gensym 'cons))])
(quasisyntax/loc stx
(let-values ([(#,cons-prime) #,(mark-lambda-as-safe (elim-callcc #'cons))])
#,(markit
(quasisyntax/loc stx
(#%plain-app call-with-values
#,(mark-lambda-as-safe
(quasisyntax/loc stx
(#%plain-lambda ()
#,(elim-callcc/mark
(lambda (x)
(quasisyntax/loc stx
(with-continuation-mark the-cont-key #,cons-prime #,x)))
#'prod))))
#,cons-prime))))))]
[(#%plain-app w (#%plain-app . stuff))
(with-syntax ([e #'(#%plain-app . stuff)])
(syntax-case #'w (#%plain-lambda case-lambda)
[(#%plain-lambda formals body)
(let ([w-prime (datum->syntax #f (gensym 'l))])
(quasisyntax/loc stx
(let-values ([(#,w-prime) #,(elim-callcc #'w)])
#,(markit
(quasisyntax/loc stx
(#%plain-app #,w-prime
#,(elim-callcc/mark
(lambda (x)
(quasisyntax/loc stx
(with-continuation-mark the-cont-key #,w-prime #,x)))
#'e)))))))]
[(case-lambda [formals body] ...)
(let ([w-prime (datum->syntax #f (gensym 'cl))])
(quasisyntax/loc stx
(let-values ([(#,w-prime) #,(elim-callcc #'w)])
#,(markit
(quasisyntax/loc stx
(#%plain-app #,w-prime
#,(elim-callcc/mark
(lambda (x)
(quasisyntax/loc stx
(with-continuation-mark the-cont-key #,w-prime #,x)))
#'e)))))))]
[_else
(let ([w-prime (elim-callcc #'w)])
(markit
(quasisyntax/loc stx
(#%plain-app #,w-prime
#,(elim-callcc/mark
(lambda (x)
#`(with-continuation-mark the-cont-key #,w-prime #,x))
#'e)))))]))]
[(#%plain-app w rest ...)
(markit
(quasisyntax/loc stx
(with-continuation-mark safe-call? '(#f stx)
(#%plain-app #,(mark-lambda-as-safe (elim-callcc #'w))
#,@(map
(lambda (an-expr)
(mark-lambda-as-safe
(elim-callcc
an-expr)))
(syntax->list #'(rest ...)))))))]
[(#%top . v)
stx]
[(#%variable-reference . v)
stx]
[id (identifier? #'id)
stx]
[_
(raise-syntax-error 'elim-callcc "Dropped through:" stx)])))

View File

@ -1,134 +1,128 @@
(module elim-letrec mzscheme
(require-for-template mzscheme
"../lang/abort-resume.ss")
(require-for-syntax "../lang/abort-resume.ss")
(require (lib "kerncase.ss" "syntax")
(lib "etc.ss")
(lib "list.ss")
"util.ss")
(provide (all-defined))
; elim-letrec : (listof identifier-syntax?)[3] -> syntax?[2] -> syntax?[3]
; Eliminates letrec-values from syntax[2] and correctly handles references to
; letrec-bound variables [3] therein.
(define ((elim-letrec ids) stx)
(recertify
stx
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(with-syntax ([(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
#lang scheme/base
(require (lib "kerncase.ss" "syntax")
(lib "etc.ss")
(lib "list.ss")
(for-syntax "../lang/abort-resume.ss")
"util.ss")
(provide (all-defined-out))
; elim-letrec : (listof identifier-syntax?)[3] -> syntax?[2] -> syntax?[3]
; Eliminates letrec-values from syntax[2] and correctly handles references to
; letrec-bound variables [3] therein.
(define ((elim-letrec ids) stx)
(recertify
stx
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(with-syntax ([(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(syntax/loc stx
(begin be ...)))]
[(begin0 be ...)
(with-syntax ([(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(syntax/loc stx
(begin0 be ...)))]
[(define-values (v ...) ve)
(with-syntax ([ve ((elim-letrec ids) #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
stx]
[(define-values-for-syntax (v ...) ve)
stx]
[(set! v ve)
(with-syntax ([ve ((elim-letrec ids) #'ve)])
(if (bound-identifier-member? #'id ids)
(syntax/loc stx (#%plain-app set-box! id ve))
(syntax/loc stx (set! id ve))))]
[(let-values ([(v ...) ve] ...) be ...)
(with-syntax ([(ve ...) (map (elim-letrec ids) (syntax->list #'(ve ...)))]
[(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(syntax/loc stx
(let-values ([(v ...) ve] ...) be ...)))]
[(letrec-values ([(v ...) ve] ...) be ...)
(let ([new-ids (apply append ids (map syntax->list (syntax->list #'((v ...) ...))))])
(with-syntax ([((nv ...) ...) (map (compose generate-temporaries syntax->list) (syntax->list #'((v ...) ...)))]
[((nv-box ...) ...) (map (lambda (nvs)
(map (lambda (x) (syntax/loc x (#%plain-app box the-undef)))
(syntax->list nvs)))
(syntax->list #`((v ...) ...)))]
[(ve ...) (map (elim-letrec new-ids) (syntax->list #'(ve ...)))]
[(be ...) (map (elim-letrec new-ids) (syntax->list #'(be ...)))])
; XXX Optimize special case of one nv
(syntax/loc stx
(begin be ...)))]
[(begin0 be ...)
(with-syntax ([(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(let-values ([(v ...)
(#%plain-app values nv-box ...)] ...)
(begin (#%plain-app call-with-values
(#%plain-lambda () ve)
(#%plain-lambda (nv ...)
(#%plain-app set-box! v nv) ...))
...
be ...)))))]
[(#%plain-lambda formals be ...)
(with-syntax ([(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(syntax/loc stx
(#%plain-lambda formals be ...)))]
[(case-lambda [formals be ...] ...)
(with-syntax ([((be ...) ...) (map (elim-letrec ids) (syntax->list #'((be ...) ...)))])
(syntax/loc stx
(case-lambda [formals be ...] ...)))]
[(if te ce ae)
(with-syntax ([te ((elim-letrec ids) #'te)]
[ce ((elim-letrec ids) #'ce)]
[ae ((elim-letrec ids) #'ae)])
(syntax/loc stx
(if te ce ae)))]
[(quote datum)
stx]
[(quote-syntax datum)
stx]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(let ([new-ids (apply append ids (map syntax->list (syntax->list #'((vv ...) ...))))])
(with-syntax ([((nvv ...) ...) (map (compose generate-temporaries syntax->list) (syntax->list #'((vv ...) ...)))]
[((nvv-box ...) ...) (map (lambda (nvs)
(map (lambda (x) (syntax/loc x (#%plain-app box the-undef)))
(syntax->list nvs)))
(syntax->list #`((vv ...) ...)))]
[(se ...) (map (elim-letrec new-ids) (syntax->list #'(se ...)))]
[(ve ...) (map (elim-letrec new-ids) (syntax->list #'(ve ...)))]
[(be ...) (map (elim-letrec new-ids) (syntax->list #'(be ...)))])
; XXX Optimize special case of one nv
(syntax/loc stx
(begin0 be ...)))]
[(define-values (v ...) ve)
(with-syntax ([ve ((elim-letrec ids) #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
stx]
[(define-values-for-syntax (v ...) ve)
stx]
[(set! v ve)
(with-syntax ([ve ((elim-letrec ids) #'ve)])
(if (bound-identifier-member? #'id ids)
(syntax/loc stx (#%plain-app set-box! id ve))
(syntax/loc stx (set! id ve))))]
[(let-values ([(v ...) ve] ...) be ...)
(with-syntax ([(ve ...) (map (elim-letrec ids) (syntax->list #'(ve ...)))]
[(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(syntax/loc stx
(let-values ([(v ...) ve] ...) be ...)))]
[(letrec-values ([(v ...) ve] ...) be ...)
(let ([new-ids (apply append ids (map syntax->list (syntax->list #'((v ...) ...))))])
(with-syntax ([((nv ...) ...) (map (compose generate-temporaries syntax->list) (syntax->list #'((v ...) ...)))]
[((nv-box ...) ...) (map (lambda (nvs)
(map (lambda (x) (syntax/loc x (#%plain-app box the-undef)))
(syntax->list nvs)))
(syntax->list #`((v ...) ...)))]
[(ve ...) (map (elim-letrec new-ids) (syntax->list #'(ve ...)))]
[(be ...) (map (elim-letrec new-ids) (syntax->list #'(be ...)))])
; XXX Optimize special case of one nv
(syntax/loc stx
(let-values ([(v ...)
(#%plain-app values nv-box ...)] ...)
(begin (#%plain-app call-with-values
(#%plain-lambda () ve)
(#%plain-lambda (nv ...)
(#%plain-app set-box! v nv) ...))
...
be ...)))))]
[(#%plain-lambda formals be ...)
(with-syntax ([(be ...) (map (elim-letrec ids) (syntax->list #'(be ...)))])
(syntax/loc stx
(#%plain-lambda formals be ...)))]
[(case-lambda [formals be ...] ...)
(with-syntax ([((be ...) ...) (map (elim-letrec ids) (syntax->list #'((be ...) ...)))])
(syntax/loc stx
(case-lambda [formals be ...] ...)))]
[(if te ce ae)
(with-syntax ([te ((elim-letrec ids) #'te)]
[ce ((elim-letrec ids) #'ce)]
[ae ((elim-letrec ids) #'ae)])
(syntax/loc stx
(if te ce ae)))]
[(if te ce)
((elim-letrec ids)
(syntax/loc stx
(if te ce (#%plain-app (#%top . void)))))]
[(quote datum)
stx]
[(quote-syntax datum)
stx]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(let ([new-ids (apply append ids (map syntax->list (syntax->list #'((vv ...) ...))))])
(with-syntax ([((nvv ...) ...) (map (compose generate-temporaries syntax->list) (syntax->list #'((vv ...) ...)))]
[((nvv-box ...) ...) (map (lambda (nvs)
(map (lambda (x) (syntax/loc x (#%plain-app box the-undef)))
(syntax->list nvs)))
(syntax->list #`((vv ...) ...)))]
[(se ...) (map (elim-letrec new-ids) (syntax->list #'(se ...)))]
[(ve ...) (map (elim-letrec new-ids) (syntax->list #'(ve ...)))]
[(be ...) (map (elim-letrec new-ids) (syntax->list #'(be ...)))])
; XXX Optimize special case of one nv
(syntax/loc stx
(let-values ([(vv ...)
(#%plain-app values nvv-box ...)] ...)
; This is okay, because we've already expanded the syntax.
(let-syntaxes
([(sv ...) se] ...)
(begin (#%plain-app call-with-values
(#%plain-lambda () ve)
(#%plain-lambda (nvv ...)
(#%plain-app set-box! vv nvv) ...))
...
be ...))))))]
[(with-continuation-mark ke me be)
(with-syntax ([ke ((elim-letrec ids) #'ke)]
[me ((elim-letrec ids) #'me)]
[be ((elim-letrec ids) #'be)])
(syntax/loc stx
(with-continuation-mark ke me be)))]
[(#%expression d)
(quasisyntax/loc stx (#%expression #,((elim-letrec ids) #'d)))]
[(#%plain-app e ...)
(with-syntax ([(e ...) (map (elim-letrec ids) (syntax->list #'(e ...)))])
(syntax/loc stx
(#%plain-app e ...)))]
[(#%top . v)
stx]
[(#%variable-reference . v)
stx]
[id (identifier? #'id)
(if (bound-identifier-member? #'id ids)
(syntax/loc stx (#%plain-app unbox id))
#'id)]
[_
(raise-syntax-error 'elim-letrec "Dropped through:" stx)])))
(define elim-letrec-term (elim-letrec empty)))
(let-values ([(vv ...)
(#%plain-app values nvv-box ...)] ...)
; This is okay, because we've already expanded the syntax.
(let-syntaxes
([(sv ...) se] ...)
(begin (#%plain-app call-with-values
(#%plain-lambda () ve)
(#%plain-lambda (nvv ...)
(#%plain-app set-box! vv nvv) ...))
...
be ...))))))]
[(with-continuation-mark ke me be)
(with-syntax ([ke ((elim-letrec ids) #'ke)]
[me ((elim-letrec ids) #'me)]
[be ((elim-letrec ids) #'be)])
(syntax/loc stx
(with-continuation-mark ke me be)))]
[(#%expression d)
(quasisyntax/loc stx (#%expression #,((elim-letrec ids) #'d)))]
[(#%plain-app e ...)
(with-syntax ([(e ...) (map (elim-letrec ids) (syntax->list #'(e ...)))])
(syntax/loc stx
(#%plain-app e ...)))]
[(#%top . v)
stx]
[(#%variable-reference . v)
stx]
[id (identifier? #'id)
(if (bound-identifier-member? #'id ids)
(syntax/loc stx (#%plain-app unbox id))
#'id)]
[_
(raise-syntax-error 'elim-letrec "Dropped through:" stx)])))
(define elim-letrec-term (elim-letrec empty))

View File

@ -1,144 +1,141 @@
(module freevars mzscheme
(require-for-template mzscheme)
(require (lib "kerncase.ss" "syntax")
(lib "list.ss")
(lib "toplevel.ss" "syntax")
(lib "plt-match.ss")
(lib "stx.ss" "syntax")
"util.ss")
(provide free-vars)
;; free-vars: syntax -> (listof identifier)
;; Find the free variables in an expression
(define (free-vars stx)
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(free-vars* (syntax->list #'(be ...)))]
[(begin0 be ...)
(free-vars* (syntax->list #'(be ...)))]
[(define-values (v ...) ve)
#lang scheme/base
(require (lib "kerncase.ss" "syntax")
(lib "list.ss")
(lib "toplevel.ss" "syntax")
(lib "plt-match.ss")
(lib "stx.ss" "syntax")
"util.ss")
(provide free-vars)
;; free-vars: syntax -> (listof identifier)
;; Find the free variables in an expression
(define (free-vars stx)
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(free-vars* (syntax->list #'(be ...)))]
[(begin0 be ...)
(free-vars* (syntax->list #'(be ...)))]
[(define-values (v ...) ve)
(set-diff (free-vars #'ve)
(syntax->list #'(v ...)))]
[(define-syntaxes (v ...) ve)
(parameterize ([transformer? #t])
(set-diff (free-vars #'ve)
(syntax->list #'(v ...)))]
[(define-syntaxes (v ...) ve)
(parameterize ([transformer? #t])
(set-diff (free-vars #'ve)
(syntax->list #'(v ...))))]
[(define-values-for-syntax (v ...) ve)
(parameterize ([transformer? #t])
(set-diff (free-vars #'ve)
(syntax->list #'(v ...))))]
[(set! v ve)
(union (free-vars #'v)
(free-vars #'ve))]
[(let-values ([(v ...) ve] ...) be ...)
(union (free-vars* (syntax->list #'(ve ...)))
(set-diff (free-vars* (syntax->list #'(be ...)))
(apply append (map syntax->list (syntax->list #'((v ...) ...))))))]
[(letrec-values ([(v ...) ve] ...) be ...)
(set-diff (union (free-vars* (syntax->list #'(ve ...)))
(free-vars* (syntax->list #'(be ...))))
(apply append (map syntax->list (syntax->list #'((v ...) ...)))))]
[(#%plain-lambda formals be ...)
(set-diff (free-vars* (syntax->list #'(be ...)))
(formals-list #'formals))]
[(case-lambda [formals be ...] ...)
(apply union*
(map (lambda (fs bes)
(set-diff (free-vars* (syntax->list bes))
(formals-list fs)))
(syntax->list #'(formals ...))
(syntax->list #'((be ...) ...))))]
[(if te ce ae)
(free-vars* (syntax->list #'(te ce ae)))]
[(if te ce)
(free-vars #`(if te ce (#%app void)))]
[(quote datum)
empty]
[(quote-syntax datum)
empty]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(set-diff (union* (free-vars* (syntax->list #'(se ...)))
(free-vars* (syntax->list #'(ve ...)))
(free-vars* (syntax->list #'(be ...))))
(append (apply append (map syntax->list (syntax->list #'((sv ...) ...))))
(apply append (map syntax->list (syntax->list #'((vv ...) ...))))))]
[(with-continuation-mark ke me be)
(free-vars* (syntax->list #'(ke me be)))]
[(#%expression d)
(free-vars #'d)]
[(#%plain-app e ...)
(free-vars* (syntax->list #'(e ...)))]
[(#%top . v)
#;(printf "Not including top ~S in freevars~n" (syntax-object->datum #'v))
empty]
[(#%variable-reference . id)
(let ([i-bdg (identifier-binding #'id)])
(cond
[(eqv? 'lexical (identifier-binding #'id))
(list #'id)]
[else
#;(printf "Not including var-reference ~S with binding ~S in freevars~n" (syntax-object->datum #'id) i-bdg)
empty]))]
[id (identifier? #'id)
(let ([i-bdg (identifier-binding #'id)])
(cond
[(eqv? 'lexical i-bdg)
(list #'id)]
[(not i-bdg)
(list #'id)]
[else
#;(printf "Not including id ~S with binding ~S in freevars~n" (syntax-object->datum #'id) i-bdg)
empty]))]
[_
(raise-syntax-error 'freevars "Dropped through:" stx)]))
;; free-vars*: (listof expr) -> (listof identifier)
;; union the free variables that occur in several expressions
(define (free-vars* exprs)
(foldl
(lambda (expr acc) (union (free-vars expr) acc))
empty exprs))
;; union: (listof identifier) (listof identifier) -> (listof identifier)
;; produce the set-theoretic union of two lists
(define (union l1 l2)
(cond
[(null? l1) l2]
[else (insert (car l1) (union (cdr l1) l2))]))
(define (union* . ll)
(foldl union
empty
ll))
;; insert: symbol (listof identifier) -> (listof symbol)
;; insert a symbol into a list without creating a duplicate
(define (insert sym into)
(unless (identifier? sym)
(raise-syntax-error 'insert "Not identifier" sym))
(cond
[(null? into) (list sym)]
[(bound-identifier=? sym (car into)) into]
[else (cons (car into) (insert sym (cdr into)))]))
;; set-diff: (listof identifier) (listof identifier) -> (listof identifier)
;; produce the set-theoretic difference of two lists
(define (set-diff s1 s2)
(cond
[(null? s2) s1]
[else (set-diff (sans s1 (car s2)) (cdr s2))]))
;; sans: (listof identifier) symbol -> (listof identifier)
;; produce the list sans the symbol
(define (sans s elt)
(unless (identifier? elt)
(raise-syntax-error 'sans "Not identifier" elt))
(cond
[(null? s) empty]
[(bound-identifier=? (car s) elt)
(cdr s)] ;; if we maintain the no-dupe invariant then we don't need to recur
[else (cons (car s)
(sans (cdr s) elt))])))
(syntax->list #'(v ...))))]
[(define-values-for-syntax (v ...) ve)
(parameterize ([transformer? #t])
(set-diff (free-vars #'ve)
(syntax->list #'(v ...))))]
[(set! v ve)
(union (free-vars #'v)
(free-vars #'ve))]
[(let-values ([(v ...) ve] ...) be ...)
(union (free-vars* (syntax->list #'(ve ...)))
(set-diff (free-vars* (syntax->list #'(be ...)))
(apply append (map syntax->list (syntax->list #'((v ...) ...))))))]
[(letrec-values ([(v ...) ve] ...) be ...)
(set-diff (union (free-vars* (syntax->list #'(ve ...)))
(free-vars* (syntax->list #'(be ...))))
(apply append (map syntax->list (syntax->list #'((v ...) ...)))))]
[(#%plain-lambda formals be ...)
(set-diff (free-vars* (syntax->list #'(be ...)))
(formals-list #'formals))]
[(case-lambda [formals be ...] ...)
(apply union*
(map (lambda (fs bes)
(set-diff (free-vars* (syntax->list bes))
(formals-list fs)))
(syntax->list #'(formals ...))
(syntax->list #'((be ...) ...))))]
[(if te ce ae)
(free-vars* (syntax->list #'(te ce ae)))]
[(quote datum)
empty]
[(quote-syntax datum)
empty]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(set-diff (union* (free-vars* (syntax->list #'(se ...)))
(free-vars* (syntax->list #'(ve ...)))
(free-vars* (syntax->list #'(be ...))))
(append (apply append (map syntax->list (syntax->list #'((sv ...) ...))))
(apply append (map syntax->list (syntax->list #'((vv ...) ...))))))]
[(with-continuation-mark ke me be)
(free-vars* (syntax->list #'(ke me be)))]
[(#%expression d)
(free-vars #'d)]
[(#%plain-app e ...)
(free-vars* (syntax->list #'(e ...)))]
[(#%top . v)
#;(printf "Not including top ~S in freevars~n" (syntax-object->datum #'v))
empty]
[(#%variable-reference . id)
(let ([i-bdg (identifier-binding #'id)])
(cond
[(eqv? 'lexical (identifier-binding #'id))
(list #'id)]
[else
#;(printf "Not including var-reference ~S with binding ~S in freevars~n" (syntax-object->datum #'id) i-bdg)
empty]))]
[id (identifier? #'id)
(let ([i-bdg (identifier-binding #'id)])
(cond
[(eqv? 'lexical i-bdg)
(list #'id)]
[(not i-bdg)
(list #'id)]
[else
#;(printf "Not including id ~S with binding ~S in freevars~n" (syntax-object->datum #'id) i-bdg)
empty]))]
[_
(raise-syntax-error 'freevars "Dropped through:" stx)]))
;; free-vars*: (listof expr) -> (listof identifier)
;; union the free variables that occur in several expressions
(define (free-vars* exprs)
(foldl
(lambda (expr acc) (union (free-vars expr) acc))
empty exprs))
;; union: (listof identifier) (listof identifier) -> (listof identifier)
;; produce the set-theoretic union of two lists
(define (union l1 l2)
(cond
[(null? l1) l2]
[else (insert (car l1) (union (cdr l1) l2))]))
(define (union* . ll)
(foldl union
empty
ll))
;; insert: symbol (listof identifier) -> (listof symbol)
;; insert a symbol into a list without creating a duplicate
(define (insert sym into)
(unless (identifier? sym)
(raise-syntax-error 'insert "Not identifier" sym))
(cond
[(null? into) (list sym)]
[(bound-identifier=? sym (car into)) into]
[else (cons (car into) (insert sym (cdr into)))]))
;; set-diff: (listof identifier) (listof identifier) -> (listof identifier)
;; produce the set-theoretic difference of two lists
(define (set-diff s1 s2)
(cond
[(null? s2) s1]
[else (set-diff (sans s1 (car s2)) (cdr s2))]))
;; sans: (listof identifier) symbol -> (listof identifier)
;; produce the list sans the symbol
(define (sans s elt)
(unless (identifier? elt)
(raise-syntax-error 'sans "Not identifier" elt))
(cond
[(null? s) empty]
[(bound-identifier=? (car s) elt)
(cdr s)] ;; if we maintain the no-dupe invariant then we don't need to recur
[else (cons (car s)
(sans (cdr s) elt))]))

View File

@ -1,23 +1,23 @@
(module lang-api mzscheme
(require (lib "url.ss" "net")
"../private/request-structs.ss"
"../private/response-structs.ss"
"../servlet/helpers.ss"
"abort-resume.ss"
"web.ss"
"web-cells.ss"
"web-param.ss"
"file-box.ss"
"web-extras.ss")
(provide (all-from-except mzscheme #%module-begin)
(all-from (lib "url.ss" "net"))
(all-from "../private/request-structs.ss")
(all-from "../private/response-structs.ss")
(all-from "../servlet/helpers.ss")
; XXX Try to remove, or only provide send/suspend
(all-from "abort-resume.ss")
(all-from "web.ss")
(all-from "web-cells.ss")
(all-from "web-param.ss")
(all-from "file-box.ss")
(all-from "web-extras.ss")))
#lang scheme/base
(require (lib "url.ss" "net")
"../private/request-structs.ss"
"../private/response-structs.ss"
"../servlet/helpers.ss"
"abort-resume.ss"
"web.ss"
"web-cells.ss"
"web-param.ss"
"file-box.ss"
"web-extras.ss")
(provide (except-out (all-from-out scheme/base) #%plain-module-begin)
(all-from-out (lib "url.ss" "net"))
(all-from-out "../private/request-structs.ss")
(all-from-out "../private/response-structs.ss")
(all-from-out "../servlet/helpers.ss")
; XXX Try to remove, or only provide send/suspend
(all-from-out "abort-resume.ss")
(all-from-out "web.ss")
(all-from-out "web-cells.ss")
(all-from-out "web-param.ss")
(all-from-out "file-box.ss")
(all-from-out "web-extras.ss"))

View File

@ -1,219 +1,209 @@
(module util mzscheme
(require-for-template mzscheme)
(require (lib "kerncase.ss" "syntax")
(lib "list.ss"))
(provide (all-defined-except template))
(define transformer? (make-parameter #f))
(define (recertify old-expr expr)
(syntax-recertify expr old-expr (current-code-inspector) #f))
(define (recertify* old-expr exprs)
(map (lambda (expr)
(syntax-recertify expr old-expr (current-code-inspector) #f))
exprs))
(define (recertify/new-defs old-expr thunk)
(call-with-values
thunk
(lambda (expr new-defs)
(values (recertify old-expr expr)
(recertify* old-expr new-defs)))))
(define current-code-labeling
(make-parameter
(lambda (stx)
(datum->syntax-object stx 'error))))
(define (generate-formal sym-name)
(let ([name (datum->syntax-object #f (gensym sym-name))])
(with-syntax ([(lambda (formal) ref-to-formal)
(if (syntax-transforming?)
(local-expand #`(lambda (#,name) #,name) 'expression empty)
#`(lambda (#,name) #,name))])
(values #'formal #'ref-to-formal))))
(define (formals-list stx)
(syntax-case stx ()
[v (identifier? #'v)
(list #'v)]
[(v ...)
(syntax->list #'(v ...))]
[(v ... . rv)
(list* #'rv (syntax->list #'(v ...)))]))
(define ((make-define-case inner) stx)
(recertify
stx
(syntax-case stx (define-values define-syntaxes define-values-for-syntax)
[(define-values (v ...) ve)
#lang scheme/base
(require (lib "kerncase.ss" "syntax")
(lib "list.ss"))
(provide (except-out (all-defined-out) template))
(define transformer? (make-parameter #f))
(define (recertify old-expr expr)
(syntax-recertify expr old-expr (current-code-inspector) #f))
(define (recertify* old-expr exprs)
(map (lambda (expr)
(syntax-recertify expr old-expr (current-code-inspector) #f))
exprs))
(define (recertify/new-defs old-expr thunk)
(call-with-values
thunk
(lambda (expr new-defs)
(values (recertify old-expr expr)
(recertify* old-expr new-defs)))))
(define current-code-labeling
(make-parameter
(lambda (stx)
(datum->syntax stx 'error))))
(define (generate-formal sym-name)
(let ([name (datum->syntax #f (gensym sym-name))])
(with-syntax ([(lambda (formal) ref-to-formal)
(if (syntax-transforming?)
(local-expand #`(lambda (#,name) #,name) 'expression empty)
#`(lambda (#,name) #,name))])
(values #'formal #'ref-to-formal))))
(define (formals-list stx)
(syntax-case stx ()
[v (identifier? #'v)
(list #'v)]
[(v ...)
(syntax->list #'(v ...))]
[(v ... . rv)
(list* #'rv (syntax->list #'(v ...)))]))
(define ((make-define-case inner) stx)
(recertify
stx
(syntax-case stx (define-values define-syntaxes define-values-for-syntax)
[(define-values (v ...) ve)
(with-syntax ([ve (inner #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (inner #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (inner #'ve)])
(syntax/loc stx
(define-syntaxes (v ...) ve))))]
[(define-values-for-syntax (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (inner #'ve)])
(syntax/loc stx
(define-values-for-syntax (v ...) ve))))]
[_
(raise-syntax-error 'define-case "Dropped through:" stx)])))
(define ((make-define-case/new-defs inner) stx)
(let-values ([(nstx defs) (inner stx)])
(append defs (list nstx))))
(define ((make-module-case/new-defs inner) stx)
(recertify*
stx
(syntax-case* stx (require provide require-for-syntax require-for-template) module-identifier=?
[(require spec ...)
(list stx)]
[(provide spec ...)
(list stx)]
[(require-for-syntax spec ...)
(list stx)]
[(require-for-template spec ...)
(list stx)]
[_
(inner stx)])))
(define ((make-module-case inner) stx)
(recertify
stx
(syntax-case* stx (require provide require-for-syntax require-for-template) module-identifier=?
[(require spec ...)
stx]
[(provide spec ...)
stx]
[(require-for-syntax spec ...)
stx]
[(require-for-template spec ...)
stx]
[_
(inner stx)])))
(define ((make-lang-module-begin make-labeling transform) stx)
(recertify
stx
(syntax-case stx ()
((mb forms ...)
(with-syntax ([(pmb rfs0 body ...)
(local-expand (quasisyntax/loc stx
(#%plain-module-begin
#,(syntax-local-introduce #'(require-for-syntax mzscheme))
forms ...))
'module-begin
empty)])
(let ([base-labeling (make-labeling (string->bytes/utf-8 (format "~a" (syntax-object->datum stx))))])
(parameterize ([current-code-labeling
(lambda (stx)
(datum->syntax-object stx (base-labeling)))])
(let ([new-defs (apply append (map transform (syntax->list #'(body ...))))])
(quasisyntax/loc stx
(pmb rfs0
#,@new-defs))))))))))
(define (bound-identifier-member? id ids)
(ormap
(lambda (an-id)
(bound-identifier=? id an-id))
ids))
;; Kernel Case Template
(define (template stx)
(recertify
stx
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(with-syntax ([(be ...) (map template (syntax->list #'(be ...)))])
(define-syntaxes (v ...) ve))))]
[(define-values-for-syntax (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (inner #'ve)])
(syntax/loc stx
(begin be ...)))]
[(begin0 be ...)
(with-syntax ([(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(begin0 be ...)))]
[(define-values (v ...) ve)
(define-values-for-syntax (v ...) ve))))]
[_
(raise-syntax-error 'define-case "Dropped through:" stx)])))
(define ((make-define-case/new-defs inner) stx)
(let-values ([(nstx defs) (inner stx)])
(append defs (list nstx))))
(define ((make-module-case/new-defs inner) stx)
(recertify*
stx
(syntax-case* stx (#%require #%provide) free-identifier=?
[(#%require spec ...)
(list stx)]
[(#%provide spec ...)
(list stx)]
[_
(inner stx)])))
(define ((make-module-case inner) stx)
(recertify
stx
(syntax-case* stx (#%require #%provide) free-identifier=?
[(#%require spec ...)
stx]
[(#%provide spec ...)
stx]
[_
(inner stx)])))
(define ((make-lang-module-begin make-labeling transform) stx)
(recertify
stx
(syntax-case stx ()
((mb forms ...)
(with-syntax ([(pmb rfs0 body ...)
(local-expand (quasisyntax/loc stx
(#%plain-module-begin
#,(syntax-local-introduce
#'(require (for-syntax scheme/base)))
forms ...))
'module-begin
empty)])
(let ([base-labeling (make-labeling (string->bytes/utf-8 (format "~a" (syntax->datum stx))))])
(parameterize ([current-code-labeling
(lambda (stx)
(datum->syntax stx (base-labeling)))])
(let ([new-defs (apply append (map transform (syntax->list #'(body ...))))])
(quasisyntax/loc stx
(pmb rfs0
#,@new-defs))))))))))
(define (bound-identifier-member? id ids)
(ormap
(lambda (an-id)
(bound-identifier=? id an-id))
ids))
;; Kernel Case Template
(define (template stx)
(recertify
stx
(kernel-syntax-case
stx (transformer?)
[(begin be ...)
(with-syntax ([(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(begin be ...)))]
[(begin0 be ...)
(with-syntax ([(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(begin0 be ...)))]
[(define-values (v ...) ve)
(with-syntax ([ve (template #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (template #'ve)])
(syntax/loc stx
(define-values (v ...) ve)))]
[(define-syntaxes (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (template #'ve)])
(syntax/loc stx
(define-syntaxes (v ...) ve))))]
[(define-values-for-syntax (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (template #'ve)])
(syntax/loc stx
(define-values-for-syntax (v ...) ve))))]
[(set! v ve)
(define-syntaxes (v ...) ve))))]
[(define-values-for-syntax (v ...) ve)
(parameterize ([transformer? #t])
(with-syntax ([ve (template #'ve)])
(syntax/loc stx
(set! v ve)))]
[(let-values ([(v ...) ve] ...) be ...)
(with-syntax ([(ve ...) (map template (syntax->list #'(ve ...)))]
[(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(let-values ([(v ...) ve] ...) be ...)))]
[(letrec-values ([(v ...) ve] ...) be ...)
(with-syntax ([(ve ...) (map template (syntax->list #'(ve ...)))]
[(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(letrec-values ([(v ...) ve] ...) be ...)))]
[(#%plain-lambda formals be ...)
(with-syntax ([(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(#%plain-lambda formals be ...)))]
[(case-lambda [formals be ...] ...)
(with-syntax ([((be ...) ...) (map template (syntax->list #'((be ...) ...)))])
(syntax/loc stx
(case-lambda [formals be ...] ...)))]
[(if te ce ae)
(with-syntax ([te (template #'te)]
[ce (template #'ce)]
[ae (template #'ae)])
(syntax/loc stx
(if te ce ae)))]
[(if te ce)
(template (syntax/loc stx (if te ce (#%plain-app void))))]
[(quote datum)
stx]
[(quote-syntax datum)
stx]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(with-syntax ([(se ...) (map template (syntax->list #'(se ...)))]
[(ve ...) (map template (syntax->list #'(ve ...)))]
[(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)))]
[(with-continuation-mark ke me be)
(with-syntax ([ke (template #'ke)]
[me (template #'me)]
[be (template #'be)])
(syntax/loc stx
(with-continuation-mark ke me be)))]
[(#%expression . d)
stx]
[(#%plain-app e ...)
(with-syntax ([(e ...) (map template (syntax->list #'(e ...)))])
(syntax/loc stx
(#%plain-app e ...)))]
[(#%top . v)
stx]
[(#%variable-reference . v)
stx]
[id (identifier? #'id)
stx]
[_
(raise-syntax-error 'kerncase "Dropped through:" stx)]))))
(syntax/loc stx
(define-values-for-syntax (v ...) ve))))]
[(set! v ve)
(with-syntax ([ve (template #'ve)])
(syntax/loc stx
(set! v ve)))]
[(let-values ([(v ...) ve] ...) be ...)
(with-syntax ([(ve ...) (map template (syntax->list #'(ve ...)))]
[(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(let-values ([(v ...) ve] ...) be ...)))]
[(letrec-values ([(v ...) ve] ...) be ...)
(with-syntax ([(ve ...) (map template (syntax->list #'(ve ...)))]
[(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(letrec-values ([(v ...) ve] ...) be ...)))]
[(#%plain-lambda formals be ...)
(with-syntax ([(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(#%plain-lambda formals be ...)))]
[(case-lambda [formals be ...] ...)
(with-syntax ([((be ...) ...) (map template (syntax->list #'((be ...) ...)))])
(syntax/loc stx
(case-lambda [formals be ...] ...)))]
[(if te ce ae)
(with-syntax ([te (template #'te)]
[ce (template #'ce)]
[ae (template #'ae)])
(syntax/loc stx
(if te ce ae)))]
[(quote datum)
stx]
[(quote-syntax datum)
stx]
[(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)
(with-syntax ([(se ...) (map template (syntax->list #'(se ...)))]
[(ve ...) (map template (syntax->list #'(ve ...)))]
[(be ...) (map template (syntax->list #'(be ...)))])
(syntax/loc stx
(letrec-syntaxes+values ([(sv ...) se] ...)
([(vv ...) ve] ...)
be ...)))]
[(with-continuation-mark ke me be)
(with-syntax ([ke (template #'ke)]
[me (template #'me)]
[be (template #'be)])
(syntax/loc stx
(with-continuation-mark ke me be)))]
[(#%expression . d)
stx]
[(#%plain-app e ...)
(with-syntax ([(e ...) (map template (syntax->list #'(e ...)))])
(syntax/loc stx
(#%plain-app e ...)))]
[(#%top . v)
stx]
[(#%variable-reference . v)
stx]
[id (identifier? #'id)
stx]
[_
(raise-syntax-error 'kerncase "Dropped through:" stx)])))

View File

@ -1,55 +1,57 @@
(module web-param mzscheme
(require "../private/closure.ss"
(lib "list.ss"))
; XXX Add contract
(provide make-web-parameter
web-parameter?
web-parameterize)
(define (web-parameter? any)
(and (procedure? any)
(procedure-arity-includes? any 0)
(procedure-arity-includes? any 2)))
(define next-web-parameter-id
(let ([i (box 0)])
(lambda ()
(begin0 (unbox i)
(set-box! i (add1 (unbox i)))))))
; This is syntax so that the web-language transformations can occur.
(define-syntax make-web-parameter
(syntax-rules ()
[(_ default)
; Key is a lambda, the defunctionalization process will turn it into a serializable value with the module's label embedded in it, that way the parameters are not guessable AND sensitive to changes in the source
; I don't like the assumption of deserialization though, but I have to do this grossness because w-c-m uses equal? and post-deserialization, the two lambdas are not equal.
(let* ([id (next-web-parameter-id)]
[label (closure->deserialize-name (lambda () 'web-param))]
[key (string->symbol (format "~a-~a" label id))])
(case-lambda
[()
(let ([cur
(continuation-mark-set->list
(current-continuation-marks)
key)])
(if (empty? cur)
default
(first cur)))]
[(v thunk)
(with-continuation-mark key v (thunk))]))]))
(define-syntax web-parameterize/values
(syntax-rules ()
[(_ () e ...)
(begin e ...)]
[(_ ([wp v]) e ...)
(wp v (lambda () e ...))]
[(_ ([fwp fv] [wp v] ...) e ...)
(web-parameterize/values ([fwp fv]) (web-parameterize/values ([wp v] ...) e ...))]))
(define-syntax (web-parameterize stx)
(syntax-case stx ()
[(_ ([wp ve] ...) e ...)
(with-syntax ([(v ...) (generate-temporaries (syntax->list #'(ve ...)))])
#'(let ([v ve] ...)
(web-parameterize/values ([wp v] ...) e ...)))])))
#lang scheme/base
(require (for-syntax scheme/base)
"../private/closure.ss"
(lib "list.ss"))
; XXX Add contract
(provide make-web-parameter
web-parameter?
web-parameterize)
(define (web-parameter? any)
(and (procedure? any)
(procedure-arity-includes? any 0)
(procedure-arity-includes? any 2)))
(define next-web-parameter-id
(let ([i (box 0)])
(lambda ()
(begin0 (unbox i)
(set-box! i (add1 (unbox i)))))))
; This is syntax so that the web-language transformations can occur.
(define-syntax make-web-parameter
(syntax-rules ()
[(_ default)
; Key is a lambda, the defunctionalization process will turn it into a serializable value with the module's label embedded in it, that way the parameters are not guessable AND sensitive to changes in the source
; I don't like the assumption of deserialization though, but I have to do this grossness because w-c-m uses equal? and post-deserialization, the two lambdas are not equal.
(let* ([id (next-web-parameter-id)]
[label (closure->deserialize-name (lambda () 'web-param))]
[key (string->symbol (format "~a-~a" label id))])
(case-lambda
[()
(let ([cur
(continuation-mark-set->list
(current-continuation-marks)
key)])
(if (empty? cur)
default
(first cur)))]
[(v thunk)
(with-continuation-mark key v (thunk))]))]))
(define-syntax web-parameterize/values
(syntax-rules ()
[(_ () e ...)
(begin e ...)]
[(_ ([wp v]) e ...)
(wp v (lambda () e ...))]
[(_ ([fwp fv] [wp v] ...) e ...)
(web-parameterize/values ([fwp fv]) (web-parameterize/values ([wp v] ...) e ...))]))
(define-syntax (web-parameterize stx)
(syntax-case stx ()
[(_ ([wp ve] ...) e ...)
(with-syntax ([(v ...) (generate-temporaries (syntax->list #'(ve ...)))])
#'(let ([v ve] ...)
(web-parameterize/values ([wp v] ...) e ...)))]))

View File

@ -1,128 +1,126 @@
(module closure mzscheme
(require-for-template mzscheme
(lib "serialize.ss")
(lib "etc.ss"))
(require (lib "list.ss")
(lib "serialize.ss"))
(provide make-closure-definition-syntax
closure->deserialize-name)
(define (closure->deserialize-name proc)
(cdr (first (second (serialize proc)))))
;; borrowed this from Matthew's code
;; creates the deserialize-info identifier
(define (make-deserialize-name id)
(datum->syntax-object
id
(string->symbol
(format "web-deserialize-info:~a" (syntax-e id)))
id))
(define (make-closure-definition-syntax tag fvars proc)
(let ([make-id (lambda (str)
(datum->syntax-object
tag (string->symbol (format str (syntax-object->datum tag)))))])
(let ([deserialize-info:CLOSURE (make-deserialize-name tag)])
(with-syntax ([CLOSURE:serialize-info (make-id "~a:serialize-info")]
[make-CLOSURE (make-id "make-~a")]
[CLOSURE? (make-id "~a?")]
[CLOSURE-ref (make-id "~a-ref")]
[CLOSURE-set! (make-id "~a-set!")]
[CLOSURE-env (make-id "~a-env")]
[set-CLOSURE-env! (make-id "set-~a-env!")]
[struct:CLOSURE (make-id "struct:~a")])
(values
(syntax/loc proc make-CLOSURE)
(list
(quasisyntax/loc proc
(define #,deserialize-info:CLOSURE
(make-deserialize-info
;; make-proc: value ... -> CLOSURE
(lambda args
(apply #,(if (null? fvars)
(syntax/loc proc (lambda () (make-CLOSURE)))
(quasisyntax/loc proc (lambda #,fvars (make-CLOSURE (lambda () (values #,@fvars))))))
args))
;; cycle-make-proc: -> (values CLOSURE (CLOSURE -> void))
(lambda ()
(let ([new-closure
#,(if (null? fvars)
(syntax/loc proc (make-CLOSURE))
(syntax/loc proc (make-CLOSURE (lambda () (error "closure not initialized")))))])
(values
new-closure
#,(if (null? fvars)
(syntax/loc proc void)
(syntax/loc proc
(lambda (clsr)
(set-CLOSURE-env! new-closure (CLOSURE-env clsr)))))))))))
(quasisyntax/loc proc
(provide #,deserialize-info:CLOSURE))
(quasisyntax/loc proc
(define CLOSURE:serialize-info
(make-serialize-info
;; to-vector: CLOSURE -> vector
#lang scheme/base
(require (for-template scheme/base)
(for-template (lib "serialize.ss"))
(lib "list.ss")
(lib "serialize.ss"))
(provide make-closure-definition-syntax
closure->deserialize-name)
(define (closure->deserialize-name proc)
(cdr (first (second (serialize proc)))))
(define (make-closure-definition-syntax tag fvars proc)
(define (make-id str)
(datum->syntax tag (string->symbol (format str (syntax->datum tag)))))
(with-syntax ([CLOSURE:deserialize-info (make-id "~a:deserialize-info")]
[CLOSURE:serialize-info (make-id "~a:serialize-info")]
[make-CLOSURE (make-id "make-~a")]
[CLOSURE? (make-id "~a?")]
[CLOSURE-ref (make-id "~a-ref")]
[CLOSURE-set! (make-id "~a-set!")]
[CLOSURE-env (make-id "~a-env")]
[set-CLOSURE-env! (make-id "set-~a-env!")]
[struct:CLOSURE (make-id "struct:~a")])
(values
(syntax/loc proc make-CLOSURE)
(list
(quasisyntax/loc proc
(define CLOSURE:deserialize-info
(make-deserialize-info
;; make-proc: value ... -> CLOSURE
(lambda args
(apply #,(if (null? fvars)
(syntax/loc proc
(#%plain-lambda () (#%plain-app make-CLOSURE)))
(quasisyntax/loc proc
(#%plain-lambda #,fvars
(#%plain-app make-CLOSURE
(#%plain-lambda ()
(#%plain-app values #,@fvars))))))
args))
;; cycle-make-proc: -> (values CLOSURE (CLOSURE -> void))
(lambda ()
(let ([new-closure
#,(if (null? fvars)
(syntax/loc proc (#%plain-app make-CLOSURE))
(syntax/loc proc
(#%plain-app make-CLOSURE
(#%plain-lambda () (#%plain-app error "closure not initialized")))))])
(values
new-closure
#,(if (null? fvars)
(syntax/loc proc void)
(syntax/loc proc
(#%plain-lambda (clsr)
(#%plain-app set-CLOSURE-env! new-closure (#%plain-app CLOSURE-env clsr)))))))))))
(quasisyntax/loc proc
(provide CLOSURE:deserialize-info))
(quasisyntax/loc proc
(define CLOSURE:serialize-info
(make-serialize-info
;; to-vector: CLOSURE -> vector
#,(if (null? fvars)
(syntax/loc proc (#%plain-lambda (clsr) (#%plain-app vector)))
(syntax/loc proc
(#%plain-lambda (clsr)
(#%plain-app call-with-values
(#%plain-lambda () (#%plain-app (#%plain-app CLOSURE-env clsr)))
vector))))
;; The serializer id: --------------------
;(syntax deserialize-info:CLOSURE)
;; I still don't know what to put here.
;; oh well.
;(quote-syntax #,(syntax deserialize-info:CLOSURE))
(let ([b (identifier-binding (quote-syntax CLOSURE:deserialize-info))])
(if (list? b)
(cons 'CLOSURE:deserialize-info (caddr b))
'CLOSURE:deserialize-info))
;; can-cycle?
#t
;; Directory for last-ditch resolution --------------------
(or (current-load-relative-directory) (current-directory))
)))
(quasisyntax/loc proc
(define-values (struct:CLOSURE make-CLOSURE CLOSURE?
#,@(if (null? fvars)
(syntax/loc proc ())
(syntax/loc proc (CLOSURE-env set-CLOSURE-env!))))
(let ([struct-apply
#,(if (null? fvars)
(syntax/loc proc (lambda (clsr) (vector)))
(syntax/loc proc
(lambda (clsr)
(call-with-values
(lambda () ((CLOSURE-env clsr)))
vector))))
;; The serializer id: --------------------
;(syntax deserialize-info:CLOSURE)
;; I still don't know what to put here.
;; oh well.
;(quote-syntax #,(syntax deserialize-info:CLOSURE))
(let ([b (identifier-binding (quote-syntax #,deserialize-info:CLOSURE))])
(if (list? b)
(cons '#,deserialize-info:CLOSURE (caddr b))
'#,deserialize-info:CLOSURE))
;; can-cycle?
#t
;; Directory for last-ditch resolution --------------------
(or (current-load-relative-directory) (current-directory))
)))
(quasisyntax/loc proc
(define-values (struct:CLOSURE make-CLOSURE CLOSURE?
#,@(if (null? fvars)
(syntax/loc proc ())
(syntax/loc proc (CLOSURE-env set-CLOSURE-env!))))
(let-values ([(struct:CLOSURE make-CLOSURE CLOSURE? CLOSURE-ref CLOSURE-set!)
(make-struct-type '#,tag ;; the tag goes here
#f ; no super type
#,(if (null? fvars) 0 1)
0 ; number of auto-fields
#f ; auto-v
; prop-vals:
(list (cons prop:serializable CLOSURE:serialize-info))
#f ; inspector
;; the struct apply proc:
#,(if (null? fvars)
(quasisyntax/loc proc
(lambda (clsr . args)
(apply #,proc args)))
(quasisyntax/loc proc
(lambda (clsr . args)
(let-values ([#,fvars ((CLOSURE-env clsr))])
(apply #,proc args)))))
)])
(values struct:CLOSURE make-CLOSURE CLOSURE?
#,@(if (null? fvars)
(syntax/loc proc ())
(syntax/loc proc
((lambda (clsr) (CLOSURE-ref clsr 0))
(lambda (clsr new-env) (CLOSURE-set! clsr 0 new-env))))))))))))))))
(quasisyntax/loc proc
(#%plain-lambda (clsr . args)
(#%plain-app apply #,proc args)))
(quasisyntax/loc proc
(#%plain-lambda (clsr . args)
(let-values ([#,fvars (#%plain-app (#%plain-app CLOSURE-env clsr))])
(#%plain-app apply #,proc args)))))])
(let-values ([(struct:CLOSURE make-CLOSURE CLOSURE? CLOSURE-ref CLOSURE-set!)
(make-struct-type '#,tag ;; the tag goes here
#f ; no super type
#,(if (null? fvars) 0 1)
0 ; number of auto-fields
#f ; auto-v
; prop-vals:
(list (cons prop:serializable CLOSURE:serialize-info)
(cons prop:procedure struct-apply))
#f ; inspector
;; the struct apply proc:
#f)])
(values struct:CLOSURE make-CLOSURE CLOSURE?
#,@(if (null? fvars)
(syntax/loc proc ())
(syntax/loc proc
((#%plain-lambda (clsr) (#%plain-app CLOSURE-ref clsr 0))
(#%plain-lambda (clsr new-env) (#%plain-app CLOSURE-set! clsr 0 new-env))))))))))))))

View File

@ -48,11 +48,11 @@
; compress-serial : serial -> serial (with compressed mod-map)
(define compress-serial
(match-lambda
[(list e0 mm e2 e3 e4 e5)
(list e0 (compress-mod-map mm) e2 e3 e4 e5)]))
[(list vs e0 mm e2 e3 e4 e5)
(list vs e0 (compress-mod-map mm) e2 e3 e4 e5)]))
; decompress-serial : serial (with compressed mod-map) -> serial
(define decompress-serial
(match-lambda
[(list e0 cmm e2 e3 e4 e5)
(list e0 (decompress-mod-map cmm) e2 e3 e4 e5)])))
[(list vs e0 cmm e2 e3 e4 e5)
(list vs e0 (decompress-mod-map cmm) e2 e3 e4 e5)])))

View File

@ -1,6 +1,7 @@
(module dispatch-lang-test mzscheme
(require (planet "test.ss" ("schematics" "schemeunit.plt" 2))
#;(planet "sxml.ss" ("lizorkin" "sxml.plt" 1 4))
; XXX Replace with real
(lib "sxml.ss" "web-server/tmp/sxml")
(lib "etc.ss")
(lib "list.ss")
(lib "dispatch.ss" "web-server" "dispatchers")
@ -10,9 +11,6 @@
"../util.ss")
(provide dispatch-lang-tests)
; XXX Sxpath broken
(define sxpath (lambda _ (lambda _ (error 'sxpath))))
(define (mkd p)
(lang:make #:url->path (lambda _ (values p (list p)))
#:make-servlet-namespace

View File

@ -1,6 +1,7 @@
(module dispatch-servlets-test mzscheme
(require (planet "test.ss" ("schematics" "schemeunit.plt" 2))
#;(planet "sxml.ss" ("lizorkin" "sxml.plt" 1 4))
; XXX Replace with real
(lib "sxml.ss" "web-server/tmp/sxml")
(lib "etc.ss")
(lib "list.ss")
(lib "request-structs.ss" "web-server" "private")
@ -10,10 +11,7 @@
(prefix servlets: (lib "dispatch-servlets.ss" "web-server" "dispatchers"))
"../util.ss")
(provide dispatch-servlets-tests)
; XXX Sxpath broken
(define sxpath (lambda _ (lambda _ (error 'sxpath))))
(current-server-custodian (current-custodian))
(define (mkd p)

View File

@ -1,6 +1,7 @@
(module util mzscheme
(require (lib "connection-manager.ss" "web-server" "private")
#;(only (planet "ssax.ss" ("lizorkin" "ssax.plt" 1 3))
; XXX Replace with real
(only (lib "ssax.ss" "web-server/tmp/ssax")
ssax:xml->sxml)
(lib "request-structs.ss" "web-server" "private")
(lib "web-server-structs.ss" "web-server" "private")
@ -19,9 +20,7 @@
(define (call d u bs)
(htxml (collect d (make-request 'get (string->url u) empty bs #"" "127.0.0.1" 80 "127.0.0.1"))))
(define (htxml bs)
; XXX SSAX is broken
#;(define sx (ssax:xml->sxml (open-input-bytes (second (regexp-match #"^.+\r\n\r\n(.+)$" bs))) empty))
(define sx empty)
(define sx (ssax:xml->sxml (open-input-bytes (second (regexp-match #"^.+\r\n\r\n(.+)$" bs))) empty))
(pretty-print sx)
sx)

View File

@ -0,0 +1,422 @@
HtmlPrag: Pragmatic Parsing and Emitting of HTML using SXML and SHTML
*********************************************************************
Version 0.16, 2005-12-18, `http://www.neilvandyke.org/htmlprag/'
by Neil W. Van Dyke <neil@neilvandyke.org>
Copyright (C) 2003 - 2005 Neil W. Van Dyke. This program is Free
Software; you can redistribute it and/or modify it under the terms
of the GNU Lesser General Public License as published by the Free
Software Foundation; either version 2.1 of the License, or (at
your option) any later version. This program is distributed in
the hope that it will be useful, but without any warranty; without
even the implied warranty of merchantability or fitness for a
particular purpose. See <http://www.gnu.org/copyleft/lesser.html>
for details. For other license options and consulting, contact
the author.
Introduction
************
HtmlPrag provides permissive HTML parsing and emitting capability to
Scheme programs. The parser is useful for software agent extraction of
information from Web pages, for programmatically transforming HTML
files, and for implementing interactive Web browsers. HtmlPrag emits
"SHTML," which is an encoding of HTML in SXML
(http://pobox.com/~oleg/ftp/Scheme/SXML.html), so that conventional
HTML may be processed with XML tools such as SXPath
(http://pair.com/lisovsky/query/sxpath/). Like Oleg Kiselyov's
SSAX-based HTML parser
(http://pobox.com/~oleg/ftp/Scheme/xml.html#HTML-parser), HtmlPrag
provides a permissive tokenizer, but also attempts to recover
structure. HtmlPrag also includes procedures for encoding SHTML in
HTML syntax.
The HtmlPrag parsing behavior is permissive in that it accepts
erroneous HTML, handling several classes of HTML syntax errors
gracefully, without yielding a parse error. This is crucial for
parsing arbitrary real-world Web pages, since many pages actually
contain syntax errors that would defeat a strict or validating parser.
HtmlPrag's handling of errors is intended to generally emulate popular
Web browsers' interpretation of the structure of erroneous HTML. We
euphemistically term this kind of parse "pragmatic."
HtmlPrag also has some support for XHTML, although XML namespace
qualifiers are currently accepted but stripped from the resulting
SHTML. Note that valid XHTML input is of course better handled by a
validating XML parser like Kiselyov's SSAX
(http://pobox.com/~oleg/ftp/Scheme/xml.html#XML-parser).
HtmlPrag requires R5RS, SRFI-6, and SRFI-23.
SHTML and SXML
**************
SHTML is a variant of SXML, with two minor but useful extensions:
1. The SXML keyword symbols, such as `*TOP*', are defined to be in all
uppercase, regardless of the case-sensitivity of the reader of the
hosting Scheme implementation in any context. This avoids several
pitfalls.
2. Since not all character entity references used in HTML can be
converted to Scheme characters in all R5RS Scheme implementations,
nor represented in conventional text files or other common
external text formats to which one might wish to write SHTML,
SHTML adds a special `&' syntax for non-ASCII (or
non-Extended-ASCII) characters. The syntax is `(& VAL)', where
VAL is a symbol or string naming with the symbolic name of the
character, or an integer with the numeric value of the character.
> shtml-comment-symbol
> shtml-decl-symbol
> shtml-empty-symbol
> shtml-end-symbol
> shtml-entity-symbol
> shtml-pi-symbol
> shtml-start-symbol
> shtml-text-symbol
> shtml-top-symbol
These variables are bound to the following case-sensitive symbols
used in SHTML, respectively: `*COMMENT*', `*DECL*', `*EMPTY*',
`*END*', `*ENTITY*', `*PI*', `*START*', `*TEXT*', and `*TOP*'.
These can be used in lieu of the literal symbols in programs read
by a case-insensitive Scheme reader.(1)
> shtml-named-char-id
> shtml-numeric-char-id
These variables are bound to the SHTML entity public identifier
strings used in SHTML `*ENTITY*' named and numeric character entity
references.
> (make-shtml-entity val)
Yields an SHTML character entity reference for VAL. For example:
(make-shtml-entity "rArr") => (& rArr)
(make-shtml-entity (string->symbol "rArr")) => (& rArr)
(make-shtml-entity 151) => (& 151)
> (shtml-entity-value obj)
Yields the value for the SHTML entity OBJ, or `#f' if OBJ is not a
recognized entity. Values of named entities are symbols, and
values of numeric entities are numbers. An error may raised if OBJ
is an entity with system ID inconsistent with its public ID. For
example:
(define (f s) (shtml-entity-value (cadr (html->shtml s))))
(f "&nbsp;") => nbsp
(f "&#2000;") => 2000
Tokenizing
**********
The tokenizer is used by the higher-level structural parser, but can
also be called directly for debugging purposes or unusual applications.
Some of the list structure of tokens, such as for start tag tokens, is
mutated and incorporated into the SHTML list structure emitted by the
parser.
> (make-html-tokenizer in normalized?)
Constructs an HTML tokenizer procedure on input port IN. If
boolean NORMALIZED? is true, then tokens will be in a format
conducive to use with a parser emitting normalized SXML. Each
call to the resulting procedure yields a successive token from the
input. When the tokens have been exhausted, the procedure returns
the null list. For example:
(define input (open-input-string "<a href=\"foo\">bar</a>"))
(define next (make-html-tokenizer input #f))
(next) => (a (@ (href "foo")))
(next) => "bar"
(next) => (*END* a)
(next) => ()
(next) => ()
> (tokenize-html in normalized?)
Returns a list of tokens from input port IN, normalizing according
to boolean NORMALIZED?. This is probably most useful as a
debugging convenience. For example:
(tokenize-html (open-input-string "<a href=\"foo\">bar</a>") #f)
=> ((a (@ (href "foo"))) "bar" (*END* a))
> (shtml-token-kind token)
Returns a symbol indicating the kind of tokenizer TOKEN:
`*COMMENT*', `*DECL*', `*EMPTY*', `*END*', `*ENTITY*', `*PI*',
`*START*', `*TEXT*'. This is used by higher-level parsing code.
For example:
(map shtml-token-kind
(tokenize-html (open-input-string "<a<b>><c</</c") #f))
=> (*START* *START* *TEXT* *START* *END* *END*)
Parsing
*******
Most applications will call a parser procedure such as `html->shtml'
rather than calling the tokenizer directly.
> (parse-html/tokenizer tokenizer normalized?)
Emits a parse tree like `html->shtml' and related procedures,
except using TOKENIZER as a source of tokens, rather than
tokenizing from an input port. This procedure is used internally,
and generally should not be called directly.
> (html->sxml-0nf input)
> (html->sxml-1nf input)
> (html->sxml-2nf input)
> (html->sxml input)
> (html->shtml input)
Permissively parse HTML from INPUT, which is either an input port
or a string, and emit an SHTML equivalent or approximation. To
borrow and slightly modify an example from Kiselyov's discussion
of his HTML parser:
(html->shtml
"<html><head><title></title><title>whatever</title></head><body>
<a href=\"url\">link</a><p align=center><ul compact style=\"aa\">
<p>BLah<!-- comment <comment> --> <i> italic <b> bold <tt> ened</i>
still &lt; bold </b></body><P> But not done yet...")
=>
(*TOP* (html (head (title) (title "whatever"))
(body "\n"
(a (@ (href "url")) "link")
(p (@ (align "center"))
(ul (@ (compact) (style "aa")) "\n"))
(p "BLah"
(*COMMENT* " comment <comment> ")
" "
(i " italic " (b " bold " (tt " ened")))
"\n"
"still < bold "))
(p " But not done yet...")))
Note that in the emitted SHTML the text token `"still < bold"' is
_not_ inside the `b' element, which represents an unfortunate
failure to emulate all the quirks-handling behavior of some
popular Web browsers.
The procedures `html->sxml-Nnf' for N 0 through 2 correspond to
0th through 2nd normal forms of SXML as specified in SXML, and
indicate the minimal requirements of the emitted SXML.
`html->sxml' and `html->shtml' are currently aliases for
`html->sxml-0nf', and can be used in scripts and interactively,
when terseness is important and any normal form of SXML would
suffice.
Emitting HTML
*************
Two procedures encoding the SHTML representation as conventional HTML,
`write-shtml-as-html' and `shtml->html'. These are perhaps most useful
for emitting the result of parsed and transformed input HTML. They can
also be used for emitting HTML from generated or handwritten SHTML.
> (write-shtml-as-html shtml [out [foreign-filter]])
Writes a conventional HTML transliteration of the SHTML SHTML to
output port OUT. If OUT is not specified, the default is the
current output port. HTML elements of types that are always empty
are written using HTML4-compatible XHTML tag syntax.
If FOREIGN-FILTER is specified, it is a procedure of two argument
that is applied to any non-SHTML ("foreign") object encountered in
SHTML, and should yield SHTML. The first argument is the object,
and the second argument is a boolean for whether or not the object
is part of an attribute value.
No inter-tag whitespace or line breaks not explicit in SHTML is
emitted. The SHTML should normally include a newline at the end of
the document. For example:
(write-shtml-as-html
'((html (head (title "My Title"))
(body (@ (bgcolor "white"))
(h1 "My Heading")
(p "This is a paragraph.")
(p "This is another paragraph.")))))
-| <html><head><title>My Title</title></head><body bgcolor="whi
-| te"><h1>My Heading</h1><p>This is a paragraph.</p><p>This is
-| another paragraph.</p></body></html>
> (shtml->html shtml)
Yields an HTML encoding of SHTML SHTML as a string. For example:
(shtml->html
(html->shtml
"<P>This is<br<b<I>bold </foo>italic</ b > text.</p>"))
=> "<p>This is<br /><b><i>bold italic</i></b> text.</p>"
Note that, since this procedure constructs a string, it should
normally only be used when the HTML is relatively small. When
encoding HTML documents of conventional size and larger,
`write-shtml-as-html' is much more efficient.
Tests
*****
The HtmlPrag test suite can be enabled by editing the source code file
and loading Testeez (http://www.neilvandyke.org/testeez/).
History
*******
Version 0.16 -- 2005-12-18
Documentation fix.
Version 0.15 -- 2005-12-18
In the HTML parent element constraints that are used for structure
recovery, `div' is now always permitted as a parent, as a stopgap
measure until substantial time can be spent reworking the
algorithm to better support `div' (bug reported by Corey Sweeney
and Jepri). Also no longer convert to Scheme character any HTML
numeric character reference with value above 126, to avoid Unicode
problem with PLT 299/300 (bug reported by Corey Sweeney).
Version 0.14 -- 2005-06-16
XML CDATA sections are now tokenized. Thanks to Alejandro Forero
Cuervo for suggesting this feature. The deprecated procedures
`sxml->html' and `write-sxml-html' have been removed. Minor
documentation changes.
Version 0.13 -- 2005-02-23
HtmlPrag now requires `syntax-rules', and a reader that can read
`@' as a symbol. SHTML now has a special `&' element for
character entities, and it is emitted by the parser rather than
the old `*ENTITY*' kludge. `shtml-entity-value' supports both the
new and the old character entity representations.
`shtml-entity-value' now yields `#f' on invalid SHTML entity,
rather than raising an error. `write-shtml-as-html' now has a
third argument, `foreign-filter'. `write-shtml-as-html' now emits
SHTML `&' entity references. Changed `shtml-named-char-id' and
`shtml-numeric-char-id', as previously warned. Testeez is now
used for the test suite. Test procedure is now the internal
`%htmlprag:test'. Documentation changes. Notably, much
documentation about using HtmlPrag under various particular Scheme
implementations has been removed.
Version 0.12 -- 2004-07-12
Forward-slash in an unquoted attribute value is now considered a
value constituent rather than an unconsumed terminator of the
value (thanks to Maurice Davis for reporting and a suggested fix).
`xml:' is now preserved as a namespace qualifier (thanks to Peter
Barabas for reporting). Output port term of `write-shtml-as-html'
is now optional. Began documenting loading for particular
implementation-specific packagings.
Version 0.11 -- 2004-05-13
To reduce likely namespace collisions with SXML tools, and in
anticipation of a forthcoming set of new features, introduced the
concept of "SHTML," which will be elaborated upon in a future
version of HtmlPrag. Renamed `sxml-X-symbol' to `shtml-X-symbol',
`sxml-html-X' to `shtml-X', and `sxml-token-kind' to
`shtml-token-kind'. `html->shtml', `shtml->html', and
`write-shtml-as-html' have been added as names. Considered
deprecated but still defined (see the "Deprecated" section of this
documentation) are `sxml->html' and `write-sxml-html'. The
growing pains should now be all but over. Internally,
`htmlprag-internal:error' introduced for Bigloo portability. SISC
returned to the test list; thanks to Scott G. Miller for his
help. Fixed a new character `eq?' bug, thanks to SISC.
Version 0.10 -- 2004-05-11
All public identifiers have been renamed to drop the "`htmlprag:'"
prefix. The portability identifiers have been renamed to begin
with an `htmlprag-internal:' prefix, are now considered strictly
internal-use-only, and have otherwise been changed. `parse-html'
and `always-empty-html-elements' are no longer public.
`test-htmlprag' now tests `html->sxml' rather than `parse-html'.
SISC temporarily removed from the test list, until an open source
Java that works correctly is found.
Version 0.9 -- 2004-05-07
HTML encoding procedures added. Added
`htmlprag:sxml-html-entity-value'. Upper-case `X' in hexadecimal
character entities is now parsed, in addition to lower-case `x'.
Added `htmlprag:always-empty-html-elements'. Added additional
portability bindings. Added more test cases.
Version 0.8 -- 2004-04-27
Entity references (symbolic, decimal numeric, hexadecimal numeric)
are now parsed into `*ENTITY*' SXML. SXML symbols like `*TOP*'
are now always upper-case, regardless of the Scheme
implementation. Identifiers such as `htmlprag:sxml-top-symbol'
are bound to the upper-case symbols. Procedures
`htmlprag:html->sxml-0nf', `htmlprag:html->sxml-1nf', and
`htmlprag:html->sxml-2nf' have been added. `htmlprag:html->sxml'
now an alias for `htmlprag:html->sxml-0nf'. `htmlprag:parse' has
been refashioned as `htmlprag:parse-html' and should no longer be
directly. A number of identifiers have been renamed to be more
appropriate when the `htmlprag:' prefix is dropped in some
implementation-specific packagings of HtmlPrag:
`htmlprag:make-tokenizer' to `htmlprag:make-html-tokenizer',
`htmlprag:parse/tokenizer' to `htmlprag:parse-html/tokenizer',
`htmlprag:html->token-list' to `htmlprag:tokenize-html',
`htmlprag:token-kind' to `htmlprag:sxml-token-kind', and
`htmlprag:test' to `htmlprag:test-htmlprag'. Verbatim elements
with empty-element tag syntax are handled correctly. New versions
of Bigloo and RScheme tested.
Version 0.7 -- 2004-03-10
Verbatim pair elements like `script' and `xmp' are now parsed
correctly. Two Scheme implementations have temporarily been
dropped from regression testing: Kawa, due to a Java bytecode
verifier error likely due to a Java installation problem on the
test machine; and SXM 1.1, due to hitting a limit on the number of
literals late in the test suite code. Tested newer versions of
Bigloo, Chicken, Gauche, Guile, MIT Scheme, PLT MzScheme, RScheme,
SISC, and STklos. RScheme no longer requires the "`(define
get-output-string close-output-port)'" workaround.
Version 0.6 -- 2003-07-03
Fixed uses of `eq?' in character comparisons, thanks to Scott G.
Miller. Added `htmlprag:html->normalized-sxml' and
`htmlprag:html->nonnormalized-sxml'. Started to add
`close-output-port' to uses of output strings, then reverted due to
bug in one of the supported dialects. Tested newer versions of
Bigloo, Gauche, PLT MzScheme, RScheme.
Version 0.5 -- 2003-02-26
Removed uses of `call-with-values'. Re-ordered top-level
definitions, for portability. Now tests under Kawa 1.6.99,
RScheme 0.7.3.2, Scheme 48 0.57, SISC 1.7.4, STklos 0.54, and SXM
1.1.
Version 0.4 -- 2003-02-19
Apostrophe-quoted element attribute values are now handled. A bug
that incorrectly assumed left-to-right term evaluation order has
been fixed (thanks to MIT Scheme for confronting us with this).
Now also tests OK under Gauche 0.6.6 and MIT Scheme 7.7.1.
Portability improvement for implementations (e.g., RScheme
0.7.3.2.b6, Stalin 0.9) that cannot read `@' as a symbol (although
those implementations tend to present other portability issues, as
yet unresolved).
Version 0.3 -- 2003-02-05
A test suite with 66 cases has been added, and necessary changes
have been made for the suite to pass on five popular Scheme
implementations. XML processing instructions are now parsed.
Parent constraints have been added for `colgroup', `tbody', and
`thead' elements. Erroneous input, including invalid hexadecimal
entity reference syntax and extraneous double quotes in element
tags, is now parsed better. `htmlprag:token-kind' emits symbols
more consistent with SXML.
Version 0.2 -- 2003-02-02
Portability improvements.
Version 0.1 -- 2003-01-31
Dusted off old Guile-specific code from April 2001, converted to
emit SXML, mostly ported to R5RS and SRFI-6, added some XHTML
support and documentation. A little preliminary testing has been
done, and the package is already useful for some applications, but
this release should be considered a preview to invite comments.
---------- Footnotes ----------
(1) Scheme implementators who have not yet made `read'
case-sensitive by default are encouraged to do so.

View File

@ -0,0 +1,458 @@
<html lang="en">
<head>
<title>HtmlPrag: Pragmatic Parsing and Emitting of HTML using SXML and SHTML</title>
<meta http-equiv="Content-Type" content="text/html">
<meta name="description" content="HtmlPrag: Pragmatic Parsing and Emitting of HTML using SXML and SHTML">
<meta name="generator" content="makeinfo 4.7">
<link title="Top" rel="top" href="#Top">
<link href="http://www.gnu.org/software/texinfo/" rel="generator-home" title="Texinfo Homepage">
<meta http-equiv="Content-Style-Type" content="text/css">
<style type="text/css"><!--
pre.display { font-family:inherit }
pre.format { font-family:inherit }
pre.smalldisplay { font-family:inherit; font-size:smaller }
pre.smallformat { font-family:inherit; font-size:smaller }
pre.smallexample { font-size:smaller }
pre.smalllisp { font-size:smaller }
span.sc { font-variant:small-caps }
span.roman { font-family: serif; font-weight: normal; }
--></style>
</head>
<body>
<a name="Top"></a>
<h2 class="unnumbered">HtmlPrag: Pragmatic Parsing and Emitting of HTML using SXML and SHTML</h2>
<p class="noindent">Version <b>0.16</b>, 2005-12-18, <a href="http://www.neilvandyke.org/htmlprag/">http://www.neilvandyke.org/htmlprag/</a>
<p class="noindent">by
Neil W. Van Dyke
&lt;<code>neil@neilvandyke.org</code>&gt;
<blockquote>
Copyright &copy; 2003 - 2005 Neil W. Van Dyke. This program is Free
Software; you can redistribute it and/or modify it under the terms of the
GNU Lesser General Public License as published by the Free Software
Foundation; either version 2.1 of the License, or (at your option) any
later version. This program is distributed in the hope that it will be
useful, but without any warranty; without even the implied warranty of
merchantability or fitness for a particular purpose. See
&lt;<code>http://www.gnu.org/copyleft/lesser.html</code>&gt; for details. For
other license options and consulting, contact the author.
</blockquote>
<h2 class="chapter">Introduction</h2>
<p>HtmlPrag provides permissive HTML parsing and emitting capability to Scheme
programs. The parser is useful for software agent extraction of
information from Web pages, for programmatically transforming HTML files,
and for implementing interactive Web browsers. HtmlPrag emits &ldquo;SHTML,&rdquo;
which is an encoding of HTML in
<a href="http://pobox.com/~oleg/ftp/Scheme/SXML.html">SXML</a>, so that
conventional HTML may be processed with XML tools such as
<a href="http://pair.com/lisovsky/query/sxpath/">SXPath</a>. Like Oleg
Kiselyov's <a href="http://pobox.com/~oleg/ftp/Scheme/xml.html#HTML-parser">SSAX-based HTML parser</a>, HtmlPrag provides a permissive tokenizer, but also
attempts to recover structure. HtmlPrag also includes procedures for
encoding SHTML in HTML syntax.
<p>The HtmlPrag parsing behavior is permissive in that it accepts erroneous
HTML, handling several classes of HTML syntax errors gracefully, without
yielding a parse error. This is crucial for parsing arbitrary real-world
Web pages, since many pages actually contain syntax errors that would
defeat a strict or validating parser. HtmlPrag's handling of errors is
intended to generally emulate popular Web browsers' interpretation of the
structure of erroneous HTML. We euphemistically term this kind of parse
&ldquo;pragmatic.&rdquo;
<p>HtmlPrag also has some support for XHTML, although XML namespace qualifiers
are currently accepted but stripped from the resulting SHTML. Note that
valid XHTML input is of course better handled by a validating XML parser
like Kiselyov's
<a href="http://pobox.com/~oleg/ftp/Scheme/xml.html#XML-parser">SSAX</a>.
<p>HtmlPrag requires R5RS, SRFI-6, and SRFI-23.
<h2 class="chapter">SHTML and SXML</h2>
<p>SHTML is a variant of SXML, with two minor but useful extensions:
<ol type=1 start=1>
<li>The SXML keyword symbols, such as <code>*TOP*</code>, are defined to be in all
uppercase, regardless of the case-sensitivity of the reader of the hosting
Scheme implementation in any context. This avoids several pitfalls.
<li>Since not all character entity references used in HTML can be converted to
Scheme characters in all R5RS Scheme implementations, nor represented in
conventional text files or other common external text formats to which one
might wish to write SHTML, SHTML adds a special <code>&amp;</code> syntax for
non-ASCII (or non-Extended-ASCII) characters. The syntax is <code>(&amp;
</code><var>val</var><code>)</code>, where <var>val</var> is a symbol or string naming with the symbolic
name of the character, or an integer with the numeric value of the
character.
</ol>
<div class="defun">
&mdash; Variable: <b>shtml-comment-symbol</b><var><a name="index-shtml_002dcomment_002dsymbol-1"></a></var><br>
&mdash; Variable: <b>shtml-decl-symbol</b><var><a name="index-shtml_002ddecl_002dsymbol-2"></a></var><br>
&mdash; Variable: <b>shtml-empty-symbol</b><var><a name="index-shtml_002dempty_002dsymbol-3"></a></var><br>
&mdash; Variable: <b>shtml-end-symbol</b><var><a name="index-shtml_002dend_002dsymbol-4"></a></var><br>
&mdash; Variable: <b>shtml-entity-symbol</b><var><a name="index-shtml_002dentity_002dsymbol-5"></a></var><br>
&mdash; Variable: <b>shtml-pi-symbol</b><var><a name="index-shtml_002dpi_002dsymbol-6"></a></var><br>
&mdash; Variable: <b>shtml-start-symbol</b><var><a name="index-shtml_002dstart_002dsymbol-7"></a></var><br>
&mdash; Variable: <b>shtml-text-symbol</b><var><a name="index-shtml_002dtext_002dsymbol-8"></a></var><br>
&mdash; Variable: <b>shtml-top-symbol</b><var><a name="index-shtml_002dtop_002dsymbol-9"></a></var><br>
<blockquote>
<p>These variables are bound to the following case-sensitive symbols used in
SHTML, respectively: <code>*COMMENT*</code>, <code>*DECL*</code>, <code>*EMPTY*</code>,
<code>*END*</code>, <code>*ENTITY*</code>, <code>*PI*</code>, <code>*START*</code>, <code>*TEXT*</code>,
and <code>*TOP*</code>. These can be used in lieu of the literal symbols in
programs read by a case-insensitive Scheme reader.<a rel="footnote" href="#fn-1" name="fnd-1"><sup>1</sup></a>
</p></blockquote></div>
<div class="defun">
&mdash; Variable: <b>shtml-named-char-id</b><var><a name="index-shtml_002dnamed_002dchar_002did-10"></a></var><br>
&mdash; Variable: <b>shtml-numeric-char-id</b><var><a name="index-shtml_002dnumeric_002dchar_002did-11"></a></var><br>
<blockquote>
<p>These variables are bound to the SHTML entity public identifier strings
used in SHTML <code>*ENTITY*</code> named and numeric character entity
references.
</p></blockquote></div>
<div class="defun">
&mdash; Procedure: <b>make-shtml-entity</b><var> val<a name="index-make_002dshtml_002dentity-12"></a></var><br>
<blockquote>
<p>Yields an SHTML character entity reference for <var>val</var>. For example:
<pre class="lisp"> (make-shtml-entity "rArr") =&gt; (&amp; rArr)
(make-shtml-entity (string-&gt;symbol "rArr")) =&gt; (&amp; rArr)
(make-shtml-entity 151) =&gt; (&amp; 151)
</pre>
</blockquote></div>
<div class="defun">
&mdash; Procedure: <b>shtml-entity-value</b><var> obj<a name="index-shtml_002dentity_002dvalue-13"></a></var><br>
<blockquote>
<p>Yields the value for the SHTML entity <var>obj</var>, or <code>#f</code> if <var>obj</var>
is not a recognized entity. Values of named entities are symbols, and
values of numeric entities are numbers. An error may raised if <var>obj</var>
is an entity with system ID inconsistent with its public ID. For example:
<pre class="lisp"> (define (f s) (shtml-entity-value (cadr (html-&gt;shtml s))))
(f "&amp;nbsp;") =&gt; nbsp
(f "&amp;#2000;") =&gt; 2000
</pre>
</blockquote></div>
<h2 class="chapter">Tokenizing</h2>
<p>The tokenizer is used by the higher-level structural parser, but can also
be called directly for debugging purposes or unusual applications. Some of
the list structure of tokens, such as for start tag tokens, is mutated and
incorporated into the SHTML list structure emitted by the parser.
<div class="defun">
&mdash; Procedure: <b>make-html-tokenizer</b><var> in normalized?<a name="index-make_002dhtml_002dtokenizer-14"></a></var><br>
<blockquote>
<p>Constructs an HTML tokenizer procedure on input port <var>in</var>. If boolean
<var>normalized?</var> is true, then tokens will be in a format conducive to use
with a parser emitting normalized SXML. Each call to the resulting
procedure yields a successive token from the input. When the tokens have
been exhausted, the procedure returns the null list. For example:
<pre class="lisp"> (define input (open-input-string "&lt;a href=\"foo\"&gt;bar&lt;/a&gt;"))
(define next (make-html-tokenizer input #f))
(next) =&gt; (a (@ (href "foo")))
(next) =&gt; "bar"
(next) =&gt; (*END* a)
(next) =&gt; ()
(next) =&gt; ()
</pre>
</blockquote></div>
<div class="defun">
&mdash; Procedure: <b>tokenize-html</b><var> in normalized?<a name="index-tokenize_002dhtml-15"></a></var><br>
<blockquote>
<p>Returns a list of tokens from input port <var>in</var>, normalizing according to
boolean <var>normalized?</var>. This is probably most useful as a debugging
convenience. For example:
<pre class="lisp"> (tokenize-html (open-input-string "&lt;a href=\"foo\"&gt;bar&lt;/a&gt;") #f)
=&gt; ((a (@ (href "foo"))) "bar" (*END* a))
</pre>
</blockquote></div>
<div class="defun">
&mdash; Procedure: <b>shtml-token-kind</b><var> token<a name="index-shtml_002dtoken_002dkind-16"></a></var><br>
<blockquote>
<p>Returns a symbol indicating the kind of tokenizer <var>token</var>:
<code>*COMMENT*</code>, <code>*DECL*</code>, <code>*EMPTY*</code>, <code>*END*</code>,
<code>*ENTITY*</code>, <code>*PI*</code>, <code>*START*</code>, <code>*TEXT*</code>.
This is used by higher-level parsing code. For example:
<pre class="lisp"> (map shtml-token-kind
(tokenize-html (open-input-string "&lt;a&lt;b&gt;&gt;&lt;c&lt;/&lt;/c") #f))
=&gt; (*START* *START* *TEXT* *START* *END* *END*)
</pre>
</blockquote></div>
<h2 class="chapter">Parsing</h2>
<p>Most applications will call a parser procedure such as
<code>html-&gt;shtml</code> rather than calling the tokenizer directly.
<div class="defun">
&mdash; Procedure: <b>parse-html/tokenizer</b><var> tokenizer normalized?<a name="index-parse_002dhtml_002ftokenizer-17"></a></var><br>
<blockquote>
<p>Emits a parse tree like <code>html-&gt;shtml</code> and related procedures, except
using <var>tokenizer</var> as a source of tokens, rather than tokenizing from an
input port. This procedure is used internally, and generally should not be
called directly.
</p></blockquote></div>
<div class="defun">
&mdash; Procedure: <b>html-&gt;sxml-0nf</b><var> input<a name="index-html_002d_003esxml_002d0nf-18"></a></var><br>
&mdash; Procedure: <b>html-&gt;sxml-1nf</b><var> input<a name="index-html_002d_003esxml_002d1nf-19"></a></var><br>
&mdash; Procedure: <b>html-&gt;sxml-2nf</b><var> input<a name="index-html_002d_003esxml_002d2nf-20"></a></var><br>
&mdash; Procedure: <b>html-&gt;sxml</b><var> input<a name="index-html_002d_003esxml-21"></a></var><br>
&mdash; Procedure: <b>html-&gt;shtml</b><var> input<a name="index-html_002d_003eshtml-22"></a></var><br>
<blockquote>
<p>Permissively parse HTML from <var>input</var>, which is either an input port or
a string, and emit an SHTML equivalent or approximation. To borrow and
slightly modify an example from Kiselyov's discussion of his HTML parser:
<pre class="lisp"> (html-&gt;shtml
"&lt;html&gt;&lt;head&gt;&lt;title&gt;&lt;/title&gt;&lt;title&gt;whatever&lt;/title&gt;&lt;/head&gt;&lt;body&gt;
&lt;a href=\"url\"&gt;link&lt;/a&gt;&lt;p align=center&gt;&lt;ul compact style=\"aa\"&gt;
&lt;p&gt;BLah&lt;!-- comment &lt;comment&gt; --&gt; &lt;i&gt; italic &lt;b&gt; bold &lt;tt&gt; ened&lt;/i&gt;
still &amp;lt; bold &lt;/b&gt;&lt;/body&gt;&lt;P&gt; But not done yet...")
=&gt;
(*TOP* (html (head (title) (title "whatever"))
(body "\n"
(a (@ (href "url")) "link")
(p (@ (align "center"))
(ul (@ (compact) (style "aa")) "\n"))
(p "BLah"
(*COMMENT* " comment &lt;comment&gt; ")
" "
(i " italic " (b " bold " (tt " ened")))
"\n"
"still &lt; bold "))
(p " But not done yet...")))
</pre>
<p>Note that in the emitted SHTML the text token <code>"still &lt; bold"</code> is
<em>not</em> inside the <code>b</code> element, which represents an unfortunate
failure to emulate all the quirks-handling behavior of some popular Web
browsers.
<p>The procedures <code>html-&gt;sxml-</code><var>n</var><code>nf</code> for <var>n</var> 0 through 2
correspond to 0th through 2nd normal forms of SXML as specified in SXML,
and indicate the minimal requirements of the emitted SXML.
<p><code>html-&gt;sxml</code> and <code>html-&gt;shtml</code> are currently aliases for
<code>html-&gt;sxml-0nf</code>, and can be used in scripts and interactively, when
terseness is important and any normal form of SXML would suffice.
</p></blockquote></div>
<h2 class="chapter">Emitting HTML</h2>
<p>Two procedures encoding the SHTML representation as conventional HTML,
<code>write-shtml-as-html</code> and <code>shtml-&gt;html</code>. These are perhaps most
useful for emitting the result of parsed and transformed input HTML. They
can also be used for emitting HTML from generated or handwritten SHTML.
<div class="defun">
&mdash; Procedure: <b>write-shtml-as-html</b><var> shtml </var>[<var>out </var>[<var>foreign-filter</var>]]<var><a name="index-write_002dshtml_002das_002dhtml-23"></a></var><br>
<blockquote>
<p>Writes a conventional HTML transliteration of the SHTML <var>shtml</var> to
output port <var>out</var>. If <var>out</var> is not specified, the default is the
current output port. HTML elements of types that are always empty are
written using HTML4-compatible XHTML tag syntax.
<p>If <var>foreign-filter</var> is specified, it is a procedure of two argument
that is applied to any non-SHTML (&ldquo;foreign&rdquo;) object encountered in
<var>shtml</var>, and should yield SHTML. The first argument is the object, and
the second argument is a boolean for whether or not the object is part of
an attribute value.
<p>No inter-tag whitespace or line breaks not explicit in <var>shtml</var> is
emitted. The <var>shtml</var> should normally include a newline at the end of
the document. For example:
<pre class="lisp"> (write-shtml-as-html
'((html (head (title "My Title"))
(body (@ (bgcolor "white"))
(h1 "My Heading")
(p "This is a paragraph.")
(p "This is another paragraph.")))))
-| &lt;html&gt;&lt;head&gt;&lt;title&gt;My Title&lt;/title&gt;&lt;/head&gt;&lt;body bgcolor="whi
-| te"&gt;&lt;h1&gt;My Heading&lt;/h1&gt;&lt;p&gt;This is a paragraph.&lt;/p&gt;&lt;p&gt;This is
-| another paragraph.&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;
</pre>
</blockquote></div>
<div class="defun">
&mdash; Procedure: <b>shtml-&gt;html</b><var> shtml<a name="index-shtml_002d_003ehtml-24"></a></var><br>
<blockquote>
<p>Yields an HTML encoding of SHTML <var>shtml</var> as a string. For example:
<pre class="lisp"> (shtml-&gt;html
(html-&gt;shtml
"&lt;P&gt;This is&lt;br&lt;b&lt;I&gt;bold &lt;/foo&gt;italic&lt;/ b &gt; text.&lt;/p&gt;"))
=&gt; "&lt;p&gt;This is&lt;br /&gt;&lt;b&gt;&lt;i&gt;bold italic&lt;/i&gt;&lt;/b&gt; text.&lt;/p&gt;"
</pre>
<p>Note that, since this procedure constructs a string, it should normally
only be used when the HTML is relatively small. When encoding HTML
documents of conventional size and larger, <code>write-shtml-as-html</code> is
much more efficient.
</p></blockquote></div>
<h2 class="chapter">Tests</h2>
<p>The HtmlPrag test suite can be enabled by editing the source code file and
loading <a href="http://www.neilvandyke.org/testeez/">Testeez</a>.
<h2 class="unnumbered">History</h2>
<dl>
<dt>Version 0.16 &mdash; 2005-12-18<dd>Documentation fix.
<br><dt>Version 0.15 &mdash; 2005-12-18<dd>In the HTML parent element constraints that are used for structure
recovery, <code>div</code> is now always permitted as a parent, as a stopgap
measure until substantial time can be spent reworking the algorithm to
better support <code>div</code> (bug reported by Corey Sweeney and Jepri). Also
no longer convert to Scheme character any HTML numeric character reference
with value above 126, to avoid Unicode problem with PLT 299/300 (bug
reported by Corey Sweeney).
<br><dt>Version 0.14 &mdash; 2005-06-16<dd>XML CDATA sections are now tokenized. Thanks to Alejandro Forero Cuervo
for suggesting this feature. The deprecated procedures <code>sxml-&gt;html</code>
and <code>write-sxml-html</code> have been removed. Minor documentation changes.
<br><dt>Version 0.13 &mdash; 2005-02-23<dd>HtmlPrag now requires <code>syntax-rules</code>, and a reader that can read
<code>@</code> as a symbol. SHTML now has a special <code>&amp;</code> element for
character entities, and it is emitted by the parser rather than the old
<code>*ENTITY*</code> kludge. <code>shtml-entity-value</code> supports both the new
and the old character entity representations. <code>shtml-entity-value</code>
now yields <code>#f</code> on invalid SHTML entity, rather than raising an error.
<code>write-shtml-as-html</code> now has a third argument, <code>foreign-filter</code>.
<code>write-shtml-as-html</code> now emits SHTML <code>&amp;</code> entity references.
Changed <code>shtml-named-char-id</code> and <code>shtml-numeric-char-id</code>, as
previously warned. Testeez is now used for the test suite. Test procedure
is now the internal <code>%htmlprag:test</code>. Documentation changes.
Notably, much documentation about using HtmlPrag under various particular
Scheme implementations has been removed.
<br><dt>Version 0.12 &mdash; 2004-07-12<dd>Forward-slash in an unquoted attribute value is now considered a value
constituent rather than an unconsumed terminator of the value (thanks to
Maurice Davis for reporting and a suggested fix). <code>xml:</code> is now
preserved as a namespace qualifier (thanks to Peter Barabas for
reporting). Output port term of <code>write-shtml-as-html</code> is now
optional. Began documenting loading for particular implementation-specific
packagings.
<br><dt>Version 0.11 &mdash; 2004-05-13<dd>To reduce likely namespace collisions with SXML tools, and in anticipation
of a forthcoming set of new features, introduced the concept of &ldquo;SHTML,&rdquo;
which will be elaborated upon in a future version of HtmlPrag. Renamed
<code>sxml-</code><var>x</var><code>-symbol</code> to <code>shtml-</code><var>x</var><code>-symbol</code>,
<code>sxml-html-</code><var>x</var> to <code>shtml-</code><var>x</var>, and
<code>sxml-token-kind</code> to <code>shtml-token-kind</code>. <code>html-&gt;shtml</code>,
<code>shtml-&gt;html</code>, and <code>write-shtml-as-html</code> have been added as
names. Considered deprecated but still defined (see the &ldquo;Deprecated&rdquo;
section of this documentation) are <code>sxml-&gt;html</code> and
<code>write-sxml-html</code>. The growing pains should now be all but over.
Internally, <code>htmlprag-internal:error</code> introduced for Bigloo
portability. SISC returned to the test list; thanks to Scott G. Miller
for his help. Fixed a new character <code>eq?</code> bug, thanks to SISC.
<br><dt>Version 0.10 &mdash; 2004-05-11<dd>All public identifiers have been renamed to drop the &ldquo;<code>htmlprag:</code>&rdquo;
prefix. The portability identifiers have been renamed to begin with an
<code>htmlprag-internal:</code> prefix, are now considered strictly
internal-use-only, and have otherwise been changed. <code>parse-html</code> and
<code>always-empty-html-elements</code> are no longer public.
<code>test-htmlprag</code> now tests <code>html-&gt;sxml</code> rather than
<code>parse-html</code>. SISC temporarily removed from the test list, until an
open source Java that works correctly is found.
<br><dt>Version 0.9 &mdash; 2004-05-07<dd>HTML encoding procedures added. Added
<code>htmlprag:sxml-html-entity-value</code>. Upper-case <code>X</code> in hexadecimal
character entities is now parsed, in addition to lower-case <code>x</code>.
Added <code>htmlprag:always-empty-html-elements</code>. Added additional
portability bindings. Added more test cases.
<br><dt>Version 0.8 &mdash; 2004-04-27<dd>Entity references (symbolic, decimal numeric, hexadecimal numeric) are now
parsed into <code>*ENTITY*</code> SXML. SXML symbols like <code>*TOP*</code> are now
always upper-case, regardless of the Scheme implementation. Identifiers
such as <code>htmlprag:sxml-top-symbol</code> are bound to the upper-case
symbols. Procedures <code>htmlprag:html-&gt;sxml-0nf</code>,
<code>htmlprag:html-&gt;sxml-1nf</code>, and <code>htmlprag:html-&gt;sxml-2nf</code> have
been added. <code>htmlprag:html-&gt;sxml</code> now an alias for
<code>htmlprag:html-&gt;sxml-0nf</code>. <code>htmlprag:parse</code> has been refashioned
as <code>htmlprag:parse-html</code> and should no longer be directly. A number
of identifiers have been renamed to be more appropriate when the
<code>htmlprag:</code> prefix is dropped in some implementation-specific
packagings of HtmlPrag: <code>htmlprag:make-tokenizer</code> to
<code>htmlprag:make-html-tokenizer</code>, <code>htmlprag:parse/tokenizer</code> to
<code>htmlprag:parse-html/tokenizer</code>, <code>htmlprag:html-&gt;token-list</code> to
<code>htmlprag:tokenize-html</code>, <code>htmlprag:token-kind</code> to
<code>htmlprag:sxml-token-kind</code>, and <code>htmlprag:test</code> to
<code>htmlprag:test-htmlprag</code>. Verbatim elements with empty-element tag
syntax are handled correctly. New versions of Bigloo and RScheme tested.
<br><dt>Version 0.7 &mdash; 2004-03-10<dd>Verbatim pair elements like <code>script</code> and <code>xmp</code> are now parsed
correctly. Two Scheme implementations have temporarily been dropped from
regression testing: Kawa, due to a Java bytecode verifier error likely due
to a Java installation problem on the test machine; and SXM 1.1, due to
hitting a limit on the number of literals late in the test suite code.
Tested newer versions of Bigloo, Chicken, Gauche, Guile, MIT Scheme, PLT
MzScheme, RScheme, SISC, and STklos. RScheme no longer requires the
&ldquo;<code>(define get-output-string close-output-port)</code>&rdquo; workaround.
<br><dt>Version 0.6 &mdash; 2003-07-03<dd>Fixed uses of <code>eq?</code> in character comparisons, thanks to Scott G.
Miller. Added <code>htmlprag:html-&gt;normalized-sxml</code> and
<code>htmlprag:html-&gt;nonnormalized-sxml</code>. Started to add
<code>close-output-port</code> to uses of output strings, then reverted due to
bug in one of the supported dialects. Tested newer versions of Bigloo,
Gauche, PLT MzScheme, RScheme.
<br><dt>Version 0.5 &mdash; 2003-02-26<dd>Removed uses of <code>call-with-values</code>. Re-ordered top-level definitions,
for portability. Now tests under Kawa 1.6.99, RScheme 0.7.3.2, Scheme 48
0.57, SISC 1.7.4, STklos 0.54, and SXM 1.1.
<br><dt>Version 0.4 &mdash; 2003-02-19<dd>Apostrophe-quoted element attribute values are now handled. A bug that
incorrectly assumed left-to-right term evaluation order has been fixed
(thanks to MIT Scheme for confronting us with this). Now also tests OK
under Gauche 0.6.6 and MIT Scheme 7.7.1. Portability improvement for
implementations (e.g., RScheme 0.7.3.2.b6, Stalin 0.9) that cannot read
<code>@</code> as a symbol (although those implementations tend to present other
portability issues, as yet unresolved).
<br><dt>Version 0.3 &mdash; 2003-02-05<dd>A test suite with 66 cases has been added, and necessary changes have been
made for the suite to pass on five popular Scheme implementations. XML
processing instructions are now parsed. Parent constraints have been added
for <code>colgroup</code>, <code>tbody</code>, and <code>thead</code> elements. Erroneous
input, including invalid hexadecimal entity reference syntax and extraneous
double quotes in element tags, is now parsed better.
<code>htmlprag:token-kind</code> emits symbols more consistent with SXML.
<br><dt>Version 0.2 &mdash; 2003-02-02<dd>Portability improvements.
<br><dt>Version 0.1 &mdash; 2003-01-31<dd>Dusted off old Guile-specific code from April 2001, converted to emit SXML,
mostly ported to R5RS and SRFI-6, added some XHTML support and
documentation. A little preliminary testing has been done, and the package
is already useful for some applications, but this release should be
considered a preview to invite comments.
</dl>
<div class="footnote">
<hr>
<a name="texinfo-footnotes-in-document"></a><h4>Footnotes</h4><p class="footnote"><small>[<a name="fn-1" href="#fnd-1">1</a>]</small> Scheme
implementators who have not yet made <code>read</code> case-sensitive by default
are encouraged to do so.</p>
<p><hr></div>
</body></html>

View File

@ -0,0 +1,27 @@
;; THIS FILE IS GENERATED
(
("shtml-comment-symbol" "shtml-comment-symbol" "index.html" "index-shtml_002dcomment_002dsymbol-1" "")
("shtml-decl-symbol" "shtml-decl-symbol" "index.html" "index-shtml_002ddecl_002dsymbol-2" "")
("shtml-empty-symbol" "shtml-empty-symbol" "index.html" "index-shtml_002dempty_002dsymbol-3" "")
("shtml-end-symbol" "shtml-end-symbol" "index.html" "index-shtml_002dend_002dsymbol-4" "")
("shtml-entity-symbol" "shtml-entity-symbol" "index.html" "index-shtml_002dentity_002dsymbol-5" "")
("shtml-pi-symbol" "shtml-pi-symbol" "index.html" "index-shtml_002dpi_002dsymbol-6" "")
("shtml-start-symbol" "shtml-start-symbol" "index.html" "index-shtml_002dstart_002dsymbol-7" "")
("shtml-text-symbol" "shtml-text-symbol" "index.html" "index-shtml_002dtext_002dsymbol-8" "")
("shtml-top-symbol" "shtml-top-symbol" "index.html" "index-shtml_002dtop_002dsymbol-9" "")
("shtml-named-char-id" "shtml-named-char-id" "index.html" "index-shtml_002dnamed_002dchar_002did-10" "")
("shtml-numeric-char-id" "shtml-numeric-char-id" "index.html" "index-shtml_002dnumeric_002dchar_002did-11" "")
("make-shtml-entity" "(make-shtml-entity val)" "index.html" "index-make_002dshtml_002dentity-12" "")
("shtml-entity-value" "(shtml-entity-value obj)" "index.html" "index-shtml_002dentity_002dvalue-13" "")
("make-html-tokenizer" "(make-html-tokenizer in normalized?)" "index.html" "index-make_002dhtml_002dtokenizer-14" "")
("tokenize-html" "(tokenize-html in normalized?)" "index.html" "index-tokenize_002dhtml-15" "")
("shtml-token-kind" "(shtml-token-kind token)" "index.html" "index-shtml_002dtoken_002dkind-16" "")
("parse-html/tokenizer" "(parse-html/tokenizer tokenizer normalized?)" "index.html" "index-parse_002dhtml_002ftokenizer-17" "")
("html->sxml-0nf" "(html->sxml-0nf input)" "index.html" "index-html_002d_003esxml_002d0nf-18" "")
("html->sxml-1nf" "(html->sxml-1nf input)" "index.html" "index-html_002d_003esxml_002d1nf-19" "")
("html->sxml-2nf" "(html->sxml-2nf input)" "index.html" "index-html_002d_003esxml_002d2nf-20" "")
("html->sxml" "(html->sxml input)" "index.html" "index-html_002d_003esxml-21" "")
("html->shtml" "(html->shtml input)" "index.html" "index-html_002d_003eshtml-22" "")
("write-shtml-as-html" "(write-shtml-as-html shtml [out [foreign-filter]])" "index.html" "index-write_002dshtml_002das_002dhtml-23" "")
("shtml->html" "(shtml->html shtml)" "index.html" "index-shtml_002d_003ehtml-24" "")
)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
;; THIS FILE IS GENERATED
(module info (lib "infotab.ss" "setup")
(define name "HtmlPrag")
(define blurb
'("HtmlPrag: Pragmatic Parsing and Emitting of HTML using SXML and SHTML"))
(define categories '(net xml))
(define doc.txt "doc.txt")
(define html-docs '("doc"))
(define homepage "http://www.neilvandyke.org/htmlprag/")
(define primary-file "htmlprag.ss")
(define version "0.16")
(define compile-omit-files '("htmlprag.scm")))

View File

@ -0,0 +1,265 @@
; Module header is generated automatically
#cs(module SXML-tree-trans mzscheme
(require "myenv.ss")
; XML/HTML processing in Scheme
; SXML expression tree transformers
;
; IMPORT
; A prelude appropriate for your Scheme system
; (myenv-bigloo.scm, myenv-mit.scm, etc.)
;
; EXPORT
; (provide SRV:send-reply
; post-order pre-post-order replace-range)
;
; See vSXML-tree-trans.scm for the validation code, which also
; serves as usage examples.
;
; $Id: SXML-tree-trans.scm,v 1.7 2004/11/09 20:22:26 sperber Exp $
; procedure: SRV:send-reply FRAGMENT ...
;
; Output the 'fragments'
; The fragments are a list of strings, characters,
; numbers, thunks, #f, #t -- and other fragments.
; The function traverses the tree depth-first, writes out
; strings and characters, executes thunks, and ignores
; #f and '().
; The function returns #t if anything was written at all;
; otherwise the result is #f
; If #t occurs among the fragments, it is not written out
; but causes the result of SRV:send-reply to be #t
(define (SRV:send-reply . fragments)
(let loop ((fragments fragments) (result #f))
(cond
((null? fragments) result)
((not (car fragments)) (loop (cdr fragments) result))
((null? (car fragments)) (loop (cdr fragments) result))
((eq? #t (car fragments)) (loop (cdr fragments) #t))
((pair? (car fragments))
(loop (cdr fragments) (loop (car fragments) result)))
((procedure? (car fragments))
((car fragments))
(loop (cdr fragments) #t))
(else
(display (car fragments))
(loop (cdr fragments) #t)))))
; procedure: pre-post-order TREE BINDINGS
;
; Traversal of an SXML tree or a grove:
; a <Node> or a <Nodelist>
;
; A <Node> and a <Nodelist> are mutually-recursive datatypes that
; underlie the SXML tree:
; <Node> ::= (name . <Nodelist>) | "text string"
; An (ordered) set of nodes is just a list of the constituent nodes:
; <Nodelist> ::= (<Node> ...)
; Nodelists, and Nodes other than text strings are both lists. A
; <Nodelist> however is either an empty list, or a list whose head is
; not a symbol (an atom in general). A symbol at the head of a node is
; either an XML name (in which case it's a tag of an XML element), or
; an administrative name such as '@'.
; See SXPath.scm and SSAX.scm for more information on SXML.
;
;
; Pre-Post-order traversal of a tree and creation of a new tree:
; pre-post-order:: <tree> x <bindings> -> <new-tree>
; where
; <bindings> ::= (<binding> ...)
; <binding> ::= (<trigger-symbol> *preorder* . <handler>) |
; (<trigger-symbol> *macro* . <handler>) |
; (<trigger-symbol> <new-bindings> . <handler>) |
; (<trigger-symbol> . <handler>)
; <trigger-symbol> ::= XMLname | *text* | *default*
; <handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
;
; The pre-post-order function visits the nodes and nodelists
; pre-post-order (depth-first). For each <Node> of the form (name
; <Node> ...) it looks up an association with the given 'name' among
; its <bindings>. If failed, pre-post-order tries to locate a
; *default* binding. It's an error if the latter attempt fails as
; well. Having found a binding, the pre-post-order function first
; checks to see if the binding is of the form
; (<trigger-symbol> *preorder* . <handler>)
; If it is, the handler is 'applied' to the current node. Otherwise,
; the pre-post-order function first calls itself recursively for each
; child of the current node, with <new-bindings> prepended to the
; <bindings> in effect. The result of these calls is passed to the
; <handler> (along with the head of the current <Node>). To be more
; precise, the handler is _applied_ to the head of the current node
; and its processed children. The result of the handler, which should
; also be a <tree>, replaces the current <Node>. If the current <Node>
; is a text string or other atom, a special binding with a symbol
; *text* is looked up.
;
; A binding can also be of a form
; (<trigger-symbol> *macro* . <handler>)
; This is equivalent to *preorder* described above. However, the result
; is re-processed again, with the current stylesheet.
;
(define (pre-post-order tree bindings)
(let* ((default-binding (assq '*default* bindings))
(text-binding (or (assq '*text* bindings) default-binding))
(text-handler ; Cache default and text bindings
(and text-binding
(if (procedure? (cdr text-binding))
(cdr text-binding) (cddr text-binding)))))
(let loop ((tree tree))
(cond
((null? tree) '())
((not (pair? tree))
(let ((trigger '*text*))
(if text-handler (text-handler trigger tree)
(error "Unknown binding for " trigger " and no default"))))
((not (symbol? (car tree))) (map loop tree)) ; tree is a nodelist
(else ; tree is an SXML node
(let* ((trigger (car tree))
(binding (or (assq trigger bindings) default-binding)))
(cond
((not binding)
(error "Unknown binding for " trigger " and no default"))
((not (pair? (cdr binding))) ; must be a procedure: handler
(apply (cdr binding) trigger (map loop (cdr tree))))
((eq? '*preorder* (cadr binding))
(apply (cddr binding) tree))
((eq? '*macro* (cadr binding))
(loop (apply (cddr binding) tree)))
(else ; (cadr binding) is a local binding
(apply (cddr binding) trigger
(pre-post-order (cdr tree) (append (cadr binding) bindings)))
))))))))
; procedure: post-order TREE BINDINGS
; post-order is a strict subset of pre-post-order without *preorder*
; (let alone *macro*) traversals.
; Now pre-post-order is actually faster than the old post-order.
; The function post-order is deprecated and is aliased below for
; backward compatibility.
(define post-order pre-post-order)
;------------------------------------------------------------------------
; Extended tree fold
; tree = atom | (node-name tree ...)
;
; foldts fdown fup fhere seed (Leaf str) = fhere seed str
; foldts fdown fup fhere seed (Nd kids) =
; fup seed $ foldl (foldts fdown fup fhere) (fdown seed) kids
; procedure fhere: seed -> atom -> seed
; procedure fdown: seed -> node -> seed
; procedure fup: parent-seed -> last-kid-seed -> node -> seed
; foldts returns the final seed
(define (foldts fdown fup fhere seed tree)
(cond
((null? tree) seed)
((not (pair? tree)) ; An atom
(fhere seed tree))
(else
(let loop ((kid-seed (fdown seed tree)) (kids (cdr tree)))
(if (null? kids)
(fup seed kid-seed tree)
(loop (foldts fdown fup fhere kid-seed (car kids))
(cdr kids)))))))
; procedure: replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
; Traverse a forest depth-first and cut/replace ranges of nodes.
;
; The nodes that define a range don't have to have the same immediate
; parent, don't have to be on the same level, and the end node of a
; range doesn't even have to exist. A replace-range procedure removes
; nodes from the beginning node of the range up to (but not including)
; the end node of the range. In addition, the beginning node of the
; range can be replaced by a node or a list of nodes. The range of
; nodes is cut while depth-first traversing the forest. If all
; branches of the node are cut a node is cut as well. The procedure
; can cut several non-overlapping ranges from a forest.
; replace-range:: BEG-PRED x END-PRED x FOREST -> FOREST
; where
; type FOREST = (NODE ...)
; type NODE = Atom | (Name . FOREST) | FOREST
;
; The range of nodes is specified by two predicates, beg-pred and end-pred.
; beg-pred:: NODE -> #f | FOREST
; end-pred:: NODE -> #f | FOREST
; The beg-pred predicate decides on the beginning of the range. The node
; for which the predicate yields non-#f marks the beginning of the range
; The non-#f value of the predicate replaces the node. The value can be a
; list of nodes. The replace-range procedure then traverses the tree and skips
; all the nodes, until the end-pred yields non-#f. The value of the end-pred
; replaces the end-range node. The new end node and its brothers will be
; re-scanned.
; The predicates are evaluated pre-order. We do not descend into a node that
; is marked as the beginning of the range.
(define (replace-range beg-pred end-pred forest)
; loop forest keep? new-forest
; forest is the forest to traverse
; new-forest accumulates the nodes we will keep, in the reverse
; order
; If keep? is #t, keep the curr node if atomic. If the node is not atomic,
; traverse its children and keep those that are not in the skip range.
; If keep? is #f, skip the current node if atomic. Otherwise,
; traverse its children. If all children are skipped, skip the node
; as well.
(define (loop forest keep? new-forest)
(if (null? forest) (values (reverse new-forest) keep?)
(let ((node (car forest)))
(if keep?
(cond ; accumulate mode
((beg-pred node) => ; see if the node starts the skip range
(lambda (repl-branches) ; if so, skip/replace the node
(loop (cdr forest) #f
(append (reverse repl-branches) new-forest))))
((not (pair? node)) ; it's an atom, keep it
(loop (cdr forest) keep? (cons node new-forest)))
(else
(let ((node?
(symbol? (car node)))) ; or is it a nodelist?
(call-with-values
; traverse its children
(lambda () (loop (if node? (cdr node) node) #t '()))
(lambda (new-kids keep?)
(loop (cdr forest) keep?
(cons
(if node? (cons (car node) new-kids) new-kids)
new-forest)))))))
; skip mode
(cond
((end-pred node) => ; end the skip range
(lambda (repl-branches) ; repl-branches will be re-scanned
(loop (append repl-branches (cdr forest)) #t
new-forest)))
((not (pair? node)) ; it's an atom, skip it
(loop (cdr forest) keep? new-forest))
(else
(let ((node?
(symbol? (car node)))) ; or is it a nodelist?
; traverse its children
(call-with-values
(lambda () (loop (if node? (cdr node) node) #f '()))
(lambda (new-kids keep?)
(loop
(cdr forest) keep?
(if (or keep? (pair? new-kids))
(cons
(if node? (cons (car node) new-kids) new-kids)
new-forest)
new-forest) ; if all kids are skipped
)))))))))) ; skip the node too
(call-with-values
(lambda () (loop forest #t '()))
(lambda (new-forest keep?)
new-forest)))
(provide (all-defined)))

View File

@ -0,0 +1,314 @@
; Module header is generated automatically
#cs(module access-remote mzscheme
(require "common.ss")
(require "myenv.ss")
(require "http.ss")
(require "srfi-12.ss")
(require "util.ss")
(require (lib "string.ss" "srfi/13"))
;; Uniform access to local and remote resources
;; Resolution for relative URIs in accordance with RFC 2396
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to:
; lizorkin@hotbox.ru Dmitry Lizorkin
;=========================================================================
; Accessing (remote) resources
; Whether the resource exists (generalization of FILE-EXISTS? predicate)
; REQ-URI - a string representing a URI of the resource
; This predicate doesn't have any side effects
(define (resource-exists? req-uri)
(cond
((string-prefix? "http://" req-uri) ; HTTP scheme is used in REQ-URI
(with-exception-handler
(lambda (x) #f) ; an uncaught exception occured during http transaction
(lambda ()
(http-transaction
"HEAD"
req-uri
(list (cons 'logger (lambda (port message . other-messages) #t)))
(lambda (resp-code resp-headers resp-port)
(close-input-port resp-port)
(and (>= resp-code 200) (< resp-code 400)))))))
(else ; a local file
(file-exists? req-uri))))
; Opens an input port for a resource
; REQ-URI - a string representing a URI of the resource
; An input port is returned if there were no errors. In case of an error,
; the function returns #f and displays an error message as a side effect.
; Doesn't raise any exceptions.
(define (open-input-resource req-uri)
(with-exception-handler
(lambda (x)
(cerr nl req-uri ": " ((condition-property-accessor 'exn 'message) x) nl)
#f)
(lambda ()
(cond
((string-prefix? "http://" req-uri) ; HTTP scheme is used in REQ-URI
(http-transaction
"GET"
req-uri
(list (cons 'logger (lambda (port message . other-messages) #t)))
(lambda (resp-code resp-headers resp-port)
(cond
((and (>= resp-code 200) (< resp-code 400)) resp-port)
(else
(close-input-port resp-port)
(cerr nl req-uri ": resource not available: " resp-code nl)
#f)))))
(else ; a local file
(open-input-file req-uri))))))
;=========================================================================
; Determining resource type
; Returns a file extenstion
; filename - a string
; File extension is returned in the form of a string
(define (ar:file-extension filename)
(let loop ((src (reverse (string->list filename)))
(res '()))
(cond
((null? src) ; no dot encountered => no extension
"")
((char=? (car src) #\.)
(list->string res))
(else
(loop (cdr src) (cons (car src) res))))))
; Determines the type of a resource
; REQ-URI - a string representing a URI of the resource
; For a local resource, its type is determined by its file extension
; One of the following is returned:
; #f - if the requested resource doesn't exist
; 'xml - for a resource that is an XML document
; 'html - for a resource that is an HTML document
; 'unknown - for any other resource type
(define (ar:resource-type req-uri)
(cond
((string-prefix? "http://" req-uri) ; HTTP scheme is used in REQ-URI
(with-exception-handler
(lambda (x) #f) ; an uncaught exception occured during http transaction
(lambda ()
(http-transaction
"HEAD"
req-uri
(list (cons 'logger (lambda (port message . other-messages) #t)))
(lambda (resp-code resp-headers resp-port)
(close-input-port resp-port)
(if
(or (< resp-code 200) (>= resp-code 400))
#f ; Resource doesn't exist
(let ((content-type (assq 'CONTENT-TYPE resp-headers)))
(cond
((not content-type) ; no content type specified
'unknown)
((string-prefix? "text/xml" (cdr content-type))
'xml)
((string-prefix? "text/html" (cdr content-type))
'html)
((string-prefix? "text/plain" (cdr content-type))
'plain)
(else
'unknown)))))))))
(else ; a local file
(cond
((not (file-exists? req-uri)) ; file doesn't exist
#f)
((assoc (ar:file-extension req-uri)
'(("xml" . xml) ("html" . html) ("htm" . html)))
=> cdr)
(else 'unknown)))))
;=========================================================================
; Working on absolute/relative URIs
; This section is based on RFC 2396
;-------------------------------------------------
; The URI and its components
; URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
; genericURI = <scheme>://<authority><path>?<query>
; For a sertain subset of URI schemes, absoluteURI = genericURI
; We will suppose this condition valid in this implementation
; Returns: (values scheme authority path query fragment)
; If some component is not presented in the given URI, #f is returned for this
; component. Note that the path component is always presented in the URI
(define (ar:uri->components uri)
(call-with-values
(lambda () (cond
((string-rindex uri #\#)
=> (lambda (pos)
(values
(substring uri (+ pos 1) (string-length uri))
(substring uri 0 pos))))
(else
(values #f uri))))
(lambda (fragment uri)
(call-with-values
(lambda () (cond
((string-rindex uri #\?)
=> (lambda (pos)
(values
(substring uri (+ pos 1) (string-length uri))
(substring uri 0 pos))))
(else
(values #f uri))))
(lambda (query uri)
(call-with-values
(lambda ()
(cond
((substring? "://" uri)
=> (lambda (pos)
(values
(substring uri 0 (+ pos 3))
(substring uri (+ pos 3) (string-length uri)))))
((string-index uri #\:)
=> (lambda (pos)
(values
(substring uri 0 (+ pos 1))
(substring uri (+ pos 1) (string-length uri)))))
(else
(values #f uri))))
(lambda (scheme uri)
(call-with-values
(lambda ()
(cond
((not scheme)
(values #f uri))
((string-index uri #\/)
=> (lambda (pos)
(values
(substring uri 0 pos)
(substring uri pos (string-length uri)))))
(else
(values #f uri))))
(lambda (authority path)
(values scheme authority path query fragment))))))))))
; Combines components into the URI
(define (ar:components->uri scheme authority path query fragment)
(apply string-append
(append
(if scheme (list scheme) '())
(if authority (list authority) '())
(list path)
(if query (list "?" query) '())
(if fragment (list "#" fragment) '()))))
;-------------------------------------------------
; Path and its path_segments
; abs_path = "/" path_segments
; path_segments = segment *( "/" segment )
; Splits the given path into segments
; Returns: (values root dir-lst filename)
; dir-lst ::= (listof directory-name)
; root - either an empty string, or "/" or drive-name (for Windows filesystems)
(define (ar:path->segments path)
(call-with-values
(lambda ()
(let ((lng (string-length path)))
(cond
((and (> lng 0) (char=? (string-ref path 0) #\/))
(values "/" (substring path 1 lng)))
((and (> lng 1)
(char=? (string-ref path 1) #\:)
(member (string-ref path 2) (list #\/ #\\)))
(values (substring path 0 3)
(substring path 3 lng)))
(else (values "" path)))))
(lambda (root rel-path)
(let ((lst (string-split rel-path (list #\/ #\\))))
(if (null? lst) ; the relative path is empty
(values root '() "")
(let ((lst (reverse lst)))
(values root (reverse (cdr lst)) (car lst))))))))
; Combines path_segments into the path
; backslash? - a boolean value: whether the backslach shall be used as a
; delimiter between path_segments. If #f, straight slash is used
(define (ar:segments->path root dir-lst filename backslash?)
(let ((delim (if backslash? "\\" "/")))
(apply string-append
(append
(list root)
(apply append
(map
(lambda (directory-name)
(list directory-name delim))
dir-lst))
(list filename)))))
; Removes redundant segment combinations from the dir-lst
; '("smth" "..") --> removed
; '(".") --> removed
; The algorithm is formally specified in RFC 2396, 5.2, step 6)
(define (ar:normalize-dir-lst dir-lst)
(cond
((null? dir-lst) dir-lst)
((string=? (car dir-lst) ".")
(ar:normalize-dir-lst (cdr dir-lst)))
((string=? (car dir-lst) "..")
(cons (car dir-lst) (ar:normalize-dir-lst (cdr dir-lst))))
(else
(let ((processed (ar:normalize-dir-lst (cdr dir-lst))))
(cond
((null? processed)
(list (car dir-lst)))
((string=? (car processed) "..")
(cdr processed))
(else
(cons (car dir-lst) processed)))))))
;-------------------------------------------------
; Resolves a relative URI with respect to the base URI
; base-uri - base URI for the requiested one
; Returns the resolved URI
(define (ar:resolve-uri-according-base base-uri req-uri)
(call-with-values
(lambda () (ar:uri->components req-uri))
(lambda (req-scheme req-authority req-path req-query req-fragment)
(if
(or req-scheme req-authority) ; it is the absolute URI
req-uri
(call-with-values
(lambda () (ar:path->segments req-path))
(lambda (req-root req-dir-lst req-filename)
(if
(> (string-length req-root) 1) ; absolute path from the disc drive
req-uri
(call-with-values
(lambda () (ar:uri->components base-uri))
(lambda
(base-scheme base-authority base-path base-query base-fragment)
(if
(string=? req-root "/") ; absolute path from server
(ar:components->uri base-scheme base-authority
req-path req-query req-fragment)
; else the requested URI is the relative URI
(call-with-values
(lambda () (ar:path->segments base-path))
(lambda (base-root base-dir-lst base-filename)
(ar:components->uri
base-scheme
base-authority
(ar:segments->path
base-root
(ar:normalize-dir-lst (append base-dir-lst req-dir-lst))
req-filename
(and (not (string-index base-path #\/))
(string-index req-path #\\)))
req-query
req-fragment)))))))))))))
(provide (all-defined)))

View File

@ -0,0 +1,66 @@
; Module header is generated automatically
#cs(module char-encoding mzscheme
(require "common.ss")
(require "myenv.ss")
; Character-encoding module
;
; This module deals with particular character-encoding issues such as
; conversions between characters and their ASCII or UCS2 codes, Scheme
; representations of "Carriage Return" (CR), "tabulation" (TAB) and
; other control characters.
;
; This module by necessity is platform-specific as character encoding
; issues are hardly addressed in R5RS. For example, the result of
; char->integer is generally not an ASCII code of an integer (although
; it is, on many Scheme systems, with the important exception of
; Scheme48 and SCSH). The level of support for character sets other
; than ASCII varies widely among Scheme systems.
;
; This file collects various character-encoding functions that are
; necessary for the SSAX XML parser. The functions are of general use
; and scope.
;
; $Id: char-encoding.scm,v 1.1 2003/04/09 20:34:28 oleg Exp $
; ascii->char INT -> CHAR
; return a character whose ASCII code is INT
; Note, because ascii->char is injective (there are more characters than
; ASCII characters), the inverse transformation is not defined.
(cond-expand
(scheme48 #f) ; ascii->char is built into Scheme48
(scsh #f) ; ascii->char is built into Scheme48
(else
(define ascii->char integer->char)
)
)
; ucscode->char INT -> CHAR
; Return a character whose UCS (ISO/IEC 10646) code is INT
; Note
; This function is required for processing of XML character entities:
; According to Section "4.1 Character and Entity References"
; of the XML Recommendation:
; "[Definition: A character reference refers to a specific character
; in the ISO/IEC 10646 character set, for example one not directly
; accessible from available input devices.]"
(define (ucscode->char code)
(cond-expand
(bigloo
(ucs2->char (integer->ucs2 code)))
((or scheme48 scsh) ; Scheme48 has no support for UCS
(ascii->char code))
(else
(integer->char code))))
; Commonly used control characters
(define char-return (ascii->char 13))
(define char-tab (ascii->char 9))
(define char-newline (ascii->char 10)) ; a.k.a. #\newline, per R5RS
(define char-space (ascii->char 32))
(provide (all-defined)))

View File

@ -0,0 +1,49 @@
; Module header is generated automatically
#cs(module common mzscheme
;; For PLT Schemes v.200
(require
(lib "defmacro.ss")
(lib "string.ss")
(rename (lib "pretty.ss") pp pretty-print))
(define (command-line)
(cons "plt" (vector->list (current-command-line-arguments)
; argv
)))
;(define (call-with-input-string str fun)
; (fun (open-input-string str)))
;
;(define (call-with-output-string fun)
; (let ((outs (open-output-string)))
; (fun outs)
; (get-output-string outs)))
(define close-output-string get-output-string)
;
(define (filter pred lis)
(let rpt ((l lis))
(if (null? l)
'()
(if (pred (car l))
(cons (car l) (rpt (cdr l)))
(rpt (cdr l))))))
(define-syntax and-let*
(syntax-rules ()
((and-let* () body ...)
(begin body ...))
((and-let* ((var expr) clauses ...) body ...)
(let ((var expr))
(if var (and-let* (clauses ...) body ...) #f)))
((and-let* ((expr) clauses ...) body ...)
(if expr (and-let* (clauses ...) body ...) #f))
((and-let* (var clauses ...) body ...)
(if var (and-let* (clauses ...) body ...) #f))
))
(provide (all-defined)))

View File

@ -0,0 +1,114 @@
SSAX Package
============
A SSAX functional XML parsing framework consists of a DOM/SXML parser, a SAX
parser, and a supporting library of lexing and parsing procedures. The
procedures in the package can be used separately to tokenize or parse various
pieces of XML documents. The framework supports XML Namespaces, character,
internal and external parsed entities, attribute value normalization,
processing instructions and CDATA sections. The package includes a
semi-validating SXML parser: a DOM-mode parser that is an instantiation of
a SAX parser (called SSAX).
SSAX is a full-featured, algorithmically optimal, pure-functional parser,
which can act as a stream processor. SSAX is an efficient SAX parser that is
easy to use. SSAX minimizes the amount of application-specific state that has
to be shared among user-supplied event handlers. SSAX makes the maintenance
of an application-specific element stack unnecessary, which eliminates several
classes of common bugs. SSAX is written in a pure-functional subset of Scheme.
Therefore, the event handlers are referentially transparent, which makes them
easier for a programmer to write and to reason about. The more expressive,
reliable and easier to use application interface for the event-driven XML
parsing is the outcome of implementing the parsing engine as an enhanced tree
fold combinator, which fully captures the control pattern of the depth-first
tree traversal.
-------------------------------------------------
Quick start
; procedure: ssax:xml->sxml PORT NAMESPACE-PREFIX-ASSIG
;
; This is an instance of a SSAX parser that returns an SXML
; representation of the XML document to be read from PORT.
; NAMESPACE-PREFIX-ASSIG is a list of (USER-PREFIX . URI-STRING)
; that assigns USER-PREFIXes to certain namespaces identified by
; particular URI-STRINGs. It may be an empty list.
; The procedure returns an SXML tree. The port points out to the
; first character after the root element.
(define (ssax:xml->sxml port namespace-prefix-assig) ...)
; procedure: pre-post-order TREE BINDINGS
;
; Traversal of an SXML tree or a grove:
; a <Node> or a <Nodelist>
;
; A <Node> and a <Nodelist> are mutually-recursive datatypes that
; underlie the SXML tree:
; <Node> ::= (name . <Nodelist>) | "text string"
; An (ordered) set of nodes is just a list of the constituent nodes:
; <Nodelist> ::= (<Node> ...)
; Nodelists, and Nodes other than text strings are both lists. A
; <Nodelist> however is either an empty list, or a list whose head is
; not a symbol (an atom in general). A symbol at the head of a node is
; either an XML name (in which case it's a tag of an XML element), or
; an administrative name such as '@'.
; See SXPath.scm and SSAX.scm for more information on SXML.
;
;
; Pre-Post-order traversal of a tree and creation of a new tree:
; pre-post-order:: <tree> x <bindings> -> <new-tree>
; where
; <bindings> ::= (<binding> ...)
; <binding> ::= (<trigger-symbol> *preorder* . <handler>) |
; (<trigger-symbol> *macro* . <handler>) |
; (<trigger-symbol> <new-bindings> . <handler>) |
; (<trigger-symbol> . <handler>)
; <trigger-symbol> ::= XMLname | *text* | *default*
; <handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
;
; The pre-post-order function visits the nodes and nodelists
; pre-post-order (depth-first). For each <Node> of the form (name
; <Node> ...) it looks up an association with the given 'name' among
; its <bindings>. If failed, pre-post-order tries to locate a
; *default* binding. It's an error if the latter attempt fails as
; well. Having found a binding, the pre-post-order function first
; checks to see if the binding is of the form
; (<trigger-symbol> *preorder* . <handler>)
; If it is, the handler is 'applied' to the current node. Otherwise,
; the pre-post-order function first calls itself recursively for each
; child of the current node, with <new-bindings> prepended to the
; <bindings> in effect. The result of these calls is passed to the
; <handler> (along with the head of the current <Node>). To be more
; precise, the handler is _applied_ to the head of the current node
; and its processed children. The result of the handler, which should
; also be a <tree>, replaces the current <Node>. If the current <Node>
; is a text string or other atom, a special binding with a symbol
; *text* is looked up.
;
; A binding can also be of a form
; (<trigger-symbol> *macro* . <handler>)
; This is equivalent to *preorder* described above. However, the result
; is re-processed again, with the current stylesheet.
;
(define (pre-post-order tree bindings) ...)
-------------------------------------------------
Additional tools included into the package
1. "access-remote.ss"
Uniform access to local and remote resources
Resolution for relative URIs in accordance with RFC 2396
2. "id.ss"
Creation and manipulation of the ID-index for a faster access to SXML elements
by their unique ID
Provides the DTD parser for extracting ID attribute declarations
3. "xlink-parser.ss"
Parser for XML documents that contain XLink elements
4. "multi-parser.ss"
SSAX multi parser: combines several specialized parsers into one
Provides creation of parent pointers to SXML document constructed

View File

@ -0,0 +1,327 @@
; Module header is generated automatically
#cs(module http mzscheme
(require (lib "defmacro.ss"))
(require "common.ss")
(require "myenv.ss")
(require "mime.ss")
(require "srfi-12.ss")
(require "util.ss")
(require (lib "string.ss" "srfi/13"))
;************************************************************************
;
; HyperText Transport Protocol (HTTP) support
;
; This code implements the basic flow of a HTTP transaction, as defined in
; a HTTP 1.1 document [RFC 2068]. That is, this code performs:
; - opening of an HTTP connection (directly or via a proxy),
; - sending of a request,
; - listening to a reply,
; - analyzing of the return code,
; - parsing of the response headers,
; - dispatching to handle reply's data,
; - closing of the connection.
;
; INTERFACE
; http-transaction REQ-METHOD REQ-URL REQ-PARMS RESPONSE-HANDLER
;
; REQ-METHOD: a string, typically "GET" or "POST", although many others
; may be allowed. It's up to a particular server to accept or reject
; a request.
;
; REQ-URL: an absolute URL of the HTTP server
;
; REQ-PARMS: an associative list, a list of (name . value) pairs. The list
; may be regarded as "keyword arguments" of the http-transaction
; procedure. The following enumerates the supported "keyword parameters".
; All of them are optional: if omitted or specified with a value #f,
; a suitable default value will be used.
; http-proxy: a string of the form "proxyname" or "proxyname:proxyport"
; or (#f or omitted) if no proxy is needed.
; Here "proxyname" is the name or the IP address of an HTTP
; proxy
; user-agent: a string identifying the user agent
; http-req: a list or a procedure
; If it is a list, it should be a list of pairs
; (http-header-name . http-header-value)
; for additional HTTP headers to include in the request.
; If http-req is a procedure, it is invoked with one
; argument, the communication port to the HTTP server.
; The procedure is expected to write as many HTTP headers as it
; wishes, _followed by an empty line_ and optionally the
; request body.
; logger: a procedure PORT MESSAGE OTHER-MESSAGES*
; The procedure is called on several occasions to tell
; the progress of the transaction
;
; RESPONSE-HANDLER: a procedure RESP-CODE RESP-HEADERS RESP-PORT
; RESP-CODE is a number, which is one of the HTTP codes, e.g.,
; 200, 304, 404, or 500, etc.
; RESP-HEADERS: HTTP headers from the server response,
; a list of pairs (http-header-name . http-header-val).
; http-header-name is an upper-cased symbol.
; In addition to the standard header names defined in the
; HTTP recommendation, a special pair
; (HTTP-RESPONSE . the-whole-response-line)
; contains the entire HTTP response line.
; RESP-PORT: an input port from which to read the body of the reply,
; if any.
; RESPONSE-HANDLER should close the RESP-PORT. The result of the
; RESPONSE-HANDLER becomes the result of the HTTP transaction.
;
; EXCEPTIONS
; The function http-transaction may abort with the following condition:
; make-property-condition 'HTTP-TRANSACTION 'REASON reason 'HEADERS headers
; where reason is a symbol: 'NO-REPLY, 'BAD-REQ-URL, 'BAD-RESP-LINE,
; 'headers' is the list of the headers read so far or '(),
; In addition, I/O conditions (such as i/o error, connection-refused, etc.)
; may be raised by the runtime system.
;
; The procedure http-transaction establishes the connection to a HTTP server
; or a proxy, sends the request line and the mandatory headers (Host: and
; Connection:) as well as User-Agent: and other headers as specified in the
; REQ-PARMS. Afterwards, we flush the stream and wait for the reply.
; Upon receiving the reply, we parse the response line, the response
; headers, and then invoke the RESPONSE-HANDLER to handle the rest.
;
; IMPORT
; The standard prelude: myenv.scm or its variations for particular Scheme
; systems.
; Functions declared in files util.scm and mime.scm
; SRFI-12 exception handling SRFI is assumed
; EXPORT
; http-transaction
;
; This code is rather similar to HTTP.cc
;
; See vhttp.scm for the validation tests, which can also serve as
; use cases.
;
; $Id: http.scm,v 2.0 2002/08/23 19:36:25 oleg Exp oleg $
;^^^^^^^^^^^^^^^^^^^^^
;;(include "myenv.scm")
; The standard prelude and SRFI-12 are assumed
; See http://pobox.com/~oleg/ftp/Scheme/
; for myenv.scm and other input parsing functions used
; in the present code. Again, see vhttp.scm how to run this code
;-------
; A system-dependent part
; Opening, closing and shutting down TCP connections and flushing the
; ports
; open-tcp-connection hostname::string port-number::int -> (i-port . o-port)
; flush-output-port port -> void
; shutdown-sender port -> void ; shutdown the sending part of the connection
;
; These functions are necessarily platform- and system-specific
(cond-expand
(gambit
; For Gambit 4
(define (open-tcp-connection host port-number)
(assert (integer? port-number) (positive? port-number))
(let ((p (open-tcp-client
(list server-address: host
port-number: port-number))))
(cons p p)))
(define flush-output-port force-output)
(define close-tcp-connection close-port)
; DL: by analogue with Gambit 3
(define shutdown-sender force-output)
; Previous version for Gambit 3
; ; The Gambit implementation relies on internal Gambit procedures,
; ; whose names start with ##
; ; Such identifiers cannot be _read_ on many other systems
; ; The following macro constructs Gambit-specific ids on the fly
; (define-macro (_gid id)
; (string->symbol (string-append "##" (symbol->string id))))
; (define (open-tcp-connection host port-number)
; (assert (integer? port-number) (positive? port-number))
; (let ((io-port ((_gid open-input-output-file)
; (string-append "tcp://" host ":"
; (number->string port-number)))))
; (cons io-port io-port)))
; (define flush-output-port flush-output)
; (define shutdown-sender flush-output)
)
(bigloo
(define (open-tcp-connection host port-number)
(let ((sock (make-client-socket host port-number)))
(cons (socket-input sock) (socket-output sock))))
; flush-output-port is built-in
(define shutdown-sender close-output-port)
)
((or plt chicken)
(define (open-tcp-connection host port-number)
(call-with-values
(lambda () (tcp-connect host port-number))
(lambda (input-port output-port)
(cons input-port output-port))))
(define flush-output-port flush-output)
(define shutdown-sender close-output-port)
)
)
;^^^^^^^
; syntax: define-def ident assoc-list defaultvalue
; Bind a variable ident to a value found in an assoc list.
; assoc-list is a list of pairs (symbol . value)
; We look up 'ident' in the assoc-list, and bind it to the found value, unless
; the latter is #f.
; If the lookup fails, the defaultvalue is used.
(define-macro (define-def ident assoc-list defaultvalue)
`(define ,ident
(or
(cond
((assq ',ident ,assoc-list) => cdr)
(else #f))
,defaultvalue)))
; The body of the function.
; The function is written as a collection of mutually-recursive
; procedures that implement a transactional FSM.
(define (http-transaction req-method req-url req-parms response-handler)
; expected keyword arguments and their default values
(define-def http-proxy req-parms #f)
(define-def user-agent req-parms "Scheme-HTTP/1.0")
(define-def http-req req-parms '())
(define-def logger req-parms
(lambda (port msg . other-msgs) (cerr msg other-msgs nl)))
(define CRLF (string (integer->char 13) (integer->char 10)))
(define (die reason headers port)
(if port (close-output-port port))
(abort (make-property-condition 'HTTP-TRANSACTION
'REASON reason 'HEADERS headers)))
; re-throw the exception exc as a HTTP-TRANSACTION exception
(define (die-again exc reason headers port)
(if port (close-output-port port))
(abort (make-composite-condition
(make-property-condition
'HTTP-TRANSACTION 'REASON reason 'HEADERS headers)
exc)))
; Open a connection, send the request, and if successful,
; invoke the read-resp-status-line on the opened http-port.
(define (make-req schema dummy host resource)
(let* ((target-host (or http-proxy host))
(target-addr-lst (string-split target-host '(#\:)))
(target-host-proper (car target-addr-lst))
(target-port
(if (pair? (cdr target-addr-lst))
(string->integer (cadr target-addr-lst) 0
(string-length (cadr target-addr-lst)))
80))
(dummy (logger #f "Connecting to " target-host-proper ":"
target-port))
; prevent hacking
(dummy (if (string-index target-host-proper #\|)
(error "Bad target addr: " target-host-proper)))
(http-ports (open-tcp-connection target-host-proper target-port))
(http-i-port (car http-ports))
(http-o-port (cdr http-ports))
)
(for-each
(lambda (str) (display str http-o-port))
`(,req-method " "
; if the proxy is set, request the full REQ-URL; otherwise,
; send only the relative URL
,@(if http-proxy (list req-url) (list "/" resource))
" HTTP/1.0" ,CRLF
"Host: " ,host ,CRLF
"User-agent: " ,user-agent ,CRLF
"Connection: close" ,CRLF))
(if (procedure? http-req)
(http-req http-o-port) ; let the user write other headers
(begin
(for-each (lambda (header-name-value)
(display (car header-name-value) http-o-port)
(write-char #\: http-o-port)
(display (cdr header-name-value) http-o-port)
(display CRLF http-o-port))
http-req)
(display CRLF http-o-port) ; An empty line ends headers
))
(flush-output-port http-o-port)
(shutdown-sender http-o-port)
(logger http-o-port "sent request. Now listening for the response...")
(read-resp-status-line http-i-port)))
; Read the first line of the server's response, something like
; HTTP/1.x 200 OK
; and extract the response code
; Invoke
; read-headers http-i-port resp-code
; '(HTTP-RESPONSE . the-whole-response-line)
; or raise an exception if the response line is absent or invalid
(define (read-resp-status-line http-port)
(let* ((resp-line (read-line http-port))
(dummy (logger http-port "Got response :" resp-line))
(resp-headers (list (cons 'HTTP-RESPONSE resp-line))))
(cond
((eof-object? resp-line)
(die 'NO-REPLY '() http-port))
((not (string-prefix? "HTTP/1." resp-line))
(die 'BAD-RESP-LINE resp-headers http-port))
(else
(let* ((resp-line-parts (string-split resp-line '() 3))
(resp-code
(and (pair? resp-line-parts)
(pair? (cdr resp-line-parts))
(string->integer (cadr resp-line-parts) 0
(string-length (cadr resp-line-parts)))))
)
(if resp-code
(read-headers http-port resp-code resp-headers)
(die 'BAD-RESP-LINE resp-headers http-port)))))))
; read-headers http-port resp-code init-resp-headers
; The http-port is positioned after the response line.
; The procedure reads HTTP response headers and adds them to
; init-resp-headers.
; On success, the procedure exits to response-handler, passing
; it the response code, the read headers and the http-port. The
; port is positioned after the empty line that terminates the headers.
(define (read-headers http-port resp-code init-resp-headers)
(let ((headers
(with-exception-handler
(lambda (exc)
(die-again exc 'BAD-HEADER init-resp-headers http-port))
(lambda ()
(MIME:read-headers http-port)))))
(response-handler resp-code (append init-resp-headers headers)
http-port)))
; parse the req-url and exit either to make-req, or to
; the response-handler to handle the error
(let ((url-parts (string-split req-url '(#\/) 4)))
; this stub is added by Dmitry Lizorkin for handling URIs consisting of
; just a schema and a host, say, "http://www.plt-scheme.org"
(let ((url-parts
(if (and (string=? "http:" (car url-parts))
(= 3 (length url-parts)))
(append url-parts '(""))
url-parts)))
(cond
((not (= 4 (length url-parts)))
(die 'BAD-REQ-URL '() #f))
((string=? "http:" (car url-parts))
(apply make-req url-parts))
(else
(die 'UNSUPPORTED-SCHEMA '() #f)
))))
)
(provide (all-defined)))

View File

@ -0,0 +1,562 @@
; Module header is generated automatically
#cs(module id mzscheme
(require "common.ss")
(require "myenv.ss")
(require "access-remote.ss")
(require "sxpathlib.ss")
;; Creation and manipulation of the ID-index
;; Provides the DTD parser for extracting ID attribute declarations
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to:
; lizorkin@hotbox.ru Dmitry Lizorkin
;
; ID-index provides fast access to XML elements by their unique ID.
; ID-index has the following structure:
; id-index = ( (id . element) (id . element) ... )
; i.e.
; id-index = (list
; (cons id element)
; (cons id element)
; ...)
; where
; id - (a string) element's unique ID
; element - an SXML presentation of an element
;
; Creation of an id-index generally consists of two steps.
; On the first step, a document declaration (internal and external DTD)
; is read and information of ID attributes is extracted.
; This is presented in a following form:
; id-attrs = ( (elem-name attr-name attr-name attr-name ...)
; (elem-name attr-name attr-name attr-name ...) ... )
; i.e.
; id-attrs = (list
; (cons
; elem-name
; (list attr-name attr-name attr-name ...)
; (cons
; elem-name
; (list attr-name attr-name attr-name ...)
; ...)
; where
; elem-name - (a symbol) a name of the element
; attr-name - (a symbol) element's attribute having an ID type
;
; On the second step, if an SXML presentation of the document is available,
; 'id-attrs' are used for forming an 'id-index'.
; If there is no SXML presentation for a document yet, both steps are
; performed as a single function call - to a specialized SSAX parser.
; This parser constructs an SXML presentation and an 'id-index'
; in a single pass
;
; ATTENTION:
; 1. Only non-qualified 'elem-name' and 'attr-name' are correctly supported
; 2. Parameter entity reference (PEReference) is NOT supported
;=========================================================================
; Functions which read XML document declaration
;------------------------------------------------
; Trivial functions that ignore symbols
; Function reads a whitespace (S production)
(define (id:process-s port)
(let ((symb (peek-char port)))
(cond((eof-object? symb) symb)
((char=? symb #\space) (read-char port)
(id:process-s port))
((char=? symb #\return) (read-char port)
(id:process-s port))
((char=? symb #\newline)(read-char port)
(id:process-s port))
((char=? symb #\tab)(read-char port)
(id:process-s port))
(else symb))))
; Ignores all symbols until template-symbol
(define (id:ignore-until templ-sym port)
(let loop ((symb (peek-char port)))
(cond((eof-object? symb) symb)
((equal? symb templ-sym) (read-char port)
symb)
(else (read-char port)
(loop (peek-char port))))))
;------------------------------------------------
; These functions perform reading from a file
; Read N symbols from a port
(define (id:read-n num port)
(id:process-s port)
(let loop ((num num) (res '()))
(if(= num 0)
(list->string (reverse res))
(let((symb (peek-char port)))
(cond((eof-object? symb) symb)
(else (read-char port)
(loop (- num 1) (cons symb res))))))))
; This function reads a name - a sequence of characters ending with
; a whitespace or '<'. '>', '(', ')', '[', ']', '|'
(define (id:read-name port)
(id:process-s port)
(let loop ((res ""))
(let ((symb (peek-char port)))
(cond((eof-object? symb) res)
((member symb '(#\space #\tab #\return #\newline
#\< #\> #\( #\) #\[ #\] #\|))
res)
(else (loop (string-append res (string (read-char port)))))))))
; This function reads a literal
; literal ::= ('"' [^"]* '"') | ("'" [^']* "'")
; A string is returned
(define (id:process-literal port)
(id:process-s port)
(let((quot (peek-char port)))
(if(eof-object? quot) ; an incorrect situaltion
""
(let((quot (if (char=? (read-char port) #\") #\" #\')))
(let loop ((res '()))
(let((symb (peek-char port)))
(cond
((eof-object? symb) (list->string (reverse res)))
((char=? symb quot) ; end of the string
(read-char port)
(list->string (reverse res)))
(else
(read-char port)
(loop (cons symb res))))))))))
;------------------------------------------------
; Miscellaneous
; Converts a string into small letters
(define (id:to-small str)
(let loop ((arg (string->list str)) (res '()))
(cond((null? arg) (list->string (reverse res)))
((char-upper-case? (car arg))
(loop (cdr arg) (cons (char-downcase (car arg)) res)))
(else (loop (cdr arg) (cons (car arg) res))))))
; Takes an 'id-attrs' which can contain equal element names
; Returns a new 'id-attrs' where all element names are unique
(define (id:unite-id-attrs id-attrs)
(let loop ((id-attrs id-attrs)
(new '()))
(if
(null? id-attrs)
new
(let rpt ((elem-name (caar id-attrs))
(atts (cdar id-attrs))
(rest (cdr id-attrs))
(id-attrs '()))
(cond
((null? rest)
(loop id-attrs (cons (cons elem-name atts) new)))
((equal? (caar rest) elem-name)
(rpt elem-name
(append atts (cdar rest))
(cdr rest)
id-attrs))
(else
(rpt elem-name atts (cdr rest) (cons (car rest) id-attrs))))))))
;------------------------------------------------
; Parsing XML productions concerning document declaration
; These functions are not intendes for error detection, they assume that
; the document is correct
; This function ignores information related to a PI production [16]
; [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
; It looks for an ending '?>' template
(define (id:ignore-PI port)
(id:ignore-until #\? port)
(let ((symb (peek-char port)))
(cond((eof-object? symb) symb)
((equal? symb #\>) (read-char port)
symb)
(else (id:ignore-PI port)))))
; This function ignores information related to a Comment production [15]
; [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
; The starting '<!' has been already processed
; The function looks for an ending '-->' template
(define (id:ignore-comment port)
(read-char port) ; it is '-'
(read-char port) ; it is '-'
(id:ignore-until #\- port)
(let((sym1 (peek-char port)))
(cond((eof-object? sym1) sym1)
((char=? sym1 #\-) (read-char port)
(let((sym2 (read-char port))) ; must be '>'
sym2))
(else (id:ignore-comment port)))))
; This function processes AttType production ([54]-[59] in XML specification)
; [54] AttType ::= StringType | TokenizedType | EnumeratedType
; [55] StringType ::= 'CDATA'
; [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY'
; | 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
; [57] EnumeratedType ::= NotationType | Enumeration
; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
; The function returnd #t if the attribute has an ID type and #f otherwise
(define (id:AttType-ID? port)
(let((type (id:to-small (id:read-name port))))
(cond((string=? type "id") #t)
((string=? type "notation")
(id:process-s port)
(read-char port) ; it is #\(
(id:ignore-until #\) port)
#f)
((and (string=? type "") (char=? (peek-char port) #\()) ; see [59]
(id:ignore-until #\) port)
#f)
(else #f))))
; This function processes DefaultDecl production ([60] in XML specification)
; [60] DefaultDecl ::= '#REQUIRED'
; | '#IMPLIED'
; | (('#FIXED' S)? AttValue)
; The result is always #t
(define (id:process-DefaultDecl port)
(let((type (id:to-small (id:read-name port))))
(cond((string=? type "#fixed")
(id:read-name port) ; reads a default value
#t)
(else #t))))
; This function processes AttDef production ([53] in XML specification)
; [53] AttDef ::= S Name S AttType S DefaultDecl
; If an attribute has an ID type, (list attribule-name) is returned
; (a list of one element). Otherwise, function returns an empty list
(define (id:process-AttDef port)
(let((att-name (string->symbol (id:read-name port))))
(let((bool (id:AttType-ID? port)))
(id:process-DefaultDecl port)
(if bool (list att-name) '()))))
; The function processes AttlistDecl production ([52] in XML specification)
; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
; The starting '<!ATTLIST' has been already processed
; 'id-attrs' are returned as a result
(define (id:process-AttlistDecl port)
(let((element-name (string->symbol (id:read-name port))))
(let loop ((atts '()))
(id:process-s port)
(cond((char=? (peek-char port) #\>) ; no more attributes will be declared
(read-char port)
(if(null? atts)
'()
(list (cons element-name atts))))
(else
(loop (append (id:process-AttDef port) atts)))))))
; This function processes a multiple markupdecl production [29]
; [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl
; | NotationDecl | PI | Comment
; 'id-attrs' are returned as a result
(define (id:process-markupdecl* port)
(let loop ((id-attrs '()))
(let((beg (id:read-n 2 port)))
(cond((eof-object? beg) id-attrs) ; the file is over
((string=? beg "]>") id-attrs) ; the end of the markupdecl
((string=? beg "<?") ; processing instruction
(id:ignore-PI port)
(loop id-attrs))
((and (string=? beg "<!") (char=? (peek-char port) #\-)) ; a comment
(id:ignore-comment port)
(loop id-attrs))
((string=? beg "<!") ; AttlistDecl or any other declarations
(let ((name (id:to-small (id:read-name port))))
(cond((string=? name "attlist")
(loop (append (id:process-AttlistDecl port) id-attrs)))
(else
(id:ignore-until #\> port)
(loop id-attrs)))))
(else ; an error condition
(cerr "Error in markupdecl production: unexpected " beg nl)
(id:ignore-until #\> port)
id-attrs)))))
; This function processes a doctypedecl production ([75] in XML specification)
; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
; | 'PUBLIC' S PubidLiteral S SystemLiteral
; The function ignores a PubidLiteral
; 'id-attrs' are returned as a result
(define (id:process-ExternalID port)
(let((system-literal
(let((name (id:to-small (id:read-name port))))
(cond
((string=? name "system")
(id:process-literal port))
((string=? name "public")
(id:process-literal port)
(id:process-literal port))
(else #f)))))
(if(not system-literal)
'() ; an incorrect situation
(let((external-port (open-input-resource system-literal)))
(if(not external-port)
'() ; a failure
(let((id-attrs (id:process-markupdecl* external-port)))
(close-input-port external-port)
id-attrs))))))
; This function processes a doctypedecl production ([28] in XML specification)
; [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)?
; S? ('[' (markupdecl | DeclSep)* ']' S?)? '>'
; The function doesn't process a DeclSep (this is a PEReference which
; this programme doesn't support)
; The starting '<!DOCTYPE' has been already processed
; 'id-attrs' are returned as a result
(define (id:process-doctypedecl port)
(let((name (id:read-name port))) ; root element's name
(id:process-s port)
(let((symb (peek-char port)))
(cond
((eof-object? symb) '()) ; an incorrect situation
((char=? symb #\[)
(read-char port)
(id:process-markupdecl* port))
(else
(let((id-attrs (id:process-ExternalID port)))
(id:process-s port)
(let((symb (peek-char port)))
(cond
((eof-object? symb) id-attrs) ; an incorrect situation
((char=? symb #\[)
(read-char port)
(append id-attrs (id:process-markupdecl* port)))
(else ; an incorrect situation
id-attrs)))))))))
; This function processes a prolog production ([22] in XML specification)
; [1] document ::= prolog element Misc*
; [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
; [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
; [27] Misc ::= Comment | PI | S
; 'id-attrs' are returned as a result
(define (id:process-prolog port)
(let((beg (id:read-n 2 port)))
(cond((eof-object? beg) '()) ; a file is over - strange...
((string=? beg "<?") ; PI or XMLDecl
(id:ignore-PI port)
(id:process-prolog port))
((and (string=? beg "<!") (char=? (peek-char port) #\-)) ; a comment
(id:ignore-comment port)
(id:process-prolog port))
((string=? beg "<!") ; doctypedecl expected
(let ((name (id:to-small (id:read-name port))))
(cond((string=? name "doctype")
(id:process-doctypedecl port))
(else
(cerr "doctypedecl production expected" nl)
'()))))
(else ; element begins, there was no doctypedecl
'()))))
;=========================================================================
; Two-step id-index creation (user level functions)
; We use this variant when we already have an SXML presentation of the
; document
;------------------------------------------------
; The first step - creating 'id-attrs'
; Read the DTD
; uri-string - a URI for the DTD location (a string)
; 'id-attrs' are returned as a result
(define (id:read-external-dtd uri-string)
(let((port (open-input-resource uri-string)))
(if(not port)
'() ; a situation of an error
(let((id-attrs (id:unite-id-attrs (id:process-markupdecl* port))))
(close-input-port port)
id-attrs))))
; Read the declaration from the document's prolog.
; If prolog contains a reference to an external DTD, it is processed either
; uri-string - a URI for the document location (a string)
; 'id-attrs' are returned as a result
(define (id:read-document-declaration uri-string)
(let((port (open-input-resource uri-string)))
(if(not port)
'() ; a situation of an error
(let((id-attrs (id:unite-id-attrs (id:process-prolog port))))
(close-input-port port)
id-attrs))))
;------------------------------------------------
; The second step - creating an 'id-index' using 'id-attrs'
; This function forms an 'id-index' and inserts it in the document
; document - a root node of the document (SXML presentation)
; id-attrs - the result of the previous step
; A new SXML document is returned. It contains an auxiliary list with an
; 'id-index subtree. If the source document already contains such a
; subtree, it will be replaced. Other subtrees in an auxiliary list will
; remain unchanged.
(define (SXML->SXML+id document id-attrs)
(let((aux-subtrees
(let((aux ((select-kids (ntype?? '@@)) document)))
(if(null? aux)
'()
(let rpt ((res '())
(to-see (cdar aux)))
(cond
((null? to-see) (reverse res))
((equal? (caar to-see) 'id-index) (rpt res (cdr to-see)))
(else (rpt (cons (car to-see) res)
(cdr to-see)))))))))
(let loop ((nodeset (list document))
(id-index '()))
(if(null? nodeset)
(let((kids ((select-kids
(lambda (node)
(not (and (pair? node) (equal? (car node) '@@)))))
document)))
(cons* '*TOP*
(cons* '@@
(cons 'id-index id-index)
aux-subtrees)
kids))
(let((cur-node (car nodeset)))
(cond
((not (pair? cur-node)) ; a text node
(loop (cdr nodeset) id-index))
((assoc (car cur-node) id-attrs)
=>
(lambda (lst)
(let((id-values
((select-kids (lambda (x) #t))
((sxml:filter (lambda (x) (member (car x) (cdr lst))))
((select-kids (lambda (x) #t))
((select-kids (ntype?? '@)) cur-node))))))
(loop
(append
((select-kids (ntype?? '*)) (car nodeset))
(cdr nodeset))
(append
id-index
(map
(lambda (x) (cons x cur-node))
id-values))))))
(else
(loop
(append ((select-kids (ntype?? '*)) (car nodeset)) (cdr nodeset))
id-index))))))))
;=========================================================================
; Some stuff for a SSAX multi parser
;------------------------------------------------
; Id-related part of the seed
; id:seed = (list id-attrs id-index)
; id-attrs, id-index - see a head comment
; Mutator
(define (id:make-seed id-attrs id-index)
(list id-attrs id-index))
; Accessors
(define (id:seed-attrs id:seed)
(car id:seed))
(define (id:seed-index id:seed)
(cadr id:seed))
;------------------------------------------------
; Handler units
; This function is called by the NEW-LEVEL-SEED handler
; A new 'id:seed' is returned
(define (id:new-level-seed-handler id:seed)
id:seed)
; This function is called by the FINISH-ELEMENT handler
; A new 'id:seed' is returned
(define (id:finish-element-handler elem-gi attributes id:seed element)
(cond
((assoc elem-gi (id:seed-attrs id:seed))
=>
(lambda (lst)
(let loop ((atts attributes)
(id-index (id:seed-index id:seed)))
(if
(null? atts)
(id:make-seed (id:seed-attrs id:seed) id-index)
(let((att (car atts)))
(cond
((pair? (car att)) ; namespace aware
(loop (cdr atts) id-index))
((member (car att) (cdr lst))
(loop (cdr atts)
(cons (cons (cdr att) element) id-index)))
(else
(loop (cdr atts) id-index))))))))
(else
id:seed)))
; This function is called by the DOCTYPE handler
; A new 'id:seed' is returned
(define (id:doctype-handler port systemid internal-subset?)
(let((id-attrs
(if
(not systemid)
'() ; systemid not supplied
(let((external-port (open-input-resource systemid)))
(if
(not external-port)
'() ; a failure
(let((id-attrs (id:process-markupdecl* external-port)))
(close-input-port external-port)
id-attrs))))))
(let((id-attrs
(if
internal-subset?
(id:unite-id-attrs
(append id-attrs (id:process-markupdecl* port)))
(id:unite-id-attrs id-attrs))))
(id:make-seed id-attrs '()))))
; This function constructs the member of an axuiliary list
(define (id:ending-action id:seed)
(let((id-index (id:seed-index id:seed)))
(cons 'id-index id-index)))
(provide (all-defined)))

View File

@ -0,0 +1,10 @@
(module info (lib "infotab.ss" "setup")
(define name "ssax")
(define blurb
(list "SSAX functional XML parsing framework "
"to inter-convert between an angular-bracket and "
"an S-expression-based notations for markup documents"))
(define primary-file "ssax.ss")
(define doc.txt "doc.txt")
(define categories '(xml))
)

View File

@ -0,0 +1,482 @@
; Module header is generated automatically
#cs(module input-parse mzscheme
(require (lib "defmacro.ss"))
(require "common.ss")
(require "myenv.ss")
(require "parse-error.ss")
;****************************************************************************
; Simple Parsing of input
;
; The following simple functions surprisingly often suffice to parse
; an input stream. They either skip, or build and return tokens,
; according to inclusion or delimiting semantics. The list of
; characters to expect, include, or to break at may vary from one
; invocation of a function to another. This allows the functions to
; easily parse even context-sensitive languages.
;
; EOF is generally frowned on, and thrown up upon if encountered.
; Exceptions are mentioned specifically. The list of expected characters
; (characters to skip until, or break-characters) may include an EOF
; "character", which is to be coded as symbol *eof*
;
; The input stream to parse is specified as a PORT, which is usually
; the last (and optional) argument. It defaults to the current input
; port if omitted.
;
; IMPORT
; This package relies on a function parser-error, which must be defined
; by a user of the package. The function has the following signature:
; parser-error PORT MESSAGE SPECIALISING-MSG*
; Many procedures of this package call parser-error to report a parsing
; error. The first argument is a port, which typically points to the
; offending character or its neighborhood. Most of the Scheme systems
; let the user query a PORT for the current position. MESSAGE is the
; description of the error. Other arguments supply more details about
; the problem.
;
; $Id: input-parse.scm,v 1.1.1.1 2001/07/11 19:33:43 oleg Exp $
;(declare ; Gambit-compiler optimization options
; (block)
; (standard-bindings)
; (extended-bindings) ; Needed for #!optional arguments, DSSSL-style
; (fixnum) ; optional, keyword and rest arguments
;)
;(include "myenv.scm") ; include target dependent stuff
;------------------------------------------------------------------------
; Preparation and tuning section
; This package is heavily used. Therefore, we take time to tune it in,
; in particular for Gambit.
; Concise and efficient definition of a function that takes one or two
; optional arguments, e.g.,
;
; (define-opt (foo arg1 arg2 (optional (arg3 init3) (arg4 init4))) body)
;
; define-opt is identical to a regular define, with one exception: the
; last argument may have a form
; (optional (binding init) ... )
(cond-expand
((or bigloo gambit)
; For Gambit and Bigloo, which support DSSSL extended lambdas,
; define-opt like the one in the example above is re-written into
; (define-opt (foo arg1 arg2 #!optional (arg3 init3) (arg4 init4)) body)
(define-macro (define-opt bindings body . body-rest)
(let* ((rev-bindings (reverse bindings))
(opt-bindings
(and (pair? rev-bindings) (pair? (car rev-bindings))
(eq? 'optional (caar rev-bindings))
(cdar rev-bindings))))
(if opt-bindings
`(define ,(append (reverse
(cons (with-input-from-string "#!optional" read)
(cdr rev-bindings)))
opt-bindings)
,body ,@body-rest)
`(define ,bindings ,body ,@body-rest))))
)
(plt ; DL: borrowed from "define-opt.scm"
(define-syntax define-opt
(syntax-rules (optional)
((define-opt (name . bindings) . bodies)
(define-opt "seek-optional" bindings () ((name . bindings) . bodies)))
((define-opt "seek-optional" ((optional . _opt-bindings))
(reqd ...) ((name . _bindings) . _bodies))
(define (name reqd ... . _rest)
(letrec-syntax
((handle-opts
(syntax-rules ()
((_ rest bodies (var init))
(let ((var (if (null? rest) init
(if (null? (cdr rest)) (car rest)
(error "extra rest" rest)))))
. bodies))
((_ rest bodies var) (handle-opts rest bodies (var #f)))
((_ rest bodies (var init) . other-vars)
(let ((var (if (null? rest) init (car rest)))
(new-rest (if (null? rest) '() (cdr rest))))
(handle-opts new-rest bodies . other-vars)))
((_ rest bodies var . other-vars)
(handle-opts rest bodies (var #f) . other-vars))
((_ rest bodies) ; no optional args, unlikely
(let ((_ (or (null? rest) (error "extra rest" rest))))
. bodies)))))
(handle-opts _rest _bodies . _opt-bindings))))
((define-opt "seek-optional" (x . rest) (reqd ...) form)
(define-opt "seek-optional" rest (reqd ... x) form))
((define-opt "seek-optional" not-a-pair reqd form)
(define . form)) ; No optional found, regular define
((define-opt name body) ; Just the definition for 'name',
(define name body)) ; for compatibilibility with define
))
)
(else
; For Scheme systems without DSSSL extensions, we rewrite the definition
; of foo of the example above into the following:
; (define (foo arg1 arg2 . rest)
; (let* ((arg3 (if (null? rest) init3 (car rest)))
; (arg4 (if (or (null? rest) (null? (cdr rest))) init4
; (cadr rest)))
; body))
; We won't handle more than two optional arguments
(define-macro define-opt (lambda (bindings body . body-rest)
(let* ((rev-bindings (reverse bindings))
(opt-bindings
(and (pair? rev-bindings) (pair? (car rev-bindings))
(eq? 'optional (caar rev-bindings))
(cdar rev-bindings))))
(cond
((not opt-bindings) ; No optional arguments
`(define ,bindings ,body ,@body-rest))
((null? opt-bindings)
`(define ,bindings ,body ,@body-rest))
((or (null? (cdr opt-bindings)) (null? (cddr opt-bindings)))
(let* ((rest (gensym)) ; One or two optional args
(first-opt (car opt-bindings))
(second-opt (and (pair? (cdr opt-bindings))
(cadr opt-bindings))))
`(define ,(let loop ((bindings bindings))
(if (null? (cdr bindings)) rest
(cons (car bindings) (loop (cdr bindings)))))
(let* ((,(car first-opt) (if (null? ,rest)
,(cadr first-opt)
(car ,rest)))
,@(if second-opt
`((,(car second-opt)
(if (or (null? ,rest) (null? (cdr ,rest)))
,(cadr second-opt)
(cadr ,rest))))
'()))
,body ,@body-rest))))
(else
'(error "At most two options are supported"))))))
))
(cond-expand
(gambit
; The following macro makes a macro that turns (read-char port)
; into (##read-char port). We can't enter such a macro-converter
; directly as readers of SCM and Bigloo, for ones, don't like
; identifiers with two leading # characters
(define-macro (gambitize clause)
`(define-macro ,clause
,(list 'quasiquote
(cons
(string->symbol (string-append "##"
(symbol->string (car clause))))
(map (lambda (id) (list 'unquote id)) (cdr clause))))))
(gambitize (read-char port))
(gambitize (peek-char port))
(gambitize (eof-object? port))
;(gambitize (string-append a b))
)
(else #t))
;------------------------------------------------------------------------
; -- procedure+: peek-next-char [PORT]
; advances to the next character in the PORT and peeks at it.
; This function is useful when parsing LR(1)-type languages
; (one-char-read-ahead).
; The optional argument PORT defaults to the current input port.
(define-opt (peek-next-char (optional (port (current-input-port))))
(read-char port)
(peek-char port))
;------------------------------------------------------------------------
; -- procedure+: assert-curr-char CHAR-LIST STRING [PORT]
; Reads a character from the PORT and looks it up
; in the CHAR-LIST of expected characters
; If the read character was found among expected, it is returned
; Otherwise, the procedure writes a nasty message using STRING
; as a comment, and quits.
; The optional argument PORT defaults to the current input port.
;
(define-opt (assert-curr-char expected-chars comment
(optional (port (current-input-port))))
(let ((c (read-char port)))
(if (memq c expected-chars) c
(parser-error port "Wrong character " c
" (0x" (if (eof-object? c) "*eof*"
(number->string (char->integer c) 16)) ") "
comment ". " expected-chars " expected"))))
; -- procedure+: skip-until CHAR-LIST [PORT]
; Reads and skips characters from the PORT until one of the break
; characters is encountered. This break character is returned.
; The break characters are specified as the CHAR-LIST. This list
; may include EOF, which is to be coded as a symbol *eof*
;
; -- procedure+: skip-until NUMBER [PORT]
; Skips the specified NUMBER of characters from the PORT and returns #f
;
; The optional argument PORT defaults to the current input port.
(define-opt (skip-until arg (optional (port (current-input-port))) )
(cond
((number? arg) ; skip 'arg' characters
(do ((i arg (-- i)))
((<= i 0) #f)
(if (eof-object? (read-char port))
(parser-error port "Unexpected EOF while skipping "
arg " characters"))))
(else ; skip until break-chars (=arg)
(let loop ((c (read-char port)))
(cond
((memv c arg) c)
((eof-object? c)
(if (memv '*eof* arg) c
(parser-error port "Unexpected EOF while skipping until " arg)))
(else (loop (read-char port))))))))
; -- procedure+: skip-while CHAR-LIST [PORT]
; Reads characters from the PORT and disregards them,
; as long as they are mentioned in the CHAR-LIST.
; The first character (which may be EOF) peeked from the stream
; that is NOT a member of the CHAR-LIST is returned. This character
; is left on the stream.
; The optional argument PORT defaults to the current input port.
(define-opt (skip-while skip-chars (optional (port (current-input-port))) )
(do ((c (peek-char port) (peek-char port)))
((not (memv c skip-chars)) c)
(read-char port)))
; whitespace const
;------------------------------------------------------------------------
; Stream tokenizers
; -- procedure+:
; next-token PREFIX-CHAR-LIST BREAK-CHAR-LIST [COMMENT-STRING] [PORT]
; skips any number of the prefix characters (members of the
; PREFIX-CHAR-LIST), if any, and reads the sequence of characters
; up to (but not including) a break character, one of the
; BREAK-CHAR-LIST.
; The string of characters thus read is returned.
; The break character is left on the input stream
; The list of break characters may include EOF, which is to be coded as
; a symbol *eof*. Otherwise, EOF is fatal, generating an error message
; including a specified COMMENT-STRING (if any)
;
; The optional argument PORT defaults to the current input port.
;
; Note: since we can't tell offhand how large the token being read is
; going to be, we make a guess, pre-allocate a string, and grow it by
; quanta if necessary. The quantum is always the length of the string
; before it was extended the last time. Thus the algorithm does
; a Fibonacci-type extension, which has been proven optimal.
; Note, explicit port specification in read-char, peek-char helps.
; Procedure input-parse:init-buffer
; returns an initial buffer for next-token* procedures.
; The input-parse:init-buffer may allocate a new buffer per each invocation:
; (define (input-parse:init-buffer) (make-string 32))
; Size 32 turns out to be fairly good, on average.
; That policy is good only when a Scheme system is multi-threaded with
; preemptive scheduling, or when a Scheme system supports shared substrings.
; In all the other cases, it's better for input-parse:init-buffer to
; return the same static buffer. next-token* functions return a copy
; (a substring) of accumulated data, so the same buffer can be reused.
; We shouldn't worry about new token being too large: next-token will use
; a larger buffer automatically. Still, the best size for the static buffer
; is to allow most of the tokens to fit in.
; Using a static buffer _dramatically_ reduces the amount of produced garbage
; (e.g., during XML parsing).
(define input-parse:init-buffer
(let ((buffer (make-string 512)))
(lambda () buffer)))
(define-opt (next-token prefix-skipped-chars break-chars
(optional (comment "") (port (current-input-port))) )
(let* ((buffer (input-parse:init-buffer))
(curr-buf-len (string-length buffer)) (quantum 16))
(let loop ((i 0) (c (skip-while prefix-skipped-chars port)))
(cond
((memq c break-chars) (substring buffer 0 i))
((eof-object? c)
(if (memq '*eof* break-chars)
(substring buffer 0 i) ; was EOF expected?
(parser-error port "EOF while reading a token " comment)))
(else
(if (>= i curr-buf-len) ; make space for i-th char in buffer
(begin ; -> grow the buffer by the quantum
(set! buffer (string-append buffer (make-string quantum)))
(set! quantum curr-buf-len)
(set! curr-buf-len (string-length buffer))))
(string-set! buffer i c)
(read-char port) ; move to the next char
(loop (++ i) (peek-char port))
)))))
; Another version of next-token, accumulating characters in a list rather
; than in a string buffer. I heard that it tends to work faster.
; In reality, it works just as fast as the string buffer version above,
; but it allocates 50% more memory and thus has to run garbage collection
; 50% as many times. See next-token-comp.scm
(define-opt (next-token-list-based prefix-skipped-chars break-chars
(optional (comment "") (port (current-input-port))) )
(let* ((first-char (skip-while prefix-skipped-chars port))
(accum-chars (cons first-char '())))
(cond
((eof-object? first-char)
(if (memq '*eof* break-chars) ""
(parser-error port "EOF while skipping before reading token "
comment)))
((memq first-char break-chars) "")
(else
(read-char port) ; consume the first-char
(let loop ((tail accum-chars) (c (peek-char port)))
(cond
((memq c break-chars) (list->string (reverse tail)))
((eof-object? c)
(if (memq '*eof* break-chars)
(list->string (reverse tail)) ; was EOF expected?
(parser-error port "EOF while reading a token " comment)))
(else
(read-char port) ; move to the next char
(loop (list* c tail) (peek-char port))
)))))))
; -- procedure+: next-token-of INC-CHARSET [PORT]
; Reads characters from the PORT that belong to the list of characters
; INC-CHARSET. The reading stops at the first character which is not
; a member of the set. This character is left on the stream.
; All the read characters are returned in a string.
;
; -- procedure+: next-token-of PRED [PORT]
; Reads characters from the PORT for which PRED (a procedure of one
; argument) returns non-#f. The reading stops at the first character
; for which PRED returns #f. That character is left on the stream.
; All the results of evaluating of PRED up to #f are returned in a
; string.
;
; PRED is a procedure that takes one argument (a character
; or the EOF object) and returns a character or #f. The returned
; character does not have to be the same as the input argument
; to the PRED. For example,
; (next-token-of (lambda (c)
; (cond ((eof-object? c) #f)
; ((char-alphabetic? c) (char-downcase c))
; (else #f))))
; will try to read an alphabetic token from the current
; input port, and return it in lower case.
;
; The optional argument PORT defaults to the current input port.
;
; Note: since we can't tell offhand how large the token being read is
; going to be, we make a guess, pre-allocate a string, and grow it by
; quanta if necessary. The quantum is always the length of the string
; before it was extended the last time. Thus the algorithm does
; a Fibonacci-type extension, which has been proven optimal.
;
; This procedure is similar to next-token but only it implements
; an inclusion rather than delimiting semantics.
(define-opt (next-token-of incl-list/pred
(optional (port (current-input-port))) )
(let* ((buffer (input-parse:init-buffer))
(curr-buf-len (string-length buffer)) (quantum 16))
(if (procedure? incl-list/pred)
(let loop ((i 0) (c (peek-char port)))
(cond
((incl-list/pred c) =>
(lambda (c)
(if (>= i curr-buf-len) ; make space for i-th char in buffer
(begin ; -> grow the buffer by the quantum
(set! buffer (string-append buffer (make-string quantum)))
(set! quantum curr-buf-len)
(set! curr-buf-len (string-length buffer))))
(string-set! buffer i c)
(read-char port) ; move to the next char
(loop (++ i) (peek-char port))))
(else (substring buffer 0 i))))
; incl-list/pred is a list of allowed characters
(let loop ((i 0) (c (peek-char port)))
(cond
((not (memq c incl-list/pred)) (substring buffer 0 i))
(else
(if (>= i curr-buf-len) ; make space for i-th char in buffer
(begin ; -> grow the buffer by the quantum
(set! buffer (string-append buffer (make-string quantum)))
(set! quantum curr-buf-len)
(set! curr-buf-len (string-length buffer))))
(string-set! buffer i c)
(read-char port) ; move to the next char
(loop (++ i) (peek-char port))
))))))
(cond-expand
(plt
#t ; DL: already available in PLT
)
(else
; -- procedure+: read-line [PORT]
; Reads one line of text from the PORT, and returns it as a string.
; A line is a (possibly empty) sequence of characters terminated
; by CR, CRLF or LF (or even the end of file).
; The terminating character (or CRLF combination) is removed from
; the input stream. The terminating character(s) is not a part
; of the return string either.
; If EOF is encountered before any character is read, the return
; value is EOF.
;
; The optional argument PORT defaults to the current input port.
(define-opt (read-line (optional (port (current-input-port))) )
(if (eof-object? (peek-char port)) (peek-char port)
(let* ((line
(next-token '() '(#\newline #\return *eof*)
"reading a line" port))
(c (read-char port))) ; must be either \n or \r or EOF
(and (eq? c #\return) (eq? (peek-char port) #\newline)
(read-char port)) ; skip \n that follows \r
line)))
; -- procedure+: read-string N [PORT]
; Reads N characters from the PORT, and returns them in a string.
; If EOF is encountered before N characters are read, a shorter string
; will be returned.
; If N is not positive, an empty string will be returned.
; The optional argument PORT defaults to the current input port.
(define-opt (read-string n (optional (port (current-input-port))) )
(if (not (positive? n)) ""
(let ((buffer (make-string n)))
(let loop ((i 0) (c (read-char port)))
(if (eof-object? c) (substring buffer 0 i)
(let ((i1 (++ i)))
(string-set! buffer i c)
(if (= i1 n) buffer
(loop i1 (read-char port)))))))))
))
(provide (all-defined)))

View File

@ -0,0 +1,107 @@
; Module header is generated automatically
#cs(module look-for-str mzscheme
(require "common.ss")
(require "myenv.ss")
; -- Function: find-string-from-port? STR IN-PORT MAX-NO-CHARS
; Looks for a string STR within the first MAX-NO-CHARS chars of the
; input port IN-PORT
; MAX-NO-CHARS may be omitted: in that case, the search span would be
; limited only by the end of the input stream.
; When the STR is found, the function returns the number of
; characters it has read from the port, and the port is set
; to read the first char after that (that is, after the STR)
; The function returns #f when the string wasn't found
; Note the function reads the port *STRICTLY* sequentially, and does not
; perform any buffering. So the function can be used even if the port is open
; on a pipe or other communication channel.
;
; Probably can be classified as misc-io.
;
; Notes on the algorithm.
; A special care should be taken in a situation when one had achieved a partial
; match with (a head of) STR, and then some unexpected character appeared in
; the stream. It'll be rash to discard all already read characters. Consider
; an example of string "acab" and the stream "bacacab...", specifically when
; a c a _b_
; b a c a c a b ...
; that is, when 'aca' had matched, but then 'c' showed up in the stream
; while we were looking for 'b'. In that case, discarding all already read
; characters and starting the matching process from scratch, that is,
; from 'c a b ...', would miss a certain match.
; Note, we don't actually need to keep already read characters, or at least
; strlen(str) characters in some kind of buffer. If there has been no match,
; we can safely discard read characters. If there was some partial match,
; we already know the characters before, they are in the STR itself, so
; we don't need a special buffer for that.
;;; "MISCIO" Search for string from port.
; Written 1995 by Oleg Kiselyov (oleg@ponder.csci.unt.edu)
; Modified 1996 by A. Jaffer (jaffer@ai.mit.edu)
;
; This code is in the public domain.
(define (MISCIO:find-string-from-port? str <input-port> . max-no-char)
(set! max-no-char (if (null? max-no-char) #f (car max-no-char)))
(letrec
((no-chars-read 0)
(my-peek-char ; Return a peeked char or #f
(lambda () (and (or (not max-no-char) (< no-chars-read max-no-char))
(let ((c (peek-char <input-port>)))
(if (eof-object? c) #f c)))))
(next-char (lambda () (read-char <input-port>)
(set! no-chars-read (inc no-chars-read))))
(match-1st-char ; of the string str
(lambda ()
(let ((c (my-peek-char)))
(if (not c) #f
(begin (next-char)
(if (char=? c (string-ref str 0))
(match-other-chars 1)
(match-1st-char)))))))
;; There has been a partial match, up to the point pos-to-match
;; (for example, str[0] has been found in the stream)
;; Now look to see if str[pos-to-match] for would be found, too
(match-other-chars
(lambda (pos-to-match)
(if (>= pos-to-match (string-length str))
no-chars-read ; the entire string has matched
(let ((c (my-peek-char)))
(and c
(if (not (char=? c (string-ref str pos-to-match)))
(backtrack 1 pos-to-match)
(begin (next-char)
(match-other-chars (inc pos-to-match)))))))))
;; There had been a partial match, but then a wrong char showed up.
;; Before discarding previously read (and matched) characters, we check
;; to see if there was some smaller partial match. Note, characters read
;; so far (which matter) are those of str[0..matched-substr-len - 1]
;; In other words, we will check to see if there is such i>0 that
;; substr(str,0,j) = substr(str,i,matched-substr-len)
;; where j=matched-substr-len - i
(backtrack
(lambda (i matched-substr-len)
(let ((j (- matched-substr-len i)))
(if (<= j 0)
(match-1st-char) ; backed off completely to the begining of str
(let loop ((k 0))
(if (>= k j)
(match-other-chars j) ; there was indeed a shorter match
(if (char=? (string-ref str k)
(string-ref str (+ i k)))
(loop (inc k))
(backtrack (inc i) matched-substr-len))))))))
)
(match-1st-char)))
(define find-string-from-port? MISCIO:find-string-from-port?)
;-----------------------------------------------------------------------------
; This is a test driver for miscio:find-string-from-port?, to make sure it
; really works as intended
; moved to vinput-parse.scm
(provide (all-defined)))

View File

@ -0,0 +1,165 @@
; Module header is generated automatically
#cs(module mime mzscheme
(require "common.ss")
(require "myenv.ss")
(require "input-parse.ss")
(require (lib "string.ss" "srfi/13"))
; Handling of MIME Entities and their parts
;
; According to RFC 2045, "Multipurpose Internet Mail Extensions (MIME)
; Part One, Format of Internet Message Bodies",
;
; "The term 'entity', refers specifically to the MIME-defined header
; fields and contents of either a message or one of the parts in the
; body of a multipart entity. The specification of such entities is
; the essence of MIME. Since the contents of an entity are often
; called the 'body', it makes sense to speak about the body of an
; entity. Any sort of field may be present in the header of an entity,
; but only those fields whose names begin with "content-" actually have
; any MIME-related meaning."
;
; Specifically, the MIME standard (RFC 2045) defines the following
; MIME-related headers (header fields)
; Content-type
; Content-Transfer-Encoding
; Content-ID
; Content-Description
;
; Generally we leave content interpretation and processing to a
; user-supplied handler. However, if the MIME entity turns out to
; be composite (multipart), this file provides code to disassemble
; it into separate discrete parts, and have them handled, in turn.
; Composite entities are distinguished by their Content-type (media type)
; of multipart/mixed, multipart/alternative, multipart/parallel,
; multipart/digest, or some other multipart type.
; At present, all of them are handled the same way.
; HTTP character types
; Section "2.2 Basic Rules" of the HTTP 1.1 document
(define (http-token-char? x)
(or (char-alphabetic? x)
(char-numeric? x)
(string-index "!#$%&'*+-.^_`|~" x)))
;------------------------------------------------------------------------
; Parse the Content-type string
;
; Given a Content-Type string:
; media-type [; attr=value]*
; return the list of associations (attr . value)
; where attr is a symbol and value is a string.
; The media-type is returned as an association with the type
; '=mime-type'
; See Sections 2.2 and 3.6 of rfc2616 (HTTP/1.1) for syntax of the
; Content-Type string
(define (MIME:parse-content-type ctype-str)
(call-with-input-string ctype-str
(lambda (port)
(let loop ((attrs
(list (cons '=mime-type
(next-token '() '(#\space #\; *eof* #\tab)
"reading media type" port)))))
(skip-while '(#\space #\tab) port)
(if (not (eqv? #\; (read-char port))) ; must be EOF
attrs ; return the attributes
(let ((attr-name
(string->symbol (next-token '(#\space #\tab) '(#\=)
"reading attr-name" port))))
(read-char port) ; skip the #\= separator
; loading attr-value, which is (section 2.2 of HTTP1.1):
; attr-value = token | quoted-string
; quoted-string = ( <"> *(qdtext | quoted-pair ) <"> )
; qdtext = <any TEXT except <">>
; quoted-pair = "\" CHAR
(cond
((eq? #\" (peek-char port)) ; we're reading a quoted-string
(read-char port) ; skip the opening quote
(let qsloop ((old-fragments '()))
(let ((fragments
(cons
(next-token '() '(#\" #\\)
"reading quoted-string" port)
old-fragments)))
(if (char=? #\" (read-char port))
(loop ; finished reading the quoted-string
(cons
(cons
attr-name
(apply string-append (reverse fragments)))
attrs))
; we've read a backslash. Read the next char literally
(qsloop (cons (string (read-char port)) fragments))
))))
(else ; reading token
(assert (char? (peek-char port))
(http-token-char? (peek-char port)))
(loop
(cons
(cons attr-name
(next-token '() '(#\space #\; *eof* #\tab)
"reading token" port))
attrs))))
))))))
; read-headers port
; The procedure reads MIME headers from the port.
; The port will be positioned after the empty line that
; separates the headers.
; Later on, make a separate procedure: read-a-header
(define MIME:read-headers
(let ()
(define (read-new-header http-port resp-headers)
(let ((c (peek-char http-port)))
(cond
((eqv? c #\return) ; An empty line, the end of headers
(if (eqv? #\newline (peek-next-char http-port))
(read-char http-port)) ; skip the following \n if any
resp-headers)
((eqv? c #\newline) ; #\return should have been appeared before
(read-char http-port) ; but not all servers are compliant
resp-headers)
((char-alphabetic? c) ; beginning of the new header
(let* ((header-name
(string->symbol
(string-upcase
(next-token '() '(#\: #\space #\tab *eof*) ""
http-port))))
(delim (skip-while '(#\space #\tab) http-port))
(header-value
(if (eqv? delim #\:)
(begin (read-char http-port)
(skip-while '(#\space #\tab) http-port)
(read-line http-port))
#f)))
(if (string? header-value)
(check-cont http-port resp-headers
header-name header-value)
(error "BAD-HEADER: " resp-headers))))
(else
(error "BAD-HEADER: " resp-headers)))))
; check to see if the value of the header continues on the next line
(define (check-cont http-port resp-headers
header-name header-value)
(let ((c (peek-char http-port)))
(cond
((or (eqv? c #\space) (eqv? c #\tab)) ; it continues
(let ((cont-value (read-line http-port)))
(check-cont http-port resp-headers
header-name (string-append header-value cont-value))))
(else
(read-new-header http-port
(cons (cons header-name header-value)
resp-headers))))))
(lambda (http-port)
(read-new-header http-port '()))
))
(provide (all-defined)))

View File

@ -0,0 +1,458 @@
; Module header is generated automatically
#cs(module multi-parser mzscheme
(require "myenv.ss")
(require (lib "string.ss" "srfi/13"))
(require "input-parse.ss")
(require "parse-error.ss")
(require "ssax-code.ss")
(require "ssax-prim.ss")
(require "id.ss")
(require "xlink-parser.ss")
;; SSAX multi parser
;; Provides ID-index creation, SXML parent pointers and XLink grammar parsing
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to:
; lisovsky@acm.org Kirill Lisovsky
; lizorkin@hotbox.ru Dmitry Lizorkin
;
; Primary features:
; '()
; '(parent)
; '(id)
; '(parent id)
; '(id xlink)
; '(parent id xlink)
;=========================================================================
; Parent seed
;------------------------------------------------
; Parent-related part of the seed
; It is a list of one element:
; a function of no arguments which returns a pointer to element's parent
; or '*TOP-PTR* symbol for a root SXML element
; Duuring an element construction it may be just a pointer to parents head,
; because a parent itself may be under construction at the moment.
; This function is called by the NEW-LEVEL-SEED handler
; elem-name = (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)
; A new 'parent:seed' is returned
(define (parent:new-level-seed-handler elem-name)
(let
((head (list elem-name)))
(list (lambda () head))))
; A function which constructs an element from its attributes, children
; and delayed parent information
; parent:seed - contains a delayed pointer to element's parent
; attrs - element's attributes
; children - a list of child elements
(define (parent:construct-element parent:parent-seed parent:seed
attrs children)
; car gets the only element of parent seed - a pointer to a parent
(let((parent-ptr (car parent:parent-seed))
(head ((car parent:seed))))
(cons (car head)
(cons* (cons '@ attrs)
`(@@ (*PARENT* ,parent-ptr))
children))))
;=========================================================================
; A seed
; seed = (list original-seed parent:seed id:seed xlink:seed)
; original-seed - the seed of the original 'SSAX:XML->SXML' function. It
; contains an SXML tree being constructed.
; parent:seed - parent-related part
; id:seed - id-related part
; xlink:seed - xlink-related part
;------------------------------------------------------------------------------
; Accessors
; (mul:seed-original seed)
(define get-sxml-seed car)
; Renamed:
; mul:seed-parent get-pptr-seed
; mul:seed-id get-id-seed
; mul:seed-xlink get-xlink-seed
; Handler for attempts to access an absent seed.
(define (bad-accessor type)
(lambda x
(cerr nl "MURDER!!! -> " type nl x nl) (exit -1)))
; Seed constructor. #f seeds will be omitted.
(define (make-seed . seeds)
(let rpt
((s (cdr seeds)) (rzt (list (car seeds))))
(cond
((null? s) (reverse rzt))
((car s) (rpt (cdr s)
(cons (car s) rzt)))
(else (rpt (cdr s) rzt)))))
;=========================================================================
; This is a multi parser constructor function
; parent, id, xlink - boolean parameters. #t means that we construct the
; corresponding feature, #f - otherwise
; ns - for future development. Is not used anywhere in the function
(define (ssax:multi-parser . req-features)
(let ((ns-assig '())
(with-parent? (memq 'parent req-features))
(with-id? (memq 'id req-features))
(with-xlink? (memq 'xlink req-features)))
(call-with-values
(lambda () (values
(if with-parent?
cadr (bad-accessor 'par))
(if with-id?
(if with-parent? caddr cadr)
(bad-accessor 'id))
(if with-xlink?
(cond
((and with-parent? with-id?)
cadddr)
((or with-parent? with-id?)
caddr)
(else cadr))
(bad-accessor 'xlink))))
(lambda (get-pptr-seed get-id-seed get-xlink-seed)
(let ((initial-seed ; Initial values for specialized seeds
(make-seed
'()
(and with-parent? (list '*TOP-PTR*))
(and with-id? (id:make-seed '() '()))
(and with-xlink?
(xlink:make-small-seed 'general '() '(1) '())))))
(letrec
(
; Making a special function, which, if applyed to the final seed,
; will construct a document
(ending-actions
(cond
((not (or with-id? with-xlink?))
(lambda (seed)
(let ((result (reverse (get-sxml-seed seed))))
(cons '*TOP* result))))
((and with-id? (not with-xlink?)) ; with-id?
(lambda (seed)
(let((result (reverse (get-sxml-seed seed)))
(aux (list (id:ending-action (get-id-seed seed)))))
(cons* '*TOP*
(cons '@@ aux)
result))))
((and with-id? with-xlink?) ; with-id, with-xlink
(lambda (seed)
(let((result (reverse (get-sxml-seed seed)))
(aux (list (xlink:ending-action (get-xlink-seed seed))
(id:ending-action (get-id-seed seed)))))
(cons* '*TOP*
(cons '@@ aux)
result))))
(else
(cerr "ending-actions NIY: " with-parent? with-id? with-xlink? nl)
(exit))))
;------------------------------------
; Some handlers
; A special function
; When given an input port, it becomes a handler for a NEW-LEVEL-SEED
(new-level-seed-handler
(cond
((not (or with-parent? with-id? with-xlink?))
(lambda(port)
(lambda (elem-gi attributes namespaces expected-content seed)
(list '()))))
((and with-parent? (not (or with-id? with-xlink?))) ; with-parent
(lambda(port)
(lambda (elem-gi attributes namespaces expected-content seed)
(make-seed
'()
(and with-parent?
(parent:new-level-seed-handler
(if (symbol? elem-gi)
elem-gi
(RES-NAME->SXML elem-gi))))
))))
((and with-id? (not (or with-parent? with-xlink?))) ; with-id
(lambda(port)
(lambda (elem-gi attributes namespaces expected-content seed)
(list ; make-seed
'()
(id:new-level-seed-handler (get-id-seed seed))))))
((and with-parent? with-id? (not with-xlink?)) ; parent, id
(lambda(port)
(lambda (elem-gi attributes namespaces expected-content seed)
(list ; make-seed
'()
(parent:new-level-seed-handler
(if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)))
(id:new-level-seed-handler (get-id-seed seed))))))
((and with-id? with-xlink? (not with-parent?)) ; id, xlink
(lambda(port)
(lambda (elem-gi attributes namespaces expected-content seed)
(list ; make-seed
'()
(id:new-level-seed-handler (get-id-seed seed))
(xlink:new-level-seed-handler
port attributes namespaces (get-xlink-seed seed))))))
((and with-parent? with-id? with-xlink?) ; parent, id, xlink
(lambda(port)
(lambda (elem-gi attributes namespaces expected-content seed)
(list ; make-seed
'()
(parent:new-level-seed-handler
(if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)))
(id:new-level-seed-handler (get-id-seed seed))
(xlink:new-level-seed-handler
port attributes namespaces (get-xlink-seed seed))))))
(else (cerr "new-level NIY: " with-parent? with-id? with-xlink? nl)
(exit))))
; A special handler function for a FINISH-ELEMENT
(finish-element-handler
(cond
((not (or with-parent? with-id? with-xlink?))
(lambda (elem-gi attributes namespaces parent-seed seed)
(let ((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(list ; make-seed
(cons
(cons
(if (symbol? elem-gi) elem-gi
(RES-NAME->SXML elem-gi))
(if (null? attrs) children
(cons (cons '@ attrs) children)))
(get-sxml-seed parent-seed))))))
((and with-parent? (not (or with-id? with-xlink?))) ; parent
(lambda (elem-gi attributes namespaces parent-seed seed)
(let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(list ; make-seed
(cons
(parent:construct-element
(get-pptr-seed parent-seed)
(get-pptr-seed seed)
attrs children)
(get-sxml-seed parent-seed))
; pptr- seed from parent seed is not modified:
(get-pptr-seed parent-seed)
))))
((and with-id? (not (or with-parent? with-xlink?))) ; id
(lambda (elem-gi attributes namespaces parent-seed seed)
(let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(let((element
(cons
(if(symbol? elem-gi)
elem-gi
(RES-NAME->SXML elem-gi))
(if(null? attrs)
children
(cons (cons '@ attrs) children)))))
(list ; make-seed
(cons element (get-sxml-seed parent-seed))
(id:finish-element-handler
elem-gi attributes (get-id-seed seed) element))))))
((and with-parent? with-id? (not with-xlink?)) ; parent, id
(lambda (elem-gi attributes namespaces parent-seed seed)
(let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(let((element
(parent:construct-element
(get-pptr-seed parent-seed) (get-pptr-seed seed)
attrs children)))
(list ; make-seed
(cons element (get-sxml-seed parent-seed))
; pptr- seed from parent seed is not modified:
(get-pptr-seed parent-seed)
(id:finish-element-handler
elem-gi attributes (get-id-seed seed) element))))))
((and with-id? with-xlink? (not with-parent?)) ; id, xlink
(lambda (elem-gi attributes namespaces parent-seed seed)
(let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(let((element
(cons
(if(symbol? elem-gi)
elem-gi
(RES-NAME->SXML elem-gi))
(if(null? attrs)
children
(cons (cons '@ attrs) children)))))
(list ; make-seed
(cons element (get-sxml-seed parent-seed))
(id:finish-element-handler
elem-gi attributes (get-id-seed seed) element)
(xlink:finish-element-handler
(get-xlink-seed parent-seed)
(get-xlink-seed seed) element))))))
((and with-parent? with-id? with-xlink?) ; parent, id, xlink
(lambda (elem-gi attributes namespaces parent-seed seed)
(let((children (reverse-collect-str-drop-ws (get-sxml-seed seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(let((element
(parent:construct-element
(get-pptr-seed parent-seed) (get-pptr-seed seed)
attrs children)))
(list ; make-seed
(cons element (get-sxml-seed parent-seed))
; pptr- seed from parent seed is not modified:
(get-pptr-seed parent-seed)
(id:finish-element-handler
elem-gi attributes (get-id-seed seed) element)
(xlink:finish-element-handler
(get-xlink-seed parent-seed)
(get-xlink-seed seed) element))))))
(else (cerr "finish-element: NIY" nl) (exit))))
; A special function
; Given 'namespaces', it becomes a handler for a DOCTYPE
(doctype-handler
(if
(not with-id?)
(lambda (namespaces)
(lambda (port docname systemid internal-subset? seed)
(when internal-subset?
(ssax:warn port
"Internal DTD subset is not currently handled ")
(ssax:skip-internal-dtd port))
(ssax:warn port "DOCTYPE DECL " docname " "
systemid " found and skipped")
(values #f '() namespaces seed)))
(cond
((not (or with-parent? with-xlink?)) ; with-id
(lambda (namespaces)
(lambda (port docname systemid internal-subset? seed)
(values
#f '() namespaces
(list ; make-seed
(get-sxml-seed seed)
(id:doctype-handler port systemid internal-subset?))))))
((and with-parent? (not with-xlink?)) ; with-parent, with-id
(lambda (namespaces)
(lambda (port docname systemid internal-subset? seed)
(values
#f '() namespaces
(list ; make-seed
(get-sxml-seed seed)
(get-pptr-seed seed)
(id:doctype-handler port systemid internal-subset?))))))
((and (not with-parent?) with-xlink?) ; with-id, with-xlink
(lambda (namespaces)
(lambda (port docname systemid internal-subset? seed)
(values
#f '() namespaces
(list ; make-seed
(get-sxml-seed seed)
(id:doctype-handler port systemid internal-subset?)
(get-xlink-seed seed))))))
(else ; with-parent, with-id, with-xlink
(lambda (namespaces)
(lambda (port docname systemid internal-subset? seed)
(values
#f '() namespaces
(list ; make-seed
(get-sxml-seed seed)
(get-pptr-seed seed)
(id:doctype-handler port systemid internal-subset?)
(get-xlink-seed seed)))))))))
) ; end of letrec
; Constructing a special parser function
(lambda (port)
(let
((namespaces
(map (lambda (el)
(cons* #f (car el) (ssax:uri-string->symbol (cdr el))))
ns-assig)))
(ending-actions
((ssax:make-parser
NEW-LEVEL-SEED
(new-level-seed-handler port)
FINISH-ELEMENT
finish-element-handler
CHAR-DATA-HANDLER
(lambda (string1 string2 seed)
(cons
(if(string-null? string2)
(cons string1 (car seed))
(cons* string2 string1 (car seed)))
(cdr seed)))
DOCTYPE
(doctype-handler namespaces)
UNDECL-ROOT
(lambda (elem-gi seed)
(values #f '() namespaces seed))
PI
((*DEFAULT* . (lambda (port pi-tag seed)
(cons
(cons
(list '*PI* pi-tag
(ssax:read-pi-body-as-string port))
(car seed))
(cdr seed)))))
)
port
initial-seed))))))
))))
(provide (all-defined)))

View File

@ -0,0 +1,380 @@
; Module header is generated automatically
#cs(module myenv mzscheme
(require (lib "defmacro.ss"))
;; $Id: myenv.ss,v 1.14 2002/03/28 22:23:06 nwv Exp $
;; $Source: /home/nwv/cvsroot/projects/ssax-plt/myenv.ss,v $
;; [ssax-plt] This is a modified version of "official/lib/myenv.scm".
;(module myenv mzscheme
; (require (lib "defmacro.ss"))
; (require (rename (lib "pretty.ss") pp pretty-print))
; My Standard Scheme "Prelude"
;
; This version of the prelude contains several forms and procedures
; that are specific to a Gambit-C 3.0 system.
; See myenv-scm.scm, myenv-bigloo.scm, etc. for versions
; of this prelude that are tuned to other Scheme systems.
;
; Id: myenv.scm,v 1.2 2001/09/21 19:53:30 oleg Exp
; assert the truth of an expression (or of a sequence of expressions)
;
; syntax: assert ?expr ?expr ... [report: ?r-exp ?r-exp ...]
;
; If (and ?expr ?expr ...) evaluates to anything but #f, the result
; is the value of that expression.
; If (and ?expr ?expr ...) evaluates to #f, an error is reported.
; The error message will show the failed expressions, as well
; as the values of selected variables (or expressions, in general).
; The user may explicitly specify the expressions whose
; values are to be printed upon assertion failure -- as ?r-exp that
; follow the identifier 'report:'
; Typically, ?r-exp is either a variable or a string constant.
; If the user specified no ?r-exp, the values of variables that are
; referenced in ?expr will be printed upon the assertion failure.
(define-macro (assert expr . others)
; given the list of expressions or vars,
; make the list appropriate for cerr
(define (make-print-list prefix lst)
(cond
((null? lst) '())
((symbol? (car lst))
(cons #\newline
(cons (list 'quote (car lst))
(cons ": " (cons (car lst) (make-print-list #\newline (cdr lst)))))))
(else
(cons prefix (cons (car lst) (make-print-list "" (cdr lst)))))))
; return the list of all unique "interesting"
; variables in the expr. Variables that are certain
; to be bound to procedures are not interesting.
(define (vars-of expr)
(let loop ((expr expr) (vars '()))
(cond
((not (pair? expr)) vars) ; not an application -- ignore
((memq (car expr)
'(quote let let* letrec let-values* lambda cond quasiquote
case define do assert))
vars) ; won't go there
(else ; ignore the head of the application
(let inner ((expr (cdr expr)) (vars vars))
(cond
((null? expr) vars)
((symbol? (car expr))
(inner (cdr expr)
(if (memq (car expr) vars) vars (cons (car expr) vars))))
(else
(inner (cdr expr) (loop (car expr) vars)))))))))
(cond
((null? others) ; the most common case
`(or ,expr (begin (cerr "failed assertion: " ',expr nl "bindings"
,@(make-print-list #\newline (vars-of expr)) nl)
(error "assertion failure"))))
((eq? (car others) 'report:) ; another common case
`(or ,expr (begin (cerr "failed assertion: " ',expr
,@(make-print-list #\newline (cdr others)) nl)
(error "assertion failure"))))
((not (memq 'report: others))
`(or (and ,expr ,@others)
(begin (cerr "failed assertion: " '(,expr ,@others) nl "bindings"
,@(make-print-list #\newline
(vars-of (cons 'and (cons expr others)))) nl)
(error "assertion failure"))))
(else ; report: occurs somewhere in 'others'
(let loop ((exprs (list expr)) (reported others))
(cond
((eq? (car reported) 'report:)
`(or (and ,@(reverse exprs))
(begin (cerr "failed assertion: " ',(reverse exprs)
,@(make-print-list #\newline (cdr reported)) nl)
(error "assertion failure"))))
(else (loop (cons (car reported) exprs) (cdr reported)))))))
)
(define-macro (assure exp error-msg) `(assert ,exp report: ,error-msg))
;; [ssax-plt] Different definition of `identify-error'.
;(define (identify-error msg args . disposition-msgs)
; (##identify-error "ERROR" #f #f msg args disposition-msgs))
(define identify-error
(let ((display-list (lambda (lst)
(for-each (lambda (arg)
(display " ")
(display arg))
lst))))
(lambda (msg args . disposition-msgs)
(parameterize ((current-output-port (current-error-port)))
(newline)
(display "ERROR: ")
(display msg)
(display-list args)
(unless (null? disposition-msgs)
(newline)
(display "ERROR DISPOSITION:")
(display-list disposition-msgs))
(newline)))))
; like cout << arguments << args
; where argument can be any Scheme object. If it's a procedure
; (without args) it's executed rather than printed (like newline)
(define (cout . args)
(for-each (lambda (x)
(if (procedure? x) (x) (display x)))
args))
;; [ssax-plt] In `cerr', `##stderr' replaced with `(current-error-port)'.
(define (cerr . args)
(for-each (lambda (x)
(if (procedure? x)
(x (current-error-port))
(display x (current-error-port))))
args))
;(##define-macro (nl) '(newline))
(define nl (string #\newline))
;; [ssax-plt] `##fixnum.' prefix removed.
; Some useful increment/decrement operators
; Note, ##fixnum prefix is Gambit-specific, it means that the
; operands assumed FIXNUM (as they ought to be anyway).
; This perfix could be safely removed: it'll leave the code just as
; correct, but more portable (and less efficient)
; Mutable increment
(define-macro (++! x) `(set! ,x (+ 1 ,x)))
; Read-only increment
(define-macro (++ x) `(+ 1 ,x))
; Mutable decrement
(define-macro (--! x) `(set! ,x (- ,x 1)))
; Read-only decrement
(define-macro (-- x) `(- ,x 1))
; Some useful control operators
;; [ssax-plt] PLT defines `when'.
; if condition is true, execute stmts in turn
; and return the result of the last statement
; otherwise, return #f
;(define-macro (when condition . stmts)
; `(and ,condition (begin ,@stmts)))
; if condition is false execute stmts in turn
; and return the result of the last statement
; otherwise, return #t
; This primitive is often called 'unless'
(define-macro (whennot condition . stmts)
`(or ,condition (begin ,@stmts)))
;; [ssax-plt] PLT defines `begin0'.
; Execute a sequence of forms and return the
; result of the _first_ one. Like PROG1 in Lisp.
; Typically used to evaluate one or more forms with
; side effects and return a value that must be
; computed before some or all of the side effects
; happen.
;(define-macro (begin0 form . forms)
; (let ((var (gensym)))
; `(let ((,var ,form)) ,@forms ,var)))
; Prepend an ITEM to a LIST, like a Lisp macro PUSH
; an ITEM can be an expression, but ls must be a VAR
(define-macro (push! item ls)
`(set! ,ls (cons ,item ,ls)))
; DL: Defined in SRFI-13
; ; Is str the empty string?
; ; string-null? str -> bool
; ; See Olin Shiver's Underground String functions
;(define-macro (string-null? str) `(zero? (string-length ,str)))
; Support for multiple-values and let-values* form
; Multiple values are not present natively in Gambit.
; What follows is an _approximation_: it is not very good in case
; of continuations captured while evaluating an argument expression of
; values. Note that the only meaningful way to use 'values' procedure is
; in conjunction with call-with-values or let-values*
;; [ssax-plt] PLT defines `values' and `call-with-values'.
; (define values list)
; (define (call-with-values producer consumer)
; (apply consumer (producer)))
;; [ssax-plt] We use the `let-values*' from Kirill Lisovsky's port of SSAX 4.9
;; instead of the current SSAX one.
; Like let* but allowing for multiple-value bindings
;(define-macro (let-values* bindings . body)
; (if (null? bindings) (cons 'begin body)
; (apply (lambda (vars initializer)
; (let ((cont
; (cons 'let-values*
; (cons (cdr bindings) body))))
; (cond
; ((not (pair? vars)) ; regular let case, a single var
; `(let ((,vars ,initializer)) ,cont))
; ((null? (cdr vars)) ; single var, see the prev case
; `(let ((,(car vars) ,initializer)) ,cont))
; ((null? (cddr vars)) ; two variables
; (let ((val (gensym)))
; `(let* ((,val ,initializer)
; (,(car vars) (car ,val))
; (,(cadr vars) (cadr ,val))) ,cont)))
; (else ; the most generic case
; `(apply (lambda ,vars ,cont) ,initializer)))))
; (car bindings))))
(define-macro let-values* (lambda (bindings . body)
(if (null? bindings) (cons 'begin body)
(apply (lambda (vars initializer)
(let ((cont
(cons 'let-values*
(cons (cdr bindings) body))))
(cond
((not (pair? vars)) ; regular let case, a single var
`(let ((,vars ,initializer)) ,cont))
((null? (cdr vars)) ; single var, see the prev case
`(let ((,(car vars) ,initializer)) ,cont))
(else ; the most generic case
`(call-with-values (lambda () ,initializer)
(lambda ,vars ,cont))))))
(car bindings)))))
; assoc-primitives with a default clause
; If the search in the assoc list fails, the
; default action argument is returned. If this
; default action turns out to be a thunk,
; the result of its evaluation is returned.
; If the default action is not given, an error
; is signaled
(define-macro (assq-def key alist . default-action-arg)
(let ((default-action
(if (null? default-action-arg)
`(error "failed to assq key '" ,key "' in a list " ,alist)
(let ((defact-symb (gensym)))
`(let ((,defact-symb ,(car default-action-arg)))
(if (procedure? ,defact-symb) (,defact-symb) ,defact-symb))))))
`(or (assq ,key ,alist) ,default-action)))
(define-macro (assv-def key alist . default-action-arg)
(let ((default-action
(if (null? default-action-arg)
`(error "failed to assv key '" ,key "' in a list " ,alist)
(let ((defact-symb (gensym)))
`(let ((,defact-symb ,(car default-action-arg)))
(if (procedure? ,defact-symb) (,defact-symb) ,defact-symb))))))
`(or (assv ,key ,alist) ,default-action)))
(define-macro (assoc-def key alist . default-action-arg)
(let ((default-action
(if (null? default-action-arg)
`(error "failed to assoc key '" ,key "' in a list " ,alist)
(let ((defact-symb (gensym)))
`(let ((,defact-symb ,(car default-action-arg)))
(if (procedure? ,defact-symb) (,defact-symb) ,defact-symb))))))
`(or (assoc ,key ,alist) ,default-action)))
; Convenience macros to avoid quoting of symbols
; being deposited/looked up in the environment
(define-macro (env.find key) `(%%env.find ',key))
(define-macro (env.demand key) `(%%env.demand ',key))
(define-macro (env.bind key value) `(%%env.bind ',key ,value))
; Implementation of SRFI-0
; Only feature-identifiers srfi-0 and gambit
; assumed predefined
(define-macro (cond-expand . clauses)
(define feature-ids '(plt srfi-0))
(define (feature-req-satisfies? fr) ; does feature-request satisfies?
(cond
((memq fr feature-ids) #t)
((not (pair? fr)) #f)
((eq? 'and (car fr))
(let loop ((clauses (cdr fr)))
(or (null? clauses)
(and (feature-req-satisfies? (car clauses))
(loop (cdr clauses))))))
((eq? 'or (car fr))
(let loop ((clauses (cdr fr)))
(and (pair? clauses)
(or (feature-req-satisfies? (car clauses))
(loop (cdr clauses))))))
((eq? 'not (car fr))
(not (feature-req-satisfies? (and (pair? (cdr fr)) (cadr fr)))))
(else #f)))
(let loop ((clauses clauses))
(if (null? clauses) '(error "Unfulfilled cond-expand")
(let* ((feature-req (if (pair? (car clauses)) (caar clauses)
(error "<cond-expand clause> is not a list")))
(cmd-or-defs* (cons 'begin (cdar clauses))))
(cond
((and (eq? 'else feature-req) (null? (cdr clauses)))
cmd-or-defs*)
((feature-req-satisfies? feature-req)
cmd-or-defs*)
(else (loop (cdr clauses))))))))
;; [ssax-plt] Begin misc. other definitions needed by other modules.
(define (call-with-input-string str proc)
(proc (open-input-string str)))
; I guess there's only one way to write this... :)
;(define (string-index str chr)
; (let ((len (string-length str)))
; (let search ((i 0))
; (cond ((= i len) #f)
; ((char=? chr (string-ref str i)) i)
; (else (search (+ i 1)))))))
(define (with-input-from-string str thunk)
(parameterize ((current-input-port (open-input-string str)))
(thunk)))
;; [ssax-plt] End misc. other definitions needed by other modules.
;; [ssax-plt] Finish module.
;(provide (all-defined) pp))
;==============================================================================
; DL: this piece of code is taken from the previous version of "myenv.scm"
; Stubs
(define-macro (inc x) `(+ 1 ,x))
(define-macro (dec x) `(- ,x 1))
(define (cons* a1 a2 . rest)
(if (null? rest)
(cons a1 a2)
(cons a1 (apply cons* (cons a2 rest)))))
;; Gambit's include and declare are disabled
(define-macro include (lambda (file) #f))
(define-macro declare (lambda x #f))
(provide (all-defined)))

View File

@ -0,0 +1,48 @@
; Module header is generated automatically
#cs(module parse-error mzscheme
(require "myenv.ss")
; This code provides informative error messages
; for SSAX (S)XML parser.
;
;
; NOTE: PLT-specific !
; It was tested with SSAX version 4.6 / PLT 103
;
;==============================================================================
; Error handler
; According to the SSAX convention this function
; accepts the port as its first argument which is used for
; location of the error in input file.
; Other parameters are considered as error messages,
; they are printed to stderr as is.
(define parser-error
(lambda args
(if
(port? (car args))
(cerr nl "Error at position "
(file-position (car args)) nl
(cdr args))
(cerr nl "Error in error handler: its first parameter is not a port"
nl args))
(cerr nl)
; (exit -1) ; this exit makes me completely insane!
(raise -1)
))
(define SSAX:warn
(lambda args
(if
(port? (car args))
(cerr nl "Warning at position "
(file-position (car args)) nl
(cdr args) nl)
#f)
))
; Alias
(define ssax:warn SSAX:warn)
(provide (all-defined)))

View File

@ -0,0 +1,323 @@
; Module header is generated automatically
#cs(module srfi-12 mzscheme
(require (lib "defmacro.ss"))
(require "myenv.ss")
;************************************************************************
; srfi-12.scm
; This file is the part of SSAX package (http://ssax.sourceforge.net),
; which is in public domain.
;************************************************************************
; Implementation of SRFI-12
;
; Most of the generic code and the comments are taken from
;
; SRFI-12: Exception Handling
; By William Clinger, R. Kent Dybvig, Matthew Flatt, and Marc Feeley
; http://srfi.schemers.org/srfi-12/
; The SRFI-12 Reference implementation has been amended where needed with
; a platform-specific code
;
;------------------------------------------------------------------------
; Catching exceptions
; The platform-specific part
; Procedure: with-exception-handler HANDLER THUNK
; Returns the result(s) of invoking thunk. The handler procedure is
; installed as the current exception handler in the dynamic context of
; invoking thunk.
; Procedure: abort OBJ
; Raises a non-continuable exception represented by OBJ.
; The abort procedure does not ensure that its argument is a
; condition. If its argument is a condition, abort does not ensure that
; the condition indicates a non-continuable exception.
; Procedure: exc:signal OBJ
; Raises a continuable exception represented by OBJ.
; In SRFI-12, this procedure is named 'signal'. However, this name
; clashes with the name of an internal Bigloo procedure. In a compiled
; code, this clash leads to a Bus error.
; Procedure: current-exception-handler
; Returns the current exception handler.
(cond-expand
(gambit
; The Gambit implementation relies on internal Gambit procedures,
; whose names start with ##
; Such identifiers cannot be _read_ on many other systems
; The following macro constructs Gambit-specific ids on the fly
(define-macro (_gid id)
(string->symbol (string-append "##" (symbol->string id))))
; `with-exception-handler` is built-in
; `abort` is built-in
(define (exc:signal obj) ; Encapsulate the object into a cell
(raise (list obj))) ; to let Gambit know it's our object
(define gambit-error error) ; Save the native Gambit 'error' function
(define (error msg . args)
(abort (make-property-condition
'exn
'message (cons msg args))))
; `current-exception-handler` is built-in
)
(bigloo
(define (with-exception-handler handler thunk)
(try (thunk)
; If we raised the condition explicitly, the proc
; is a pair, whose car is the
; argument that was passed to 'abort' or 'exc:signal'.
; The cdr part of the pair is the
; continuation (for a continuable exception)
(lambda (escape proc mes obj)
;(cerr "exn! " proc mes obj nl)
(if (pair? proc) ; We've caught the exception thrown
(let ((cont (cdr proc))) ; by abort or exc:signal
(if (not (null? cont))
(cont (handler (car proc))) ; continue after the handler
(handler (car proc))) ; Let Bigloo handle the return
) ; from the handler
; If (pair? proc) is false, we caught the exception
; raised by Bigloo's runtime system
; Let Bigloo handle the return from the handler
(handler
(make-property-condition
'exn ; condition kind required by SRFI-12
'message
(list proc mes obj)))))))
; An "at hock" implementation
(define-macro (handle-exceptions var handle-expr expr . more-exprs)
`(try
,(cons `begin (cons expr more-exprs))
(lambda (escape proc mes obj)
(let((,var
(if (pair? proc) ; by abort or exc:signal
(car proc)
(make-property-condition ; required by SRFI-12
'exn
'message
(list proc mes obj)))))
,handle-expr))))
(define (abort obj) ; Encapsulate the object into a cell
(the_failure (list obj) "" "") ; to let Bigloo know it's our object
(exit 4)) ; In case the exc:signal handler returns
; Encapsulate the object into a cell
; to let Bigloo know it's our object.
; In addition, we capture the continuation:
; 'exc:signal' generates a continuable
; exception
(define (exc:signal obj)
(bind-exit (escape)
(the_failure (cons obj escape) "" "")))
; When the current-exception-handler is applied, we encapsulate the
; argument (the exception) into a cell to let the framework know
; it's our exception
; We need to capture the continuation at the point current-exception-handler
; is invoked, so we can come back to that point and issue 'abort'
; in the dynamic context where current-exception-handler is invoked.
; We assume that a call to the current-exception-handler is
; equivalent to the throwing of a non-continuable exception
; (SRFI-12 does not preclude such an assumption).
; DL: had to comment it out, because Bigloo compiler dislikes
; CALL-WITH-CURRENT-CONTINUATION. A temporary solution.
;(define (current-exception-handler)
; (let ((result
; (call-with-current-continuation
; (lambda (k)
; (lambda (exn) (k (list exn)))))))
; (if (procedure? result) result
; (abort (car result))))) ; re-entrance after k was invoked
; A simplified version (which is far more efficient on bigloo)
; If this function is invoked in the context of an exception handler,
; the function invokes a _parent_ exception handler.
(define (parent-exception-handler)
(lambda (exn) (exc:signal exn)))
)
(chicken ; Chicken supports SRFI-12 natively
(define exc:signal signal)
)
(plt
; DL: supported in PLT natively
; ; Borrowed from Bigloo's cond-expand branch
; (define (current-exception-handler)
; (let ((result
; (call-with-current-continuation
; (lambda (k)
; (lambda (exn) (k (list exn)))))))
; (if (procedure? result) result
; (abort (car result)))))
; A helper function which converts an exception (PLT internal exception
; or SRFI-12 exception) into CONDITION
(define (exn:exception->condition obj)
(cond
((exn? obj) ; PLT internal exception
(make-property-condition
'exn ; condition kind required by SRFI-12
'message
(exn-message obj)))
((pair? obj) ; exception generated by ABORT or EXN:SIGNAL
(car obj))
(else ; some more conditions should be added, I guess
obj)))
(define-macro (with-exception-handler handler thunk)
`(with-handlers
(((lambda (x) #t)
(lambda (x)
(,handler (exn:exception->condition x)))))
(,thunk)))
; Evaluates the body expressions expr1, expr2, ... in sequence with an
; exception handler constructed from var and handle-expr. Assuming no
; exception is raised, the result(s) of the last body expression is(are)
; the result(s) of the HANDLE-EXCEPTIONS expression.
; The exception handler created by HANDLE-EXCEPTIONS restores the dynamic
; context (continuation, exception handler, etc.) of the HANDLE-EXCEPTIONS
; expression, and then evaluates handle-expr with var bound to the value
; provided to the handler.
(define-macro (handle-exceptions var handle-expr expr . more-exprs)
(cons
`with-handlers
(cons
`(((lambda (x) #t)
(lambda (x)
(let ((,var (exn:exception->condition x)))
,handle-expr))))
(cons expr more-exprs))))
; This implementation was borrowed from Gambit's cond-expand branch
(define (abort obj)
(raise (list obj))
(exit 4))
(define (exc:signal obj)
(raise (list obj)))
(define (signal obj)
(raise (list obj)))
) ; end of PLT branch
)
; (define (with-exception-handler handler thunk)
; (let ((old #f))
; (dynamic-wind
; (lambda ()
; (set! old *current-exn-handler*)
; (set! *current-exn-handler* handler))
; thunk
; (lambda ()
; (set! *current-exn-handler* old)))))
; (define (abort obj)
; ((CURRENT-EXCEPTION-HANDLER) obj)
; (ABORT (make-property-condition
; 'exn
; 'message
; "Exception handler returned")))
; (define (exc:signal exn)
; ((CURRENT-EXCEPTION-HANDLER) exn))
;------------------------------------------------------------------------
; Exception conditions
; The following is an approximate implementation of conditions that
; uses lists, instead of a disjoint class of values
; The code below is basically the reference SRFI-12 implementation,
; with a few types fixed.
; A condition is represented as a pair where the first value of the
; pair is this function. A program could forge conditions, and they're
; not disjoint from Scheme pairs.
; Exception conditions are disjoint from any other Scheme values
; (or so should appear).
(define (condition? obj)
(and (pair? obj)
(eq? condition? (car obj))))
; Procedure: make-property-condition KIND-KEY PROP-KEY VALUE ...
; This procedure accepts any even number of arguments after kind-key,
; which are regarded as a sequence of alternating prop-key and value
; objects. Each prop-key is regarded as the name of a property, and
; each value is regarded as the value associated with the key that
; precedes it. Returns a kind-key condition that associates the given
; prop-keys with the given values.
(define (make-property-condition kind-key . prop-vals)
(cons condition? (list (cons kind-key prop-vals))))
; Procedure: make-composite-condition CONDITION ...
; Returns a newly-allocated condition whose components correspond to
; the the given conditions. A predicate created by CONDITION-PREDICATE
; returns true for the new condition if and only if it returns true
; for one or more of its component conditions.
(define (make-composite-condition . conditions)
(cons condition? (apply append (map cdr conditions))))
; Procedure: condition-predicate KIND-KEY
; Returns a predicate that can be called with any object as its
; argument. Given a condition that was created by
; make-property-condition, the predicate returns #t if and only if
; kind-key is EQV? to the kind key that was passed to
; make-property-condition. Given a composite condition created with
; make-composite-condition, the predicate returns #t if and only if
; the predicate returns #t for at least one of its components.
(define (condition-predicate kind-key)
(lambda (exn)
(and (condition? exn) (assv kind-key (cdr exn)))))
; Procedure: condition-property-accessor KIND-KEY PROP-KEY
; Returns a procedure that can be called with any condition that satisfies
; (condition-predicate KIND-KEY). Given a condition that was created by
; make-property-condition and KIND-KEY, the procedure returns the value
; that is associated with prop-key. Given a composite condition created with
; make-composite-condition, the procedure returns the value that is
; associated with prop-key in one of the components that
; satisfies (condition-predicate KIND-KEY).
; Otherwise, the result will be #f
(define (condition-property-accessor kind-key prop-key)
(lambda (exn)
(let* ((p ((condition-predicate kind-key) exn))
(prop-lst (and p (pair? p) (memq prop-key (cdr p)))))
(and prop-lst (pair? (cdr prop-lst)) (cadr prop-lst)))))
(provide (all-defined)))

View File

@ -0,0 +1,61 @@
; Module header is generated automatically
#cs(module ssax-code mzscheme
(require (lib "defmacro.ss"))
(require "common.ss")
(require "myenv.ss")
(require (lib "string.ss" "srfi/13"))
(require "util.ss")
(require "parse-error.ss")
(require "input-parse.ss")
(require "look-for-str.ss")
(require "char-encoding.ss")
(define-syntax run-test (syntax-rules (define) ((run-test "scan-exp" (define vars body)) (define vars (run-test "scan-exp" body))) ((run-test "scan-exp" ?body) (letrec-syntax ((scan-exp (syntax-rules (quote quasiquote !) ((scan-exp (quote ()) (k-head ! . args)) (k-head (quote ()) . args)) ((scan-exp (quote (hd . tl)) k) (scan-lit-lst (hd . tl) (do-wrap ! quasiquote k))) ((scan-exp (quasiquote (hd . tl)) k) (scan-lit-lst (hd . tl) (do-wrap ! quasiquote k))) ((scan-exp (quote x) (k-head ! . args)) (k-head (if (string? (quote x)) (string->symbol (quote x)) (quote x)) . args)) ((scan-exp (hd . tl) k) (scan-exp hd (do-tl ! scan-exp tl k))) ((scan-exp x (k-head ! . args)) (k-head x . args)))) (do-tl (syntax-rules (!) ((do-tl processed-hd fn () (k-head ! . args)) (k-head (processed-hd) . args)) ((do-tl processed-hd fn old-tl k) (fn old-tl (do-cons ! processed-hd k))))) (do-cons (syntax-rules (!) ((do-cons processed-tl processed-hd (k-head ! . args)) (k-head (processed-hd . processed-tl) . args)))) (do-wrap (syntax-rules (!) ((do-wrap val fn (k-head ! . args)) (k-head (fn val) . args)))) (do-finish (syntax-rules () ((do-finish new-body) new-body))) (scan-lit-lst (syntax-rules (quote unquote unquote-splicing !) ((scan-lit-lst (quote ()) (k-head ! . args)) (k-head (quote ()) . args)) ((scan-lit-lst (quote (hd . tl)) k) (do-tl quote scan-lit-lst ((hd . tl)) k)) ((scan-lit-lst (unquote x) k) (scan-exp x (do-wrap ! unquote k))) ((scan-lit-lst (unquote-splicing x) k) (scan-exp x (do-wrap ! unquote-splicing k))) ((scan-lit-lst (quote x) (k-head ! . args)) (k-head (unquote (if (string? (quote x)) (string->symbol (quote x)) (quote x))) . args)) ((scan-lit-lst (hd . tl) k) (scan-lit-lst hd (do-tl ! scan-lit-lst tl k))) ((scan-lit-lst x (k-head ! . args)) (k-head x . args))))) (scan-exp ?body (do-finish !)))) ((run-test body ...) (begin (run-test "scan-exp" body) ...))))
(define (make-xml-token kind head) (cons kind head))
(define xml-token? pair?)
(define-syntax xml-token-kind (syntax-rules () ((xml-token-kind token) (car token))))
(define-syntax xml-token-head (syntax-rules () ((xml-token-head token) (cdr token))))
(define (string-whitespace? str) (let ((len (string-length str))) (cond ((zero? len) #t) ((= 1 len) (char-whitespace? (string-ref str 0))) ((= 2 len) (and (char-whitespace? (string-ref str 0)) (char-whitespace? (string-ref str 1)))) (else (let loop ((i 0)) (or (>= i len) (and (char-whitespace? (string-ref str i)) (loop (inc i)))))))))
(define (assq-values val alist) (let loop ((alist alist) (scanned (quote ()))) (cond ((null? alist) (values #f scanned)) ((equal? val (caar alist)) (values (car alist) (append scanned (cdr alist)))) (else (loop (cdr alist) (cons (car alist) scanned))))))
(define (fold-right kons knil lis1) (let recur ((lis lis1)) (if (null? lis) knil (let ((head (car lis))) (kons head (recur (cdr lis)))))))
(define (fold kons knil lis1) (let lp ((lis lis1) (ans knil)) (if (null? lis) ans (lp (cdr lis) (kons (car lis) ans)))))
(define ssax:S-chars (map ascii->char (quote (32 10 9 13))))
(define (ssax:skip-S port) (skip-while ssax:S-chars port))
(define (ssax:ncname-starting-char? a-char) (and (char? a-char) (or (char-alphabetic? a-char) (char=? #\_ a-char))))
(define (ssax:read-NCName port) (let ((first-char (peek-char port))) (or (ssax:ncname-starting-char? first-char) (parser-error port "XMLNS [4] for '" first-char "'"))) (string->symbol (next-token-of (lambda (c) (cond ((eof-object? c) #f) ((char-alphabetic? c) c) ((string-index "0123456789.-_" c) c) (else #f))) port)))
(define (ssax:read-QName port) (let ((prefix-or-localpart (ssax:read-NCName port))) (case (peek-char port) ((#\:) (read-char port) (cons prefix-or-localpart (ssax:read-NCName port))) (else prefix-or-localpart))))
(define ssax:Prefix-XML (string->symbol "xml"))
(define name-compare (letrec ((symbol-compare (lambda (symb1 symb2) (cond ((eq? symb1 symb2) (quote =)) ((string<? (symbol->string symb1) (symbol->string symb2)) (quote <)) (else (quote >)))))) (lambda (name1 name2) (cond ((symbol? name1) (if (symbol? name2) (symbol-compare name1 name2) (quote <))) ((symbol? name2) (quote >)) ((eq? name2 ssax:largest-unres-name) (quote <)) ((eq? name1 ssax:largest-unres-name) (quote >)) ((eq? (car name1) (car name2)) (symbol-compare (cdr name1) (cdr name2))) (else (symbol-compare (car name1) (car name2)))))))
(define ssax:largest-unres-name (cons (string->symbol "#LARGEST-SYMBOL") (string->symbol "#LARGEST-SYMBOL")))
(define ssax:read-markup-token (let () (define (skip-comment port) (assert-curr-char (quote (#\-)) "XML [15], second dash" port) (if (not (find-string-from-port? "-->" port)) (parser-error port "XML [15], no -->")) (make-xml-token (quote COMMENT) #f)) (define (read-cdata port) (assert (string=? "CDATA[" (read-string 6 port))) (make-xml-token (quote CDSECT) #f)) (lambda (port) (assert-curr-char (quote (#\<)) "start of the token" port) (case (peek-char port) ((#\/) (read-char port) (begin0 (make-xml-token (quote END) (ssax:read-QName port)) (ssax:skip-S port) (assert-curr-char (quote (#\>)) "XML [42]" port))) ((#\?) (read-char port) (make-xml-token (quote PI) (ssax:read-NCName port))) ((#\!) (case (peek-next-char port) ((#\-) (read-char port) (skip-comment port)) ((#\[) (read-char port) (read-cdata port)) (else (make-xml-token (quote DECL) (ssax:read-NCName port))))) (else (make-xml-token (quote START) (ssax:read-QName port)))))))
(define (ssax:skip-pi port) (if (not (find-string-from-port? "?>" port)) (parser-error port "Failed to find ?> terminating the PI")))
(define (ssax:read-pi-body-as-string port) (ssax:skip-S port) (string-concatenate/shared (let loop () (let ((pi-fragment (next-token (quote ()) (quote (#\?)) "reading PI content" port))) (if (eqv? #\> (peek-next-char port)) (begin (read-char port) (cons pi-fragment (quote ()))) (cons* pi-fragment "?" (loop)))))))
(define (ssax:skip-internal-dtd port) (if (not (find-string-from-port? "]>" port)) (parser-error port "Failed to find ]> terminating the internal DTD subset")))
(define ssax:read-cdata-body (let ((cdata-delimiters (list char-return #\newline #\] #\&))) (lambda (port str-handler seed) (let loop ((seed seed)) (let ((fragment (next-token (quote ()) cdata-delimiters "reading CDATA" port))) (case (read-char port) ((#\newline) (loop (str-handler fragment nl seed))) ((#\]) (if (not (eqv? (peek-char port) #\])) (loop (str-handler fragment "]" seed)) (let check-after-second-braket ((seed (if (string-null? fragment) seed (str-handler fragment "" seed)))) (case (peek-next-char port) ((#\>) (read-char port) seed) ((#\]) (check-after-second-braket (str-handler "]" "" seed))) (else (loop (str-handler "]]" "" seed))))))) ((#\&) (let ((ent-ref (next-token-of (lambda (c) (and (not (eof-object? c)) (char-alphabetic? c) c)) port))) (cond ((and (string=? "gt" ent-ref) (eqv? (peek-char port) #\;)) (read-char port) (loop (str-handler fragment ">" seed))) (else (loop (str-handler ent-ref "" (str-handler fragment "&" seed))))))) (else (if (eqv? (peek-char port) #\newline) (read-char port)) (loop (str-handler fragment nl seed)))))))))
(define (ssax:read-char-ref port) (let* ((base (cond ((eqv? (peek-char port) #\x) (read-char port) 16) (else 10))) (name (next-token (quote ()) (quote (#\;)) "XML [66]" port)) (char-code (string->number name base))) (read-char port) (if (integer? char-code) (ucscode->char char-code) (parser-error port "[wf-Legalchar] broken for '" name "'"))))
(define ssax:predefined-parsed-entities (quasiquote (((unquote (string->symbol "amp")) . "&") ((unquote (string->symbol "lt")) . "<") ((unquote (string->symbol "gt")) . ">") ((unquote (string->symbol "apos")) . "'") ((unquote (string->symbol "quot")) . "\""))))
(define (ssax:handle-parsed-entity port name entities content-handler str-handler seed) (cond ((assq name entities) => (lambda (decl-entity) (let ((ent-body (cdr decl-entity)) (new-entities (cons (cons name #f) entities))) (cond ((string? ent-body) (call-with-input-string ent-body (lambda (port) (content-handler port new-entities seed)))) ((procedure? ent-body) (let ((port (ent-body))) (begin0 (content-handler port new-entities seed) (close-input-port port)))) (else (parser-error port "[norecursion] broken for " name)))))) ((assq name ssax:predefined-parsed-entities) => (lambda (decl-entity) (str-handler (cdr decl-entity) "" seed))) (else (parser-error port "[wf-entdeclared] broken for " name))))
(define (make-empty-attlist) (quote ()))
(define (attlist-add attlist name-value) (if (null? attlist) (cons name-value attlist) (case (name-compare (car name-value) (caar attlist)) ((=) #f) ((<) (cons name-value attlist)) (else (cons (car attlist) (attlist-add (cdr attlist) name-value))))))
(define attlist-null? null?)
(define (attlist-remove-top attlist) (values (car attlist) (cdr attlist)))
(define (attlist->alist attlist) attlist)
(define attlist-fold fold)
(define ssax:read-attributes (let ((value-delimeters (append ssax:S-chars (quote (#\< #\&))))) (define (read-attrib-value delimiter port entities prev-fragments) (let* ((new-fragments (cons (next-token (quote ()) (cons delimiter value-delimeters) "XML [10]" port) prev-fragments)) (cterm (read-char port))) (cond ((or (eof-object? cterm) (eqv? cterm delimiter)) new-fragments) ((eqv? cterm char-return) (if (eqv? (peek-char port) #\newline) (read-char port)) (read-attrib-value delimiter port entities (cons " " new-fragments))) ((memv cterm ssax:S-chars) (read-attrib-value delimiter port entities (cons " " new-fragments))) ((eqv? cterm #\&) (cond ((eqv? (peek-char port) #\#) (read-char port) (read-attrib-value delimiter port entities (cons (string (ssax:read-char-ref port)) new-fragments))) (else (read-attrib-value delimiter port entities (read-named-entity port entities new-fragments))))) (else (parser-error port "[CleanAttrVals] broken"))))) (define (read-named-entity port entities fragments) (let ((name (ssax:read-NCName port))) (assert-curr-char (quote (#\;)) "XML [68]" port) (ssax:handle-parsed-entity port name entities (lambda (port entities fragments) (read-attrib-value (quote *eof*) port entities fragments)) (lambda (str1 str2 fragments) (if (equal? "" str2) (cons str1 fragments) (cons* str2 str1 fragments))) fragments))) (lambda (port entities) (let loop ((attr-list (make-empty-attlist))) (if (not (ssax:ncname-starting-char? (ssax:skip-S port))) attr-list (let ((name (ssax:read-QName port))) (ssax:skip-S port) (assert-curr-char (quote (#\=)) "XML [25]" port) (ssax:skip-S port) (let ((delimiter (assert-curr-char (quote (#\' #\")) "XML [10]" port))) (loop (or (attlist-add attr-list (cons name (string-concatenate-reverse/shared (read-attrib-value delimiter port entities (quote ()))))) (parser-error port "[uniqattspec] broken for " name))))))))))
(define (ssax:resolve-name port unres-name namespaces apply-default-ns?) (cond ((pair? unres-name) (cons (cond ((assq (car unres-name) namespaces) => cadr) ((eq? (car unres-name) ssax:Prefix-XML) ssax:Prefix-XML) (else (parser-error port "[nsc-NSDeclared] broken; prefix " (car unres-name)))) (cdr unres-name))) (apply-default-ns? (let ((default-ns (assq (quote *DEFAULT*) namespaces))) (if (and default-ns (cadr default-ns)) (cons (cadr default-ns) unres-name) unres-name))) (else unres-name)))
(define (ssax:uri-string->symbol uri-str) (string->symbol uri-str))
(define ssax:complete-start-tag (let ((xmlns (string->symbol "xmlns")) (largest-dummy-decl-attr (list ssax:largest-unres-name #f #f #f))) (define (validate-attrs port attlist decl-attrs) (define (add-default-decl decl-attr result) (let*-values (((attr-name content-type use-type default-value) (apply values decl-attr))) (and (eq? use-type (quote REQUIRED)) (parser-error port "[RequiredAttr] broken for" attr-name)) (if default-value (cons (cons attr-name default-value) result) result))) (let loop ((attlist attlist) (decl-attrs decl-attrs) (result (quote ()))) (if (attlist-null? attlist) (attlist-fold add-default-decl result decl-attrs) (let*-values (((attr attr-others) (attlist-remove-top attlist)) ((decl-attr other-decls) (if (attlist-null? decl-attrs) (values largest-dummy-decl-attr decl-attrs) (attlist-remove-top decl-attrs)))) (case (name-compare (car attr) (car decl-attr)) ((<) (if (or (eq? xmlns (car attr)) (and (pair? (car attr)) (eq? xmlns (caar attr)))) (loop attr-others decl-attrs (cons attr result)) (parser-error port "[ValueType] broken for " attr))) ((>) (loop attlist other-decls (add-default-decl decl-attr result))) (else (let*-values (((attr-name content-type use-type default-value) (apply values decl-attr))) (cond ((eq? use-type (quote FIXED)) (or (equal? (cdr attr) default-value) (parser-error port "[FixedAttr] broken for " attr-name))) ((eq? content-type (quote CDATA)) #t) ((pair? content-type) (or (member (cdr attr) content-type) (parser-error port "[enum] broken for " attr-name "=" (cdr attr)))) (else (ssax:warn port "declared content type " content-type " not verified yet"))) (loop attr-others other-decls (cons attr result))))))))) (define (add-ns port prefix uri-str namespaces) (and (equal? "" uri-str) (parser-error port "[dt-NSName] broken for " prefix)) (let ((uri-symbol (ssax:uri-string->symbol uri-str))) (let loop ((nss namespaces)) (cond ((null? nss) (cons (cons* prefix uri-symbol uri-symbol) namespaces)) ((eq? uri-symbol (cddar nss)) (cons (cons* prefix (cadar nss) uri-symbol) namespaces)) (else (loop (cdr nss))))))) (define (adjust-namespace-decl port attrs namespaces) (let loop ((attrs attrs) (proper-attrs (quote ())) (namespaces namespaces)) (cond ((null? attrs) (values proper-attrs namespaces)) ((eq? xmlns (caar attrs)) (loop (cdr attrs) proper-attrs (if (equal? "" (cdar attrs)) (cons (cons* (quote *DEFAULT*) #f #f) namespaces) (add-ns port (quote *DEFAULT*) (cdar attrs) namespaces)))) ((and (pair? (caar attrs)) (eq? xmlns (caaar attrs))) (loop (cdr attrs) proper-attrs (add-ns port (cdaar attrs) (cdar attrs) namespaces))) (else (loop (cdr attrs) (cons (car attrs) proper-attrs) namespaces))))) (lambda (tag-head port elems entities namespaces) (let*-values (((attlist) (ssax:read-attributes port entities)) ((empty-el-tag?) (begin (ssax:skip-S port) (and (eqv? #\/ (assert-curr-char (quote (#\> #\/)) "XML [40], XML [44], no '>'" port)) (assert-curr-char (quote (#\>)) "XML [44], no '>'" port)))) ((elem-content decl-attrs) (if elems (cond ((assoc tag-head elems) => (lambda (decl-elem) (values (if empty-el-tag? (quote EMPTY-TAG) (cadr decl-elem)) (caddr decl-elem)))) (else (parser-error port "[elementvalid] broken, no decl for " tag-head))) (values (if empty-el-tag? (quote EMPTY-TAG) (quote ANY)) #f))) ((merged-attrs) (if decl-attrs (validate-attrs port attlist decl-attrs) (attlist->alist attlist))) ((proper-attrs namespaces) (adjust-namespace-decl port merged-attrs namespaces))) (values (ssax:resolve-name port tag-head namespaces #t) (fold-right (lambda (name-value attlist) (or (attlist-add attlist (cons (ssax:resolve-name port (car name-value) namespaces #f) (cdr name-value))) (parser-error port "[uniqattspec] after NS expansion broken for " name-value))) (make-empty-attlist) proper-attrs) namespaces elem-content)))))
(define (ssax:read-external-id port) (let ((discriminator (ssax:read-NCName port))) (assert-curr-char ssax:S-chars "space after SYSTEM or PUBLIC" port) (ssax:skip-S port) (let ((delimiter (assert-curr-char (quote (#\' #\")) "XML [11], XML [12]" port))) (cond ((eq? discriminator (string->symbol "SYSTEM")) (begin0 (next-token (quote ()) (list delimiter) "XML [11]" port) (read-char port))) ((eq? discriminator (string->symbol "PUBLIC")) (skip-until (list delimiter) port) (assert-curr-char ssax:S-chars "space after PubidLiteral" port) (ssax:skip-S port) (let* ((delimiter (assert-curr-char (quote (#\' #\")) "XML [11]" port)) (systemid (next-token (quote ()) (list delimiter) "XML [11]" port))) (read-char port) systemid)) (else (parser-error port "XML [75], " discriminator " rather than SYSTEM or PUBLIC"))))))
(define (ssax:scan-Misc port) (let loop ((c (ssax:skip-S port))) (cond ((eof-object? c) c) ((not (char=? c #\<)) (parser-error port "XML [22], char '" c "' unexpected")) (else (let ((token (ssax:read-markup-token port))) (case (xml-token-kind token) ((COMMENT) (loop (ssax:skip-S port))) ((PI DECL START) token) (else (parser-error port "XML [22], unexpected token of kind " (xml-token-kind token)))))))))
(define ssax:read-char-data (let ((terminators-usual (list #\< #\& char-return)) (terminators-usual-eof (list #\< (quote *eof*) #\& char-return)) (handle-fragment (lambda (fragment str-handler seed) (if (string-null? fragment) seed (str-handler fragment "" seed))))) (lambda (port expect-eof? str-handler seed) (if (eqv? #\< (peek-char port)) (let ((token (ssax:read-markup-token port))) (case (xml-token-kind token) ((START END) (values seed token)) ((CDSECT) (let ((seed (ssax:read-cdata-body port str-handler seed))) (ssax:read-char-data port expect-eof? str-handler seed))) ((COMMENT) (ssax:read-char-data port expect-eof? str-handler seed)) (else (values seed token)))) (let ((char-data-terminators (if expect-eof? terminators-usual-eof terminators-usual))) (let loop ((seed seed)) (let* ((fragment (next-token (quote ()) char-data-terminators "reading char data" port)) (term-char (peek-char port))) (if (eof-object? term-char) (values (handle-fragment fragment str-handler seed) term-char) (case term-char ((#\<) (let ((token (ssax:read-markup-token port))) (case (xml-token-kind token) ((CDSECT) (loop (ssax:read-cdata-body port str-handler (handle-fragment fragment str-handler seed)))) ((COMMENT) (loop (handle-fragment fragment str-handler seed))) (else (values (handle-fragment fragment str-handler seed) token))))) ((#\&) (case (peek-next-char port) ((#\#) (read-char port) (loop (str-handler fragment (string (ssax:read-char-ref port)) seed))) (else (let ((name (ssax:read-NCName port))) (assert-curr-char (quote (#\;)) "XML [68]" port) (values (handle-fragment fragment str-handler seed) (make-xml-token (quote ENTITY-REF) name)))))) (else (if (eqv? (peek-next-char port) #\newline) (read-char port)) (loop (str-handler fragment (string #\newline) seed))))))))))))
(define (ssax:assert-token token kind gi error-cont) (or (and (xml-token? token) (eq? kind (xml-token-kind token)) (equal? gi (xml-token-head token))) (error-cont token kind gi)))
(define-syntax ssax:make-pi-parser (syntax-rules () ((ssax:make-pi-parser orig-handlers) (letrec-syntax ((loop (syntax-rules (*DEFAULT*) ((loop () #f accum port target seed) (make-case ((else (ssax:warn port "Skipping PI: " target nl) (ssax:skip-pi port) seed) . accum) () target)) ((loop () default accum port target seed) (make-case ((else (default port target seed)) . accum) () target)) ((loop ((*DEFAULT* . default) . handlers) old-def accum port target seed) (loop handlers default accum port target seed)) ((loop ((tag . handler) . handlers) default accum port target seed) (loop handlers default (((tag) (handler port target seed)) . accum) port target seed)))) (make-case (syntax-rules () ((make-case () clauses target) (case target . clauses)) ((make-case (clause . clauses) accum target) (make-case clauses (clause . accum) target))))) (lambda (port target seed) (loop orig-handlers #f () port target seed))))))
(define-syntax ssax:make-elem-parser (syntax-rules () ((ssax:make-elem-parser my-new-level-seed my-finish-element my-char-data-handler my-pi-handlers) (lambda (start-tag-head port elems entities namespaces preserve-ws? seed) (define xml-space-gi (cons ssax:Prefix-XML (string->symbol "space"))) (let handle-start-tag ((start-tag-head start-tag-head) (port port) (entities entities) (namespaces namespaces) (preserve-ws? preserve-ws?) (parent-seed seed)) (let*-values (((elem-gi attributes namespaces expected-content) (ssax:complete-start-tag start-tag-head port elems entities namespaces)) ((seed) (my-new-level-seed elem-gi attributes namespaces expected-content parent-seed))) (case expected-content ((EMPTY-TAG) (my-finish-element elem-gi attributes namespaces parent-seed seed)) ((EMPTY) (ssax:assert-token (and (eqv? #\< (ssax:skip-S port)) (ssax:read-markup-token port)) (quote END) start-tag-head (lambda (token exp-kind exp-head) (parser-error port "[elementvalid] broken for " token " while expecting " exp-kind exp-head))) (my-finish-element elem-gi attributes namespaces parent-seed seed)) (else (let ((preserve-ws? (cond ((assoc xml-space-gi attributes) => (lambda (name-value) (equal? "preserve" (cdr name-value)))) (else preserve-ws?)))) (let loop ((port port) (entities entities) (expect-eof? #f) (seed seed)) (let*-values (((seed term-token) (ssax:read-char-data port expect-eof? my-char-data-handler seed))) (if (eof-object? term-token) seed (case (xml-token-kind term-token) ((END) (ssax:assert-token term-token (quote END) start-tag-head (lambda (token exp-kind exp-head) (parser-error port "[GIMatch] broken for " term-token " while expecting " exp-kind exp-head))) (my-finish-element elem-gi attributes namespaces parent-seed seed)) ((PI) (let ((seed ((ssax:make-pi-parser my-pi-handlers) port (xml-token-head term-token) seed))) (loop port entities expect-eof? seed))) ((ENTITY-REF) (let ((seed (ssax:handle-parsed-entity port (xml-token-head term-token) entities (lambda (port entities seed) (loop port entities #t seed)) my-char-data-handler seed))) (loop port entities expect-eof? seed))) ((START) (if (eq? expected-content (quote PCDATA)) (parser-error port "[elementvalid] broken for " elem-gi " with char content only; unexpected token " term-token)) (let ((seed (handle-start-tag (xml-token-head term-token) port entities namespaces preserve-ws? seed))) (loop port entities expect-eof? seed))) (else (parser-error port "XML [43] broken for " term-token)))))))))))))))
(define-syntax ssax:make-parser/positional-args (syntax-rules () ((ssax:make-parser/positional-args *handler-DOCTYPE *handler-UNDECL-ROOT *handler-DECL-ROOT *handler-NEW-LEVEL-SEED *handler-FINISH-ELEMENT *handler-CHAR-DATA-HANDLER *handler-PI) (lambda (port seed) (define (handle-decl port token-head seed) (or (eq? (string->symbol "DOCTYPE") token-head) (parser-error port "XML [22], expected DOCTYPE declaration, found " token-head)) (assert-curr-char ssax:S-chars "XML [28], space after DOCTYPE" port) (ssax:skip-S port) (let*-values (((docname) (ssax:read-QName port)) ((systemid) (and (ssax:ncname-starting-char? (ssax:skip-S port)) (ssax:read-external-id port))) ((internal-subset?) (begin (ssax:skip-S port) (eqv? #\[ (assert-curr-char (quote (#\> #\[)) "XML [28], end-of-DOCTYPE" port)))) ((elems entities namespaces seed) (*handler-DOCTYPE port docname systemid internal-subset? seed))) (scan-for-significant-prolog-token-2 port elems entities namespaces seed))) (define (scan-for-significant-prolog-token-1 port seed) (let ((token (ssax:scan-Misc port))) (if (eof-object? token) (parser-error port "XML [22], unexpected EOF") (case (xml-token-kind token) ((PI) (let ((seed ((ssax:make-pi-parser *handler-PI) port (xml-token-head token) seed))) (scan-for-significant-prolog-token-1 port seed))) ((DECL) (handle-decl port (xml-token-head token) seed)) ((START) (let*-values (((elems entities namespaces seed) (*handler-UNDECL-ROOT (xml-token-head token) seed))) (element-parser (xml-token-head token) port elems entities namespaces #f seed))) (else (parser-error port "XML [22], unexpected markup " token)))))) (define (scan-for-significant-prolog-token-2 port elems entities namespaces seed) (let ((token (ssax:scan-Misc port))) (if (eof-object? token) (parser-error port "XML [22], unexpected EOF") (case (xml-token-kind token) ((PI) (let ((seed ((ssax:make-pi-parser *handler-PI) port (xml-token-head token) seed))) (scan-for-significant-prolog-token-2 port elems entities namespaces seed))) ((START) (element-parser (xml-token-head token) port elems entities namespaces #f (*handler-DECL-ROOT (xml-token-head token) seed))) (else (parser-error port "XML [22], unexpected markup " token)))))) (define element-parser (ssax:make-elem-parser *handler-NEW-LEVEL-SEED *handler-FINISH-ELEMENT *handler-CHAR-DATA-HANDLER *handler-PI)) (scan-for-significant-prolog-token-1 port seed)))))
(define-syntax ssax:define-labeled-arg-macro (syntax-rules () ((ssax:define-labeled-arg-macro labeled-arg-macro-name (positional-macro-name (arg-name . arg-def) ...)) (define-syntax labeled-arg-macro-name (syntax-rules () ((labeled-arg-macro-name . kw-val-pairs) (letrec-syntax ((find (syntax-rules (arg-name ...) ((find k-args (arg-name . default) arg-name val . others) (next val . k-args)) ... ((find k-args key arg-no-match-name val . others) (find k-args key . others)) ((find k-args (arg-name default)) (next default . k-args)) ...)) (next (syntax-rules () ((next val vals key . keys) (find ((val . vals) . keys) key . kw-val-pairs)) ((next val vals) (rev-apply (val) vals)))) (rev-apply (syntax-rules () ((rev-apply form (x . xs)) (rev-apply (x . form) xs)) ((rev-apply form ()) form)))) (next positional-macro-name () (arg-name . arg-def) ...))))))))
(ssax:define-labeled-arg-macro ssax:make-parser (ssax:make-parser/positional-args (DOCTYPE (lambda (port docname systemid internal-subset? seed) (when internal-subset? (ssax:warn port "Internal DTD subset is not currently handled ") (ssax:skip-internal-dtd port)) (ssax:warn port "DOCTYPE DECL " docname " " systemid " found and skipped") (values #f (quote ()) (quote ()) seed))) (UNDECL-ROOT (lambda (elem-gi seed) (values #f (quote ()) (quote ()) seed))) (DECL-ROOT (lambda (elem-gi seed) seed)) (NEW-LEVEL-SEED) (FINISH-ELEMENT) (CHAR-DATA-HANDLER) (PI ())))
(define (ssax:reverse-collect-str fragments) (cond ((null? fragments) (quote ())) ((null? (cdr fragments)) fragments) (else (let loop ((fragments fragments) (result (quote ())) (strs (quote ()))) (cond ((null? fragments) (if (null? strs) result (cons (string-concatenate/shared strs) result))) ((string? (car fragments)) (loop (cdr fragments) result (cons (car fragments) strs))) (else (loop (cdr fragments) (cons (car fragments) (if (null? strs) result (cons (string-concatenate/shared strs) result))) (quote ()))))))))
(define (ssax:reverse-collect-str-drop-ws fragments) (cond ((null? fragments) (quote ())) ((null? (cdr fragments)) (if (and (string? (car fragments)) (string-whitespace? (car fragments))) (quote ()) fragments)) (else (let loop ((fragments fragments) (result (quote ())) (strs (quote ())) (all-whitespace? #t)) (cond ((null? fragments) (if all-whitespace? result (cons (string-concatenate/shared strs) result))) ((string? (car fragments)) (loop (cdr fragments) result (cons (car fragments) strs) (and all-whitespace? (string-whitespace? (car fragments))))) (else (loop (cdr fragments) (cons (car fragments) (if all-whitespace? result (cons (string-concatenate/shared strs) result))) (quote ()) #t)))))))
(define (ssax:xml->sxml port namespace-prefix-assig) (letrec ((namespaces (map (lambda (el) (cons* #f (car el) (ssax:uri-string->symbol (cdr el)))) namespace-prefix-assig)) (RES-NAME->SXML (lambda (res-name) (string->symbol (string-append (symbol->string (car res-name)) ":" (symbol->string (cdr res-name))))))) (let ((result (reverse ((ssax:make-parser NEW-LEVEL-SEED (lambda (elem-gi attributes namespaces expected-content seed) (quote ())) FINISH-ELEMENT (lambda (elem-gi attributes namespaces parent-seed seed) (let ((seed (ssax:reverse-collect-str-drop-ws seed)) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) (quote ()) attributes))) (cons (cons (if (symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)) (if (null? attrs) seed (cons (cons (quote @) attrs) seed))) parent-seed))) CHAR-DATA-HANDLER (lambda (string1 string2 seed) (if (string-null? string2) (cons string1 seed) (cons* string2 string1 seed))) DOCTYPE (lambda (port docname systemid internal-subset? seed) (when internal-subset? (ssax:warn port "Internal DTD subset is not currently handled ") (ssax:skip-internal-dtd port)) (ssax:warn port "DOCTYPE DECL " docname " " systemid " found and skipped") (values #f (quote ()) namespaces seed)) UNDECL-ROOT (lambda (elem-gi seed) (values #f (quote ()) namespaces seed)) PI ((*DEFAULT* lambda (port pi-tag seed) (cons (list (quote *PI*) pi-tag (ssax:read-pi-body-as-string port)) seed)))) port (quote ()))))) (cons (quote *TOP*) (if (null? namespace-prefix-assig) result (cons (list (quote @) (cons (quote *NAMESPACES*) (map (lambda (ns) (list (car ns) (cdr ns))) namespace-prefix-assig))) result))))))
(provide (all-defined)))

View File

@ -0,0 +1,71 @@
; Module header is generated automatically
#cs(module ssax-prim mzscheme
(require "ssax-code.ss")
;=========================================================================
; This is a multi parser constructor function
;------------------------------------------------
; Some Oleg Kiselyov's features from SSAX:XML->SXML
; Returns
(define (RES-NAME->SXML res-name)
(string->symbol
(string-append
(symbol->string (car res-name))
":"
(symbol->string (cdr res-name)))))
; given the list of fragments (some of which are text strings)
; reverse the list and concatenate adjacent text strings
(define (reverse-collect-str fragments)
(if (null? fragments) '() ; a shortcut
(let loop ((fragments fragments) (result '()) (strs '()))
(cond
((null? fragments)
(if (null? strs) result
(cons (apply string-append strs) result)))
((string? (car fragments))
(loop (cdr fragments) result (cons (car fragments) strs)))
(else
(loop (cdr fragments)
(cons
(car fragments)
(if (null? strs) result
(cons (apply string-append strs) result)))
'()))))))
; given the list of fragments (some of which are text strings)
; reverse the list and concatenate adjacent text strings
; We also drop "unsignificant" whitespace, that is, whitespace
; in front, behind and between elements. The whitespace that
; is included in character data is not affected.
(define (reverse-collect-str-drop-ws fragments)
(cond
((null? fragments) '()) ; a shortcut
((and (string? (car fragments)) ; another shortcut
(null? (cdr fragments)) ; remove trailing ws
(string-whitespace? (car fragments))) '())
(else
(let loop ((fragments fragments) (result '()) (strs '())
(all-whitespace? #t))
(cond
((null? fragments)
(if all-whitespace? result ; remove leading ws
(cons (apply string-append strs) result)))
((string? (car fragments))
(loop (cdr fragments) result (cons (car fragments) strs)
(and all-whitespace?
(string-whitespace? (car fragments)))))
(else
(loop (cdr fragments)
(cons
(car fragments)
(if all-whitespace? result
(cons (apply string-append strs) result)))
'() #t)))))))
(provide (all-defined)))

View File

@ -0,0 +1,37 @@
#cs(module ssax mzscheme
(require "common.ss")
(require "myenv.ss")
(require "util.ss")
(require "parse-error.ss")
(require "input-parse.ss")
(require "look-for-str.ss")
(require "char-encoding.ss")
(require "ssax-code.ss")
(require "SXML-tree-trans.ss")
(require "sxpathlib.ss")
(require "srfi-12.ss")
(require "mime.ss")
(require "http.ss")
(require "access-remote.ss")
(require "id.ss")
(require "xlink-parser.ss")
(require "ssax-prim.ss")
(require "multi-parser.ss")
(provide (all-from "common.ss"))
(provide (all-from "myenv.ss"))
(provide (all-from "util.ss"))
(provide (all-from "parse-error.ss"))
(provide (all-from "input-parse.ss"))
(provide (all-from "look-for-str.ss"))
(provide (all-from "char-encoding.ss"))
(provide (all-from "ssax-code.ss"))
(provide (all-from "SXML-tree-trans.ss"))
;(provide (all-from "sxpathlib.ss"))
(provide (all-from "srfi-12.ss"))
(provide (all-from "mime.ss"))
(provide (all-from "http.ss"))
(provide (all-from "access-remote.ss"))
(provide (all-from "id.ss"))
(provide (all-from "xlink-parser.ss"))
(provide (all-from "ssax-prim.ss"))
(provide (all-from "multi-parser.ss")))

View File

@ -0,0 +1,538 @@
; Module header is generated automatically
#cs(module sxpathlib mzscheme
(require (rename (lib "pretty.ss") pp pretty-print))
(require "common.ss")
(require "myenv.ss")
(require (lib "string.ss" "srfi/13"))
(require "util.ss")
;; XML processing in Scheme
; SXPath -- SXML Query Language
;
; $Id: sxpathlib.scm,v 3.918 2004/02/05 22:52:33 kl Exp kl $
;
; This code is in Public Domain
; It's based on SXPath by Oleg Kiselyov, and multiple improvements
; implemented by Dmitry Lizorkin.
;
; The list of differences from original SXPath.scm my be found in changelog.txt
;
; Kirill Lisovsky lisovsky@acm.org
;
; * * *
;
; SXPath is a query language for SXML, an instance of XML Information
; set (Infoset) in the form of s-expressions. See SSAX.scm for the
; definition of SXML and more details. SXPath is also a translation into
; Scheme of an XML Path Language, XPath:
; http://www.w3.org/TR/xpath
; XPath and SXPath describe means of selecting a set of Infoset's items
; or their properties.
;
; To facilitate queries, XPath maps the XML Infoset into an explicit
; tree, and introduces important notions of a location path and a
; current, context node. A location path denotes a selection of a set of
; nodes relative to a context node. Any XPath tree has a distinguished,
; root node -- which serves as the context node for absolute location
; paths. Location path is recursively defined as a location step joined
; with a location path. A location step is a simple query of the
; database relative to a context node. A step may include expressions
; that further filter the selected set. Each node in the resulting set
; is used as a context node for the adjoining location path. The result
; of the step is a union of the sets returned by the latter location
; paths.
;
; The SXML representation of the XML Infoset (see SSAX.scm) is rather
; suitable for querying as it is. Bowing to the XPath specification,
; we will refer to SXML information items as 'Nodes':
; <Node> ::= <Element> | <attributes-coll> | <attrib>
; | "text string" | <PI>
; This production can also be described as
; <Node> ::= (name . <Nodelist>) | "text string"
; An (ordered) set of nodes is just a list of the constituent nodes:
; <Nodelist> ::= (<Node> ...)
; Nodelists, and Nodes other than text strings are both lists. A
; <Nodelist> however is either an empty list, or a list whose head is not
; a symbol. A symbol at the head of a node is either an XML name (in
; which case it's a tag of an XML element), or an administrative name
; such as '@'. This uniform list representation makes processing rather
; simple and elegant, while avoiding confusion. The multi-branch tree
; structure formed by the mutually-recursive datatypes <Node> and
; <Nodelist> lends itself well to processing by functional languages.
;
; A location path is in fact a composite query over an XPath tree or
; its branch. A singe step is a combination of a projection, selection
; or a transitive closure. Multiple steps are combined via join and
; union operations. This insight allows us to _elegantly_ implement
; XPath as a sequence of projection and filtering primitives --
; converters -- joined by _combinators_. Each converter takes a node
; and returns a nodelist which is the result of the corresponding query
; relative to that node. A converter can also be called on a set of
; nodes. In that case it returns a union of the corresponding queries over
; each node in the set. The union is easily implemented as a list
; append operation as all nodes in a SXML tree are considered
; distinct, by XPath conventions. We also preserve the order of the
; members in the union. Query combinators are high-order functions:
; they take converter(s) (which is a Node|Nodelist -> Nodelist function)
; and compose or otherwise combine them. We will be concerned with
; only relative location paths [XPath]: an absolute location path is a
; relative path applied to the root node.
;
; Similarly to XPath, SXPath defines full and abbreviated notations
; for location paths. In both cases, the abbreviated notation can be
; mechanically expanded into the full form by simple rewriting
; rules. In case of SXPath the corresponding rules are given as
; comments to a sxpath function, below. The regression test suite at
; the end of this file shows a representative sample of SXPaths in
; both notations, juxtaposed with the corresponding XPath
; expressions. Most of the samples are borrowed literally from the
; XPath specification, while the others are adjusted for our running
; example, tree1.
;
;=============================================================================
; Basic converters and applicators
; A converter is a function
; type Converter = Node|Nodelist -> Nodelist
; A converter can also play a role of a predicate: in that case, if a
; converter, applied to a node or a nodelist, yields a non-empty
; nodelist, the converter-predicate is deemed satisfied. Throughout
; this file a nil nodelist is equivalent to #f in denoting a failure.
; Returns #t if given object is a nodelist
(define (nodeset? x)
(or (and (pair? x) (not (symbol? (car x)))) (null? x)))
; If x is a nodelist - returns it as is, otherwise wrap it in a list.
(define (as-nodeset x)
(if (nodeset? x) x (list x)))
;-----------------------------------------------------------------------------
; Node test
; The following functions implement 'Node test's as defined in
; Sec. 2.3 of XPath document. A node test is one of the components of a
; location step. It is also a converter-predicate in SXPath.
; Predicate which returns #t if <obj> is SXML element, otherwise returns #f.
(define (sxml:element? obj)
(and (pair? obj)
(symbol? (car obj))
(not (memq (car obj)
; '(@ @@ *PI* *COMMENT* *ENTITY* *NAMESPACES*)
; the line above is a workaround for old SXML
'(@ @@ *PI* *COMMENT* *ENTITY*)))))
; The function ntype-names?? takes a list of acceptable node names as a
; criterion and returns a function, which, when applied to a node,
; will return #t if the node name is present in criterion list and #f
; othervise.
; ntype-names?? :: ListOfNames -> Node -> Boolean
(define (ntype-names?? crit)
(lambda(node)
(and (pair? node)
(memq (car node) crit))))
; The function ntype?? takes a type criterion and returns
; a function, which, when applied to a node, will tell if the node satisfies
; the test.
; ntype?? :: Crit -> Node -> Boolean
;
; The criterion 'crit' is
; one of the following symbols:
; id - tests if the Node has the right name (id)
; @ - tests if the Node is an <attributes-list>
; * - tests if the Node is an <Element>
; *text* - tests if the Node is a text node
; *data* - tests if the Node is a data node
; (text, number, boolean, etc., but not pair)
; *PI* - tests if the Node is a PI node
; *COMMENT* - tests if the Node is a COMMENT node
; *ENTITY* - tests if the Node is a ENTITY node
; *any* - #t for any type of Node
(define (ntype?? crit)
(case crit
((*) sxml:element?)
((*any*) (lambda (node) #t))
((*text*) (lambda (node) (string? node)))
((*data*) (lambda (node) (not (pair? node))))
(else (lambda (node) (and (pair? node) (eq? crit (car node)))))
))
; This function takes a namespace-id, and returns a predicate
; Node -> Boolean, which is #t for nodes with this very namespace-id.
; ns-id is a string
; (ntype-namespace-id?? #f) will be #t for nodes with non-qualified names.
(define (ntype-namespace-id?? ns-id)
(lambda (node)
(and (pair? node)
(not (memq (car node)
'(@ @@ *PI* *COMMENT* *ENTITY*)))
(let ((nm (symbol->string (car node))))
(cond
((string-rindex nm #\:)
=> (lambda (pos)
(and
(= pos (string-length ns-id))
(string-prefix? ns-id nm))))
(else (not ns-id)))))))
;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
; This function takes a predicate and returns it complemented
; That is if the given predicate yelds #f or '() the complemented one
; yields the given node (#t) and vice versa.
(define (sxml:complement pred)
(lambda(node)
(case (pred node)
((#f '()) node)
(else #f))))
; Curried equivalence converter-predicates
(define (node-eq? other)
(lambda (node)
(eq? other node)))
(define (node-equal? other)
(lambda (node)
(equal? other node)))
; node-pos:: N -> Nodelist -> Nodelist, or
; node-pos:: N -> Converter
; Select the N'th element of a Nodelist and return as a singular Nodelist;
; Return an empty nodelist if the Nth element does not exist.
; ((node-pos 1) Nodelist) selects the node at the head of the Nodelist,
; if exists; ((node-pos 2) Nodelist) selects the Node after that, if
; exists.
; N can also be a negative number: in that case the node is picked from
; the tail of the list.
; ((node-pos -1) Nodelist) selects the last node of a non-empty nodelist;
; ((node-pos -2) Nodelist) selects the last but one node, if exists.
(define (node-pos n)
(lambda (nodelist)
(cond
((not (nodeset? nodelist)) '())
((null? nodelist) nodelist)
((eqv? n 1) (list (car nodelist)))
((negative? n) ((node-pos (+ n 1 (length nodelist))) nodelist))
(else
(assert (positive? n))
((node-pos (-- n)) (cdr nodelist))))))
; filter:: Converter -> Converter
; A filter applicator, which introduces a filtering context. The argument
; converter is considered a predicate, with either #f or nil result meaning
; failure.
(define (sxml:filter pred?)
(lambda (lst) ; a nodelist or a node (will be converted to a singleton nset)
(let loop ((lst (as-nodeset lst))
(res '()))
(if (null? lst)
(reverse res)
(let ((pred-result (pred? (car lst))))
(loop (cdr lst)
(if (and pred-result (not (null? pred-result)))
(cons (car lst) res)
res)))))))
; take-until:: Converter -> Converter, or
; take-until:: Pred -> Node|Nodelist -> Nodelist
; Given a converter-predicate and a nodelist, apply the predicate to
; each element of the nodelist, until the predicate yields anything but #f or
; nil. Return the elements of the input nodelist that have been processed
; till that moment (that is, which fail the predicate).
; take-until is a variation of the filter above: take-until passes
; elements of an ordered input set till (but not including) the first
; element that satisfies the predicate.
; The nodelist returned by ((take-until (not pred)) nset) is a subset --
; to be more precise, a prefix -- of the nodelist returned by
; ((filter pred) nset)
(define (take-until pred?)
(lambda (lst) ; a nodelist or a node (will be converted to a singleton nset)
(let loop ((lst (as-nodeset lst)))
(if (null? lst) lst
(let ((pred-result (pred? (car lst))))
(if (and pred-result (not (null? pred-result)))
'()
(cons (car lst) (loop (cdr lst)))))
))))
; take-after:: Converter -> Converter, or
; take-after:: Pred -> Node|Nodelist -> Nodelist
; Given a converter-predicate and a nodelist, apply the predicate to
; each element of the nodelist, until the predicate yields anything but #f or
; nil. Return the elements of the input nodelist that have not been processed:
; that is, return the elements of the input nodelist that follow the first
; element that satisfied the predicate.
; take-after along with take-until partition an input nodelist into three
; parts: the first element that satisfies a predicate, all preceding
; elements and all following elements.
(define (take-after pred?)
(lambda (lst) ; a nodelist or a node (will be converted to a singleton nset)
(let loop ((lst (as-nodeset lst)))
(if (null? lst) lst
(let ((pred-result (pred? (car lst))))
(if (and pred-result (not (null? pred-result)))
(cdr lst)
(loop (cdr lst))))
))))
; Apply proc to each element of lst and return the list of results.
; if proc returns a nodelist, splice it into the result
;
; From another point of view, map-union is a function Converter->Converter,
; which places an argument-converter in a joining context.
(define (map-union proc lst)
(if (null? lst) lst
(let ((proc-res (proc (car lst))))
((if (nodeset? proc-res) append cons)
proc-res (map-union proc (cdr lst))))))
; node-reverse :: Converter, or
; node-reverse:: Node|Nodelist -> Nodelist
; Reverses the order of nodes in the nodelist
; This basic converter is needed to implement a reverse document order
; (see the XPath Recommendation).
(define node-reverse
(lambda (node-or-nodelist)
(if (not (nodeset? node-or-nodelist)) (list node-or-nodelist)
(reverse node-or-nodelist))))
; node-trace:: String -> Converter
; (node-trace title) is an identity converter. In addition it prints out
; a node or nodelist it is applied to, prefixed with the 'title'.
; This converter is very useful for debugging.
(define (node-trace title)
(lambda (node-or-nodelist)
(cout nl "-->" title " :")
(pp node-or-nodelist)
node-or-nodelist))
;------------------------------------------------------------------------------
; Converter combinators
;
; Combinators are higher-order functions that transmogrify a converter
; or glue a sequence of converters into a single, non-trivial
; converter. The goal is to arrive at converters that correspond to
; XPath location paths.
;
; From a different point of view, a combinator is a fixed, named
; _pattern_ of applying converters. Given below is a complete set of
; such patterns that together implement XPath location path
; specification. As it turns out, all these combinators can be built
; from a small number of basic blocks: regular functional composition,
; map-union and filter applicators, and the nodelist union.
; select-kids:: Pred -> Node -> Nodelist
; Given a Node, return an (ordered) subset its children that satisfy
; the Pred (a converter, actually)
; select-kids:: Pred -> Nodelist -> Nodelist
; The same as above, but select among children of all the nodes in
; the Nodelist
;
; More succinctly, the signature of this function is
; select-kids:: Converter -> Converter
(define (select-kids test-pred?)
(lambda (node) ; node or node-set
(cond
((null? node) node)
((not (pair? node)) '()) ; No children
((symbol? (car node))
((sxml:filter test-pred?) (cdr node))) ; it's a single node
(else (map-union (select-kids test-pred?) node)))))
; node-self:: Pred -> Node -> Nodelist, or
; node-self:: Converter -> Converter
; Similar to select-kids but apply to the Node itself rather
; than to its children. The resulting Nodelist will contain either one
; component, or will be empty (if the Node failed the Pred).
(define node-self sxml:filter)
; node-join:: [LocPath] -> Node|Nodelist -> Nodelist, or
; node-join:: [Converter] -> Converter
; join the sequence of location steps or paths as described
; in the title comments above.
(define (node-join . selectors)
(lambda (nodelist) ; Nodelist or node
(let loop ((nodelist nodelist) (selectors selectors))
(if (null? selectors) nodelist
(loop
(if (nodeset? nodelist)
(map-union (car selectors) nodelist)
((car selectors) nodelist))
(cdr selectors))))))
; node-reduce:: [LocPath] -> Node|Nodelist -> Nodelist, or
; node-reduce:: [Converter] -> Converter
; A regular functional composition of converters.
; From a different point of view,
; ((apply node-reduce converters) nodelist)
; is equivalent to
; (foldl apply nodelist converters)
; i.e., folding, or reducing, a list of converters with the nodelist
; as a seed.
(define (node-reduce . converters)
(lambda (nodelist) ; Nodelist or node
(let loop ((nodelist nodelist) (converters converters))
(if (null? converters) nodelist
(loop ((car converters) nodelist) (cdr converters))))))
; node-or:: [Converter] -> Converter
; This combinator applies all converters to a given node and
; produces the union of their results.
; This combinator corresponds to a union, '|' operation for XPath
; location paths.
(define (node-or . converters)
(lambda (node-or-nodelist)
(let loop ((result '()) (converters converters))
(if (null? converters) result
(loop (append result (or ((car converters) node-or-nodelist) '()))
(cdr converters))))))
; node-closure:: Converter -> Converter
; Select all _descendants_ of a node that satisfy a converter-predicate.
; This combinator is similar to select-kids but applies to
; grand... children as well.
; This combinator implements the "descendant::" XPath axis
; Conceptually, this combinator can be expressed as
; (define (node-closure f)
; (node-or
; (select-kids f)
; (node-reduce (select-kids (ntype?? '*)) (node-closure f))))
; This definition, as written, looks somewhat like a fixpoint, and it
; will run forever. It is obvious however that sooner or later
; (select-kids (ntype?? '*)) will return an empty nodelist. At
; this point further iterations will no longer affect the result and
; can be stopped.
(define (node-closure test-pred?)
(let ((kid-selector (select-kids test-pred?)))
(lambda (node) ; Nodelist or node
(let loop ((parent node) (result '()))
(if (null? parent) result
(loop (sxml:child-elements parent)
(append result
(kid-selector parent)))
)))))
;=============================================================================
; Unified with sxpath-ext and sxml-tools
; According to XPath specification 2.3, this test is true for any
; XPath node.
; For SXML auxiliary lists and lists of attributes has to be excluded.
(define (sxml:node? node)
(not (and
(pair? node)
(memq (car node) '(@ @@)))))
; Returns the list of attributes for a given SXML node
; Empty list is returned if the given node os not an element,
; or if it has no list of attributes
(define (sxml:attr-list obj)
(if (and (sxml:element? obj)
(not (null? (cdr obj)))
(pair? (cadr obj))
(eq? '@ (caadr obj)))
(cdadr obj)
'()))
; Attribute axis
(define (sxml:attribute test-pred?)
(let ((fltr (sxml:filter test-pred?)))
(lambda (node)
(fltr
(apply append
(map
sxml:attr-list
(as-nodeset node)))))))
; Child axis
; This function is similar to 'select-kids', but it returns an empty
; child-list for PI, Comment and Entity nodes
(define (sxml:child test-pred?)
(lambda (node) ; node or node-set
(cond
((null? node) node)
((not (pair? node)) '()) ; No children
((memq (car node) '(*PI* *COMMENT* *ENTITY*)) ; PI, Comment or Entity
'()) ; No children
((symbol? (car node)) ; it's a single node
((sxml:filter test-pred?) (cdr node)))
(else (map-union (sxml:child test-pred?) node)))))
; Parent axis
; Given a predicate, it returns a function
; RootNode -> Converter
; which which yields a
; node -> parent
; converter then applied to a rootnode.
; Thus, such a converter may be constructed using
; ((sxml:parent test-pred) rootnode)
; and returns a parent of a node it is applied to.
; If applied to a nodelist, it returns the
; list of parents of nodes in the nodelist. The rootnode does not have
; to be the root node of the whole SXML tree -- it may be a root node
; of a branch of interest.
; The parent:: axis can be used with any SXML node.
(define (sxml:parent test-pred?)
(lambda (root-node) ; node or nodelist
(lambda (node) ; node or nodelist
(if (nodeset? node)
(map-union ((sxml:parent test-pred?) root-node) node)
(let rpt ((pairs
(apply append
(map
(lambda (root-n)
(map
(lambda (arg) (cons arg root-n))
(append
(sxml:attr-list root-n)
(sxml:child-nodes root-n))))
(as-nodeset root-node)))
))
(if (null? pairs)
'()
(let ((pair (car pairs)))
(if (eq? (car pair) node)
((sxml:filter test-pred?) (list (cdr pair)))
(rpt (append
(map
(lambda (arg) (cons arg (car pair)))
(append
(sxml:attr-list (car pair))
(sxml:child-nodes (car pair))))
(cdr pairs)
))))))))))
;=============================================================================
; Popular short cuts
; node-parent:: RootNode -> Converter
; (node-parent rootnode) yields a converter that returns a parent of a
; node it is applied to. If applied to a nodelist, it returns the list
; of parents of nodes in the nodelist.
; Given the notation of Philip Wadler's paper on semantics of XSLT,
; parent(x) = { y | y=subnode*(root), x=subnode(y) }
; Therefore, node-parent is not the fundamental converter: it can be
; expressed through the existing ones. Yet node-parent is a rather
; convenient converter. It corresponds to a parent:: axis of SXPath.
;
; Please note: this function is provided for backward compatibility
; with SXPath/SXPathlib ver. 3.5.x.x and earlier.
; Now it's a particular case of 'sxml:parent' application:
(define node-parent (sxml:parent (ntype?? '*any*)))
(define sxml:child-nodes (sxml:child sxml:node?))
(define sxml:child-elements (select-kids sxml:element?))
(provide (all-defined)))

View File

@ -0,0 +1,293 @@
; Module header is generated automatically
#cs(module util mzscheme
(require "common.ss")
(require "myenv.ss")
(require (lib "string.ss" "srfi/13"))
;****************************************************************************
; My Scheme misc utility functions
; (mainly dealing with string and list manipulations)
;
; myenv.scm, myenv-bigloo.scm or similar prelude is assumed.
; From SRFI-13, import many functions
; If a particular implementation lacks SRFI-13 support, please
; include the file srfi-13-local.scm
;
; $Id: util.scm,v 1.5 2004/07/07 16:02:31 sperber Exp $
;------------------------------------------------------------------------
; Iterator ANY?
;
; -- procedure+: any? PRED COLLECTION
; Searches for the first element in the collection satisfying a
; given predicate
; That is, the procedure applies PRED to every element of the
; COLLECTION in turn.
; The first element for which PRED returns non-#f stops the iteration;
; the value of the predicate is returned.
; If none of the elements of the COLLECTION satisfy the predicate,
; the return value from the procedure is #f
; COLLECTION can be a list, a vector, a string, or an input port.
; See vmyenv.scm for validation tests.
(define (any? <pred?> coll)
(cond
((list? coll)
(let loop ((curr-l coll))
(if (null? curr-l) #f
(or (<pred?> (car curr-l)) (loop (cdr curr-l))))))
((vector? coll)
(let ((len (vector-length coll)))
(let loop ((i 0))
(if (>= i len) #f
(or (<pred?> (vector-ref coll i)) (loop (inc i)))))))
((string? coll)
(let ((len (string-length coll)))
(let loop ((i 0))
(if (>= i len) #f
(or (<pred?> (string-ref coll i)) (loop (inc i)))))))
((input-port? coll)
(let loop ((c (read-char coll)))
(if (eof-object? c) #f
(or (<pred?> c) (loop (read-char coll))))))
(else (error "any? on an invalid collection"))))
;------------------------------------------------------------------------
; Some list manipulation functions
; -- procedure+: list-intersperse SRC-L ELEM
; inserts ELEM between elements of the SRC-L, returning a freshly allocated
; list (cells, that is)
(define (list-intersperse src-l elem)
(if (null? src-l) src-l
(let loop ((l (cdr src-l)) (dest (cons (car src-l) '())))
(if (null? l) (reverse dest)
(loop (cdr l) (cons (car l) (cons elem dest)))))))
; -- procedure+: list-intersperse! SRC-L ELEM
; inserts ELEM between elements of the SRC-L inplace
(define (list-intersperse! src-l elem)
(list-intersperse src-l elem))
; List-tail-difference: given two lists, list1 and list2 where
; list2 is presumably a tail of list1, return
; a (freshly allocated) list which is a difference between list1
; and list2. If list2 is *not* a tail of list1, the entire list1
; is returned.
(define (list-tail-diff list1 list2)
(let loop ((l1-curr list1) (difference '()))
(cond
((eq? l1-curr list2) (reverse difference))
((null? l1-curr) (reverse difference))
(else (loop (cdr l1-curr) (cons (car l1-curr) difference))))))
;------------------------------------------------------------------------
; String utilities
; See SRFI-13 or srfi-13-local.scm
; Return the index of the last occurence of a-char in str, or #f
; See SRFI-13
(define string-rindex string-index-right)
; -- procedure+: substring? PATTERN STRING
; Searches STRING to see if it contains the substring PATTERN.
; Returns the index of the first substring of STRING that is equal
; to PATTERN; or `#f' if STRING does not contain PATTERN.
;
; (substring? "rat" "pirate") => 2
; (substring? "rat" "outrage") => #f
; (substring? "" any-string) => 0
(define (substring? pattern str) (string-contains str pattern))
; -- procedure+: string->integer STR START END
;
; Makes sure a substring of the STR from START (inclusive) till END
; (exclusive) is a representation of a non-negative integer in decimal
; notation. If so, this integer is returned. Otherwise -- when the
; substring contains non-decimal characters, or when the range from
; START till END is not within STR, the result is #f.
;
; This procedure is a simplification of the standard string->number.
; The latter is far more generic: for example, it will try to read
; strings like "1/2" "1S2" "1.34" and even "1/0" (the latter causing
; a zero-divide error). Note that to string->number, "1S2" is a valid
; representation of an _inexact_ integer (100 to be precise).
; Oftentimes we want to be more restrictive about what we consider a
; number; we want merely to read an integral label.
(define (string->integer str start end)
(and (< -1 start end (inc (string-length str)))
(let loop ((pos start) (accum 0))
(cond
((>= pos end) accum)
((char-numeric? (string-ref str pos))
(loop (inc pos) (+ (char->integer (string-ref str pos))
(- (char->integer #\0)) (* 10 accum))))
(else #f)))))
;
; -- procedure+: string-split STRING
; -- procedure+: string-split STRING '()
; -- procedure+: string-split STRING '() MAXSPLIT
;
; Returns a list of whitespace delimited words in STRING.
; If STRING is empty or contains only whitespace, then the empty list
; is returned. Leading and trailing whitespaces are trimmed.
; If MAXSPLIT is specified and positive, the resulting list will
; contain at most MAXSPLIT elements, the last of which is the string
; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
; non-positive, the empty list is returned. "In time critical
; applications it behooves you not to split into more fields than you
; really need."
;
; -- procedure+: string-split STRING CHARSET
; -- procedure+: string-split STRING CHARSET MAXSPLIT
;
; Returns a list of words delimited by the characters in CHARSET in
; STRING. CHARSET is a list of characters that are treated as delimiters.
; Leading or trailing delimeters are NOT trimmed. That is, the resulting
; list will have as many initial empty string elements as there are
; leading delimiters in STRING.
;
; If MAXSPLIT is specified and positive, the resulting list will
; contain at most MAXSPLIT elements, the last of which is the string
; remaining after (MAXSPLIT - 1) splits. If MAXSPLIT is specified and
; non-positive, the empty list is returned. "In time critical
; applications it behooves you not to split into more fields than you
; really need."
;
; This is based on the split function in Python/Perl
;
; (string-split " abc d e f ") ==> ("abc" "d" "e" "f")
; (string-split " abc d e f " '() 1) ==> ("abc d e f ")
; (string-split " abc d e f " '() 0) ==> ()
; (string-split ":abc:d:e::f:" '(#\:)) ==> ("" "abc" "d" "e" "" "f" "")
; (string-split ":" '(#\:)) ==> ("" "")
; (string-split "root:x:0:0:Lord" '(#\:) 2) ==> ("root" "x:0:0:Lord")
; (string-split "/usr/local/bin:/usr/bin:/usr/ucb/bin" '(#\:))
; ==> ("/usr/local/bin" "/usr/bin" "/usr/ucb/bin")
; (string-split "/usr/local/bin" '(#\/)) ==> ("" "usr" "local" "bin")
(define (string-split str . rest)
; maxsplit is a positive number
(define (split-by-whitespace str maxsplit)
(define (skip-ws i yet-to-split-count)
(cond
((>= i (string-length str)) '())
((char-whitespace? (string-ref str i))
(skip-ws (inc i) yet-to-split-count))
(else (scan-beg-word (inc i) i yet-to-split-count))))
(define (scan-beg-word i from yet-to-split-count)
(cond
((zero? yet-to-split-count)
(cons (substring str from (string-length str)) '()))
(else (scan-word i from yet-to-split-count))))
(define (scan-word i from yet-to-split-count)
(cond
((>= i (string-length str))
(cons (substring str from i) '()))
((char-whitespace? (string-ref str i))
(cons (substring str from i)
(skip-ws (inc i) (- yet-to-split-count 1))))
(else (scan-word (inc i) from yet-to-split-count))))
(skip-ws 0 (- maxsplit 1)))
; maxsplit is a positive number
; str is not empty
(define (split-by-charset str delimeters maxsplit)
(define (scan-beg-word from yet-to-split-count)
(cond
((>= from (string-length str)) '(""))
((zero? yet-to-split-count)
(cons (substring str from (string-length str)) '()))
(else (scan-word from from yet-to-split-count))))
(define (scan-word i from yet-to-split-count)
(cond
((>= i (string-length str))
(cons (substring str from i) '()))
((memq (string-ref str i) delimeters)
(cons (substring str from i)
(scan-beg-word (inc i) (- yet-to-split-count 1))))
(else (scan-word (inc i) from yet-to-split-count))))
(scan-beg-word 0 (- maxsplit 1)))
; resolver of overloading...
; if omitted, maxsplit defaults to
; (inc (string-length str))
(if (string-null? str) '()
(if (null? rest)
(split-by-whitespace str (inc (string-length str)))
(let ((charset (car rest))
(maxsplit
(if (pair? (cdr rest)) (cadr rest) (inc (string-length str)))))
(cond
((not (positive? maxsplit)) '())
((null? charset) (split-by-whitespace str maxsplit))
(else (split-by-charset str charset maxsplit))))))
)
; make-char-quotator QUOT-RULES
;
; Given QUOT-RULES, an assoc list of (char . string) pairs, return
; a quotation procedure. The returned quotation procedure takes a string
; and returns either a string or a list of strings. The quotation procedure
; check to see if its argument string contains any instance of a character
; that needs to be encoded (quoted). If the argument string is "clean",
; it is returned unchanged. Otherwise, the quotation procedure will
; return a list of string fragments. The input straing will be broken
; at the places where the special characters occur. The special character
; will be replaced by the corresponding encoding strings.
;
; For example, to make a procedure that quotes special HTML characters,
; do
; (make-char-quotator
; '((#\< . "&lt;") (#\> . "&gt;") (#\& . "&amp;") (#\" . "&quot;")))
(define (make-char-quotator char-encoding)
(let ((bad-chars (map car char-encoding)))
; Check to see if str contains one of the characters in charset,
; from the position i onward. If so, return that character's index.
; otherwise, return #f
(define (index-cset str i charset)
(let loop ((i i))
(and (< i (string-length str))
(if (memv (string-ref str i) charset) i
(loop (inc i))))))
; The body of the function
(lambda (str)
(let ((bad-pos (index-cset str 0 bad-chars)))
(if (not bad-pos) str ; str had all good chars
(let loop ((from 0) (to bad-pos))
(cond
((>= from (string-length str)) '())
((not to)
(cons (substring str from (string-length str)) '()))
(else
(let ((quoted-char
(cdr (assv (string-ref str to) char-encoding)))
(new-to
(index-cset str (inc to) bad-chars)))
(if (< from to)
(cons
(substring str from to)
(cons quoted-char (loop (inc to) new-to)))
(cons quoted-char (loop (inc to) new-to))))))))))
))
(provide (all-defined)))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,376 @@
SXML Package
============
SXML package contains a collection of tools for processing markup documents
(XML, XHTML, HTML) in the form of S-expressions (SXML, SHTML)
You can find the API documentation in:
http://modis.ispras.ru/Lizorkin/Apidoc/index.html
SXML tools tutorial (under construction):
http://modis.ispras.ru/Lizorkin/sxml-tutorial.html
==========================================================================
Description of the main high-level package components
-----------------------------------------------------
1. SXML-tools
2. SXPath - SXML Query Language
3. SXPath with context
4. DDO SXPath
5. Functional-style modification tool for SXML
6. STX - Scheme-enabled XSLT processor
7. XPathLink - query language for a set of linked documents
-------------------------------------------------
1. SXML-tools
XML is XML Infoset represented as native Scheme data - S-expressions.
Any Scheme programm can manipulate SXML data directly, and DOM-like API is not
necessary for SXML/Scheme applications.
SXML-tools (former DOMS) is just a set of handy functions which may be
convenient for some popular operations on SXML data.
library file: Bigloo, Chicken, Gambit: "sxml/sxml-tools.scm"
PLT: "sxml-tools.ss"
http://www.pair.com/lisovsky/xml/sxmltools/
-------------------------------------------------
2. SXPath - SXML Query Language
SXPath is a query language for SXML. It treats a location path as a composite
query over an XPath tree or its branch. A single step is a combination of a
projection, selection or a transitive closure. Multiple steps are combined via
join and union operations.
Lower-level SXPath consists of a set of predicates, filters, selectors and
combinators, and higher-level abbreviated SXPath functions which are
implemented in terms of lower-level functions.
Higher level SXPath functions are dealing with XPath expressions which may be
represented as a list of steps in the location path ("native" SXPath):
(sxpath '(table (tr 3) td @ align))
or as a textual representation of XPath expressions which is compatible with
W3C XPath recommendation ("textual" SXPath):
(sxpath "table/tr[3]/td/@align")
An arbitrary converter implemented as a Scheme function may be used as a step
in location path of "native" SXPath, which makes it extremely powerful and
flexible tool. On other hand, a lot of W3C Recommendations such as XSLT,
XPointer, XLink depends on a textual XPath expressions.
It is possible to combine "native" and "textual" location paths and location
step functions in one query, constructing an arbitrary XML query far beyond
capabilities of XPath. For example, the query
(sxpath `("document/chapter[3]" ,relevant-links @ author)
makes a use of location step function relevant-links which implements an
arbitrary algorithm in Scheme.
SXPath may be considered as a compiler from abbreviated XPath (extended with
native SXPath and location step functions) to SXPath primitives.
library file: Bigloo, Chicken, Gambit: "sxml/sxpath.scm"
PLT: "sxpath.ss"
http://www.pair.com/lisovsky/query/sxpath/
-------------------------------------------------
3. SXPath with context
SXPath with context provides the effective implementation for XPath reverse
axes ("parent::", "ancestor::" and such) on SXML documents.
The limitation of SXML is the absense of an upward link from a child to its
parent, which makes the straightforward evaluation of XPath reverse axes
ineffective. The previous approach for evaluating reverse axes in SXPath was
searching for a parent from the root of the SXML tree.
SXPath with context provides the fast reverse axes, which is achieved by
storing previously visited ancestors of the context node in the context.
With a special static analysis of an XPath expression, only the minimal
required number of ancestors is stored in the context on each location step.
library file: Bigloo, Chicken, Gambit: "sxml/xpath-context.scm"
PLT: "xpath-context_xlink.ss"
-------------------------------------------------
4. DDO SXPath
The optimized SXPath that implements distinct document order (DDO) of the
nodeset produced.
Unlike conventional SXPath and SXPath with context, DDO SXPath guarantees that
the execution time is at worst polynomial of the XPath expression size and of
the SXML document size.
The API of DDO SXPath is compatible of that in conventional SXPath. The main
following kinds of optimization methods are designed and implemented in DDO
SXPath:
- All XPath axes are implemented to keep a nodeset in distinct document
order (DDO). An axis can now be considered as a converter:
nodeset_in_DDO --> nodeset_in_DDO
- Type inference for XPath expressions allows determining whether a
predicate involves context-position implicitly;
- Faster evaluation for particular kinds of XPath predicates that involve
context-position, like: [position() > number] or [number];
- Sort-merge join algorithm implemented for XPath EqualityComparison of
two nodesets;
- Deeply nested XPath predicates are evaluated at the very beginning of the
evaluation phase, to guarantee that evaluation of deeply nested predicates
is performed no more than once for each combination of
(context-node, context-position, context-size)
library file: Bigloo, Chicken, Gambit: "sxml/ddo-txpath.scm"
PLT: "ddo-txpath.ss"
http://modis.ispras.ru/Lizorkin/ddo.html
-------------------------------------------------
5. Functional-style modification tool for SXML
A tool for making functional-style modifications to SXML documents
The basics of modification language design was inspired by Patrick Lehti and
his data manipulation processor for XML Query Language:
http://www.ipsi.fraunhofer.de/~lehti/
However, with functional techniques we can do this better...
library file: Bigloo, Chicken, Gambit: "sxml/modif.scm"
PLT: "modif.ss"
-------------------------------------------------
6. STX - Scheme-enabled XSLT processor
STX is an XML transformation tool based on XSLT and Scheme which combines
a processor for most common XSLT stylesheets and a framework for their
extension in Scheme and provides an environment for a general-purpose
transformation of XML data. It integrates two functional languages - Scheme
and XSLT-like transformation language on the basis of the common data model -
SXML.
library file: Bigloo, Chicken, Gambit: "stx/stx-engine.scm"
PLT: "stx-engine.ss"
http://www.pair.com/lisovsky/transform/stx/
-------------------------------------------------
7. XPathLink - query language for a set of linked documents
XLink is a language for describing links between resources using XML attributes
and namespaces. XLink provides expressive means for linking information in
different XML documents. With XLink, practical XML application data can be
expressed as several linked XML documents, rather than a single complicated XML
document. Such a design makes it very attractive to have a query language that
would inherently recognize XLink links and provide a natural navigation
mechanism over them.
Such a query language has been designed and implemented in Scheme. This
language is an extension to XPath with 3 additional axes. The implementation
is naturally an extended SXPath. We call this language XPath with XLink
support, or XPathLink.
Additionally, an HTML <A> hyperlink can be considered as a particular case of
an XLink link. This observation makes it possible to query HTML documents with
XPathLink as well. Neil W. Van Dyke <neil@neilvandyke.org> and his permissive
HTML parser HtmlPrag have made this feature possible.
library file: Bigloo, Chicken, Gambit: "sxml/xlink.scm"
PLT: "xpath-context_xlink.ss"
http://modis.ispras.ru/Lizorkin/xpathlink.html
==========================================================================
Examples and expected results
-----------------------------
Obtaining an SXML document from XML
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
==>
(*TOP*
(*PI* xml "version='1.0'")
(poem
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
(stanza
(line "Let us go then, you and I,")
(line "When the evening is spread out against the sky")
(line "Like a patient etherized upon a table:"))
(stanza
(line "In the room the women come and go")
(line "Talking of Michaelangelo."))))
Accessing parts of the document with SXPath
((sxpath "poem/stanza[2]/line/text()")
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
==>
("In the room the women come and go" "Talking of Michaelangelo.")
Obtaining/querying HTML documents
((sxpath "html/head/title")
(sxml:document "http://modis.ispras.ru/Lizorkin/index.html"))
==>
((title "Dmitry Lizorkin homepage"))
-------------------------------------
SXML Transformations
Transforming the document according to XSLT stylesheet
(apply
string-append
(sxml:clean-feed
(stx:transform-dynamic
(sxml:add-parents
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
(stx:make-stx-stylesheet
(sxml:document
"http://modis.ispras.ru/Lizorkin/XML/poem2html.xsl"
'((xsl . "http://www.w3.org/1999/XSL/Transform")))))))
==>
"<html><head><title>The Lovesong of J. Alfred Prufrock</title></head>
<body><h1>The Lovesong of J. Alfred Prufrock</h1>
<p>Let us go then, you and I,<br/>
When the evening is spread out against the sky<br/>
Like a patient etherized upon a table:<br/></p>
<p>In the room the women come and go<br/>Talking of Michaelangelo.<br/></p>
<i>T. S. Eliot</i></body></html>"
Expressing the same transformation in pre-post-order (requires SSAX package)
(pre-post-order
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
`((*TOP* *macro* . ,(lambda top (car ((sxpath '(*)) top))))
(poem
unquote
(lambda elem
`(html
(head
(title ,((sxpath "string(@title)") elem)))
(body
(h1 ,((sxpath "string(@title)") elem))
,@((sxpath "node()") elem)
(i ,((sxpath "string(@poet)") elem))))))
(@ *preorder* . ,(lambda x x))
(stanza . ,(lambda (tag . content)
`(p ,@(map-union (lambda (x) x) content))))
(line . ,(lambda (tag . content) (append content '((br)))))
(*text* . ,(lambda (tag text) text))))
==>
(html
(head (title "The Lovesong of J. Alfred Prufrock"))
(body
(h1 "The Lovesong of J. Alfred Prufrock")
(p
"Let us go then, you and I,"
(br)
"When the evening is spread out against the sky"
(br)
"Like a patient etherized upon a table:"
(br))
(p "In the room the women come and go" (br)
"Talking of Michaelangelo." (br))
(i "T. S. Eliot")))
-------------------------------------
XPathLink: a query language with XLink support
Returning a chapter element that is linked with the first item
in the table of contents
((sxpath/c "doc/item[1]/traverse::chapter")
(xlink:documents "http://modis.ispras.ru/Lizorkin/XML/doc.xml"))
==>
((chapter (@ (id "chap1"))
(title "Abstract")
(p "This document describes about XLink Engine...")))
Traversing between documents with XPathLink
((sxpath/c "descendant::a[.='XPathLink']/traverse::html/
descendant::blockquote[1]/node()")
(xlink:documents "http://modis.ispras.ru/Lizorkin/index.html"))
==>
((b "Abstract: ")
"\r\n"
"XPathLink is a query language for XML documents linked with XLink links.\r\n"
"XPathLink is based on XPath and extends it with transparent XLink support.\r\n"
"The implementation of XPathLink in Scheme is provided.\r\n")
-------------------------------------
SXML Modifications
Modifying the SXML representation of the document
((sxml:modify '("/poem/stanza[2]" move-preceding "preceding-sibling::stanza"))
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
==>
(*TOP*
(*PI* xml "version='1.0'")
(poem
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
(stanza
(line "In the room the women come and go")
(line "Talking of Michaelangelo."))
(stanza
(line "Let us go then, you and I,")
(line "When the evening is spread out against the sky")
(line "Like a patient etherized upon a table:"))))
-------------------------------------
DDO SXPath: the optimized XPath implementation
Return all text nodes that follow the keyword ``XPointer'' and
that are not descendants of the element appendix
((ddo:sxpath "//text()[contains(., 'XPointer')]/
following::text()[not(./ancestor::appendix)]")
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/doc.xml"))
==>
("XPointer is the fragment identifier of documents having the mime-type..."
"Models for using XLink/XPointer "
"There are important keywords."
"samples"
"Conclusion"
"Thanks a lot.")
-------------------------------------
Lazy XML processing
Lazy XML-to-SXML conversion
(define doc
(lazy:xml->sxml
(open-input-resource "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
'()))
doc
==>
(*TOP*
(*PI* xml "version='1.0'")
(poem
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
(stanza (line "Let us go then, you and I,") #<struct:promise>)
#<struct:promise>))
Querying a lazy SXML document, lazyly
(define res ((lazy:sxpath "poem/stanza/line[1]") doc))
res
==>
((line "Let us go then, you and I,") #<struct:promise>)
Obtain the next portion of the result
(force (cadr res))
==>
((line "In the room the women come and go") #<struct:promise>)
Converting the lazy result to a conventional SXML nodeset
(lazy:result->list res)
==>
((line "Let us go then, you and I,")
(line "In the room the women come and go"))

View File

@ -0,0 +1,166 @@
; Module header is generated automatically
#cs(module guides mzscheme
(require (lib "ssax.ss" "web-server/tmp/ssax"))
;; $Id: guides.scm,v 2.4 2003/12/08 02:07:23 kl Exp kl $
;; DataGuide is a "structural summary" for semistructured data and may be
;; considered as analog of traditional database schema in context of
;; semistructured data management.
;==============================================================================
; Auxilliary
(define dgs:version
(string-append " $Revision: 2.4 $" nl " $Date: 2003/12/08 02:07:23 $"))
;------------------------------------------------------------------------------
; Customized versions of SRFI-1 functions
; right fold
(define (dgs:fold op init seq)
(if (null? seq)
init
(op (car seq)
(dgs:fold op init (cdr seq)))))
; find from SRFI-1 optimized for speed
(define (dgs:find pred seq)
(cond
((let lp ((seq seq))
(and (pair? seq)
(if (pred (car seq))
seq
(lp (cdr seq)))))
=> car)
(else #f)))
;------------------------------------------------------------------------------
; DataGuide management
; Add location path <lp> in <tree>
; If location path is already present - then it is skipped.
(define (add-lp tree lp)
(cond
; lp is empty : return the tree as it is
((null? lp) tree)
; lp is present in the tree: traverse the tree recursively
((dgs:find (lambda(x)
(and (pair? x)
(eq? (car lp) (car x))))
tree)
=> (lambda(x)
(add-lp x (cdr lp))
tree))
; lp is absent in the tree: add it and return modified tree
(else
(if (null? tree)
; if the tree is empty
(set! tree (list lp))
(set-cdr! tree (cons lp (cdr tree))))
tree
))
)
;==============================================================================
; DataGuides for SXML tree (DOM-style)
; This functions build DataGuides for semistructured data represented
; data represented as SXML tree.
; Flat DataGuide for given node or nodeset <obj>
; Flat DataGuide is a list of all the unique location paths found in the
; source data. It contains no location paths which are absent in the source
; data.
; Location paths with trailing @ (which are LPs of attributes-lists) are
; excluded.
(define (sxml-guide-flat obj . ignore)
(define (helper lp)
(lambda (x y)
(if (and (pair? x)
(not (memq (car x)
(if (null? ignore)
'(*PI* *COMMENT* *NAMESPACES* *ENTITY*)
(car ignore)))))
(let ((this-lp (cons (car x) lp)))
; (cerr nl y)
(dgs:fold
(helper this-lp)
(if (or (eq? '@ (car x)) ; excludes @-ended location paths
(member this-lp y))
y
(cons this-lp y))
(cdr x)))
y)))
(map reverse
(dgs:fold (helper '()) '() obj)
))
; Strong DataGuide for given node or nodeset <obj>
; Strong DataGuide is a tree which contains one instance of every location
; path found in the source data, and which contains no location paths which
; are absent in the source data
(define (sxml-guide obj . ignore)
(define (helper lp)
(lambda (x y)
(if (and (pair? x)
(not (memq (car x)
(if (null? ignore)
'(*PI* *COMMENT* *NAMESPACES* *ENTITY*)
(car ignore)))))
(let ((this-lp (cons (car x) lp)))
(dgs:fold (helper this-lp)
(add-lp y (reverse this-lp))
(cdr x)))
y)))
(dgs:fold (helper '()) '() obj)
)
;==============================================================================
; DataGuides (SSAX-style)
; This functions build DataGuides for while parsing XML data.
; Flat data guide
; The seed is pair whose car is current location path (reversed) and
; whose cdr is DataGuide accumulated.
(define (xml-guide-flat xml-port)
(cdr
(map reverse
((ssax:make-parser
NEW-LEVEL-SEED
(lambda (elem-gi attributes namespaces
expected-content seed)
(cons (cons elem-gi (car seed)) ; Add element name to current LP
(cdr seed)))
FINISH-ELEMENT
(lambda (elem-gi attributes namespaces parent-seed seed)
(let ((attr-lps (map
(lambda(attr)
`(,(car attr) @ ,@(car seed)))
attributes)))
(cons (car parent-seed)
; Add LP to DataGuide, if unique
(if
(member (car seed) (cdr seed))
; If elements LP is already in DG - then add its attributes
; which are not in DG already
(append
(filter
(lambda(lp) (not (member lp (cdr seed))))
attr-lps)
(cdr seed))
; If elements LP is unique - then add to DataGuide
; it and all its attributes
(append attr-lps (cons (car seed) (cdr seed)))
))))
CHAR-DATA-HANDLER
(lambda (string1 string2 seed)
seed))
xml-port
(cons '() '()) ; Initial seed
))))
(provide (all-defined)))

View File

@ -0,0 +1,10 @@
(module info (lib "infotab.ss" "setup")
(define name "sxml")
(define blurb
(list "Collection of tools for processing markup documents "
"in the form of S-expressions"))
(define primary-file "sxml.ss")
(define doc.txt "doc.txt")
(define homepage "http://modis.ispras.ru/Lizorkin/sxml-tutorial.html")
(define categories '(xml))
)

View File

@ -0,0 +1,217 @@
; Module header is generated automatically
#cs(module lazy-ssax mzscheme
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
(require "sxpathlib.ss")
(require "sxml-tools.ss")
(require "sxpath-ext.ss")
(require "xpath-parser.ss")
(require "txpath.ss")
(require "xpath-ast.ss")
(require "xpath-context_xlink.ss")
(require "lazy-xpath.ss")
;; A specialized lazy XML->SXML parser
; Is heavily based on continuations
;-------------------------------------------------
; Preliminary helper functions
; A helper that forces all descendants of a given node or a nodeset
(define (lazy:force-descendants node)
(cond
((lazy:promise? node) ; force it
(lazy:force-descendants (force node)))
((pair? node) ; not null
(for-each lazy:force-descendants node))
(else ; null or not pair
#t ; nothing to be done
)))
; Returns the list containing of all members of the argument list except
; for the last member
(define (lazy:except-last lst)
(if
(or (null? lst) ; this shouldn't happen
(null? (cdr lst)))
'()
(cons (car lst) (lazy:except-last (cdr lst)))))
;-------------------------------------------------
;
; Returns the common part of the seed
(define (lazy:seed-common seed)
((if (null? (cdr seed)) ; a short seed
car caddr)
seed))
; A monad-like handler
; Replaces the common part of the seed
(define (lazy:replace-common seed new-common)
(if (null? (cdr seed)) ; a short seed
(list new-common)
(list (car seed)
(cadr seed)
new-common
(cadddr seed))))
; Produces a lazy SXML document, which corresponds to reading a source
; document in a stream-wise fashion
(define (lazy:xml->sxml port namespace-prefix-assig)
(let ((namespaces
(map (lambda (el)
(cons* #f (car el) (ssax:uri-string->symbol (cdr el))))
namespace-prefix-assig))
(RES-NAME->SXML
(lambda (res-name)
(string->symbol
(string-append
(symbol->string (car res-name))
":"
(symbol->string (cdr res-name)))))))
((lambda (result)
; We assume that nobody follows the document element
(if (null? namespace-prefix-assig)
(cons '*TOP* (lazy:except-last result))
(cons
'*TOP*
(cons
`(@@ (*NAMESPACES*
,@(map
(lambda (ns) (list (car ns) (cdr ns)))
namespace-prefix-assig)))
(lazy:except-last result)))))
(call-with-current-continuation ; we grab the continuation to escape from parsing
(lambda (result-k)
; seed ::= (list result-k state-k common-seed level)
; result-k - continuation on what to do with the current result portion
; state-k - continuation to return to SSAX state on this level of XML
; tree hierarchy
; common-seed - general seed information
; level - level of a current node in a tree hierarchy
((ssax:make-parser
NEW-LEVEL-SEED
(lambda (elem-gi attributes namespaces expected-content seed)
;(pp (cons elem-gi (cadddr seed)))
(if
(or (null? (cdr seed)) ; short seed
(> (cadddr seed) 3)) ; deep level
(list '()) ; work like a conventional SSAX parser
(let ((attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(call-with-current-continuation
(lambda (new-level-k) ; how to parse next
((car seed) ; return the result
(let ((elem-content
; A promise to continue parsing
(call-with-current-continuation ; where to put the result
(lambda (elem-k)
(new-level-k
(list ; now form a seed
elem-k ; what to do with result
new-level-k ; SSAX state on this level
'() ; common-seed is empty
(+ (cadddr seed) 1) ; increase level
))))))
(append
; Previous string content
(ssax:reverse-collect-str-drop-ws (caddr seed))
(list
(cons
(if (symbol? elem-gi) elem-gi
(RES-NAME->SXML elem-gi))
(if (null? attrs) elem-content
(cons (cons '@ attrs) elem-content)))
; The following siblings of this element
(delay
(call-with-current-continuation ; where to put the result
(lambda (foll-k)
; First we force the parsing of the current element
(lazy:force-descendants elem-content)
; Than continue parsing
((cadr seed) ; recover the parent level of nesting
(list
foll-k ; what to do with result
(cadr seed)
'() ; common-seed is empty
(cadddr seed) ; the same level for siblings
))))))))))))))
FINISH-ELEMENT
(lambda (elem-gi attributes namespaces parent-seed seed)
(if
(null? (cdr seed)) ; a short seed
(let ((common (ssax:reverse-collect-str-drop-ws
(lazy:seed-common seed)))
(attrs
(attlist-fold
(lambda (attr accum)
(cons (list
(if (symbol? (car attr)) (car attr)
(RES-NAME->SXML (car attr)))
(cdr attr)) accum))
'() attributes)))
(lazy:replace-common
parent-seed
(cons
(cons
(if (symbol? elem-gi) elem-gi
(RES-NAME->SXML elem-gi))
(if (null? attrs) common
(cons (cons '@ attrs) common)))
(lazy:seed-common parent-seed))))
; Otherwise - just return the remaining character content
((car seed) ; continuation
(ssax:reverse-collect-str-drop-ws
(lazy:seed-common seed)))))
CHAR-DATA-HANDLER
(lambda (string1 string2 seed)
;(pp (list string1 string2 seed))
(lazy:replace-common
seed
(if (string-null? string2)
(cons string1 (lazy:seed-common seed))
(cons* string2 string1 (lazy:seed-common seed)))))
DOCTYPE
(lambda (port docname systemid internal-subset? seed)
(when internal-subset?
(ssax:warn port
"Internal DTD subset is not currently handled ")
(ssax:skip-internal-dtd port))
(ssax:warn port "DOCTYPE DECL " docname " "
systemid " found and skipped")
(values #f '() namespaces seed))
UNDECL-ROOT
(lambda (elem-gi seed)
(values #f '() namespaces seed))
PI
((*DEFAULT* .
(lambda (port pi-tag seed)
(lazy:replace-common
seed
(cons
(list '*PI* pi-tag (ssax:read-pi-body-as-string port))
(lazy:seed-common seed))))))
)
port
(list ; form initial seed
result-k ; put the result
(lambda (seed) ; dummy top-level parser state that produces '()
((car seed) ; where to put the result nodeset
'()))
'()
1 ; level for the document element
)))))))
(provide (all-defined)))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,348 @@
; Module header is generated automatically
#cs(module libmisc mzscheme
(require (rename (lib "pretty.ss") pp pretty-print))
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
;; Portable Library of Miscellaneous Functions
;; $Id: libmisc.scm,v 1.7 2002/10/08 15:47:21 kl Exp kl $
;==============================================================================
; Miscellaneous
; Identity function
(define (self x) x)
;==============================================================================
; Lists
; Returns #f if given list is empty and the list itself otherwise
; It is intended for emulation of MIT-style empty list treatment
; (not-null? <list>) may be considered as a counterpart to MIT-style <list>
(define (not-null? l)
(if (null? l)
#f
l))
;------------------------------------------------------------------------------
; Converters
; Transform a list of characters to a symbol
(define (list->symbol lst)
(string->symbol (apply string lst)))
; Elements if given list <l>, which are supposed to be strings,
; are returned as a string separated by sep-str
; or space separated if <sep-str> is omitted
(define (list-to-string l . sep-str)
(let ((sp-st (if (null? sep-str) " " (car sep-str))))
(if (not (null? l))
(let rpt ((x l) (rez ""))
(if (null? (cdr x))
(string-append rez (car x))
(rpt (cdr x) (string-append rez (car x) sp-st))))
""
)))
; Convert a string separated by (car rest) to a list of lines
; If the rest is omitted, then #\space is used
(define (string-to-list str . rest)
(let ((lngth (string-length str))
(sep-char (if (null? rest)
#\space
(car rest))))
(let rpt ((indx 0) (rzt '()))
(let seek ((i 0))
(cond
((= lngth (+ i indx))
(reverse (cons (substring str indx lngth) rzt))
)
((char=? (string-ref str (+ i indx)) sep-char)
(rpt (+ indx i 1)
(cons (substring str indx (+ indx i)) rzt)))
(else (seek (+ i 1))))))))
;==============================================================================
; Strings
; Return a string where every line of given <text> is commented out
; using <comment-string>
(define (comment-out text comment-string)
(let rpt ((txt (reverse (string-to-list text #\newline))) (rzt ""))
(if (null? txt)
rzt
(rpt (cdr txt) (string-append comment-string (car txt) "\n" rzt)))))
; Reads all the characters up to the end of the line and put
; them in a string.
; Returns a string containing all the characters read, including
; the end-of-line character
; If the line read is eof-object terminated, then it is returned
; with eof-object replaced by #\newline
; If the eof-object is the only one character read,
; then it is returned as is
(define (read-whole-line . port)
(let ((p (if (null? port)
(current-input-port)
(car port))))
(let rpt ((l '())
(c (read-char p)))
(cond
((and (eof-object? c) (null? l)) c)
((or (eof-object? c) (char=? c #\newline))
(list->string (reverse (cons #\newline l))))
(else
(rpt (cons c l) (read-char p)))))))
; Skip all the leading characters of a given string <str> which are members
; of <skip-chars> list and return the substring remaining
(define (skip-prefix skip-chars str)
(let ((strl (string-length str)))
(do ((i 0 (+ i 1)))
((or (>= i strl)
(not (memq (string-ref str i)
skip-chars)))
(substring str i strl))
)))
;==============================================================================
; System
; Default operating system
(define *OPERATING-SYSTEM* 'unix)
;==============================================================================
; IO related
; Newline string
(define (nl-string . op-system)
(case (if (null? op-system)
*OPERATING-SYSTEM*
(car op-system))
((UNIX) (string (integer->char 10)))
((WIN) (string (integer->char 13) (integer->char 10)))
((MAC) (string (integer->char 13)))
(else (cerr nl "Unsupported operating system: " op-system nl)
(exit))))
; cout redirection to a file with the given "fname"
(define (make-cout fname)
(let ((o-port
(open-output-file fname)))
(lambda args
(for-each (lambda (x)
(if (procedure? x)
(display (x) o-port)
(display x o-port)))
args))))
; Like pp, but symbols are quoted
(define (ppw obj . port)
(let ((port (if (null? port) (current-output-port) (car port))))
(begin
(and (symbol? obj)
(display "'" port))
(pp obj port))))
;------------------------------------------------------------------------------
; "Controlled verbosity" messages
(define (tee tag x)
(cerr tag x nl)
x)
(define (tee-1 tag x)
x)
(define (tee-2 tag x)
x)
(define (tee-3 tag x)
x)
(define (tee-4 tag x)
x)
(define (verb-1 . x)
#f)
(define (verb-2 . x)
#f)
(define (verb-3 . x)
#f)
(define (verb-4 . x)
#f)
; DL: commented this non-functional acrobatics out
;(define (set-verbosity-4)
; (set-verbosity-3)
; (set! verb-4 (lambda mes (apply cerr mes) (cerr nl)))
; (set! tee-4 (lambda (tag x) (cerr tag x nl) x)))
;
;(define (set-verbosity-3)
; (set-verbosity-2)
; (set! verb-3 (lambda mes (apply cerr mes) (cerr nl)))
; (set! tee-3 (lambda (tag x) (cerr tag x nl) x)))
;
;(define (set-verbosity-2)
; (set-verbosity-1)
; (set! verb-2 (lambda mes (apply cerr mes) (cerr nl)))
; (set! tee-2 (lambda (tag x) (cerr tag x nl) x)))
;
;(define (set-verbosity-1)
; (set! verb-1 (lambda mes (apply cerr mes) (cerr nl)))
; (set! tee-1 (lambda (tag x) (cerr tag x nl) x)))
;==============================================================================
; Command line parameters parsing
;@requires util.scm string-prefix? substring?
;@requires myenv.scm cerr ++
; NOTE: This function doesn't require any SXML software, but SXPath is
; a natural way to operate on its result.
; The function accepts a command line as a list, parse it and returns
; SXML element:
; (command-line
; (arg 'arg-value')* ; one per argument
; ('opt-name' ; one per option
; (@ (type { "--" | "-" }))?
; 'opt-value'?)*
; )
;
; The function obtains options and their arguments from a list of
; parameters that follows the standard POSIX.2 option syntax.
; It recognizes a subset of POSIX.2 options syntax wich may be unambiguously
; parsed without explicit description.
; Supported types of options are:
; Short without arguments: -o
; Short combined: -abc
; which is equal to: -a -b -c
; Long without arguments: --opt
; Long with argument: --opt=val
;
; The function may accept an optional second argument - a list of
; possible options. Each option in this list has to be represented as a string.
; Short options are represented without leading dash, while long option
; are represented with both leading dashes presented.
; Example '("v" "--update").
; If the list of acceptable options was given, and command line contains
; an option not included in this list, then the function will print an
; "Invalid option" error message and (exit -1).
;
; The function doesn't use any global variables.
(define (argv->sxml argv . options)
(let* ((vopt (if (null? options) #f (car options)))
(test-valid (lambda(opt . fopt)
(and vopt
(not (member opt vopt))
(begin (cerr nl "Invalid option: " opt " "
(if (pair? fopt) fopt "") nl)
(exit -1))))))
(cons
'command-line
(let rpt ((cl argv)
(rez '()))
(cond
((null? cl)
(reverse rez))
((string=? (car cl) "--")
(append (reverse rez) (map
(lambda(x)
`(arg ,x))
(cdr cl))))
(else (rpt
(cdr cl)
(append
(cond
; Long option
((string-prefix? "--" (car cl))
(cond
; with argument
((substring? "=" (car cl))
=>(lambda(pos)
(test-valid
(substring (car cl) 0 pos)
(car cl))
`((,(string->symbol
(substring (car cl) 2 pos) ) ; option
(@ (type "--"))
,(substring (car cl) (++ pos) ; argument
(string-length (car cl))))
)))
; without argument
(else
(test-valid (car cl))
`((,(string->symbol
(substring (car cl) 2
(string-length (car cl))))
(@ (type "--")))
))))
; short option
((string-prefix? "-" (car cl))
(map
(lambda (x)
(let ((opt (string x)))
(test-valid opt (car cl))
`(,(string->symbol opt)
(@ (type "-")))))
(cdr (string->list (car cl)))))
; non-option
(else `((argument ,(car cl)))))
rez))))
))))
;==============================================================================
; A minimalistic and pure functional record type.
; A record constructor, which returns record as a function.
; This returned function may be used as:
; a field accessor
; -- returns value of a specified field
; if applyed to an only parameter of type symbol (field name)
; -- returns a list of record fields as a list of (<name> <value>) lists
; if called without parameters
; a modifier for some elements of the record
; -- if its parameters are lists whose CARs are names of record fields
; (alteration descriptors). This function doesn't modify the original
; record but returns the record modified.
; Two forms of alteration descriptors are supported:
; 1. (<field-name> <new-value>)
; Specifies new value for the field <field-name>.
; 2. (<field-name> => <expression>)
; The <expression> must be a procedure that accepts one argument;
; this procedure is then called on the value of the <field-name> field
; and the value returned by this procedure is the new value of this field.
; Both <field-name> and => has to be symbols.
; Note: a type of record constructed with "lambda-tuple" is not distinct
; from "procedure" type.
(define (lambda-tuple . elts)
(lambda param
(cond
((null? param) elts)
((symbol? (car param))
(cond
((assq (car param) elts)
=> cadr)
((eq? '*LT-ADD* (car param))
(apply lambda-tuple (append elts (cdr param))))
(else (verb-4 nl "Lambda-tuple field name not found: " (car param)
nl "Valid names are: " (map car elts) nl)
'*LT-NOT-FOUND*
)))
(else (apply lambda-tuple
(map
(lambda(e)
(cond
((assq (car e) param)
=> (lambda(mut)
(list (car e)
(if (eq? '=> (cadr mut))
((caddr mut) (cadr e))
(cadr mut)))))
(else e)))
elts))))))
(provide (all-defined)))

View File

@ -0,0 +1,852 @@
; Module header is generated automatically
#cs(module modif mzscheme
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
(require "sxpathlib.ss")
(require "sxml-tools.ss")
(require "xpath-context_xlink.ss")
(require "xpath-ast.ss")
(require "ddo-txpath.ss")
;; A tool for making functional-style modifications to SXML documents
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to:
; lizorkin@hotbox.ru Dmitry Lizorkin
;
; The basics of modification language design was inspired by Patrick Lehti
; and his data manipulation processor for XML Query Language:
; http://www.ipsi.fraunhofer.de/~lehti/
; However, with functional techniques we can do this better...
;==========================================================================
; Modification core
; Displays an error to stderr and returns #f
(define (sxml:modification-error . text)
(cerr "Modification error: ")
(apply cerr text)
(cerr nl)
#f)
; Separates the list into two lists with respect to the predicate
; Returns: (values res-lst1 res-lst2)
; res-lst1 - contains all members from the input lst that satisfy the pred?
; res-lst2 - contains the remaining members of the input lst
(define (sxml:separate-list pred? lst)
(let loop ((lst lst)
(satisfy '())
(rest '()))
(cond
((null? lst)
(values (reverse satisfy) (reverse rest)))
((pred? (car lst)) ; the first member satisfies the predicate
(loop (cdr lst)
(cons (car lst) satisfy) rest))
(else
(loop (cdr lst)
satisfy (cons (car lst) rest))))))
;-------------------------------------------------
; Miscellaneous helpers
; Asserts that the given obj is a proper attribute node.
; If this is the case, returns #t. Otherwise, calls sxml:modification-error
; with the appropriate error message.
; Handles singular attributes correctly. In accordance with SXML 3.0, accepts
; aux lists as attribute nodes
(define (sxml:assert-proper-attribute obj)
(if
(or (and (pair? obj) ; aux node - any content is acceptable
(not (null? obj))
(eq? (car obj) '@))
(and (list? obj) ; '() is not a list
(symbol? (car obj))
(or (null? (cdr obj)) ; singular attribute
(null? (cddr obj)))))
#t
(sxml:modification-error
"improper attribute node - " obj)))
; Unites a list of annot-attributes into a single annot-attributes.
; Ensures that every attribute is a proper one, and that there is no duplicate
; attributes
; annot-attributes-lst ::= (listof annot-attributes)
; In accordance with SXML specification, version 3.0:
; [3] <annot-attributes> ::= (@ <attribute>* <annotations>? )
; In case of an error, returns #f.
; In the correct case, returns: annot-attributes
(define (sxml:unite-annot-attributes-lists . annot-attributes-lst)
(if
(null? annot-attributes-lst) ; nothing to do
'()
(let iter-lst ((src annot-attributes-lst)
(attrs '())
(annotations '()))
(if
(null? src) ; Recursion finished
(if (null? annotations)
(cons '@ (reverse attrs))
`(@ ,@(reverse attrs) (@ ,@annotations)))
(let iter-annot-attrs ((annot-attrs (cdar src))
(attrs attrs)
(annotations annotations))
(if
(null? annot-attrs) ; proceed with the outer loop
(iter-lst (cdr src) attrs annotations)
(let ((curr (car annot-attrs)))
(cond
((and (pair? curr)
(not (null? curr))
(eq? (car curr) '@))
; an annotation node
(iter-annot-attrs (cdr annot-attrs)
attrs
(append annotations (cdr curr))))
((sxml:assert-proper-attribute curr)
(if
(assq (car curr) attrs) ; duplicate attribute detected
(sxml:modification-error
"duplicate attribute - " (car curr))
(iter-annot-attrs (cdr annot-attrs)
(cons curr attrs)
annotations)))
(else ; improper attribute
#f)))))))))
;-------------------------------------------------
; The core function of document transformation into a new document
; Recursive SXML tree transformation
; curr-node - the node to be transformed
; targets-alist ::= (listof (cons node-chain update-target))
; node-chain ::= (listof node)
; node-chain - the chain of nodes, starting from the `curr-node' and proceeding
; with its decsednants until the update target
; Returns the transformed node
(define (sxml:tree-trans curr-node targets-alist)
(call-with-values
(lambda () (sxml:separate-list
(lambda (pair) (null? (car pair)))
targets-alist))
(lambda (matched ; handlers which match this node
targets-alist ; the rest
)
(and-let*
((after-subnodes ; curr-node after its subnodes are processed
(if
(or (not (pair? curr-node)) ; leaf node
(null? targets-alist) ; no more handlers
)
curr-node
(let process-attrs ((targets-alist targets-alist)
(src-attrs (sxml:attr-list curr-node))
(res-attrs '()))
(if
(null? src-attrs) ; all attributes processed
; Go to proceed child elements
(if
(null? targets-alist) ; children don't need to be processed
(cons ; Constructing the result node
(car curr-node) ; node name
((lambda (kids)
(if (null? res-attrs) ; no attributes
kids
(cons (cons '@ (reverse res-attrs))
kids)))
((if (and (not (null? (cdr curr-node)))
(pair? (cadr curr-node))
(eq? (caadr curr-node) '@))
cddr cdr)
curr-node)))
(let process-kids ((targets-alist targets-alist)
(src-kids (cdr curr-node))
(res-kids '()))
(cond
((null? src-kids) ; all kids processed
(call-with-values
(lambda () (sxml:separate-list
(lambda (obj)
(and (pair? obj) (eq? (car obj) '@)))
res-kids))
(lambda (more-attrs kids)
(if
(and (null? res-attrs) (null? more-attrs))
(cons ; Constructing the result node
(car curr-node) ; node name
kids)
(and-let*
((overall-attrs
(apply
sxml:unite-annot-attributes-lists
(cons
(cons '@ (reverse res-attrs))
more-attrs))))
(cons (car curr-node) ; node name
(cons overall-attrs kids)))))))
((and (pair? (car src-kids))
(eq? (caar src-kids) '@))
; attribute node - already processed
(process-kids
targets-alist (cdr src-kids) res-kids))
(else
(let ((kid-templates
(filter
(lambda (pair)
(eq? (caar pair) (car src-kids)))
targets-alist)))
(if
(null? kid-templates)
; this child node remains as is
(process-kids
targets-alist
(cdr src-kids)
(append res-kids (list (car src-kids))))
(and-let*
((new-kid
(sxml:tree-trans
(car src-kids)
(map
(lambda (pair)
(cons (cdar pair) (cdr pair)))
kid-templates))))
(process-kids
(filter
(lambda (pair)
(not (eq? (caar pair) (car src-kids))))
targets-alist)
(cdr src-kids)
(append
res-kids
(if (nodeset? new-kid)
new-kid
(list new-kid)))))))))))
(let* ((curr-attr (car src-attrs))
(attr-templates
(filter
(lambda (pair)
(eq? (caar pair) curr-attr))
targets-alist)))
(if
(null? attr-templates)
; this attribute remains as is
(process-attrs targets-alist
(cdr src-attrs)
(cons curr-attr res-attrs))
(let ((new-attr ; cannot produce error for attrs
(sxml:tree-trans
curr-attr
(map
(lambda (pair)
(cons (cdar pair) (cdr pair)))
attr-templates))))
(process-attrs
(filter
(lambda (pair)
(not (eq? (caar pair) curr-attr)))
targets-alist)
(cdr src-attrs)
(if (nodeset? new-attr)
(append (reverse new-attr) res-attrs)
(cons new-attr res-attrs)))))))))))
(let process-this ((new-curr-node after-subnodes)
(curr-handlers (map cdr matched)))
(if
(null? curr-handlers)
(if ; all handlers processed
(not (pair? new-curr-node))
new-curr-node ; atomic node
(call-with-values ; otherwise - unite attr lists
(lambda () (sxml:separate-list
(lambda (obj) (and (pair? obj) (eq? (car obj) '@)))
(cdr new-curr-node)))
(lambda (attrs kids)
(if (null? attrs)
new-curr-node ; node remains unchanged
(and-let*
((overall-attrs
(apply sxml:unite-annot-attributes-lists attrs)))
(cons
(car new-curr-node) ; node name
(cons overall-attrs kids)))))))
(process-this
((cadar curr-handlers) ; lambda
new-curr-node
(caar curr-handlers) ; context
(caddar curr-handlers) ; base-node
)
(cdr curr-handlers))))))))
; doc - a source SXML document
; update-targets ::= (listof update-target)
; update-target ::= (list context handler base-node)
; context - context of the node selected by the location path
; handler ::= (lambda (node context base-node) ...)
; handler - specifies the required transformation over the node selected
; base-node - the node with respect to which the location path was evaluated
;
; Returns the new document. In case of a transformation that results to a
; non-well-formed document, returns #f and the error message is displayed to
; stderr as a side effect
(define (sxml:transform-document doc update-targets)
(let ((targets-alist
(map-union
(lambda (triple)
(let ((node-path (reverse (sxml:context->content (car triple)))))
(if
(eq? (car node-path) doc)
(list (cons (cdr node-path) triple))
'())))
update-targets)))
(if (null? targets-alist) ; nothing to do
doc
(sxml:tree-trans doc targets-alist))))
;==========================================================================
; Processing update-specifiers
; Evaluates lambda-upd-specifiers for the SXML document doc
; Returns:
; update-targets ::= (listof update-target)
; update-target ::= (list context handler base-node)
; context - context of the node selected by the location path
; handler ::= (lambda (node context base-node) ...)
; handler - specifies the required transformation over the node selected
; base-node - the node with respect to which the location path was evaluated
(define (sxml:lambdas-upd-specifiers->targets doc lambdas-upd-specifiers)
(let ((doc-list (list doc)))
(letrec
((construct-targets
; base-cntxtset - base context set for the current upd-specifier
; lambdas-upd-specifiers - is assumed to be non-null?
(lambda (base-cntxtset lambdas-upd-specifiers)
(let ((triple (car lambdas-upd-specifiers)))
; Iterates members of the base context-set
; new-base ::= (listof context-set)
; Each context-set is obtained by applying the txpath-lambda
; to the each member of base-cntxtset
(let iter-base ((base-cntxtset base-cntxtset)
(res '())
(new-base '()))
(if
(null? base-cntxtset) ; finished scanning base context-set
(if
(null? (cdr lambdas-upd-specifiers)) ; no more members
res
(append
res
(construct-targets
(if
(cadadr lambdas-upd-specifiers) ; following is relative
(apply ddo:unite-multiple-context-sets new-base)
doc-list)
(cdr lambdas-upd-specifiers))))
(let* ((curr-base-context (car base-cntxtset))
(context-set ((car triple)
(list curr-base-context)
(cons 1 1)
'() ; dummy var-binding
)))
(iter-base
(cdr base-cntxtset)
(append res
(map
(lambda (context)
(list context
(caddr triple) ; handler
(sxml:context->node curr-base-context)))
context-set))
(cons context-set new-base)))))))))
(if
(null? lambdas-upd-specifiers) ; no transformation rules
'()
(construct-targets doc-list lambdas-upd-specifiers)))))
; "Precompiles" each of update-specifiers, by transforming location paths and
; update actions into lambdas.
; Returns:
; lambdas-upd-specifiers ::= (listof lambdas-upd-specifier)
; lambdas-upd-specifier ::= (list txpath-lambda relative? handler)
; txpath-lambda ::= (lambda (nodeset position+size var-binding) ...)
; txpath-lambda - full-argument implementation of a location path
; relative? - whether the txpath lambda is to be evaluated relatively to the
; node selected by the previous lambdas-upd-specifier, or with respect to
; the root of the document. For relative?=#t the base-node is the node
; selected by the previous lambdas-upd-specifier, otherwise the base node is
; the root of the document being transformed
; handler ::= (lambda (node context base-node) ...)
(define (sxml:update-specifiers->lambdas update-specifiers)
(let iter ((src update-specifiers)
(res '()))
(if
(null? src) ; every specifier processed
(reverse res)
(let ((curr (car src)))
(if
(or (not (list? curr))
(null? (cdr curr)))
(sxml:modification-error "improper update-specifier: " curr)
(and-let*
; Convert Location path to XPath AST
((ast (txp:xpath->ast (car curr))))
(call-with-values
(lambda ()
(if
(eq? (car ast) 'absolute-location-path)
(values
(ddo:ast-relative-location-path
(cons 'relative-location-path (cdr ast))
#f ; keep all ancestors
#t ; on a single level, since a single node
0 ; zero predicate nesting
'(0) ; initial var-mapping
)
#f)
(values
(ddo:ast-relative-location-path ast #f #t 0 '(0))
(not (null? res)) ; absolute for the first rule
)))
(lambda (txpath-pair relative?)
(if
(not txpath-pair) ; semantic error
txpath-pair ; propagate the error
(let ((txpath-lambda (car txpath-pair))
(action (cadr curr)))
(if
(procedure? action) ; user-supplied handler
(iter (cdr src)
(cons
(list txpath-lambda relative? action)
res))
(case action
((delete delete-undeep)
(iter (cdr src)
(cons
(list
txpath-lambda
relative?
(cdr
(assq action
`((delete . ,modif:delete)
(delete-undeep . ,modif:delete-undeep)))))
res)))
((insert-into insert-following insert-preceding)
(let ((params (cddr curr)))
(iter (cdr src)
(cons
(list
txpath-lambda
relative?
((cdr
(assq
action
`((insert-into . ,modif:insert-into)
(insert-following . ,modif:insert-following)
(insert-preceding . ,modif:insert-preceding))))
(lambda (context base-node) params)))
res))))
((replace)
(let ((params (cddr curr)))
(iter (cdr src)
(cons
(list txpath-lambda relative?
(lambda (node context base-node) params))
res))))
((rename)
(if
(or (null? (cddr curr)) ; no parameter supplied
(not (symbol? (caddr curr))))
(sxml:modification-error
"improper new name for the node to be renamed: "
curr)
(iter
(cdr src)
(cons
(let ((new-name (caddr curr)))
(list txpath-lambda relative? (modif:rename new-name)))
res))))
((move-into move-following move-preceding)
(if
(or (null? (cddr curr)) ; no lpath supplied
(not (string? (caddr curr))))
(sxml:modification-error
"improper destination location path for move action: "
curr)
(and-let*
((ast (txp:xpath->ast (caddr curr)))
(txpath-pair (ddo:ast-location-path ast #f #t 0 '(0))))
(iter (cdr src)
(cons
(list
(car txpath-pair)
#t
((cdr
(assq
action
`((move-into . ,modif:insert-into)
(move-following . ,modif:insert-following)
(move-preceding . ,modif:insert-preceding))))
(lambda (context base-node) base-node)))
(cons
(list txpath-lambda relative? modif:delete)
res))))))
(else
(sxml:modification-error "unknown action: " curr))))))))))))))
;==========================================================================
; Several popular handlers
; Node insertion
; node-specifier ::= (lambda (context base-node) ...)
; The lambda specifies the node to be inserted
(define (modif:insert-following node-specifier)
(lambda (node context base-node)
((if (nodeset? node) append cons)
node
(as-nodeset (node-specifier context base-node)))))
(define (modif:insert-preceding node-specifier)
(lambda (node context base-node)
(let ((new (node-specifier context base-node)))
((if (nodeset? new) append cons)
new
(as-nodeset node)))))
(define (modif:insert-into node-specifier)
(lambda (node context base-node)
(let* ((to-insert (as-nodeset (node-specifier context base-node)))
(insert-into-single ; inserts into single node
(lambda (node)
(if (not (pair? node)) ; can't insert into
node
(append node to-insert)))))
(if (nodeset? node)
(map insert-into-single node)
(insert-into-single node)))))
; Rename
(define (modif:rename new-name)
(let ((rename-single ; renames a single node
(lambda (node)
(if (pair? node) ; named node
(cons new-name (cdr node))
node))))
(lambda (node context base-node)
(if (nodeset? node)
(map rename-single node)
(rename-single node)))))
; Delete
(define modif:delete
(lambda (node context base-node) '()))
(define modif:delete-undeep
(let ((delete-undeep-single
(lambda (node)
(if (pair? node) (cdr node) '()))))
(lambda (node context base-node)
(if (nodeset? node)
(map delete-undeep-single node)
(delete-undeep-single node)))))
;==========================================================================
; Highest-level API function
; update-specifiers ::= (listof update-specifier)
; update-specifier ::= (list xpath-location-path action [action-parametes])
; xpath-location-path - addresses the node(s) to be transformed, in the form of
; XPath location path. If the location path is absolute, it addresses the
; node(s) with respect to the root of the document being transformed. If the
; location path is relative, it addresses the node(s) with respect to the
; node selected by the previous update-specifier. The location path in the
; first update-specifier always addresses the node(s) with respect to the
; root of the document. We'll further refer to the node with respect of which
; the location path is evaluated as to the base-node for this location path.
; action - specifies the modification to be made over each of the node(s)
; addressed by the location path. Possible actions are described below.
; action-parameters - additional parameters supplied for the action. The number
; of parameters and their semantics depend on the definite action.
;
; action ::= 'delete | 'delete-undeep |
; 'insert-into | 'insert-following | 'insert-preceding |
; 'replace |
; 'move-into | 'move-following | 'move-preceding |
; handler
; 'delete - deletes the node. Expects no action-parameters
; 'delete-undeep - deletes the node, but keeps all its content (which thus
; moves to one level upwards in the document tree). Expects no
; action-parameters
; 'insert-into - inserts the new node(s) as the last children of the given
; node. The new node(s) are specified in SXML as action-parameters
; 'insert-following, 'insert-preceding - inserts the new node(s) after (before)
; the given node. Action-parameters are the same as for 'insert-into
; 'replace - replaces the given node with the new node(s). Action-parameters
; are the same as for 'insert-into
; 'rename - renames the given node. The node to be renamed must be a pair (i.e.
; not a text node). A single action-parameter is expected, which is to be
; a Scheme symbol to specify the new name of the given node
; 'move-into - moves the given node to a new location. The single
; action-parameter is the location path, which addresses the new location
; with respect to the given node as the base node. The given node becomes
; the last child of the node selected by the parameter location path.
; 'move-following, 'move-preceding - the given node is moved to the location
; respectively after (before) the node selected by the parameter location
; path
; handler ::= (lambda (node context base-node) ...)
; handler - specifies the required transformation. It is an arbitrary lambda
; that consumes the node and its context (the latter can be used for addressing
; the other node of the source document relative to the given node). The hander
; can return one of the following 2 things: a node or a nodeset.
; 1. If a node is returned, than it replaces the source node in the result
; document
; 2. If a nodeset is returned, than the source node is replaced by (multiple)
; nodes from this nodeset, in the same order in which they appear in the
; nodeset. In particular, if the empty nodeset is returned by the handler, the
; source node is removed from the result document and nothing is inserted
; instead.
;
; Returns either (lambda (doc) ...) or #f
; The latter signals of an error, an the error message is printed into stderr
; as a side effect. In the former case, the lambda can be applied to an SXML
; document and produces the new SXML document being the result of the
; modification specified.
(define (sxml:modify . update-specifiers)
(and-let*
((lambdas-upd-specifiers
(sxml:update-specifiers->lambdas update-specifiers)))
(lambda (doc)
(sxml:transform-document
doc
(sxml:lambdas-upd-specifiers->targets doc lambdas-upd-specifiers)))))
;==========================================================================
; Destructive modifications
;-------------------------------------------------
; Helper cloning facilities
; These are required to avoid circular structures and such as the result of
; destructive modifications
; Clones the given SXML node
(define (sxml:clone node)
(letrec
((clone-nodeset ; clones nodeset
(lambda (nset)
(if (null? nset)
nset
(cons (sxml:clone (car nset)) (cdr nset))))))
(cond
((pair? node)
(cons (car node) (clone-nodeset (cdr node))))
; Atomic node
((string? node)
(string-copy node))
((number? node)
(string->number (number->string node)))
(else ; unknown node type - do not clone it
node))))
; Clones all members of the `nodeset', except for the `node', which is not
; cloned
(define (sxml:clone-nset-except nodeset node)
(letrec
((iter-nset
; encountered? - a boolean value: whether `node' already encountered
; in the head of the nodeset being processed
(lambda (nset encountered?)
(cond
((null? nset) nset)
((eq? (car nset) node)
(cons
(if encountered? ; already encountered before
(sxml:clone (car nset)) ; is to be now cloned
(car nset))
(iter-nset (cdr nset) #t)))
(else
(cons (sxml:clone (car nset))
(iter-nset (cdr nset) encountered?)))))))
(iter-nset nodeset #f)))
;-------------------------------------------------
; Facilities for mutation
; Destructively replaces the next list member for `prev' with the new `lst'
(define (sxml:replace-next-with-lst! prev lst)
(let ((next (cddr prev)))
(if
(null? lst) ; the member is to be just removed
(set-cdr! prev next)
(begin
(set-cdr! prev lst)
(let loop ((lst lst)) ; the lst is non-null
(if
(null? (cdr lst))
(set-cdr! lst next)
(loop (cdr lst))))))))
; Destructively updates the SXML document
; Returns the modified doc
; mutation-lst ::= (listof (cons context new-value)),
; new-value - a nodeset: the new value to be set to the node
(define (sxml:mutate-doc! doc mutation-lst)
(letrec
((tree-walk
(lambda (curr-node targets-alist)
(if
(not (pair? curr-node)) ; an atom
#t ; nothing to do
; Otherwise, the `curr-node' is a pair
(let loop ((lst curr-node)
(targets targets-alist))
(if
(null? targets)
#t ; nothing more to do
(begin
(if ((ntype?? '@) (car lst)) ; attribute node
(tree-walk (car lst) targets-alist)
#t ; dummy else-branch
)
(if
(null? (cdr lst)) ; this is the last member
#t ; nothing more to be done
(let ((next (cadr lst)))
(call-with-values
(lambda ()
(sxml:separate-list
(lambda (pair) (eq? (caar pair) next))
targets))
(lambda (matched ; handlers which match `next'
targets ; the rest
)
(if
(null? matched) ; nothing matched the next node
(loop (cdr lst) targets)
(let ((matched
(map
(lambda (pair) (cons (cdar pair) (cdr pair)))
matched)))
(cond
((assv '() matched) ; the `next' is to be mutated
=> (lambda (pair)
(let ((k (length (cdr pair))))
(sxml:replace-next-with-lst! lst (cdr pair))
(loop (list-tail lst k) targets))))
(else
(tree-walk next matched)
(loop (cdr lst) targets))))))))))))))))
(let ((targets-alist
(map-union
(lambda (pair)
(let ((node-path (reverse (sxml:context->content (car pair)))))
(if
(eq? (car node-path) doc)
(list (cons (cdr node-path) (cdr pair)))
'())))
mutation-lst)))
(cond
((null? targets-alist) ; nothing to do
#t)
((assv '() targets-alist) ; assv is specified for empty lists
; The root of the document itself is to be modified
=> (lambda (pair)
(set! doc (cadr pair))))
(else
(tree-walk doc targets-alist)))
doc)))
;-------------------------------------------------
; Selects the nodes to be mutated (by a subsequent destructive modification)
; This function is the close analog of `sxml:transform-document'
;
; Returns:
; mutation-lst ::= (listof (cons context new-value)),
; new-value - a nodeset: the new value to be set to the node;
; or #f in case of semantic error during tree processing (e.g. not a
; well-formed document after modification)
;
; doc - a source SXML document
; update-targets ::= (listof update-target)
; update-target ::= (list context handler base-node)
; context - context of the node selected by the location path
; handler ::= (lambda (node context base-node) ...)
; handler - specifies the required transformation over the node selected
; base-node - the node with respect to which the location path was evaluated
(define (sxml:nodes-to-mutate doc update-targets)
(letrec
(; targets-alist ::= (listof (cons node-chain update-target))
; node-chain - the chain of nodes, starting from the current node
; anc-upd? - whether an ancestor of the current node us updated
(tree-walk
(lambda (curr-node targets-alist)
(call-with-values
(lambda () (sxml:separate-list
(lambda (pair) (null? (car pair)))
targets-alist))
(lambda (matched ; handlers which match this node
targets ; the rest
)
(if
; No updates both on this level and on ancestor's level
(null? matched)
(let loop ((targets targets-alist)
(subnodes (append (sxml:attr-list curr-node)
((sxml:child sxml:node?) curr-node)))
(res '()))
(if
(or (null? targets) (null? subnodes))
res
(call-with-values
(lambda ()
(sxml:separate-list
(lambda (pair) (eq? (caar pair) (car subnodes)))
targets))
(lambda (matched targets)
(loop targets
(cdr subnodes)
(if
(null? matched)
res
(append res
(tree-walk
(car subnodes)
(map
(lambda (pair) (cons (cdar pair) (cdr pair)))
matched)))))))))
(list
(cons (cadar matched) ; context
(sxml:clone-nset-except
(as-nodeset
(sxml:tree-trans curr-node targets-alist))
curr-node)))))))))
(let ((targets-alist
(map-union
(lambda (triple)
(let ((node-path (reverse (sxml:context->content (car triple)))))
(if
(eq? (car node-path) doc)
(list (cons (cdr node-path) triple))
'())))
update-targets)))
(if (null? targets-alist) ; nothing to do
'()
(tree-walk doc targets-alist)))))
; A highest-level function
(define (sxml:modify! . update-specifiers)
(and-let*
((lambdas-upd-specifiers
(sxml:update-specifiers->lambdas update-specifiers)))
(lambda (doc)
(sxml:mutate-doc!
doc
(sxml:nodes-to-mutate
doc
(sxml:lambdas-upd-specifiers->targets doc lambdas-upd-specifiers))))))
(provide (all-defined)))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,662 @@
; Module header is generated automatically
#cs(module stx-engine mzscheme
(require (lib "defmacro.ss"))
(require (rename (lib "pretty.ss") pp pretty-print))
(require (lib "string.ss" "srfi/13"))
(require "sxml-tools.ss")
(require "sxpathlib.ss")
(require "sxpath-ext.ss")
(require "txpath.ss")
(require "sxpath.ss")
(require "libmisc.ss")
(require (lib "ssax.ss" "web-server/tmp/ssax"))
;; $Id: stx-engine.scm,v 1.9403 2002/12/25 19:33:48 kl Exp kl $
; DL: if you are not using "access-remote.scm", uncomment the following line
;(define open-input-resource open-input-file)
;=============================================================================
; Auxilliary
(define stx:version
(string-append " $Revision: 1.9403 $" nl " $Date: 2002/12/25 19:33:48 $"))
(define (stx:error . messages)
(cerr nl "STX: ")
(apply cerr messages)
(cerr nl)
(exit -1))
; Reads content of a given SXML element 'obj' using Scheme reader.
; The content has to be a list of strings (first of them will be read).
; If the content is empty, "" is returned.
(define (stx:read-content obj objname)
(let ((ct (sxml:content obj)))
(cond
((null? ct) "")
((string? (car ct))
(with-exception-handler
(lambda(mes)
(apply stx:error
`("Error " ,nl ,mes ,nl "reading " ,objname " code:" ,nl
,(car ct) ,nl "from element" ,nl
,@(sxml:clean-feed (sxml:sxml->xml obj)) ,nl))
(exit))
(lambda()
(call-with-input-string (car ct) read))))
(else
(stx:error "Invalid " objname " element:" nl obj)))
))
(define (stx:clean-feed . fragments)
(reverse
(let loop ((fragments fragments) (result '()))
(cond
((null? fragments) result)
((not (car fragments)) (loop (cdr fragments) result))
((null? (car fragments)) (loop (cdr fragments) result))
(else
(loop (cdr fragments)
(cons (car fragments) result)))))))
; DL: Borrowed from the older version of SXML Tools
; Filter the 'fragments'
; The fragments are a list of strings, characters,
; numbers, thunks, #f -- and other fragments.
; The function traverses the tree depth-first, and returns a list
; of strings, characters and executed thunks, and ignores #f and '().
;
; If all the meaningful fragments are strings, then
; (apply string-append ... )
; to a result of this function will return its string-value
;
; It may be considered as a variant of Oleg Kiselyov's SRV:send-reply:
; While SRV:send-reply displays fragments, this function returns the list
; of meaningful fragments and filter out the garbage.
(define (sxml:clean-feed . fragments)
(reverse
(let loop ((fragments fragments) (result '()))
(cond
((null? fragments) result)
((not (car fragments)) (loop (cdr fragments) result))
((null? (car fragments)) (loop (cdr fragments) result))
((pair? (car fragments))
(loop (cdr fragments)
(loop (car fragments) result)))
; ((procedure? (car fragments))
; (loop (cdr fragments)
; (cons ((car fragments))
; result)))
(else
(loop (cdr fragments)
(cons (car fragments) result)))))))
;-----------------------------------------------------------------------------
; This functions will be probably moved to sxml-tools
; Transforms top-level *NAMESPACES* in SXML document
; parsed using SSAX 4.9 to aux-list representation compatible to
; SXML-spec. ver. 2.5
(define (sxml:refactor-ns tree)
(if (and (pair? tree) (pair? (cdr tree)) (pair? (cadr tree))
(eq? '*NAMESPACES* (caadr tree)))
`(,(car tree) (@@ (*NAMESPACES* ,@(cdadr tree))) ,@(cddr tree))
tree))
; Reads XML document as SXML tree. NS prefixes declared in XML document
; are used as namespace-id's.
(define (sxml:xml->sxml-autoprefix name)
(sxml:refactor-ns ; workaround for SSAX 4.9
(let ((ns-list (sxml:extract-prefix-assigs name)))
(ssax:xml->sxml
(open-input-resource name)
ns-list))))
; Extracts a value of attribute with given name from attr-list
;(define (sxml:attr-from-list attr-list name)
; (cond
; ((assq name attr-list)
; => cadr)
; (else #f)))
; Reads a root element of given XML file and returns a list
; of NS-prefix/URIs declared as a list of pairs.
(define (sxml:extract-prefix-assigs file)
(call-with-input-file
file
(lambda (p)
(ssax:skip-S p)
(let loop ((lst (ssax:read-markup-token p)))
(case (car lst)
((PI) ; Processing instruction
(ssax:skip-pi p) ; ignore until the end
(ssax:skip-S p)
(loop (ssax:read-markup-token p)))
((START)
(filter-and-map
(lambda(x)
(and (pair? (car x)) (eq? 'xmlns (caar x))))
(lambda(x)
(cons (cdar x) (cdr x)))
(ssax:read-attributes p '())))
(else
(display "Unknown token type: ")
(display (car lst))
(exit)))))))
;=============================================================================
; Tree transformation
; stx:apply-templates:: <tree> x <templates> x <root> x <environment> -> <new-tree>
; where
; <templates> ::= <default-template> <text-template> <template>*
; <default-template> ::= (*default* . <handler>)
; <text-template> ::= (*text* . <handler>)
; <template> ::= (<matcher> <handler>) | ( XMLname <handler>)
; <root> ::= <document-root>
; <environment> ::= <lambda-tuple>
; <matcher> ::= <node> <root> -> <nodeset>
; <handler> :: <node> <templates> <root> <environment> -> <new-node>
;
; The stx:apply-templates function visits top-level nodes of a given tree and
; process them in accordance with a list of templates given.
; If a node is a textual one then it is processed usind 'text-template',
; which has to be second element in given list of templates.
; If a node is a pair then stx:apply-templates looks up a corresponding template
; among given <templates> using stx:find-template function.
; If failed, stx:apply-templates tries to locate a *default* template,
; which has to be first element in given list of templates. It's an
; error if this latter attempt fails as well.
; Having found a template, its handler is applied to the current node.
; The result of the handler application, which should
; also be a <tree>, replaces the current node in output tree.
;
; This function is slightly similar to Oleg Kiselyov's "pre-post-order" function
; with *preorder* bindings.
(define (stx:apply-templates tree templates root environment)
(cond
((nodeset? tree)
(map (lambda (a-tree)
(stx:apply-templates a-tree templates root environment))
tree))
((pair? tree)
(cond
((tee-4 "Template: "
(stx:find-template tree
(cddr templates) ; *default* and *text* skipped
root))
=> (lambda (template)
((cadr template) tree templates root environment)))
(else
(if (eq? '*default* (caar templates))
((cadar templates) tree templates root environment)
(stx:error "stx:apply-templates: There is no template in: " templates
; nl "for: " tree
))
)))
((string? tree) ; *text*
(if (eq? '*text* (caadr templates))
((cadadr templates) tree)
(stx:error "stx:apply-templates: There is no *text* templates for: "
templates)))
(else (stx:error "Unexpected type of node: " tree))))
; stx:find-template: <node> x <templates> x <root> -> <template>
; This function returns first template in <templates> whouse <matcher>
; matches given <node>
; <matcher> matches node if:
; - if it is a symbol and its the same as the name of the node matched
; - if it is a procedure (sxpath/txpath generated one) then it is
; applyed (with respect to given <root>) sequentially to the matched node
; and its parents until the matched node is a member of a resulting nodeset
; or root node is reached. In the first case the node matches successfuly,
; in the second case it does not.
(define (stx:find-template node templates root)
(let ((pattern-matches?
(lambda (node pattern-test)
(let rpt ((context-node node))
(cond
((null? context-node) #f)
((memq node (pattern-test context-node
`((*root* ,root))))
#t)
(else ; try PARENT
(rpt ((sxml:node-parent root) context-node))))))))
(let rpt ((bnd templates))
(cond ((null? bnd) #f)
((and (symbol? (caar bnd)) (eq? (caar bnd) (car node)))
(car bnd))
((and (procedure? (caar bnd)) ; redundant?
(pattern-matches? node (caar bnd)))
(car bnd))
(else (rpt (cdr bnd)))))))
; Returns SXML tree for a given link.
; A link is a lambda-tuple of its attributes.
(define (stx:load-sst link)
((cond
((equal? (link 'type) "stx")
(lambda(x)
(call-with-input-file x read)))
((equal? (link 'type) "sxml")
(stx:make-stx-stylesheet
(lambda(x)
(call-with-input-file x read))))
; default is xml
(else
(lambda(x)
(stx:make-stx-stylesheet
(sxml:xml->sxml-autoprefix x)))))
(link 'href)))
; Transform top-level objects of stx:stylesheet
; to a list whose car is corresponding template (#f no templates generated)
; and whose cadr is corresponding environment binding (#f if no bindings),
(define (stx:stx->tmpl+env stx-objects)
(let rpt ((objs stx-objects)
(templts '())
(envrt '()))
(cond
((null? objs) (list (reverse templts)
envrt))
; templates
((eq? (caar objs) 'stx:template)
(let* ((obj (car objs))
(handler (caddr obj)))
(rpt
(cdr objs)
(cond
((sxml:attr obj 'match)
=> (lambda (x)
(cons
(list `(sxpath ,x) handler)
;(list `(sxp:xpath+root ,x) handler)
templts)))
((sxml:attr obj 'match-lambda)
=> (lambda (x)
(cons (list x handler) templts)))
(else
(verb-2 nl "NO match for: " (cadr obj))
templts))
(cond
((sxml:attr obj 'name)
=> (lambda (x)
(cons
(list (string->symbol x) handler) envrt)))
(else
(verb-2 nl "NO name for: " (cadr obj)
"==" (sxml:attr obj 'name))
envrt)))))
((eq? (caar objs) 'stx:variable)
(let* ((obj (car objs))
(name (sxml:attr obj 'name))
(code (caddr obj))) ; (sxml:content obj)
(rpt
(cdr objs)
templts
(cons (list (string->symbol name) code) envrt))))
(else
(verb-2 nl "Unrecognized object: " (caar objs) nl)
(rpt (cdr objs) templts envrt)))))
;
(define (stx:write-ss t+e fname)
(let* ((of
; DL: replacing this non-R5RS call with the following piece of code
;(open-output-file fname 'replace)
(begin
(when (file-exists? fname) (delete-file fname))
(open-output-file fname)))
(wrt (lambda x
(for-each (lambda(y) (display y of)) x))))
(wrt "#cs(module transform mzscheme" nl
"(require" nl
"(rename (lib \"list.ss\") sort mergesort)" nl
"(lib \"stx-engine.ss\" \"sxml\")" nl
"(lib \"util.ss\" \"ssax\")" nl
"(lib \"txpath.ss\" \"sxml\")" nl
"(lib \"sxpath-ext.ss\" \"sxml\")" nl
"(lib \"sxml-tools.ss\" \"sxml\")" nl
"(lib \"sxpathlib.ss\" \"sxml\")" nl
"(lib \"libmisc.ss\" \"sxml\")" nl
"(lib \"myenv.ss\" \"ssax\")" nl
"(lib \"common.ss\" \"sxml\"))" nl
"(provide stylesheet)" nl)
(wrt nl "(define stylesheet (list " nl "(list ; templates:")
(for-each
(lambda(x)
(wrt nl "(list ")
(pp (car x) of)
(wrt "")
(pp (cadr x) of)
(wrt ")" nl))
(car t+e))
(wrt ") ; end templates" nl nl "( list ; environment:")
(for-each
(lambda(x)
(wrt nl "(list '" (car x) nl)
(pp (cadr x) of)
(wrt ") ; end of `" (car x) "'" nl))
(cadr t+e))
(wrt ") ; end environment" nl)
(wrt ")) ; end stylesheet" nl)
(wrt ")" nl) ; end module
))
; transformate given SXML document <doc> using stylesheet <sst> in SXML
; format
(define (stx:transform-dynamic doc sst-sxml)
(stx:transform
doc
(stx:eval-transformer
(stx:translate sst-sxml))))
; transformate given SXML document <doc> loading prepared stylesheet from
; a file <sst-file>
; DL: commented out because of DYNAMIC-REQUIRE
;(define (stx:transform-static doc sst-file)
; (stx:transform
; doc
; (dynamic-require sst-file 'stylesheet)))
; writes to a file <sst-file> prepared stylesheet given in SXML format <sst>
(define (stx:write-transformer sst file)
(stx:write-ss (stx:translate sst) file))
; evalutes components of given (in Scheme) STX stylesheet and returns a "prepared"
; stylesheet where all the necessary S-expressions are evaluated
(define (stx:eval-transformer sst-scm)
(list
(map
(lambda(x)
(list (eval (car x))
(eval (cadr x))))
(car sst-scm))
(map
(lambda(x)
(list (car x)
(eval (cadr x))))
(cadr sst-scm))))
; transforms given SXML document <doc> using prepared (compiled or eval'uated)
; stylesheet <sst-lambda>
(define (stx:transform doc sst-lambda)
(let ((string-out sxml:sxml->html))
(stx:apply-templates doc
; bindings
(tee-3
"Templates: "
(append
`((*default*
,(lambda (node bindings root environment)
(stx:apply-templates (sxml:content node)
bindings
root environment)))
(*text*
,string-out))
(car sst-lambda)))
;root
doc
; environment
(apply
lambda-tuple
(tee-3
"Environment: "
(append
`((stx:version ,stx:version))
(cadr sst-lambda)
))))))
; translate given STX stylesheet <sst> from SXML to Scheme
(define (stx:translate sst)
(let*
(
; (output-attr
; ; lambda tuple of 'xsl:output' attributes
; (apply lambda-tuple
; (cond
; (((if-car-sxpath '(xsl:stylesheet xsl:output @)) sst)
; => cdr)
; (else '((method "html"))))))
; ((string-out)
; (case (string->symbol (output-attr 'method))
; ((text) self)
; ((xml) sxml:string->xml)
; (else sxml:sxml->html)))
(stx-sst
(tee-2
"STX stylesheets: "
(append sst
(apply append
(map
(lambda(x)
(tee-2
"IMPORTED: "
; just stx:template and stx:variable elements
; are used from imported _STX_ stylesheets
((sxpath '((*or* stx:template stx:variable)))
(stx:load-sst (apply lambda-tuple
(sxml:attr-list x))))))
((sxpath '((*or* xsl:import stx:import))) sst))))))
(templates+env
(tee-2
"templates+env"
(stx:stx->tmpl+env
((sxpath '(*)) stx-sst))))
)
templates+env))
; Generates an stx:stylesheet from a stylesheet represented as <stx-tree>
; in SXML format
(define (stx:make-stx-stylesheet stx-tree)
(let*
((output-attr
; lambda tuple of 'xsl:output' attributes
(apply lambda-tuple
(cond
(((if-car-sxpath '(xsl:stylesheet xsl:output @)) stx-tree)
=> cdr)
(else '((method "html"))))))
(string-out
(case (string->symbol (output-attr 'method))
((text) 'self)
((xml) 'sxml:string->xml)
(else 'sxml:sxml->html)))
(custom-prefixes
(map string->symbol
((sxpath '(xsl:stylesheet stx:import @ prefix *text*))
stx-tree))))
(cons 'stx:stylesheet
(map
(lambda(x)
(cond
; xsl:template
((eq? (car x) 'xsl:template)
(stx:xsl->stx x output-attr string-out custom-prefixes
stx-tree))
; stx:template
((eq? (car x) 'stx:template)
(stx:scm->stx x))
; stx:variable
((eq? (car x) 'stx:variable)
(stx:stx-var->stx x))
; xsl:variable
((eq? (car x) 'xsl:variable)
(stx:xsl-var->stx x))
(else x)))
((sxpath `(xsl:stylesheet *)) stx-tree)))
))
;-----------------------------------------------------------------------------
; Transformers for XSL stylesheet elements
; Transforms element stx:template (extracted from XSL stylesheet and
; represented in SXML format) to stx:template
(define (stx:scm->stx tmplt)
`(stx:template (@ ,@(sxml:attr-list tmplt))
(lambda (current-node stx:templates current-root $)
,(stx:read-content tmplt "<stx:template@match>"))))
; Transforms STX element stx:var to stx:variable
(define (stx:stx-var->stx var)
`(stx:variable (@ ,@(sxml:attr-list var))
,(stx:read-content var "<stx:variable")))
; Transforms XSL element xsl:var to stx:variable
(define (stx:xsl-var->stx var)
`(stx:variable (@ ,@(sxml:attr-list var))
',(sxml:content var)))
(define (stx:attr->html attr)
(if (equal? "" (cadr attr))
`(list " " ,(sxml:ncname attr))
`(list " " ,(sxml:ncname attr) "='" ,(cadr attr) "'")))
; transforms an xsl:template to stx:template
(define (stx:xsl->stx tmplt output-attr doc-string-out
custom-prefixes c-root)
(let*
; list of template's attributes
((attr-list (sxml:attr-list tmplt))
(sst-method (cond
((sxml:attr-from-list attr-list 'stx:method)
=> string->symbol)
; xml is default
(else 'xml)))
; output string for _template_ (not document!) conversion
(sst-string-out
(case sst-method
((text) self)
((html) sxml:string->html)
(else sxml:sxml->xml))))
`(stx:template (@ ,@attr-list)
(lambda (current-node stx:templates current-root $)
,(cons 'list
;(stx:clean-feed
(stx:apply-templates
(sxml:content tmplt)
`((*default*
,(lambda (tt-node bindings root environment)
(if (cond ((sxml:name->ns-id (car tt-node))
=> (lambda(x)
(member (string->symbol x)
custom-prefixes)))
(else #f))
; user-defined tag
`(stx:call-function ,(sxml:ncname tt-node)
"Custom Tag"
,tt-node
$)
; XHTML tag
(let ((nm (sxml:ncname tt-node))
(content (sxml:content tt-node)))
(if (null? content)
`(list "<" ,nm ,@(map stx:attr->html
(sxml:attr-list tt-node)) "/>")
`(list "<" ,nm ,@(map stx:attr->html
(sxml:attr-list tt-node)) ">"
,@(stx:apply-templates content
bindings root environment)
"</" ,nm ">" ))))
))
(*text* ; text string in template
,sst-string-out)
(xsl:apply-templates ; t-node is <xsl:apply-templates/>
,(lambda (t-node bindings root environment)
`(stx:apply-templates
,(cond
((sxml:attr t-node 'select)
=> (lambda (lp)
`((sxpath ,lp) current-node current-root)
;`((sxp:xpath+root ,lp) current-node current-root)
))
(else '(sxml:content current-node)))
stx:templates current-root $)))
(xsl:if
,(lambda (t-node bindings root environment)
``(stx:apply-templates
,(sxml:content ,t-node)
bindings
root
environment)
))
(xsl:call-template ; t-node is <xsl:call-template/>
,(lambda (t-node bindings root environment)
`(stx:call-function ,(sxml:attr t-node 'name)
"Named Template"
,t-node
$)))
(xsl:value-of ; t-node is <xsl:value-of/>
,(lambda (t-node bindings root environment)
`(,(if (equal? "yes"
(sxml:attr t-node 'disable-output-escaping))
'self
doc-string-out)
(sxml:string
((sxpath ,(sxml:attr t-node 'select))
;((sxp:xpath ,(sxml:attr t-node 'select))
current-node)))))
; <xsl:copy-of/>
(xsl:copy-of
,(lambda (t-node bindings root environment)
`((sxpath ,(sxml:attr t-node 'select)) current-node)))
;`((sxp:xpath ,(sxml:attr t-node 'select)) current-node)))
(stx:eval ; t-node is <stx:eval/>
,(lambda (t-node bindings root environment)
(let ((content
(stx:read-content t-node "<stx:eval>")))
`(call-with-err-handler
(lambda()
(eval ,content))
(lambda(mes)
(apply stx:error `("Error " ,nl ,mes ,nl
"while evaluating code:" ,nl
,,content
,nl "from element" ,nl
,@(sxml:clean-feed
(sxml:sxml->xml
',t-node)))))))))
) c-root
(lambda-tuple) ; compile-time environment
))))))
; A helper for stx:xsl->stx
(define-macro stx:call-function
(lambda (name type tpl-node $-env)
`(let ((fn (,$-env
(string->symbol ,name))))
(if
(eq? fn '*LT-NOT-FOUND*)
(apply stx:error
(append
(list "Undefined " ,type " with name " ,name " is called by:" nl)
(sxml:clean-feed (sxml:sxml->xml ',tpl-node))
(list nl "Valid names: ") (map car (,$-env))
))
(call-with-err-handler
(lambda()
(sxml:clean-feed
(fn current-node stx:templates current-root (,$-env '*LT-ADD*
`(stx:param ,',tpl-node)))))
(lambda (mes)
(apply stx:error
(list ,type " evaluation ERROR"
nl mes nl "for:" nl
(sxml:clean-feed
(sxml:sxml->xml ',tpl-node))))))))))
;==============================================================================
; Wrappers
;(define stx:eval eval)
;(define txpath sxp:xpath+root)
; NOTE: namespace for vars and templates is shared!
(provide (all-defined)))

View File

@ -0,0 +1,871 @@
; Module header is generated automatically
#cs(module sxml-tools mzscheme
(require (lib "defmacro.ss"))
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
(require "sxpathlib.ss")
;; S X M L T o o l s
; $Revision: 3.14 $ from $Date: 2003/12/23 05:39:31 $:
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to lisovsky@acm.org
; Kirill Lisovsky
;
; SXML normal form used for normalization-dependent functions:
; If attr-list is present it's always the second in SXML element.
; If aux-list is present - then list of attributes is always
; included, and aux-list is always the third.
; Minimized form is just the same, but all the empty aux-lists are
; absent, and empty attr-lists are present only in elements with aux-lists
; present.
;==============================================================================
; Auxiliary functions.
; unlike filter-map from SRFI-1 this function uses separate predicate
; and mapping functions.
; Applies proc to all the elements of source list that satisfy the predicate
; and return the list of the results.
(define (filter-and-map pred proc lis)
(let rpt ((l lis))
(if (null? l)
'()
(if (pred (car l))
(cons (proc (car l)) (rpt (cdr l)))
(rpt (cdr l))))))
; Applies pred to every member of lst and yields #t if all the results
; are #t
(define (check-list pred lst)
(cond
((null? lst) #t)
((pred (car lst))
(check-list pred (cdr lst)))
(else #f)))
; Returns attr-list node for a given obj
; or #f if it is absent
(define (sxml:attr-list-node obj)
(if (and (not (null? (cdr obj)))
(pair? (cadr obj))
(eq? '@ (caadr obj)))
(cadr obj)
#f))
; Returns attr-list wrapped in list
; or '((@)) if it is absent and aux-list is present
; or '() if both lists are absent
(define (sxml:attr-as-list obj)
(cond
((sxml:attr-list-node obj)
=> list)
((sxml:aux-list-node obj)
'((@)))
(else '())))
; Returns aux-list node for a given obj
; or #f if it is absent
(define (sxml:aux-list-node obj)
(if
(or (null? (cdr obj))
(null? (cddr obj))
(not (pair? (caddr obj)))
(not (eq? (caaddr obj) '@@)))
#f
(caddr obj)))
; Returns aux-list wrapped in list
; or '() if it is absent
(define (sxml:aux-as-list obj)
(cond
((sxml:aux-list-node obj)
=> list)
(else '())))
; optimized (string-rindex name #\:)
; returns position of a separator between namespace-id and LocalName
(define-macro (sxml:find-name-separator len)
`(let rpt ((pos (-- ,len)))
(cond
((negative? pos) #f)
((char=? #\: (string-ref name pos)) pos)
(else (rpt (-- pos))))))
; sxml error message
(define (sxml:error . messages)
(cerr nl "SXML ERROR: ")
(apply cerr messages)
(cerr nl)
(exit -1))
;==============================================================================
; Predicates
; Predicate which returns #t if given element <obj> is empty.
; Empty element has no nested elements, text nodes, PIs, Comments or entities
; but it may contain attributes or namespace-id.
; It is a SXML counterpart of XML empty-element.
(define (sxml:empty-element? obj)
(not
((select-first-kid
(lambda(x)
(or ((ntype-names?? '(*PI* *COMMENT* *ENTITY*)) x)
((ntype?? '*) x)
(string? x)))) obj)))
; Returns #t if the given <obj> is shallow-normalized SXML element.
; The element itself has to be normalised but its nested elements are not tested.
(define (sxml:shallow-normalized? obj)
(or
(null? (cdr obj))
(and (or
(and
(pair? (cadr obj))
(eq? (caadr obj) '@))
(not ((select-first-kid (ntype-names?? '(@ @@))) obj)))
(or (null? (cddr obj))
(and (pair? (caddr obj))
(eq? (caaddr obj) '@@))
(not ((select-first-kid (ntype?? '@@)) obj))))))
; Returns #t if the given <obj> is normalized SXML element.
; The element itself and all its nested elements have to be normalised.
(define (sxml:normalized? obj)
(and
(sxml:shallow-normalized? obj)
(check-list
(lambda(x)
(if
(sxml:element? x)
(sxml:normalized? x)
#t))
(sxml:content obj))
))
; Returns #t if the given <obj> is shallow-minimized SXML element.
; The element itself has to be minimised but its nested elements are not tested.
(define (sxml:shallow-minimized? obj)
(and
(sxml:shallow-normalized? obj)
(not (and (sxml:aux-list-node obj)
(null? (sxml:aux-list obj))))
(not (and (sxml:attr-list-node obj)
(null? (sxml:attr-list obj))
(not (sxml:aux-list-node obj))))))
; Returns #t if the given <obj> is minimized SXML element.
; The element itself and all its nested elements have to be minimised.
(define (sxml:minimized? obj)
(and
(sxml:shallow-minimized? obj)
(check-list
(lambda(x)
(if
(sxml:element? x)
(sxml:minimized? x)
#t))
(sxml:content obj))
))
;==============================================================================
; Accessors
; Returns a name of a given SXML node
; It is introduced for the sake of encapsulation.
(define sxml:name car)
; A version of sxml:name, which returns #f if the given <obj> is
; not a SXML element.
; Otherwise returns its name.
(define (sxml:element-name obj)
(and ((ntype?? '*) obj)
(car obj)))
; Safe version of sxml:name, which returns #f if the given <obj> is
; not a SXML node.
; Otherwise returns its name.
(define (sxml:node-name obj)
(and (pair? obj)
(symbol? (car obj))
(car obj)))
; Returns Local Part of Qualified Name (Namespaces in XML production [6])
; for given obj, which is ":"-separated suffix of its Qualified Name
; If a name of a node given is NCName (Namespaces in XML production [4]), then
; it is returned as is.
; Please note that while SXML name is a symbol this function returns a string.
(define (sxml:ncname obj)
(let* ((name (symbol->string (car obj)))
(len (string-length name)))
(cond
((sxml:find-name-separator len)
=> (lambda (pos)
(substring name (+ pos 1) len)))
(else name))))
; Returns namespace-id part of given name, or #f if it's LocalName
(define (sxml:name->ns-id sxml-name)
(let* ((name (symbol->string sxml-name)))
(cond
((sxml:find-name-separator (string-length name))
=> (lambda (pos)
(substring name 0 pos)))
(else #f))))
; Returns the content of given SXML element or nodeset (just text and element
; nodes) representing it as a list of strings and nested elements in document
; order. This list is empty if <obj> is empty element or empty list.
(define (sxml:content obj)
(((if (nodeset? obj)
sxml:filter
select-kids)
(lambda(x)
(or
(string? x) ; ((ntype?? '*text*) x)
((ntype?? '*) x))))
obj))
; Returns a string which combines all the character data
; from text node childrens of the given SXML element
; or "" if there is no text node children
(define (sxml:text obj)
(let ((tnodes
((select-kids
string?)
obj)))
(cond
((null? tnodes) "")
((null? (cdr tnodes))
(car tnodes))
(else (apply string-append tnodes)))))
;------------------------------------------------------------------------------
; Normalization-dependent accessors
;
;
; "Universal" accessors are less effective but may be used for non-normalized SXML
; Safe accessors are named with suffix '-u'
;
; "Fast" accessors are optimized for normalized SXML data.
; They are not applicable to arbitrary non-normalized SXML data
; Their names has no specific suffixes
; Returns all the content of normalized SXML element except attr-list and
; aux-list.
; Thus it includes PI, COMMENT and ENTITY nodes as well as TEXT and ELEMENT nodes
; returned by sxml:content.
; Returns a list of nodes in document order or empty list if <obj> is empty
; element or empty list.
; This function is faster than sxml:content
(define (sxml:content-raw obj)
((if (and (not (null? (cdr obj)))
(pair? (cadr obj)) (eq? (caadr obj) '@))
(if (and (not (null? (cddr obj)))
(pair? (caddr obj)) (eq? (caaddr obj) '@@))
cdddr
cddr)
cdr) obj))
; Returns the list of attributes for given element or nodeset.
; Analog of ((sxpath '(@ *)) obj)
; Empty list is returned if there is no list of attributes.
(define (sxml:attr-list-u obj)
(cond (((select-first-kid (ntype?? '@)) obj)
=> cdr)
(else '())))
; Returns the list of auxiliary nodes for given element or nodeset.
; Analog of ((sxpath '(@@ *)) obj)
; Empty list is returned if a list of auxiliary nodes is absent.
(define (sxml:aux-list obj)
(if
(or (null? (cdr obj))
(null? (cddr obj))
(not (pair? (caddr obj)))
(not (eq? (caaddr obj) '@@)))
'()
(cdaddr obj)))
; Returns the list of auxiliary nodes for given element or nodeset.
; Analog of ((sxpath '(@@ *)) obj)
; Empty list is returned if a list of auxiliary nodes is absent.
(define (sxml:aux-list-u obj)
(cond (((select-first-kid (ntype?? '@@)) obj)
=> cdr)
(else '())))
; Return the first aux-node with <aux-name> given in SXML element <obj>
; or #f is such a node is absent.
; NOTE: it returns just the FIRST node found even if multiple nodes are
; present, so it's mostly intended for nodes with unique names
(define (sxml:aux-node obj aux-name)
(cond
((assq aux-name (sxml:aux-list obj)))
(else #f)))
; Return a list of aux-node with <aux-name> given in SXML element <obj>
; or '() if such a node is absent.
(define (sxml:aux-nodes obj aux-name)
(filter
(lambda(x) (eq? aux-name (car x)))
(sxml:aux-list obj)))
; Accessor for an attribute <attr-name> of given SXML element <obj> which
; It returns:
; the value of the attribute if the attribute is present
; #f if there is no such an attribute in the given element
(define (sxml:attr obj attr-name)
(cond
((assq attr-name (sxml:attr-list obj))
=> cadr)
(else #f)))
; Extracts a value of attribute with given name from attr-list
(define (sxml:attr-from-list attr-list name)
(cond
((assq name attr-list)
=> cadr)
(else #f)))
; Accessor for a numerical attribute <attr-name> of given SXML element <obj>
; which It returns:
; a value of the attribute as the attribute as a number if the attribute
; is present and its value may be converted to number using string->number
; #f if there is no such an attribute in the given element or
; its value can't be converted to a number
(define (sxml:num-attr obj attr-name)
(cond
((assq attr-name (sxml:attr-list obj))
=> (lambda(x) (string->number (cadr x))))
(else #f)))
; Accessor for an attribute <attr-name> of given SXML element <obj> which
; may also be an attributes-list or nodeset (usually content of SXML element)
;
; It returns:
; the value of the attribute if the attribute is present
; #f if there is no such an attribute in the given element
(define (sxml:attr-u obj attr-name)
(cond
((assq attr-name
; the list of attributes is computed below
(cond
((and (not (null? (cdr obj)))
(pair? (cadr obj))
(eq? '@ (caadr obj)))
(cdadr obj)) ; fast track for normalized elements
((eq? '@ (car obj))
(cdr obj)) ; if applied to attr-list
(else (sxml:attr-list-u obj))))
=> cadr)
(else #f)))
; Returns the list of namespaces for given element.
; Analog of ((sxpath '(@@ *NAMESPACES* *)) obj)
; Empty list is returned if there is no list of namespaces.
(define (sxml:ns-list obj)
(cond ((assv '*NAMESPACES* (sxml:aux-list obj))
=> cdr)
(else '())))
; Returns the list of namespace-assoc's for given namespace-id in
; SXML element <obj>.
; Analog of ((sxpath '(@@ *NAMESPACES* namespace-id)) obj)
; Empty list is returned if there is no namespace-assoc with namespace-id
; given.
(define (sxml:ns-id->nodes obj namespace-id)
(filter
(lambda(x)
(eq? (car x) namespace-id))
(sxml:ns-list obj)))
; It returns:
; A URI's for namespace-id given
; #f if there is no namespace-assoc with namespace-id given
(define (sxml:ns-id->uri obj namespace-id)
(cond
((assq namespace-id (sxml:ns-list obj))
=> cadr)
(else #f)))
; Returns a list of namespace-assocs nodes for NS URI given
(define (sxml:ns-uri->nodes obj URI)
(filter
(lambda (ns-assoc)
(string=? (cadr ns-assoc) URI))
(sxml:ns-list obj)))
; Returns a namespace-id for NS URI given
(define (sxml:ns-uri->id obj URI)
(let rpt ((ns-assocs (sxml:ns-list obj)))
(cond
((null? ns-assocs) #f)
((string=? (cadar ns-assocs) URI)
(caar ns-assocs))
(else (rpt (cdr ns-assocs)))
)))
; Returns namespace-id for given namespace-assoc list
(define sxml:ns-id car)
; Returns URI for given namespace-assoc list
(define sxml:ns-uri cadr)
; It returns namespace prefix for given namespace-assoc list
; Original (as in XML document) prefix for namespace-id given
; has to be strored as the third element in namespace-assoc list
; if it is different from namespace-id.
; If original prefix is omitted in namespace-assoc then
; namespace-id is used instead
(define (sxml:ns-prefix ns-assoc)
(if (> (length ns-assoc) 2)
(caddr ns-assoc)
(car ns-assoc)))
;==============================================================================
; Data modification functions
; Constructors and mutators for normalized SXML data
;
; This functions are optimized for normalized SXML data.
; They are not applicable to arbitrary non-normalized SXML data
;
; Most of the functions are provided in two variants:
; 1. side-effect intended functions for linear update of given elements.
; Their names are ended with exclamation mark.
; An example:
; sxml:change-content!
; 2. pure functions without side-effects which return modified elements.
; An example:
; sxml:change-content
; Change the content of given SXML element to <new-content>
; If <new-content> is an empty list then the <obj> is transformed
; The resulting SXML element is normalized
; Former name sxml:content!
#;(define (sxml:change-content! obj new-content)
(set-cdr! obj
`(
,@(sxml:attr-as-list obj)
,@(sxml:aux-as-list obj)
,@new-content)))
; Change the content of given SXML element to <new-content>
; If <new-content> is an empty list then the <obj> is transformed
; to an empty element
; The resulting SXML element is normalized
(define (sxml:change-content obj new-content)
`(,(sxml:name obj)
,@(sxml:attr-as-list obj)
,@(sxml:aux-as-list obj)
,@new-content))
; The resulting SXML element is normalized, if <new-attrlist> is empty,
; the cadr of <obj> is (@)
(define (sxml:change-attrlist obj new-attrlist)
`(,(sxml:name obj)
,@(cond
(new-attrlist
`((@ ,@new-attrlist)))
((sxml:aux-list-node obj)
'((@)))
(else `()))
,@(sxml:aux-as-list obj)
,@(sxml:content obj)))
; The resulting SXML element is normalized, if <new-attrlist> is empty,
; the cadr of <obj> is (@)
; Former name sxml:attrlist!
#;(define (sxml:change-attrlist! obj new-attrlist)
(set-cdr! obj
`(
,@(cond
(new-attrlist
`((@ ,@new-attrlist)))
((sxml:aux-list-node obj)
'((@)))
(else `()))
,@(sxml:aux-as-list obj)
,@(sxml:content obj))))
; Change a name of SXML element destructively
; Former name was 'sxml:name!'
#;(define (sxml:change-name! obj new-name)
(set-car! obj new-name))
; Returns SXML element with its name changed
(define (sxml:change-name obj new-name)
(cons new-name (cdr obj)))
; Returns SXML element <obj> with attribute <attr> added or #f
; if the attribute with given name already exists,
; <attr> is (<attr-name> <attr-value>)
; Pure functional counterpart to sxml:add-attr!
(define (sxml:add-attr obj attr)
(let ((attr-list (sxml:attr-list obj)))
(if (assq (car attr) attr-list)
#f
`(,(sxml:name obj)
(@ ,@(cons attr attr-list))
,@(sxml:aux-as-list obj)
,@(sxml:content obj)))))
; Add an attribute <attr> for an element <obj>
; Returns #f if the attribute with given name already exists.
; The resulting SXML node is normalized.
; Linear update counterpart to sxml:add-attr
#;(define (sxml:add-attr! obj attr)
(let ((attr-list (sxml:attr-list obj)))
(if (assq (car attr) attr-list)
#f
(begin
(set-cdr! obj
`(
(@ ,@(cons attr attr-list))
,@(sxml:aux-as-list obj)
,@(sxml:content obj)))
obj))))
; Returns SXML element <obj> with changed value of attribute <attr> or #f
; if where is no attribute with given name.
; <attr> is (<attr-name> <attr-value>)
(define (sxml:change-attr obj attr)
(let ((attr-list (sxml:attr-list obj)))
(if (null? attr-list)
#f
(cond
((assv (car attr) attr-list)
=> (lambda (y)
`(,(sxml:name obj)
(@ ,@(map
(lambda(at)
(if
(eq? at y)
attr
at))
attr-list))
,@(sxml:aux-as-list obj)
,@(sxml:content obj)
)))
(else #f)))))
; Change value of the attribute for element <obj>
; <attr> is (<attr-name> <attr-value>)
; Returns #f if where is no such attribute
#;(define (sxml:change-attr! obj attr)
(let ((x (sxml:attr-list obj)))
(if (null? x)
#f
(cond
((assv (car attr) x) => (lambda (y)
(set-cdr! y (cdr attr)) obj))
(else #f)))))
; Set attribute <attr> of element <obj>
; If there is no such attribute the new one is added
(define (sxml:set-attr obj attr)
(let ((attr-list (sxml:attr-list obj)))
(cond
((assv (car attr) attr-list)
=> (lambda (y)
`(,(sxml:name obj)
(@ ,@(map
(lambda(at)
(if
(eq? at y)
attr
at))
attr-list))
,@(sxml:aux-as-list obj)
,@(sxml:content obj)
)))
(else
`(,(sxml:name obj)
(@ ,@(cons attr attr-list))
,@(sxml:aux-as-list obj)
,@(sxml:content obj))))
))
; Set attribute <attr> of element <obj>
; If there is no such attribute the new one is added
#;(define (sxml:set-attr! obj attr)
(let ((attr-list (sxml:attr-list obj)))
(cond
((assv (car attr) attr-list)
=> (lambda (x) (set-cdr! x (cdr attr))))
(else (set-cdr! obj
`((@ ,@(cons attr attr-list))
,@(sxml:aux-as-list obj)
,@(sxml:content obj))))
)))
; Returns SXML element <obj> with an auxiliary node <aux-node> added
(define (sxml:add-aux obj aux-node)
`(,(sxml:name obj)
(@ ,@(sxml:attr-list obj))
(@@ ,@(cons aux-node (sxml:aux-list obj)))
,@(sxml:content obj)))
; Add an auxiliary node <aux-node> for an element <obj>
#;(define (sxml:add-aux! obj aux-node)
(set-cdr! obj
`(
(@ ,@(sxml:attr-list obj))
(@@ ,@(cons aux-node (sxml:aux-list obj)))
,@(sxml:content obj)))
obj)
; Eliminates empty lists of attributes and aux-lists for given SXML element
; <obj> and its descendants ("minimize" it)
; Returns: minimized and normalized SXML element
#;(define (sxml:squeeze! obj)
(set-cdr! obj
`(,@(cond
((sxml:attr-list-node obj)
=> (lambda (atl)
(if (and (null? (cdr atl))
(null? (sxml:aux-list obj)))
'()
(list atl))))
(else '()))
,@(cond ((sxml:aux-list-node obj)
=> (lambda (axl)
(if (null? (cdr axl))
'()
(list axl))))
(else '()))
,@(map
(lambda(x)
(cond
(((ntype?? '*) x)
(sxml:squeeze! x)
x)
(else x)))
(sxml:content obj))
))
)
; Eliminates empty lists of attributes and aux-lists for given SXML element
; <obj> and its descendants ("minimize" it)
; Returns: minimized and normalized SXML element
(define (sxml:squeeze obj)
`(,(sxml:name obj)
,@(cond
((sxml:attr-list-node obj)
=> (lambda (atl)
(if (and (null? (cdr atl))
(null? (sxml:aux-list obj)))
'()
(list atl))))
(else '()))
,@(cond ((sxml:aux-list-node obj)
=> (lambda (axl)
(if (null? (cdr axl))
'()
(list axl))))
(else '()))
,@(map
(lambda(x)
(cond
(((ntype?? '*) x)
(sxml:squeeze x))
(else x)))
(sxml:content obj))))
; Eliminates empty lists of attributes and ALL aux-lists for given SXML element
; <obj> and its descendants
; Returns: minimized and normalized SXML element
(define (sxml:clean obj)
`(,(sxml:name obj)
,@(cond
((sxml:attr-list-node obj)
=> (lambda (atl)
(if (null? (cdr atl))
'()
(list atl))))
(else '()))
,@(map
(lambda(x)
(cond
(((ntype?? '*) x)
(sxml:clean x))
(else x)))
(sxml:content obj))))
;==============================================================================
; SXPath-related
;------------------------------------------------------------------------------
; Extensions
; select-first-kid:: Pred -> Node -> Node
; Given a Node, return its first child that satisfy
; the test-pred?
; Returns #f if there is no such a child
; select-first-kid:: Pred -> Nodeset -> Node
; The same as above, but select among children of all the nodes in
; the Nodeset
(define (select-first-kid test-pred?)
(lambda(obj)
(let rpt ((lst (if (symbol? (car obj))
(cdr obj)
obj)))
(cond
((null? lst) #f)
((and (pair? (car lst))
(test-pred? (car lst)))
(car lst))
(else (rpt (cdr lst))))
)))
;------------------------------------------------------------------------------
; Fast node-parent
; Returns a function of one argument - SXML element - which returns its parent
; node using *PARENT* pointer in aux-list
; '*TOP-PTR* may be used as a pointer to root node
; It return an empty list when applyed to root node
(define (sxml:node-parent rootnode)
(lambda(obj)
(cond
((sxml:aux-node obj '*PARENT*)
=> (lambda(x)
(if
(eq? '*TOP-PTR* (cadr x))
rootnode
((cadr x)))))
((and (pair? obj)
(eq? (car obj) '*TOP* ))
'())
(else (sxml:error nl "PARENT pointer is absent in: " obj nl)
))))
(define (sxml:add-parents obj . top-ptr)
(let rpt
((elt obj)
(p '*TOP*)
(at-aux (if (eq? (sxml:name obj) '*TOP*)
(list (cons '@@ (sxml:aux-list-u obj)))
(list
(cons '@ (sxml:attr-list obj))
(cons '@@ (cons `(*PARENT* ,(lambda() (car top-ptr)))
(sxml:aux-list obj))))))
) ; *TOP* is a parent for top-level element
(let* ((h (list (sxml:name elt)))
(b (append
at-aux
(map
(lambda(x)
(cond
(((ntype?? '*) x)
(rpt x h
(list
(cons '@ (sxml:attr-list x))
(cons '@@ (cons `(*PARENT* ,(lambda() h))
(sxml:aux-list x))))
))
(else x)))
(sxml:content elt)))))
(cons (car h) b))))
; Lookup an element using its ID
(define (sxml:lookup id index)
(cond
((assoc id index)
=> cdr)
(else #f)))
;==============================================================================
; Markup generation
;------------------------------------------------------------------------------
; XML
; Creates the XML markup for attributes.
(define (sxml:attr->xml attr)
(list " " (sxml:ncname attr)
"='" (cadr attr) "'"))
; Return a string or a list of strings where all the occurences of
; characters < > & " ' in a given string are replaced by corresponding
; character entity references. See also: sxml:string->html
(define sxml:string->xml
(make-char-quotator
'((#\< . "&lt;") (#\> . "&gt;") (#\& . "&amp;")
(#\" . "&quot;") (#\' . "&apos;"))))
; A version of dispatch-node specialized and optimized for SXML->XML
; transformation.
(define (sxml:sxml->xml tree)
(cond
((nodeset? tree)
(map (lambda (a-tree)
(sxml:sxml->xml a-tree))
tree))
((pair? tree)
(let* ((name (sxml:name tree)) ; NS (URI-prefixed) not supported
(nm (symbol->string name))
(content (sxml:content-raw tree)))
`("<" ,nm ,@(map sxml:attr->xml (sxml:attr-list tree))
,@(if (null? content) '("/>")
`(">" ,@(sxml:sxml->xml content) "</" ,nm ">")))))
((string? tree) (sxml:string->xml tree)) ; *text*
(else (sxml:error "sxml->html - unexpected type of node: " tree))))
;------------------------------------------------------------------------------
; HTML
; Creates the HTML markup for attributes.
(define (sxml:attr->html attr)
(if (equal? "" (cadr attr))
(list " " (sxml:ncname attr))
(list " " (sxml:ncname attr) "='" (cadr attr) "'")))
; Given a string, check to make sure it does not contain characters
; < > & " that require encoding. Return either the original
; string, or a list of string fragments with special characters
; replaced by appropriate character entities.
; Borrowed from Oleg Kiselyov's XML-to-HTML.scm (where its name is
; string->goodHTML)
(define sxml:string->html
(make-char-quotator
'((#\< . "&lt;") (#\> . "&gt;") (#\& . "&amp;") (#\" . "&quot;"))))
; This predicate yields #t for "unterminated" HTML 4.0 tags
(define (sxml:non-terminated-html-tag? tag)
(memq tag
'(area base basefont br col frame hr img input isindex link meta param)))
; A version of dispatch-node specialized and optimized for SXML->HTML
; transformation.
(define (sxml:sxml->html tree)
(cond
((nodeset? tree)
(map (lambda (a-tree)
(sxml:sxml->html a-tree))
tree))
((pair? tree)
(let* ((name (sxml:name tree))
(nm (symbol->string name))
(content (sxml:content-raw tree)))
`("<" ,nm ,@(map sxml:attr->html (sxml:attr-list tree))
,@(if (null? content)
(if (sxml:non-terminated-html-tag? name) '(">") '("/>"))
`(">" ,@(sxml:sxml->html content) "</" ,nm ">")))))
((string? tree) (sxml:string->html tree)) ; *text*
(else (sxml:error "sxml->html - unexpected type of node: " tree))))
(provide (all-defined)))

View File

@ -0,0 +1,35 @@
#cs(module sxml mzscheme
(require "sxpathlib.ss")
(require "sxml-tools.ss")
(require "sxpath-ext.ss")
(require "xpath-parser.ss")
(require "txpath.ss")
(require "sxpath.ss")
(require "xpath-ast.ss")
(require "xpath-context_xlink.ss")
(require "ddo-axes.ss")
(require "ddo-txpath.ss")
(require "lazy-xpath.ss")
(require "lazy-ssax.ss")
#;(require "modif.ss")
(require "serializer.ss")
#;(require "guides.ss")
(require "libmisc.ss")
(require "stx-engine.ss")
(provide (all-from "sxpathlib.ss"))
(provide (all-from "sxml-tools.ss"))
(provide (all-from "sxpath-ext.ss"))
(provide (all-from "xpath-parser.ss"))
(provide (all-from "txpath.ss"))
(provide (all-from "sxpath.ss"))
(provide (all-from "xpath-ast.ss"))
(provide (all-from "xpath-context_xlink.ss"))
(provide (all-from "ddo-axes.ss"))
(provide (all-from "ddo-txpath.ss"))
(provide (all-from "lazy-xpath.ss"))
(provide (all-from "lazy-ssax.ss"))
#;(provide (all-from "modif.ss"))
(provide (all-from "serializer.ss"))
#;(provide (all-from "guides.ss"))
(provide (all-from "libmisc.ss"))
(provide (all-from "stx-engine.ss")))

View File

@ -0,0 +1,626 @@
; Module header is generated automatically
#cs(module sxpath-ext mzscheme
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
(require "sxpathlib.ss")
(require "sxml-tools.ss")
;; W3C compliant extensions to SXPathlib
; $Id: sxpath-ext.scm,v 1.911 2002/12/06 22:10:53 kl Exp kl $:
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to:
; lisovsky@acm.org Kirill Lisovsky
; lizorkin@hotbox.ru Dmitry Lizorkin
;=========================================================================
; SXML counterparts to W3C XPath Core Functions Library
; The counterpart to XPath 'string' function (section 4.2 XPath Rec.)
; Converts a given object to a string
; NOTE:
; 1. When converting a nodeset - a document order is not preserved
; 2. number->string function returns the result in a form which is slightly
; different from XPath Rec. specification
(define (sxml:string object)
(cond
((string? object) object)
((nodeset? object) (if (null? object)
""
(sxml:string-value (car object))))
((number? object)
(if (and (rational? object) (not (integer? object))) ; like 1/2
(number->string (exact->inexact object))
(number->string object)))
((boolean? object) (if object "true" "false"))
(else ""))) ; Unknown type -> empty string.
; Option: write its value to string port?
; The counterpart to XPath 'boolean' function (section 4.3 XPath Rec.)
; Converts its argument to a boolean
(define (sxml:boolean object)
(cond
((boolean? object) object)
((number? object) (not (= object 0)))
((string? object) (> (string-length object) 0))
((nodeset? object) (not (null? object)))
(else #f))) ; Not specified in XPath Rec.
; The counterpart to XPath 'number' function (section 4.4 XPath Rec.)
; Converts its argument to a number
; NOTE:
; 1. The argument is not optional (yet?)
; 2. string->number conversion is not IEEE 754 round-to-nearest
; 3. NaN is represented as 0
(define (sxml:number obj)
(cond
((number? obj) obj)
((string? obj)
(let ((nmb (call-with-input-string obj read)))
(if (number? nmb)
nmb
0))) ; NaN
((boolean? obj) (if obj 1 0))
((nodeset? obj) (sxml:number (sxml:string obj)))
(else 0))) ; unknown datatype
; Returns a string value for a given node in accordance to
; XPath Rec. 5.1 - 5.7
(define (sxml:string-value node)
(cond
((not (pair? node)) ; a text node or data node
(sxml:string node))
((null? (cdr node))
"")
(else
(apply string-append ; a list of arguments is always non-null
(map
(lambda (node)
(if (sxml:node? node) ; not annot-attr node or aux list node
(sxml:string-value node) ""))
(cdr node))))))
; Select SXML element by its unique IDs
; XPath Rec. 4.1
; object - a nodeset or a datatype which can be converted to a string by means
; of a 'string' function
; id-index = ( (id-value . element) (id-value . element) ... )
; This index is used for selection of an element by its unique ID.
; The result is a nodeset
(define (sxml:id id-index)
(lambda(object)
(if (nodeset? object)
(let loop ((str-lst (map sxml:string-value object))
(res '()))
(if (null? str-lst)
(reverse res)
(let ((node (sxml:lookup (car str-lst) id-index)))
(if (not node) ; no such element
(loop (cdr str-lst) res)
(loop (cdr str-lst) (cons node res))))))
(let rpt ((lst (string->list (sxml:string object)))
(tmp '())
(res '()))
(cond
((null? lst)
(if (null? tmp)
(reverse res)
(let ((node (sxml:lookup (list->string (reverse tmp)) id-index)))
(if (not node)
(reverse res)
(reverse (cons node res))))))
((member (car lst) '(#\space #\return #\newline #\tab))
(if (null? tmp)
(rpt (cdr lst) tmp res)
(let ((node (sxml:lookup (list->string (reverse tmp)) id-index)))
(if (not node)
(rpt (cdr lst) '() res)
(rpt (cdr lst) '() (cons node res))))))
(else (rpt (cdr lst) (cons (car lst) tmp) res)))))))
;=========================================================================
; Comparators for XPath objects
; Implements XPath equality comparison in a straightforward nested loop manner
(define (sxml:nested-loop-join string-set1 string-set2 string-op)
(let first ((str-set1 string-set1)
(str-set2 string-set2))
(cond
((null? str-set1) #f)
((let second ((elem (car str-set1))
(set2 str-set2))
(cond
((null? set2) #f)
((string-op elem (car set2)) #t)
(else (second elem (cdr set2))))) #t)
(else
(first (cdr str-set1) str-set2)))))
;-------------------------------------------------
; Merge-sort for speeding up equality comparison of two nodesets
; Similar to R5RS 'list-tail' but returns the new list consisting of the first
; 'k' members of 'lst'
(define (sxml:list-head lst k)
(if (or (null? lst) (zero? k))
'()
(cons (car lst) (sxml:list-head (cdr lst) (- k 1)))))
; Implements merge-sort of the given lst
; Returns the sorted list, the smallest member first
; less-than?-pred ::= (lambda (obj1 obj2) ...)
; less-than?-pred returns #t if obj1<obj2 with respect to the given ordering
(define (sxml:merge-sort less-than?-pred lst)
(letrec
((merge-sorted-lists
; Merges 2 sorted lists into one sorted list
(lambda (lst1 lst2)
(cond
((null? lst1) lst2)
((null? lst2) lst1)
; both lists are non-null here
((less-than?-pred (car lst1) (car lst2))
(cons (car lst1)
(merge-sorted-lists (cdr lst1) lst2)))
(else
(cons (car lst2)
(merge-sorted-lists lst1 (cdr lst2))))))))
(if
(or (null? lst) (null? (cdr lst))) ; already sorted
lst
(let ((middle (inexact->exact (round (/ (length lst) 2)))))
(merge-sorted-lists
(sxml:merge-sort less-than?-pred (sxml:list-head lst middle))
(sxml:merge-sort less-than?-pred (list-tail lst middle)))))))
; Implementation of XPath equality comparison for 2 string-sets with
; merge-sort join algorithm
(define (sxml:merge-sort-join string-set1 string-set2 string-op)
(let loop ((str-set1 (sxml:merge-sort string<? string-set1))
(str-set2 (sxml:merge-sort string<? string-set2)))
(cond
((or (null? str-set1) (null? str-set2))
#f)
((string-op (car str-set1) (car str-set2))
; comparison condition fulfilled for a pair of nodes
#t)
((string<? (car str-set1) (car str-set2))
; we can remove (car str-set1) from our further consideration
(loop (cdr str-set1) str-set2))
(else ; vice versa
(loop str-set1 (cdr str-set2))))))
;-------------------------------------------------
; Radix-sort join for equality comparison of 2 nodesets
; The running time of the algorithm is proportional to the nodeset size and
; to node string-value length
;
; Since each nodeset contains O(n) nodes and string-value for each node is
; O(n) in length, radix-sort join algorithm evaluates in O(n^2) time. By
; comparison, nested loop join requires O(n^3) time, merge-sort join
; implemented above requires O(n^2*log(n)).
;
; On the other hand, radix-sort join is time-ineffective for relatively small
; nodesets being joined. For small nodesets, the above implemented sort-merge
; join runs more effectively. Radix-sort join is promising for large nodesets.
; Represents a list of chars as a branch in the string-tree
; The list of chars must be non-empty
(define (sxml:charlst->branch lst)
(if (null? (cdr lst)) ; this is the last character in the lst
`(,(car lst) #t)
`(,(car lst) #f ,(sxml:charlst->branch (cdr lst)))))
; Converts a string to a string-tree
(define (sxml:string->tree str)
(let ((lst (string->list str)))
(if (null? lst) ; an empty string is given
'(*top* #t)
`(*top* #f ,(sxml:charlst->branch lst)))))
; Adds a new string to string-tree
; In a special case, tree257 may be #f. The function than creates a new tree,
; which contains just the representation for str
(define (sxml:add-string-to-tree str tree)
(letrec
((add-lst-to-tree ; adds the list of chars to tree
(lambda (lst tree)
(if
(null? lst) ; the lst is over
(if
(cadr tree) ; whether it is already in the tree
tree
(cons (car tree)
(cons #t (cddr tree))))
(let ((curr-char (car lst)))
(let iter-alist ((alist (cddr tree))
(res (list (cadr tree) (car tree))))
(cond
((null? alist) ; branch not in a tree
(reverse
(cons
(sxml:charlst->branch lst)
res)))
((char=? (caar alist) curr-char) ; entry found
(if
(null? (cdr alist)) ; nothing more in the alist
(reverse
(cons
(add-lst-to-tree (cdr lst) (car alist))
res))
(append
(reverse
(cons
(add-lst-to-tree (cdr lst) (car alist))
res))
(cdr alist))))
((char>? (caar alist) curr-char)
(if
(null? (cdr alist)) ; nothing more in the alist
(reverse
(cons (car alist)
(cons (sxml:charlst->branch lst) res)))
(append
(reverse
(cons
(sxml:charlst->branch lst)
res))
alist)))
(else
(iter-alist (cdr alist)
(cons (car alist) res))))))))))
(add-lst-to-tree (string->list str) tree)))
; Whether a given string is presented in the string-tree
(define (sxml:string-in-tree? str tree)
(let loop ((lst (string->list str))
(tree tree))
(cond
((null? lst) ; the string is over
(cadr tree))
((assv (car lst) (cddr tree))
=> (lambda (new-tree)
(loop (cdr lst) new-tree)))
(else #f))))
; XPath equality comparison for 2 string-sets
; bool-op - comparison function for 2 boolean values
(define (sxml:radix-sort-join string-set1 string-set2 bool-op)
(if
(null? string-set1) ; always #f
#f
(let ((tree
(let iter-1 ((set1 (cdr string-set1))
(tree (sxml:string->tree (car string-set1))))
(if (null? set1)
tree
(iter-1 (cdr set1)
(sxml:add-string-to-tree (car set1) tree))))))
(let iter-2 ((set2 string-set2))
(cond
((null? set2) ; equality not found
#f)
((bool-op (sxml:string-in-tree? (car set2) tree) #t)
#t)
(else
(iter-2 (cdr set2))))))))
;-------------------------------------------------
; Equality comparison
; A helper for XPath equality operations: = , !=
; 'bool-op', 'number-op' and 'string-op' are comparison operations for
; a pair of booleans, numbers and strings respectively
(define (sxml:equality-cmp bool-op number-op string-op)
(lambda (obj1 obj2)
(cond
((and (not (nodeset? obj1)) (not (nodeset? obj2)))
; neither object is a nodeset
(cond
((boolean? obj1) (bool-op obj1 (sxml:boolean obj2)))
((boolean? obj2) (bool-op (sxml:boolean obj1) obj2))
((number? obj1) (number-op obj1 (sxml:number obj2)))
((number? obj2) (number-op (sxml:number obj1) obj2))
(else ; both objects are strings
(string-op obj1 obj2))))
((and (nodeset? obj1) (nodeset? obj2)) ; both objects are nodesets
(let ((lng1 (length obj1))
(lng2 (length obj2)))
(cond
((and (< lng1 100000) (< lng2 100000))
((if ; either nodeset is a short one
(or (<= lng1 2) (<= lng2 2))
sxml:nested-loop-join
sxml:merge-sort-join)
(map sxml:string-value obj1)
(map sxml:string-value obj2)
string-op))
((< lng1 lng2)
(sxml:radix-sort-join (map sxml:string-value obj1)
(map sxml:string-value obj2)
bool-op))
(else ; lng2 < lng1
(sxml:radix-sort-join (map sxml:string-value obj2)
(map sxml:string-value obj1)
bool-op)))))
(else ; one of the objects is a nodeset, another is not
(call-with-values
(lambda () ; Equality operations are commutative
(if (nodeset? obj1) (values obj1 obj2) (values obj2 obj1)))
(lambda (nset elem)
(cond
((boolean? elem) (bool-op elem (sxml:boolean nset)))
((number? elem)
(let loop ((nset
(map
(lambda (node) (sxml:number (sxml:string-value node)))
nset)))
(cond
((null? nset) #f)
((number-op elem (car nset)) #t)
(else (loop (cdr nset))))))
((string? elem)
(let loop ((nset (map sxml:string-value nset)))
(cond
((null? nset) #f)
((string-op elem (car nset)) #t)
(else (loop (cdr nset))))))
(else ; unknown datatype
(cerr "Unknown datatype: " elem nl)
#f))))))))
(define sxml:equal? (sxml:equality-cmp eq? = string=?))
(define sxml:not-equal?
(sxml:equality-cmp
(lambda (bool1 bool2) (not (eq? bool1 bool2)))
(lambda (num1 num2) (not (= num1 num2)))
(lambda (str1 str2) (not (string=? str1 str2)))))
;-------------------------------------------------
; Relational comparison
; Relational operation ( < , > , <= , >= ) for two XPath objects
; op is comparison procedure: < , > , <= or >=
(define (sxml:relational-cmp op)
(lambda (obj1 obj2)
(cond
((not (or (nodeset? obj1) (nodeset? obj2))) ; neither obj is a nodeset
(op (sxml:number obj1) (sxml:number obj2)))
((boolean? obj1) ; 'obj1' is a boolean, 'obj2' is a nodeset
(op (sxml:number obj1) (sxml:number (sxml:boolean obj2))))
((boolean? obj2) ; 'obj1' is a nodeset, 'obj2' is a boolean
(op (sxml:number (sxml:boolean obj1)) (sxml:number obj2)))
((or (null? obj1) (null? obj2)) ; one of the objects is an empty nodeset
#f)
(else ; at least one object is a nodeset
(op
(cond
((nodeset? obj1) ; 'obj1' is a (non-empty) nodeset
(let ((nset1 (map
(lambda (node) (sxml:number (sxml:string-value node)))
obj1)))
(let first ((num1 (car nset1))
(nset1 (cdr nset1)))
(cond
((null? nset1) num1)
((op num1 (car nset1)) (first num1 (cdr nset1)))
(else (first (car nset1) (cdr nset1)))))))
((string? obj1) (sxml:number obj1))
(else ; 'obj1' is a number
obj1))
(cond
((nodeset? obj2) ; 'obj2' is a (non-empty) nodeset
(let ((nset2 (map
(lambda (node) (sxml:number (sxml:string-value node)))
obj2)))
(let second ((num2 (car nset2))
(nset2 (cdr nset2)))
(cond
((null? nset2) num2)
((op num2 (car nset2)) (second (car nset2) (cdr nset2)))
(else (second num2 (cdr nset2)))))))
((string? obj2) (sxml:number obj2))
(else ; 'obj2' is a number
obj2)))))))
;=========================================================================
; XPath axes
; An order in resulting nodeset is preserved
; Ancestor axis
(define (sxml:ancestor test-pred?)
(lambda (root-node) ; node or nodeset
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union ((sxml:ancestor test-pred?) root-node) node)
(let rpt ((paths (if (nodeset? root-node)
(map list root-node)
(list (list root-node)))))
(if (null? paths)
'()
(let ((path (car paths)))
(if (eq? (car path) node)
((sxml:filter test-pred?) (cdr path))
(rpt (append
(map
(lambda (arg) (cons arg path))
(append
((sxml:attribute (ntype?? '*)) (car path))
((sxml:child sxml:node?) (car path))))
(cdr paths)))))))))))
; Ancestor-or-self axis
(define (sxml:ancestor-or-self test-pred?)
(lambda (root-node) ; node or nodeset
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union ((sxml:ancestor-or-self test-pred?) root-node) node)
(let rpt ((paths (if (nodeset? root-node)
(map list root-node)
(list (list root-node)))))
(if (null? paths)
((sxml:filter test-pred?) (list node))
(let ((path (car paths)))
(if (eq? (car path) node)
((sxml:filter test-pred?) path)
(rpt (append
(map
(lambda (arg) (cons arg path))
(append
((sxml:attribute (ntype?? '*)) (car path))
((sxml:child sxml:node?) (car path))))
(cdr paths)))))))))))
; Descendant axis
; It's similar to original 'node-closure' a resulting nodeset is
; in depth-first order rather than breadth-first
; Fix: din't descend in non-element nodes!
(define (sxml:descendant test-pred?)
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union (sxml:descendant test-pred?) node)
(let rpt ((res '())
(more ((sxml:child sxml:node?) node)))
(if (null? more)
(reverse res)
(rpt (if (test-pred? (car more))
(cons (car more) res)
res)
(append ((sxml:child sxml:node?) (car more))
(cdr more))))))))
; Descendant-or-self axis
(define (sxml:descendant-or-self test-pred?)
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union (sxml:descendant-or-self test-pred?) node)
(let rpt ((res '())
(more (list node)))
(if (null? more)
(reverse res)
(rpt (if (test-pred? (car more))
(cons (car more) res)
res)
(append ((sxml:child sxml:node?) (car more))
; sxml:node?
(cdr more))))))))
; Following axis
(define (sxml:following test-pred?)
(lambda (root-node) ; node or nodeset
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union ((sxml:following test-pred?) root-node) node)
(let loop ((seq (if (nodeset? root-node)
(list root-node)
(list (list root-node)))))
(cond
((null? seq) '())
((null? (car seq)) (loop (cdr seq)))
((eq? (caar seq) node)
(let rpt ((seq (cdr (apply append seq)))
(res '()))
(if (null? seq)
res
(rpt (cdr seq)
(append
res
((sxml:descendant-or-self test-pred?) (car seq)))))))
((and (sxml:element? (caar seq))
(memq node (sxml:attr-list (caar seq))))
(let rpt ((sq (cdr (apply append seq)))
(res ((sxml:descendant test-pred?) (caar seq))))
(if (null? sq)
res
(rpt (cdr sq)
(append res
((sxml:descendant-or-self test-pred?) (car sq)))))))
(else
(loop (cons
((sxml:child sxml:node?) (caar seq))
(cons (cdar seq) (cdr seq)))))))))))
; Following-sibling axis
(define (sxml:following-sibling test-pred?)
(lambda (root-node) ; node or nodeset
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union ((sxml:following-sibling test-pred?) root-node) node)
(let loop ((seqs (if (nodeset? root-node)
(list root-node)
(list (list root-node)))))
(if (null? seqs)
'()
(let rpt ((seq (car seqs)))
(cond
((null? seq)
(loop (append
(map (sxml:child sxml:node?)
(car seqs))
(cdr seqs))))
((eq? (car seq) node) ((sxml:filter test-pred?) (cdr seq)))
(else (rpt (cdr seq)))))))))))
; Namespace axis
(define (sxml:namespace test-pred?)
(lambda (node) ; node or nodeset
((sxml:filter test-pred?)
(sxml:ns-list node))))
; Preceding axis
(define (sxml:preceding test-pred?)
(lambda (root-node) ; node or nodeset
(lambda (node) ; node or nodeset
(if (nodeset? node)
(map-union ((sxml:preceding test-pred?) root-node) node)
(let loop ((seq (if (nodeset? root-node)
(list (reverse root-node))
(list (list root-node)))))
(cond
((null? seq) '())
((null? (car seq)) (loop (cdr seq)))
((or (eq? (caar seq) node)
(not (null? ((sxml:attribute
(lambda (n)
(eq? n node)))
(caar seq)))))
(let rpt ((seq (cdr (apply append seq)))
(res '()))
(if (null? seq)
res
(rpt (cdr seq)
(append res
(reverse ((sxml:descendant-or-self test-pred?)
(car seq))))))))
(else (loop (cons (reverse ((sxml:child sxml:node?) (caar seq)))
(cons (cdar seq) (cdr seq)))))))))))
; Preceding-sibling axis
(define (sxml:preceding-sibling test-pred?)
(lambda (root-node) ; node or nodeset
(lambda (node) ; node or nodeset
(if(nodeset? node)
(map-union ((sxml:preceding-sibling test-pred?) root-node) node)
(let loop ((seqs (if (nodeset? root-node)
(list root-node)
(list (list root-node)))))
(if (null? seqs)
'()
(let rpt ((seq (car seqs)))
(cond
((null? seq)
(loop (append
(map
(lambda (n)
(reverse ((sxml:child sxml:node?) n)))
(car seqs))
(cdr seqs))))
((eq? (car seq) node) ((sxml:filter test-pred?) (cdr seq)))
(else (rpt (cdr seq)))))))))))
(provide (all-defined)))

View File

@ -0,0 +1,226 @@
; Module header is generated automatically
#cs(module sxpath mzscheme
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
(require "sxml-tools.ss")
(require "sxpathlib.ss")
(require "sxpath-ext.ss")
(require "txpath.ss")
(require "xpath-parser.ss")
;; $Id: sxpath.scm,v 1.5 2005/09/07 09:27:34 lizorkin Exp $
;; Highghest level SXPath
;; Refactored from sxml-tools.scm and sxpathlib.scm
;==============================================================================
; Abbreviated SXPath
; Evaluate an abbreviated SXPath
; sxpath:: AbbrPath -> Converter, or
; sxpath:: AbbrPath -> Node|Nodeset -> Nodeset
; AbbrPath is a list. It is translated to the full SXPath according
; to the following rewriting rules
; (sxpath '()) -> (node-join)
; (sxpath '(path-component ...)) ->
; (node-join (sxpath1 path-component) (sxpath '(...)))
; (sxpath1 '//) -> (sxml:descendant-or-self sxml:node?)
; (sxpath1 '(equal? x)) -> (select-kids (node-equal? x))
; (sxpath1 '(eq? x)) -> (select-kids (node-eq? x))
; (sxpath1 '(*or* ...)) -> (select-kids (ntype-names??
; (cdr '(*or* ...))))
; (sxpath1 '(*not* ...)) -> (select-kids (sxml:complement
; (ntype-names??
; (cdr '(*not* ...)))))
; (sxpath1 '(ns-id:* x)) -> (select-kids
; (ntype-namespace-id?? x))
; (sxpath1 ?symbol) -> (select-kids (ntype?? ?symbol))
; (sxpath1 ?string) -> (txpath ?string)
; (sxpath1 procedure) -> procedure
; (sxpath1 '(?symbol ...)) -> (sxpath1 '((?symbol) ...))
; (sxpath1 '(path reducer ...)) ->
; (node-reduce (sxpath path) (sxpathr reducer) ...)
; (sxpathr number) -> (node-pos number)
; (sxpathr path-filter) -> (filter (sxpath path-filter))
(define (sxpath path . ns-binding)
(let ((ns-binding (if (null? ns-binding) ns-binding (car ns-binding))))
(let loop ((converters '())
(root-vars '()) ; a list of booleans, one per location step:
; #t - location step function is binary
; #f - location step function is unary
(path (if (string? path) (list path) path)))
(cond
((null? path) ; parsing is finished
(lambda (node . var-binding)
(let ((var-binding
(if (null? var-binding) var-binding (car var-binding))))
(let rpt ((nodeset (as-nodeset node))
(conv (reverse converters))
(r-v (reverse root-vars)))
(if
(null? conv) ; the path is over
nodeset
(rpt
(if (car r-v) ; the current converter consumes 2 arguments
((car conv) nodeset var-binding)
((car conv) nodeset))
(cdr conv)
(cdr r-v)))))))
; *or* handler
((and (pair? (car path))
(not (null? (car path)))
(eq? '*or* (caar path)))
(loop (cons (select-kids (ntype-names?? (cdar path))) converters)
(cons #f root-vars)
(cdr path)))
; *not* handler
((and (pair? (car path))
(not (null? (car path)))
(eq? '*not* (caar path)))
(loop (cons
(select-kids (sxml:complement (ntype-names?? (cdar path))))
converters)
(cons #f root-vars)
(cdr path)))
((procedure? (car path))
(loop (cons (car path) converters)
(cons #t root-vars)
(cdr path)))
((eq? '// (car path))
(if (or (null? (cdr path))
(not (symbol? (cadr path)))
(eq? (cadr path) '@))
(loop (cons (sxml:descendant-or-self sxml:node?)
converters)
(cons #f root-vars)
(cdr path))
(loop (cons (sxml:descendant (ntype?? (cadr path)))
converters)
(cons #f root-vars)
(cddr path))))
((symbol? (car path))
(loop (cons (select-kids (ntype?? (car path))) converters)
(cons #f root-vars)
(cdr path)))
((string? (car path))
(and-let*
((f (sxml:xpath-expr (car path) ns-binding))) ; DL: was: txpath
(loop (cons f converters)
(cons #t root-vars)
(cdr path))))
((and (pair? (car path)) (eq? 'equal? (caar path)))
(loop (cons (select-kids (apply node-equal? (cdar path))) converters)
(cons #f root-vars)
(cdr path)))
; ns-id:* handler
((and (pair? (car path)) (eq? 'ns-id:* (caar path)))
(loop
(cons (select-kids (ntype-namespace-id?? (cadar path))) converters)
(cons #f root-vars)
(cdr path)))
((and (pair? (car path)) (eq? 'eq? (caar path)))
(loop (cons (select-kids (apply node-eq? (cdar path))) converters)
(cons #f root-vars)
(cdr path)))
((pair? (car path))
(and-let*
((select
(if
(symbol? (caar path))
(lambda (node . var-binding)
((select-kids (ntype?? (caar path))) node))
(sxpath (caar path) ns-binding))))
(let reducer ((reducing-path (cdar path))
(filters '()))
(cond
((null? reducing-path)
(loop
(cons
(lambda (node var-binding)
(map-union
(lambda (node)
(let label ((nodeset (select node var-binding))
(fs (reverse filters)))
(if
(null? fs)
nodeset
(label
((car fs) nodeset var-binding)
(cdr fs)))))
(if (nodeset? node) node (list node))))
converters)
(cons #t root-vars)
(cdr path)))
((number? (car reducing-path))
(reducer
(cdr reducing-path)
(cons
(lambda (node var-binding)
((node-pos (car reducing-path)) node))
filters)))
(else
(and-let*
((func (sxpath (car reducing-path) ns-binding)))
(reducer
(cdr reducing-path)
(cons
(lambda (node var-binding)
((sxml:filter
(lambda (n) (func n var-binding)))
node))
filters))))))))
(else
(cerr "Invalid path step: " (car path))
#f)))))
;==============================================================================
; Wrappers
; sxpath always returns a list, which is #t in Scheme
; if-sxpath returns #f instead of empty list
(define (if-sxpath path)
(lambda (obj)
(let ((x ((sxpath path) obj)))
(if (null? x) #f x))))
; Returns first node found, if any.
; Otherwise returns #f.
(define (if-car-sxpath path)
(lambda (obj)
(let ((x ((sxpath path) obj)))
(if (null? x) #f (car x)))))
; Returns first node found, if any.
; Otherwise returns empty list.
(define (car-sxpath path)
(lambda (obj)
(let ((x ((sxpath path) obj)))
(if (null? x) '() (car x)))))
;==============================================================================
; lookup by a value of ID type attribute
; See also sxml:lookup in sxml-tools
; Built an index as a list of (ID_value . element) pairs for given
; node. lpaths are location paths for attributes of type ID.
(define (sxml:id-alist node . lpaths)
(apply
append
(map
(lambda(lp)
(let ((lpr (reverse lp)))
(map
(lambda (nd)
(cons (sxml:attr nd (car lpr))
nd))
; Selects elements with ID attributes
; using (lpath ,(node-self (sxpath '(@ attrname))))
((sxpath (reverse (cons
(lambda(n r+v)
((node-self (sxpath `(@ ,(car lpr)))) n))
(cddr lpr)))) node))
))
lpaths)))
(provide (all-defined)))

View File

@ -0,0 +1,536 @@
; Module header is generated automatically
#cs(module sxpathlib mzscheme
(require (rename (lib "pretty.ss") pp pretty-print))
(require (lib "string.ss" "srfi/13"))
(require (lib "ssax.ss" "web-server/tmp/ssax"))
;; XML processing in Scheme
; SXPath -- SXML Query Language
;
; $Id: sxpathlib.scm,v 3.918 2004/02/05 22:52:33 kl Exp kl $
;
; This code is in Public Domain
; It's based on SXPath by Oleg Kiselyov, and multiple improvements
; implemented by Dmitry Lizorkin.
;
; The list of differences from original SXPath.scm my be found in changelog.txt
;
; Kirill Lisovsky lisovsky@acm.org
;
; * * *
;
; SXPath is a query language for SXML, an instance of XML Information
; set (Infoset) in the form of s-expressions. See SSAX.scm for the
; definition of SXML and more details. SXPath is also a translation into
; Scheme of an XML Path Language, XPath:
; http://www.w3.org/TR/xpath
; XPath and SXPath describe means of selecting a set of Infoset's items
; or their properties.
;
; To facilitate queries, XPath maps the XML Infoset into an explicit
; tree, and introduces important notions of a location path and a
; current, context node. A location path denotes a selection of a set of
; nodes relative to a context node. Any XPath tree has a distinguished,
; root node -- which serves as the context node for absolute location
; paths. Location path is recursively defined as a location step joined
; with a location path. A location step is a simple query of the
; database relative to a context node. A step may include expressions
; that further filter the selected set. Each node in the resulting set
; is used as a context node for the adjoining location path. The result
; of the step is a union of the sets returned by the latter location
; paths.
;
; The SXML representation of the XML Infoset (see SSAX.scm) is rather
; suitable for querying as it is. Bowing to the XPath specification,
; we will refer to SXML information items as 'Nodes':
; <Node> ::= <Element> | <attributes-coll> | <attrib>
; | "text string" | <PI>
; This production can also be described as
; <Node> ::= (name . <Nodelist>) | "text string"
; An (ordered) set of nodes is just a list of the constituent nodes:
; <Nodelist> ::= (<Node> ...)
; Nodelists, and Nodes other than text strings are both lists. A
; <Nodelist> however is either an empty list, or a list whose head is not
; a symbol. A symbol at the head of a node is either an XML name (in
; which case it's a tag of an XML element), or an administrative name
; such as '@'. This uniform list representation makes processing rather
; simple and elegant, while avoiding confusion. The multi-branch tree
; structure formed by the mutually-recursive datatypes <Node> and
; <Nodelist> lends itself well to processing by functional languages.
;
; A location path is in fact a composite query over an XPath tree or
; its branch. A singe step is a combination of a projection, selection
; or a transitive closure. Multiple steps are combined via join and
; union operations. This insight allows us to _elegantly_ implement
; XPath as a sequence of projection and filtering primitives --
; converters -- joined by _combinators_. Each converter takes a node
; and returns a nodelist which is the result of the corresponding query
; relative to that node. A converter can also be called on a set of
; nodes. In that case it returns a union of the corresponding queries over
; each node in the set. The union is easily implemented as a list
; append operation as all nodes in a SXML tree are considered
; distinct, by XPath conventions. We also preserve the order of the
; members in the union. Query combinators are high-order functions:
; they take converter(s) (which is a Node|Nodelist -> Nodelist function)
; and compose or otherwise combine them. We will be concerned with
; only relative location paths [XPath]: an absolute location path is a
; relative path applied to the root node.
;
; Similarly to XPath, SXPath defines full and abbreviated notations
; for location paths. In both cases, the abbreviated notation can be
; mechanically expanded into the full form by simple rewriting
; rules. In case of SXPath the corresponding rules are given as
; comments to a sxpath function, below. The regression test suite at
; the end of this file shows a representative sample of SXPaths in
; both notations, juxtaposed with the corresponding XPath
; expressions. Most of the samples are borrowed literally from the
; XPath specification, while the others are adjusted for our running
; example, tree1.
;
;=============================================================================
; Basic converters and applicators
; A converter is a function
; type Converter = Node|Nodelist -> Nodelist
; A converter can also play a role of a predicate: in that case, if a
; converter, applied to a node or a nodelist, yields a non-empty
; nodelist, the converter-predicate is deemed satisfied. Throughout
; this file a nil nodelist is equivalent to #f in denoting a failure.
; Returns #t if given object is a nodelist
(define (nodeset? x)
(or (and (pair? x) (not (symbol? (car x)))) (null? x)))
; If x is a nodelist - returns it as is, otherwise wrap it in a list.
(define (as-nodeset x)
(if (nodeset? x) x (list x)))
;-----------------------------------------------------------------------------
; Node test
; The following functions implement 'Node test's as defined in
; Sec. 2.3 of XPath document. A node test is one of the components of a
; location step. It is also a converter-predicate in SXPath.
; Predicate which returns #t if <obj> is SXML element, otherwise returns #f.
(define (sxml:element? obj)
(and (pair? obj)
(symbol? (car obj))
(not (memq (car obj)
; '(@ @@ *PI* *COMMENT* *ENTITY* *NAMESPACES*)
; the line above is a workaround for old SXML
'(@ @@ *PI* *COMMENT* *ENTITY*)))))
; The function ntype-names?? takes a list of acceptable node names as a
; criterion and returns a function, which, when applied to a node,
; will return #t if the node name is present in criterion list and #f
; othervise.
; ntype-names?? :: ListOfNames -> Node -> Boolean
(define (ntype-names?? crit)
(lambda(node)
(and (pair? node)
(memq (car node) crit))))
; The function ntype?? takes a type criterion and returns
; a function, which, when applied to a node, will tell if the node satisfies
; the test.
; ntype?? :: Crit -> Node -> Boolean
;
; The criterion 'crit' is
; one of the following symbols:
; id - tests if the Node has the right name (id)
; @ - tests if the Node is an <attributes-list>
; * - tests if the Node is an <Element>
; *text* - tests if the Node is a text node
; *data* - tests if the Node is a data node
; (text, number, boolean, etc., but not pair)
; *PI* - tests if the Node is a PI node
; *COMMENT* - tests if the Node is a COMMENT node
; *ENTITY* - tests if the Node is a ENTITY node
; *any* - #t for any type of Node
(define (ntype?? crit)
(case crit
((*) sxml:element?)
((*any*) (lambda (node) #t))
((*text*) (lambda (node) (string? node)))
((*data*) (lambda (node) (not (pair? node))))
(else (lambda (node) (and (pair? node) (eq? crit (car node)))))
))
; This function takes a namespace-id, and returns a predicate
; Node -> Boolean, which is #t for nodes with this very namespace-id.
; ns-id is a string
; (ntype-namespace-id?? #f) will be #t for nodes with non-qualified names.
(define (ntype-namespace-id?? ns-id)
(lambda (node)
(and (pair? node)
(not (memq (car node)
'(@ @@ *PI* *COMMENT* *ENTITY*)))
(let ((nm (symbol->string (car node))))
(cond
((string-rindex nm #\:)
=> (lambda (pos)
(and
(= pos (string-length ns-id))
(string-prefix? ns-id nm))))
(else (not ns-id)))))))
;^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
; This function takes a predicate and returns it complemented
; That is if the given predicate yelds #f or '() the complemented one
; yields the given node (#t) and vice versa.
(define (sxml:complement pred)
(lambda(node)
(case (pred node)
((#f '()) node)
(else #f))))
; Curried equivalence converter-predicates
(define (node-eq? other)
(lambda (node)
(eq? other node)))
(define (node-equal? other)
(lambda (node)
(equal? other node)))
; node-pos:: N -> Nodelist -> Nodelist, or
; node-pos:: N -> Converter
; Select the N'th element of a Nodelist and return as a singular Nodelist;
; Return an empty nodelist if the Nth element does not exist.
; ((node-pos 1) Nodelist) selects the node at the head of the Nodelist,
; if exists; ((node-pos 2) Nodelist) selects the Node after that, if
; exists.
; N can also be a negative number: in that case the node is picked from
; the tail of the list.
; ((node-pos -1) Nodelist) selects the last node of a non-empty nodelist;
; ((node-pos -2) Nodelist) selects the last but one node, if exists.
(define (node-pos n)
(lambda (nodelist)
(cond
((not (nodeset? nodelist)) '())
((null? nodelist) nodelist)
((eqv? n 1) (list (car nodelist)))
((negative? n) ((node-pos (+ n 1 (length nodelist))) nodelist))
(else
(assert (positive? n))
((node-pos (-- n)) (cdr nodelist))))))
; filter:: Converter -> Converter
; A filter applicator, which introduces a filtering context. The argument
; converter is considered a predicate, with either #f or nil result meaning
; failure.
(define (sxml:filter pred?)
(lambda (lst) ; a nodelist or a node (will be converted to a singleton nset)
(let loop ((lst (as-nodeset lst))
(res '()))
(if (null? lst)
(reverse res)
(let ((pred-result (pred? (car lst))))
(loop (cdr lst)
(if (and pred-result (not (null? pred-result)))
(cons (car lst) res)
res)))))))
; take-until:: Converter -> Converter, or
; take-until:: Pred -> Node|Nodelist -> Nodelist
; Given a converter-predicate and a nodelist, apply the predicate to
; each element of the nodelist, until the predicate yields anything but #f or
; nil. Return the elements of the input nodelist that have been processed
; till that moment (that is, which fail the predicate).
; take-until is a variation of the filter above: take-until passes
; elements of an ordered input set till (but not including) the first
; element that satisfies the predicate.
; The nodelist returned by ((take-until (not pred)) nset) is a subset --
; to be more precise, a prefix -- of the nodelist returned by
; ((filter pred) nset)
(define (take-until pred?)
(lambda (lst) ; a nodelist or a node (will be converted to a singleton nset)
(let loop ((lst (as-nodeset lst)))
(if (null? lst) lst
(let ((pred-result (pred? (car lst))))
(if (and pred-result (not (null? pred-result)))
'()
(cons (car lst) (loop (cdr lst)))))
))))
; take-after:: Converter -> Converter, or
; take-after:: Pred -> Node|Nodelist -> Nodelist
; Given a converter-predicate and a nodelist, apply the predicate to
; each element of the nodelist, until the predicate yields anything but #f or
; nil. Return the elements of the input nodelist that have not been processed:
; that is, return the elements of the input nodelist that follow the first
; element that satisfied the predicate.
; take-after along with take-until partition an input nodelist into three
; parts: the first element that satisfies a predicate, all preceding
; elements and all following elements.
(define (take-after pred?)
(lambda (lst) ; a nodelist or a node (will be converted to a singleton nset)
(let loop ((lst (as-nodeset lst)))
(if (null? lst) lst
(let ((pred-result (pred? (car lst))))
(if (and pred-result (not (null? pred-result)))
(cdr lst)
(loop (cdr lst))))
))))
; Apply proc to each element of lst and return the list of results.
; if proc returns a nodelist, splice it into the result
;
; From another point of view, map-union is a function Converter->Converter,
; which places an argument-converter in a joining context.
(define (map-union proc lst)
(if (null? lst) lst
(let ((proc-res (proc (car lst))))
((if (nodeset? proc-res) append cons)
proc-res (map-union proc (cdr lst))))))
; node-reverse :: Converter, or
; node-reverse:: Node|Nodelist -> Nodelist
; Reverses the order of nodes in the nodelist
; This basic converter is needed to implement a reverse document order
; (see the XPath Recommendation).
(define node-reverse
(lambda (node-or-nodelist)
(if (not (nodeset? node-or-nodelist)) (list node-or-nodelist)
(reverse node-or-nodelist))))
; node-trace:: String -> Converter
; (node-trace title) is an identity converter. In addition it prints out
; a node or nodelist it is applied to, prefixed with the 'title'.
; This converter is very useful for debugging.
(define (node-trace title)
(lambda (node-or-nodelist)
(cout nl "-->" title " :")
(pp node-or-nodelist)
node-or-nodelist))
;------------------------------------------------------------------------------
; Converter combinators
;
; Combinators are higher-order functions that transmogrify a converter
; or glue a sequence of converters into a single, non-trivial
; converter. The goal is to arrive at converters that correspond to
; XPath location paths.
;
; From a different point of view, a combinator is a fixed, named
; _pattern_ of applying converters. Given below is a complete set of
; such patterns that together implement XPath location path
; specification. As it turns out, all these combinators can be built
; from a small number of basic blocks: regular functional composition,
; map-union and filter applicators, and the nodelist union.
; select-kids:: Pred -> Node -> Nodelist
; Given a Node, return an (ordered) subset its children that satisfy
; the Pred (a converter, actually)
; select-kids:: Pred -> Nodelist -> Nodelist
; The same as above, but select among children of all the nodes in
; the Nodelist
;
; More succinctly, the signature of this function is
; select-kids:: Converter -> Converter
(define (select-kids test-pred?)
(lambda (node) ; node or node-set
(cond
((null? node) node)
((not (pair? node)) '()) ; No children
((symbol? (car node))
((sxml:filter test-pred?) (cdr node))) ; it's a single node
(else (map-union (select-kids test-pred?) node)))))
; node-self:: Pred -> Node -> Nodelist, or
; node-self:: Converter -> Converter
; Similar to select-kids but apply to the Node itself rather
; than to its children. The resulting Nodelist will contain either one
; component, or will be empty (if the Node failed the Pred).
(define node-self sxml:filter)
; node-join:: [LocPath] -> Node|Nodelist -> Nodelist, or
; node-join:: [Converter] -> Converter
; join the sequence of location steps or paths as described
; in the title comments above.
(define (node-join . selectors)
(lambda (nodelist) ; Nodelist or node
(let loop ((nodelist nodelist) (selectors selectors))
(if (null? selectors) nodelist
(loop
(if (nodeset? nodelist)
(map-union (car selectors) nodelist)
((car selectors) nodelist))
(cdr selectors))))))
; node-reduce:: [LocPath] -> Node|Nodelist -> Nodelist, or
; node-reduce:: [Converter] -> Converter
; A regular functional composition of converters.
; From a different point of view,
; ((apply node-reduce converters) nodelist)
; is equivalent to
; (foldl apply nodelist converters)
; i.e., folding, or reducing, a list of converters with the nodelist
; as a seed.
(define (node-reduce . converters)
(lambda (nodelist) ; Nodelist or node
(let loop ((nodelist nodelist) (converters converters))
(if (null? converters) nodelist
(loop ((car converters) nodelist) (cdr converters))))))
; node-or:: [Converter] -> Converter
; This combinator applies all converters to a given node and
; produces the union of their results.
; This combinator corresponds to a union, '|' operation for XPath
; location paths.
(define (node-or . converters)
(lambda (node-or-nodelist)
(let loop ((result '()) (converters converters))
(if (null? converters) result
(loop (append result (or ((car converters) node-or-nodelist) '()))
(cdr converters))))))
; node-closure:: Converter -> Converter
; Select all _descendants_ of a node that satisfy a converter-predicate.
; This combinator is similar to select-kids but applies to
; grand... children as well.
; This combinator implements the "descendant::" XPath axis
; Conceptually, this combinator can be expressed as
; (define (node-closure f)
; (node-or
; (select-kids f)
; (node-reduce (select-kids (ntype?? '*)) (node-closure f))))
; This definition, as written, looks somewhat like a fixpoint, and it
; will run forever. It is obvious however that sooner or later
; (select-kids (ntype?? '*)) will return an empty nodelist. At
; this point further iterations will no longer affect the result and
; can be stopped.
(define (node-closure test-pred?)
(let ((kid-selector (select-kids test-pred?)))
(lambda (node) ; Nodelist or node
(let loop ((parent node) (result '()))
(if (null? parent) result
(loop (sxml:child-elements parent)
(append result
(kid-selector parent)))
)))))
;=============================================================================
; Unified with sxpath-ext and sxml-tools
; According to XPath specification 2.3, this test is true for any
; XPath node.
; For SXML auxiliary lists and lists of attributes has to be excluded.
(define (sxml:node? node)
(not (and
(pair? node)
(memq (car node) '(@ @@)))))
; Returns the list of attributes for a given SXML node
; Empty list is returned if the given node os not an element,
; or if it has no list of attributes
(define (sxml:attr-list obj)
(if (and (sxml:element? obj)
(not (null? (cdr obj)))
(pair? (cadr obj))
(eq? '@ (caadr obj)))
(cdadr obj)
'()))
; Attribute axis
(define (sxml:attribute test-pred?)
(let ((fltr (sxml:filter test-pred?)))
(lambda (node)
(fltr
(apply append
(map
sxml:attr-list
(as-nodeset node)))))))
; Child axis
; This function is similar to 'select-kids', but it returns an empty
; child-list for PI, Comment and Entity nodes
(define (sxml:child test-pred?)
(lambda (node) ; node or node-set
(cond
((null? node) node)
((not (pair? node)) '()) ; No children
((memq (car node) '(*PI* *COMMENT* *ENTITY*)) ; PI, Comment or Entity
'()) ; No children
((symbol? (car node)) ; it's a single node
((sxml:filter test-pred?) (cdr node)))
(else (map-union (sxml:child test-pred?) node)))))
; Parent axis
; Given a predicate, it returns a function
; RootNode -> Converter
; which which yields a
; node -> parent
; converter then applied to a rootnode.
; Thus, such a converter may be constructed using
; ((sxml:parent test-pred) rootnode)
; and returns a parent of a node it is applied to.
; If applied to a nodelist, it returns the
; list of parents of nodes in the nodelist. The rootnode does not have
; to be the root node of the whole SXML tree -- it may be a root node
; of a branch of interest.
; The parent:: axis can be used with any SXML node.
(define (sxml:parent test-pred?)
(lambda (root-node) ; node or nodelist
(lambda (node) ; node or nodelist
(if (nodeset? node)
(map-union ((sxml:parent test-pred?) root-node) node)
(let rpt ((pairs
(apply append
(map
(lambda (root-n)
(map
(lambda (arg) (cons arg root-n))
(append
(sxml:attr-list root-n)
(sxml:child-nodes root-n))))
(as-nodeset root-node)))
))
(if (null? pairs)
'()
(let ((pair (car pairs)))
(if (eq? (car pair) node)
((sxml:filter test-pred?) (list (cdr pair)))
(rpt (append
(map
(lambda (arg) (cons arg (car pair)))
(append
(sxml:attr-list (car pair))
(sxml:child-nodes (car pair))))
(cdr pairs)
))))))))))
;=============================================================================
; Popular short cuts
; node-parent:: RootNode -> Converter
; (node-parent rootnode) yields a converter that returns a parent of a
; node it is applied to. If applied to a nodelist, it returns the list
; of parents of nodes in the nodelist.
; Given the notation of Philip Wadler's paper on semantics of XSLT,
; parent(x) = { y | y=subnode*(root), x=subnode(y) }
; Therefore, node-parent is not the fundamental converter: it can be
; expressed through the existing ones. Yet node-parent is a rather
; convenient converter. It corresponds to a parent:: axis of SXPath.
;
; Please note: this function is provided for backward compatibility
; with SXPath/SXPathlib ver. 3.5.x.x and earlier.
; Now it's a particular case of 'sxml:parent' application:
(define node-parent (sxml:parent (ntype?? '*any*)))
(define sxml:child-nodes (sxml:child sxml:node?))
(define sxml:child-elements (select-kids sxml:element?))
(provide (all-defined)))

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,469 @@
; Module header is generated automatically
#cs(module xpath-ast mzscheme
(require (lib "ssax.ss" "web-server/tmp/ssax"))
(require "xpath-parser.ss")
;; XPath/XPointer -> Abstract Syntax Tree parser
;
; This software is in Public Domain.
; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND.
;
; Please send bug reports and comments to:
; lisovsky@acm.org Kirill Lisovsky
; lizorkin@hotbox.ru Dmitry Lizorkin
;==========================================================================
; W3C textual XPath/XPointer -> AST
; Writing operations as an S-expression in an infix notation
(define (txp:ast-operation-helper expr-lst op-lst add-on)
(let ((rev-expr-lst (reverse expr-lst)))
(let loop ((exprs (cdr rev-expr-lst))
(ops (reverse op-lst))
(res (car rev-expr-lst)))
(if (null? ops)
res
(loop (cdr exprs) (cdr ops)
(list (car ops) (car exprs) res))))))
;-------------------------------------------------
; Parameters for TXPath -> AST implementation
(define txp:ast-params
`(
; Axes
(axis
((ancestor
,(lambda (add-on) 'ancestor))
(ancestor-or-self
,(lambda (add-on) 'ancestor-or-self))
(attribute
,(lambda (add-on) 'attribute))
(child
,(lambda (add-on) 'child))
(descendant
,(lambda (add-on) 'descendant))
(descendant-or-self
,(lambda (add-on) 'descendant-or-self))
(following
,(lambda (add-on) 'following))
(following-sibling
,(lambda (add-on) 'following-sibling))
(namespace
,(lambda (add-on) 'namespace))
(parent
,(lambda (add-on) 'parent))
(preceding
,(lambda (add-on) 'preceding))
(preceding-sibling
,(lambda (add-on) 'preceding-sibling))
(self
,(lambda (add-on) 'self))
; Addition by XLink
(arc
,(lambda (add-on) 'arc))
(traverse
,(lambda (add-on) 'traverse))
(traverse-arc
,(lambda (add-on) 'traverse-arc))))
; Node test
(node-test
((star
,(lambda (add-on) '((*))))
(uri+star
,(lambda (uri add-on)
`((namespace-uri ,uri))))
(qname
,(lambda (uri local-name add-on)
(if (not uri)
`((local-name ,local-name))
`((namespace-uri ,uri) (local-name ,local-name)))))
(comment
,(lambda (add-on) '((comment))))
(text
,(lambda (add-on) '((text))))
(processing-instruction
,(lambda (literal-string add-on)
(if (not literal-string) ; no literal provided
'((pi))
`((pi ,literal-string)))))
(node
,(lambda (add-on) '((node))))
(point
,(lambda (add-on) '((point))))
(range
,(lambda (add-on) '((range))))))
; Location step
(step
((common
,(lambda (axis-res node-test-res predicate-res-lst add-on)
`(step
(axis-specifier (,axis-res))
(node-test ,@node-test-res)
,@predicate-res-lst)))
(range-to
,(lambda (expr-res predicate-res-lst add-on)
`(range-to
(expr ,expr-res)
,@predicate-res-lst)))))
; Relative location path
(relative-lpath
,(lambda (step-res-lst add-on)
(cons 'relative-location-path step-res-lst)))
; Location path
(location-path
((bare-slash
,(lambda (add-on) '(absolute-location-path)))
(slash
,(lambda (relative-lpath-res add-on)
(cons 'absolute-location-path (cdr relative-lpath-res))))
(double-slash
,(lambda (relative-lpath-res add-on)
`(absolute-location-path
(step
(axis-specifier (descendant-or-self))
(node-test (node)))
,@(cdr relative-lpath-res))))))
; Predicate
(predicate
,(lambda (expr-res add-on)
(list 'predicate expr-res)))
; Variable reference
(variable-ref
,(lambda (var-name-string add-on)
`(variable-reference ,var-name-string)))
; Function call
(function-call
,(lambda (fun-name-string arg-res-lst add-on)
`(function-call
(function-name ,fun-name-string)
,@(map
(lambda (arg-res) `(argument ,arg-res))
arg-res-lst))))
; Primary expression
(primary-expr
((literal
,(lambda (literal add-on)
`(literal ,literal)))
(number
,(lambda (number add-on)
`(number ,number)))))
; Filter expression
(filter-expr
,(lambda (primary-expr-res predicate-res-lst add-on)
`(filter-expr
(primary-expr ,primary-expr-res)
,@predicate-res-lst)))
; Path expression
(path-expr
((slash
,(lambda (filter-expr-res relative-lpath-res add-on)
`(path-expr
,(if (eq? (car filter-expr-res) 'filter-expr)
filter-expr-res
`(filter-expr (primary-expr ,filter-expr-res)))
,@(cdr relative-lpath-res))))
(double-slash
,(lambda (filter-expr-res relative-lpath-res add-on)
`(path-expr
,(if (eq? (car filter-expr-res) 'filter-expr)
filter-expr-res
`(filter-expr (primary-expr ,filter-expr-res)))
(step
(axis-specifier (descendant-or-self))
(node-test (node)))
,@(cdr relative-lpath-res))))))
; Union expression
(union-expr
,(lambda (path-expr-res-lst add-on)
(cons 'union-expr path-expr-res-lst)))
; Unary expression
(unary-expr
,(lambda (union-expr-res num-minuses add-on)
(let loop ((n num-minuses)
(res union-expr-res))
(if (= n 0) res
(loop (- n 1) (list '- res))))))
; Different operations
(operations
((* ,(lambda (add-on) '*))
(div ,(lambda (add-on) 'div))
(mod ,(lambda (add-on) 'mod))
(+ ,(lambda (add-on) '+))
(- ,(lambda (add-on) '-))
(< ,(lambda (add-on) '<))
(> ,(lambda (add-on) '>))
(<= ,(lambda (add-on) '<=))
(>= ,(lambda (add-on) '>=))
(= ,(lambda (add-on) '=))
(!= ,(lambda (add-on) '!=))))
; Additive and multiplicative expressions
(mul-expr ,txp:ast-operation-helper)
(add-expr ,txp:ast-operation-helper)
; Relational expression
(relational-expr ,txp:ast-operation-helper)
; Equality expression
(equality-expr ,txp:ast-operation-helper)
; And-expression
(and-expr
,(lambda (equality-expr-res-lst add-on)
(cons 'and equality-expr-res-lst)))
; Or-expression
(or-expr
,(lambda (and-expr-res-lst add-on)
(cons 'or and-expr-res-lst)))
; Full XPointer
(full-xptr
,(lambda (expr-res-lst add-on)
(cons 'full-xptr expr-res-lst)))
; XPointer child sequence
(child-seq
((with-name
,(lambda (name-string number-lst add-on)
`(child-seq
(name ,name-string)
,@(map
(lambda (num) (list 'number num))
number-lst))))
(without-name
,(lambda (number-lst add-on)
(cons 'child-seq
(map
(lambda (num) (list 'number num))
number-lst))))))
))
(define txp:ast-res (txp:parameterize-parser txp:ast-params))
;-------------------------------------------------
; Highest level API functions
;
; xpath-string - an XPath location path (a string)
; ns-binding - declared namespace prefixes (an optional argument)
; ns-binding = (list (prefix . uri)
; (prefix . uri)
; ...)
; prefix - a symbol
; uri - a string
;
; The returned result: abstract-syntax-tree or #f
; abstract-syntax-tree - an S-expression
; #f - signals of a parse error (error message is printed as a side effect
; during parsing)
(define (txp:ast-api-helper parse-proc)
(lambda (xpath-string . ns-binding)
(let ((res (parse-proc
xpath-string
(if (null? ns-binding) ns-binding (car ns-binding))
'())))
(if (txp:error? res) ; error detected
#f res))))
(define txp:xpath->ast
(txp:ast-api-helper (cadr (assq 'xpath txp:ast-res))))
(define txp:xpointer->ast
(txp:ast-api-helper (cadr (assq 'xpointer txp:ast-res))))
(define txp:expr->ast
(txp:ast-api-helper (cadr (assq 'expr txp:ast-res))))
;==========================================================================
; SXPath native syntax -> AST
; Additional features added to AST by native SXPath
; Operator += below denotes additional alternatives to AST grammar rules
; {7} <NodeTest> += (node-test (equal? <SXML-node> ))
; | (node-test (eq? <SXML-node> ))
; | (node-test (names <String>+ ))
; | (node-test (not-names <String>+ ))
; {4} <Step> += (lambda-step <Lambda> )
; | <FilterExpr>
(define (txp:sxpath->ast path . ns-binding)
(let ((ns-binding (if (null? ns-binding) ns-binding (car ns-binding))))
(if
(string? path) ; Just a textual XPath
(txp:expr->ast path ns-binding)
(let loop ((ast-steps '())
(path path))
(cond
((null? path) ; parsing is finished
(if (null? ast-steps) ; empty path
'(absolute-location-path)
(let ((forward-steps (reverse ast-steps)))
(cons
(if (eq? (caar forward-steps) 'filter-expr)
'path-expr 'relative-location-path)
forward-steps))))
((procedure? (car path))
(loop (cons (list 'lambda-step (car path))
ast-steps)
(cdr path)))
((assq (car path) '((// . descendant-or-self) (.. . parent)))
=> (lambda (pair)
(loop (cons
`(step (axis-specifier (,(cdr pair)))
(node-test (node)))
ast-steps)
(cdr path))))
((symbol? (car path))
(loop (cons
`(step (axis-specifier (child))
(node-test
,(cond
((assq (car path) '((* . (*)) (*text* . (text))))
=> cdr)
(else
`(local-name ,(symbol->string (car path)))))))
ast-steps)
(cdr path)))
((string? (car path))
(and-let* ; only for the location path for the moment
((txt-ast (txp:expr->ast (car path))))
(loop (if (eq? (car txt-ast) 'relative-location-path)
(append (reverse (cdr txt-ast)) ast-steps)
(cons
`(filter-expr (primary-expr ,txt-ast))
ast-steps))
(cdr path))))
((and (pair? (car path)) (not (null? (car path))))
(cond
((assq (caar path) '((*or* . names) (*not* . not-names)))
=> (lambda (pair)
(loop
(cons
`(step (axis-specifier (child))
(node-test
,(cons (cdr pair)
(map symbol->string (cdar path)))))
ast-steps)
(cdr path))))
((assq (caar path) '((equal? . equal?) (eq? . eq?)
(ns-id:* . namespace-uri)))
=> (lambda (pair)
(loop
(cons `(step (axis-specifier (child))
(node-test ,(list (cdr pair) (cadar path))))
ast-steps)
(cdr path))))
(else
(let reducer ((reducing-path (cdar path))
(filters '()))
(cond
((null? reducing-path)
(if
(symbol? (caar path)) ; just a child axis
(loop
(cons
`(step
(axis-specifier (child))
(node-test (local-name ,(symbol->string (caar path))))
,@(reverse filters))
ast-steps)
(cdr path))
(and-let*
((select (txp:sxpath->ast (caar path) ns-binding)))
(loop
(cons `(filter-expr
(primary-expr ,select)
,@(reverse filters))
ast-steps)
(cdr path)))))
((number? (car reducing-path))
(reducer
(cdr reducing-path)
(cons
`(predicate
,(if
(negative? (car reducing-path)) ; from end of nodeset
`(- (function-call (function-name "last"))
(number ,(- -1 (car reducing-path))))
`(number ,(car reducing-path))))
filters)))
(else
(and-let*
((pred-ast
(txp:sxpath->ast (car reducing-path) ns-binding)))
(reducer
(cdr reducing-path)
(cons `(predicate ,pred-ast) filters)))))))))
(else
(cerr "Invalid path step: " (car path))
#f))))))
;==========================================================================
; Several popular accessors and constructors for AST steps
; Whether a representation for location step
(define (txp:step? op)
(and (pair? op) (eq? (car op) 'step)))
; Returns the axis specifier of the location step
; Argument: the AST representation of a location step
; Result: either '(child) and the like, or #f if the AST contains syntactic
; error
(define (txp:step-axis op)
(and (txp:step? op)
(not (null? (cdr op)))
(pair? (cadr op)) (eq? (caadr op) 'axis-specifier)
(cadadr op)))
; Returns the node test of the location step
; Argument: the AST representation of a location step
; Result: either '(*) and the like, or #f if the AST contains syntactic
; error
(define (txp:step-node-test op)
(and (txp:step? op)
(not (null? (cdr op))) (not (null? (cddr op)))
(pair? (caddr op)) (eq? (caaddr op) 'node-test)
(cadr (caddr op))))
; Returns predicate expressions of the location step
; Argument: the AST representation of a location step
; Result: either (listof ast-expr)
; or #f if syntactic error detected in a location step AST
(define (txp:step-preds op)
(and (txp:step? op)
(not (null? (cdr op))) (not (null? (cddr op)))
(null? (filter
(lambda (sub) ; not a predicate representation
(not (and (pair? sub) (eq? (car sub) 'predicate))))
(cdddr op)))
(map cadr (cdddr op))))
; Constructs the AST representation for a given axis, node-test and
; a list of predicate expressions
; axis ::= '(child) and the like
; node-test ::= '(*) and the like
; pred-expr-list ::= (listof ast-expr)
(define (txp:construct-step axis node-test . pred-expr-list)
`(step (axis-specifier ,axis)
(node-test ,node-test)
,@(map
(lambda (pred-expr) `(predicate ,pred-expr))
pred-expr-list)))
(provide (all-defined)))

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff