218 lines
8.3 KiB
Scheme
218 lines
8.3 KiB
Scheme
; Module header is generated automatically
|
|
#cs(module lazy-ssax mzscheme
|
|
(require (lib "string.ss" "srfi/13"))
|
|
(require (lib "ssax.ss" "web-server/tests/tmp/ssax"))
|
|
(require "sxpathlib.ss")
|
|
(require "sxml-tools.ss")
|
|
(require "sxpath-ext.ss")
|
|
(require "xpath-parser.ss")
|
|
(require "txpath.ss")
|
|
(require "xpath-ast.ss")
|
|
(require "xpath-context_xlink.ss")
|
|
(require "lazy-xpath.ss")
|
|
|
|
;; A specialized lazy XML->SXML parser
|
|
; Is heavily based on continuations
|
|
|
|
;-------------------------------------------------
|
|
; Preliminary helper functions
|
|
|
|
; A helper that forces all descendants of a given node or a nodeset
|
|
(define (lazy:force-descendants node)
|
|
(cond
|
|
((lazy:promise? node) ; force it
|
|
(lazy:force-descendants (force node)))
|
|
((pair? node) ; not null
|
|
(for-each lazy:force-descendants node))
|
|
(else ; null or not pair
|
|
#t ; nothing to be done
|
|
)))
|
|
|
|
; Returns the list containing of all members of the argument list except
|
|
; for the last member
|
|
(define (lazy:except-last lst)
|
|
(if
|
|
(or (null? lst) ; this shouldn't happen
|
|
(null? (cdr lst)))
|
|
'()
|
|
(cons (car lst) (lazy:except-last (cdr lst)))))
|
|
|
|
;-------------------------------------------------
|
|
;
|
|
|
|
; Returns the common part of the seed
|
|
(define (lazy:seed-common seed)
|
|
((if (null? (cdr seed)) ; a short seed
|
|
car caddr)
|
|
seed))
|
|
|
|
; A monad-like handler
|
|
; Replaces the common part of the seed
|
|
(define (lazy:replace-common seed new-common)
|
|
(if (null? (cdr seed)) ; a short seed
|
|
(list new-common)
|
|
(list (car seed)
|
|
(cadr seed)
|
|
new-common
|
|
(cadddr seed))))
|
|
|
|
; Produces a lazy SXML document, which corresponds to reading a source
|
|
; document in a stream-wise fashion
|
|
(define (lazy:xml->sxml port namespace-prefix-assig)
|
|
(let ((namespaces
|
|
(map (lambda (el)
|
|
(cons* #f (car el) (ssax:uri-string->symbol (cdr el))))
|
|
namespace-prefix-assig))
|
|
(RES-NAME->SXML
|
|
(lambda (res-name)
|
|
(string->symbol
|
|
(string-append
|
|
(symbol->string (car res-name))
|
|
":"
|
|
(symbol->string (cdr res-name)))))))
|
|
((lambda (result)
|
|
; We assume that nobody follows the document element
|
|
(if (null? namespace-prefix-assig)
|
|
(cons '*TOP* (lazy:except-last result))
|
|
(cons
|
|
'*TOP*
|
|
(cons
|
|
`(@@ (*NAMESPACES*
|
|
,@(map
|
|
(lambda (ns) (list (car ns) (cdr ns)))
|
|
namespace-prefix-assig)))
|
|
(lazy:except-last result)))))
|
|
(call-with-current-continuation ; we grab the continuation to escape from parsing
|
|
(lambda (result-k)
|
|
; seed ::= (list result-k state-k common-seed level)
|
|
; result-k - continuation on what to do with the current result portion
|
|
; state-k - continuation to return to SSAX state on this level of XML
|
|
; tree hierarchy
|
|
; common-seed - general seed information
|
|
; level - level of a current node in a tree hierarchy
|
|
((ssax:make-parser
|
|
NEW-LEVEL-SEED
|
|
(lambda (elem-gi attributes namespaces expected-content seed)
|
|
;(pp (cons elem-gi (cadddr seed)))
|
|
(if
|
|
(or (null? (cdr seed)) ; short seed
|
|
(> (cadddr seed) 3)) ; deep level
|
|
(list '()) ; work like a conventional SSAX parser
|
|
(let ((attrs
|
|
(attlist-fold
|
|
(lambda (attr accum)
|
|
(cons (list
|
|
(if (symbol? (car attr)) (car attr)
|
|
(RES-NAME->SXML (car attr)))
|
|
(cdr attr)) accum))
|
|
'() attributes)))
|
|
(call-with-current-continuation
|
|
(lambda (new-level-k) ; how to parse next
|
|
((car seed) ; return the result
|
|
(let ((elem-content
|
|
; A promise to continue parsing
|
|
(call-with-current-continuation ; where to put the result
|
|
(lambda (elem-k)
|
|
(new-level-k
|
|
(list ; now form a seed
|
|
elem-k ; what to do with result
|
|
new-level-k ; SSAX state on this level
|
|
'() ; common-seed is empty
|
|
(+ (cadddr seed) 1) ; increase level
|
|
))))))
|
|
(append
|
|
; Previous string content
|
|
(ssax:reverse-collect-str-drop-ws (caddr seed))
|
|
(list
|
|
(cons
|
|
(if (symbol? elem-gi) elem-gi
|
|
(RES-NAME->SXML elem-gi))
|
|
(if (null? attrs) elem-content
|
|
(cons (cons '@ attrs) elem-content)))
|
|
; The following siblings of this element
|
|
(delay
|
|
(call-with-current-continuation ; where to put the result
|
|
(lambda (foll-k)
|
|
; First we force the parsing of the current element
|
|
(lazy:force-descendants elem-content)
|
|
; Than continue parsing
|
|
((cadr seed) ; recover the parent level of nesting
|
|
(list
|
|
foll-k ; what to do with result
|
|
(cadr seed)
|
|
'() ; common-seed is empty
|
|
(cadddr seed) ; the same level for siblings
|
|
))))))))))))))
|
|
|
|
FINISH-ELEMENT
|
|
(lambda (elem-gi attributes namespaces parent-seed seed)
|
|
(if
|
|
(null? (cdr seed)) ; a short seed
|
|
(let ((common (ssax:reverse-collect-str-drop-ws
|
|
(lazy:seed-common seed)))
|
|
(attrs
|
|
(attlist-fold
|
|
(lambda (attr accum)
|
|
(cons (list
|
|
(if (symbol? (car attr)) (car attr)
|
|
(RES-NAME->SXML (car attr)))
|
|
(cdr attr)) accum))
|
|
'() attributes)))
|
|
(lazy:replace-common
|
|
parent-seed
|
|
(cons
|
|
(cons
|
|
(if (symbol? elem-gi) elem-gi
|
|
(RES-NAME->SXML elem-gi))
|
|
(if (null? attrs) common
|
|
(cons (cons '@ attrs) common)))
|
|
(lazy:seed-common parent-seed))))
|
|
; Otherwise - just return the remaining character content
|
|
((car seed) ; continuation
|
|
(ssax:reverse-collect-str-drop-ws
|
|
(lazy:seed-common seed)))))
|
|
|
|
CHAR-DATA-HANDLER
|
|
(lambda (string1 string2 seed)
|
|
;(pp (list string1 string2 seed))
|
|
(lazy:replace-common
|
|
seed
|
|
(if (string-null? string2)
|
|
(cons string1 (lazy:seed-common seed))
|
|
(cons* string2 string1 (lazy:seed-common seed)))))
|
|
|
|
DOCTYPE
|
|
(lambda (port docname systemid internal-subset? seed)
|
|
(when internal-subset?
|
|
(ssax:warn port
|
|
"Internal DTD subset is not currently handled ")
|
|
(ssax:skip-internal-dtd port))
|
|
(ssax:warn port "DOCTYPE DECL " docname " "
|
|
systemid " found and skipped")
|
|
(values #f '() namespaces seed))
|
|
|
|
UNDECL-ROOT
|
|
(lambda (elem-gi seed)
|
|
(values #f '() namespaces seed))
|
|
|
|
PI
|
|
((*DEFAULT* .
|
|
(lambda (port pi-tag seed)
|
|
(lazy:replace-common
|
|
seed
|
|
(cons
|
|
(list '*PI* pi-tag (ssax:read-pi-body-as-string port))
|
|
(lazy:seed-common seed))))))
|
|
)
|
|
port
|
|
(list ; form initial seed
|
|
result-k ; put the result
|
|
(lambda (seed) ; dummy top-level parser state that produces '()
|
|
((car seed) ; where to put the result nodeset
|
|
'()))
|
|
'()
|
|
1 ; level for the document element
|
|
)))))))
|
|
|
|
(provide (all-defined)))
|