; Module header is generated automatically #cs(module multi-parser mzscheme (require "myenv.ss") (require (lib "string.ss" "srfi/13")) (require "input-parse.ss") (require "parse-error.ss") (require "ssax-code.ss") (require "ssax-prim.ss") (require "id.ss") (require "xlink-parser.ss") ;; SSAX multi parser ;; Provides ID-index creation, SXML parent pointers and XLink grammar parsing ; ; This software is in Public Domain. ; IT IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND. ; ; Please send bug reports and comments to: ; lisovsky@acm.org Kirill Lisovsky ; lizorkin@hotbox.ru Dmitry Lizorkin ; ; Primary features: ; '() ; '(parent) ; '(id) ; '(parent id) ; '(id xlink) ; '(parent id xlink) ;========================================================================= ; Parent seed ;------------------------------------------------ ; Parent-related part of the seed ; It is a list of one element: ; a function of no arguments which returns a pointer to element's parent ; or '*TOP-PTR* symbol for a root SXML element ; Duuring an element construction it may be just a pointer to parents head, ; because a parent itself may be under construction at the moment. ; This function is called by the NEW-LEVEL-SEED handler ; elem-name = (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi) ; A new 'parent:seed' is returned (define (parent:new-level-seed-handler elem-name) (let ((head (list elem-name))) (list (lambda () head)))) ; A function which constructs an element from its attributes, children ; and delayed parent information ; parent:seed - contains a delayed pointer to element's parent ; attrs - element's attributes ; children - a list of child elements (define (parent:construct-element parent:parent-seed parent:seed attrs children) ; car gets the only element of parent seed - a pointer to a parent (let((parent-ptr (car parent:parent-seed)) (head ((car parent:seed)))) (cons (car head) (cons* (cons '@ attrs) `(@@ (*PARENT* ,parent-ptr)) children)))) ;========================================================================= ; A seed ; seed = (list original-seed parent:seed id:seed xlink:seed) ; original-seed - the seed of the original 'SSAX:XML->SXML' function. It ; contains an SXML tree being constructed. ; parent:seed - parent-related part ; id:seed - id-related part ; xlink:seed - xlink-related part ;------------------------------------------------------------------------------ ; Accessors ; (mul:seed-original seed) (define get-sxml-seed car) ; Renamed: ; mul:seed-parent get-pptr-seed ; mul:seed-id get-id-seed ; mul:seed-xlink get-xlink-seed ; Handler for attempts to access an absent seed. (define (bad-accessor type) (lambda x (cerr nl "MURDER!!! -> " type nl x nl) (exit -1))) ; Seed constructor. #f seeds will be omitted. (define (make-seed . seeds) (let rpt ((s (cdr seeds)) (rzt (list (car seeds)))) (cond ((null? s) (reverse rzt)) ((car s) (rpt (cdr s) (cons (car s) rzt))) (else (rpt (cdr s) rzt))))) ;========================================================================= ; This is a multi parser constructor function ; parent, id, xlink - boolean parameters. #t means that we construct the ; corresponding feature, #f - otherwise ; ns - for future development. Is not used anywhere in the function (define (ssax:multi-parser . req-features) (let ((ns-assig '()) (with-parent? (memq 'parent req-features)) (with-id? (memq 'id req-features)) (with-xlink? (memq 'xlink req-features))) (call-with-values (lambda () (values (if with-parent? cadr (bad-accessor 'par)) (if with-id? (if with-parent? caddr cadr) (bad-accessor 'id)) (if with-xlink? (cond ((and with-parent? with-id?) cadddr) ((or with-parent? with-id?) caddr) (else cadr)) (bad-accessor 'xlink)))) (lambda (get-pptr-seed get-id-seed get-xlink-seed) (let ((initial-seed ; Initial values for specialized seeds (make-seed '() (and with-parent? (list '*TOP-PTR*)) (and with-id? (id:make-seed '() '())) (and with-xlink? (xlink:make-small-seed 'general '() '(1) '()))))) (letrec ( ; Making a special function, which, if applyed to the final seed, ; will construct a document (ending-actions (cond ((not (or with-id? with-xlink?)) (lambda (seed) (let ((result (reverse (get-sxml-seed seed)))) (cons '*TOP* result)))) ((and with-id? (not with-xlink?)) ; with-id? (lambda (seed) (let((result (reverse (get-sxml-seed seed))) (aux (list (id:ending-action (get-id-seed seed))))) (cons* '*TOP* (cons '@@ aux) result)))) ((and with-id? with-xlink?) ; with-id, with-xlink (lambda (seed) (let((result (reverse (get-sxml-seed seed))) (aux (list (xlink:ending-action (get-xlink-seed seed)) (id:ending-action (get-id-seed seed))))) (cons* '*TOP* (cons '@@ aux) result)))) (else (cerr "ending-actions NIY: " with-parent? with-id? with-xlink? nl) (exit)))) ;------------------------------------ ; Some handlers ; A special function ; When given an input port, it becomes a handler for a NEW-LEVEL-SEED (new-level-seed-handler (cond ((not (or with-parent? with-id? with-xlink?)) (lambda(port) (lambda (elem-gi attributes namespaces expected-content seed) (list '())))) ((and with-parent? (not (or with-id? with-xlink?))) ; with-parent (lambda(port) (lambda (elem-gi attributes namespaces expected-content seed) (make-seed '() (and with-parent? (parent:new-level-seed-handler (if (symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)))) )))) ((and with-id? (not (or with-parent? with-xlink?))) ; with-id (lambda(port) (lambda (elem-gi attributes namespaces expected-content seed) (list ; make-seed '() (id:new-level-seed-handler (get-id-seed seed)))))) ((and with-parent? with-id? (not with-xlink?)) ; parent, id (lambda(port) (lambda (elem-gi attributes namespaces expected-content seed) (list ; make-seed '() (parent:new-level-seed-handler (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi))) (id:new-level-seed-handler (get-id-seed seed)))))) ((and with-id? with-xlink? (not with-parent?)) ; id, xlink (lambda(port) (lambda (elem-gi attributes namespaces expected-content seed) (list ; make-seed '() (id:new-level-seed-handler (get-id-seed seed)) (xlink:new-level-seed-handler port attributes namespaces (get-xlink-seed seed)))))) ((and with-parent? with-id? with-xlink?) ; parent, id, xlink (lambda(port) (lambda (elem-gi attributes namespaces expected-content seed) (list ; make-seed '() (parent:new-level-seed-handler (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi))) (id:new-level-seed-handler (get-id-seed seed)) (xlink:new-level-seed-handler port attributes namespaces (get-xlink-seed seed)))))) (else (cerr "new-level NIY: " with-parent? with-id? with-xlink? nl) (exit)))) ; A special handler function for a FINISH-ELEMENT (finish-element-handler (cond ((not (or with-parent? with-id? with-xlink?)) (lambda (elem-gi attributes namespaces parent-seed seed) (let ((children (reverse-collect-str-drop-ws (get-sxml-seed seed))) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) '() attributes))) (list ; make-seed (cons (cons (if (symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)) (if (null? attrs) children (cons (cons '@ attrs) children))) (get-sxml-seed parent-seed)))))) ((and with-parent? (not (or with-id? with-xlink?))) ; parent (lambda (elem-gi attributes namespaces parent-seed seed) (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed))) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) '() attributes))) (list ; make-seed (cons (parent:construct-element (get-pptr-seed parent-seed) (get-pptr-seed seed) attrs children) (get-sxml-seed parent-seed)) ; pptr- seed from parent seed is not modified: (get-pptr-seed parent-seed) )))) ((and with-id? (not (or with-parent? with-xlink?))) ; id (lambda (elem-gi attributes namespaces parent-seed seed) (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed))) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) '() attributes))) (let((element (cons (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)) (if(null? attrs) children (cons (cons '@ attrs) children))))) (list ; make-seed (cons element (get-sxml-seed parent-seed)) (id:finish-element-handler elem-gi attributes (get-id-seed seed) element)))))) ((and with-parent? with-id? (not with-xlink?)) ; parent, id (lambda (elem-gi attributes namespaces parent-seed seed) (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed))) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) '() attributes))) (let((element (parent:construct-element (get-pptr-seed parent-seed) (get-pptr-seed seed) attrs children))) (list ; make-seed (cons element (get-sxml-seed parent-seed)) ; pptr- seed from parent seed is not modified: (get-pptr-seed parent-seed) (id:finish-element-handler elem-gi attributes (get-id-seed seed) element)))))) ((and with-id? with-xlink? (not with-parent?)) ; id, xlink (lambda (elem-gi attributes namespaces parent-seed seed) (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed))) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) '() attributes))) (let((element (cons (if(symbol? elem-gi) elem-gi (RES-NAME->SXML elem-gi)) (if(null? attrs) children (cons (cons '@ attrs) children))))) (list ; make-seed (cons element (get-sxml-seed parent-seed)) (id:finish-element-handler elem-gi attributes (get-id-seed seed) element) (xlink:finish-element-handler (get-xlink-seed parent-seed) (get-xlink-seed seed) element)))))) ((and with-parent? with-id? with-xlink?) ; parent, id, xlink (lambda (elem-gi attributes namespaces parent-seed seed) (let((children (reverse-collect-str-drop-ws (get-sxml-seed seed))) (attrs (attlist-fold (lambda (attr accum) (cons (list (if (symbol? (car attr)) (car attr) (RES-NAME->SXML (car attr))) (cdr attr)) accum)) '() attributes))) (let((element (parent:construct-element (get-pptr-seed parent-seed) (get-pptr-seed seed) attrs children))) (list ; make-seed (cons element (get-sxml-seed parent-seed)) ; pptr- seed from parent seed is not modified: (get-pptr-seed parent-seed) (id:finish-element-handler elem-gi attributes (get-id-seed seed) element) (xlink:finish-element-handler (get-xlink-seed parent-seed) (get-xlink-seed seed) element)))))) (else (cerr "finish-element: NIY" nl) (exit)))) ; A special function ; Given 'namespaces', it becomes a handler for a DOCTYPE (doctype-handler (if (not with-id?) (lambda (namespaces) (lambda (port docname systemid internal-subset? seed) (when internal-subset? (ssax:warn port "Internal DTD subset is not currently handled ") (ssax:skip-internal-dtd port)) (ssax:warn port "DOCTYPE DECL " docname " " systemid " found and skipped") (values #f '() namespaces seed))) (cond ((not (or with-parent? with-xlink?)) ; with-id (lambda (namespaces) (lambda (port docname systemid internal-subset? seed) (values #f '() namespaces (list ; make-seed (get-sxml-seed seed) (id:doctype-handler port systemid internal-subset?)))))) ((and with-parent? (not with-xlink?)) ; with-parent, with-id (lambda (namespaces) (lambda (port docname systemid internal-subset? seed) (values #f '() namespaces (list ; make-seed (get-sxml-seed seed) (get-pptr-seed seed) (id:doctype-handler port systemid internal-subset?)))))) ((and (not with-parent?) with-xlink?) ; with-id, with-xlink (lambda (namespaces) (lambda (port docname systemid internal-subset? seed) (values #f '() namespaces (list ; make-seed (get-sxml-seed seed) (id:doctype-handler port systemid internal-subset?) (get-xlink-seed seed)))))) (else ; with-parent, with-id, with-xlink (lambda (namespaces) (lambda (port docname systemid internal-subset? seed) (values #f '() namespaces (list ; make-seed (get-sxml-seed seed) (get-pptr-seed seed) (id:doctype-handler port systemid internal-subset?) (get-xlink-seed seed))))))))) ) ; end of letrec ; Constructing a special parser function (lambda (port) (let ((namespaces (map (lambda (el) (cons* #f (car el) (ssax:uri-string->symbol (cdr el)))) ns-assig))) (ending-actions ((ssax:make-parser NEW-LEVEL-SEED (new-level-seed-handler port) FINISH-ELEMENT finish-element-handler CHAR-DATA-HANDLER (lambda (string1 string2 seed) (cons (if(string-null? string2) (cons string1 (car seed)) (cons* string2 string1 (car seed))) (cdr seed))) DOCTYPE (doctype-handler namespaces) UNDECL-ROOT (lambda (elem-gi seed) (values #f '() namespaces seed)) PI ((*DEFAULT* . (lambda (port pi-tag seed) (cons (cons (list '*PI* pi-tag (ssax:read-pi-body-as-string port)) (car seed)) (cdr seed))))) ) port initial-seed)))))) )))) (provide (all-defined)))