eol-style
svn: r7803
This commit is contained in:
parent
a4023f2ebe
commit
2204f32678
|
@ -1,114 +1,114 @@
|
||||||
SSAX Package
|
SSAX Package
|
||||||
============
|
============
|
||||||
|
|
||||||
A SSAX functional XML parsing framework consists of a DOM/SXML parser, a SAX
|
A SSAX functional XML parsing framework consists of a DOM/SXML parser, a SAX
|
||||||
parser, and a supporting library of lexing and parsing procedures. The
|
parser, and a supporting library of lexing and parsing procedures. The
|
||||||
procedures in the package can be used separately to tokenize or parse various
|
procedures in the package can be used separately to tokenize or parse various
|
||||||
pieces of XML documents. The framework supports XML Namespaces, character,
|
pieces of XML documents. The framework supports XML Namespaces, character,
|
||||||
internal and external parsed entities, attribute value normalization,
|
internal and external parsed entities, attribute value normalization,
|
||||||
processing instructions and CDATA sections. The package includes a
|
processing instructions and CDATA sections. The package includes a
|
||||||
semi-validating SXML parser: a DOM-mode parser that is an instantiation of
|
semi-validating SXML parser: a DOM-mode parser that is an instantiation of
|
||||||
a SAX parser (called SSAX).
|
a SAX parser (called SSAX).
|
||||||
|
|
||||||
SSAX is a full-featured, algorithmically optimal, pure-functional parser,
|
SSAX is a full-featured, algorithmically optimal, pure-functional parser,
|
||||||
which can act as a stream processor. SSAX is an efficient SAX parser that is
|
which can act as a stream processor. SSAX is an efficient SAX parser that is
|
||||||
easy to use. SSAX minimizes the amount of application-specific state that has
|
easy to use. SSAX minimizes the amount of application-specific state that has
|
||||||
to be shared among user-supplied event handlers. SSAX makes the maintenance
|
to be shared among user-supplied event handlers. SSAX makes the maintenance
|
||||||
of an application-specific element stack unnecessary, which eliminates several
|
of an application-specific element stack unnecessary, which eliminates several
|
||||||
classes of common bugs. SSAX is written in a pure-functional subset of Scheme.
|
classes of common bugs. SSAX is written in a pure-functional subset of Scheme.
|
||||||
Therefore, the event handlers are referentially transparent, which makes them
|
Therefore, the event handlers are referentially transparent, which makes them
|
||||||
easier for a programmer to write and to reason about. The more expressive,
|
easier for a programmer to write and to reason about. The more expressive,
|
||||||
reliable and easier to use application interface for the event-driven XML
|
reliable and easier to use application interface for the event-driven XML
|
||||||
parsing is the outcome of implementing the parsing engine as an enhanced tree
|
parsing is the outcome of implementing the parsing engine as an enhanced tree
|
||||||
fold combinator, which fully captures the control pattern of the depth-first
|
fold combinator, which fully captures the control pattern of the depth-first
|
||||||
tree traversal.
|
tree traversal.
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
Quick start
|
Quick start
|
||||||
|
|
||||||
; procedure: ssax:xml->sxml PORT NAMESPACE-PREFIX-ASSIG
|
; procedure: ssax:xml->sxml PORT NAMESPACE-PREFIX-ASSIG
|
||||||
;
|
;
|
||||||
; This is an instance of a SSAX parser that returns an SXML
|
; This is an instance of a SSAX parser that returns an SXML
|
||||||
; representation of the XML document to be read from PORT.
|
; representation of the XML document to be read from PORT.
|
||||||
; NAMESPACE-PREFIX-ASSIG is a list of (USER-PREFIX . URI-STRING)
|
; NAMESPACE-PREFIX-ASSIG is a list of (USER-PREFIX . URI-STRING)
|
||||||
; that assigns USER-PREFIXes to certain namespaces identified by
|
; that assigns USER-PREFIXes to certain namespaces identified by
|
||||||
; particular URI-STRINGs. It may be an empty list.
|
; particular URI-STRINGs. It may be an empty list.
|
||||||
; The procedure returns an SXML tree. The port points out to the
|
; The procedure returns an SXML tree. The port points out to the
|
||||||
; first character after the root element.
|
; first character after the root element.
|
||||||
(define (ssax:xml->sxml port namespace-prefix-assig) ...)
|
(define (ssax:xml->sxml port namespace-prefix-assig) ...)
|
||||||
|
|
||||||
; procedure: pre-post-order TREE BINDINGS
|
; procedure: pre-post-order TREE BINDINGS
|
||||||
;
|
;
|
||||||
; Traversal of an SXML tree or a grove:
|
; Traversal of an SXML tree or a grove:
|
||||||
; a <Node> or a <Nodelist>
|
; a <Node> or a <Nodelist>
|
||||||
;
|
;
|
||||||
; A <Node> and a <Nodelist> are mutually-recursive datatypes that
|
; A <Node> and a <Nodelist> are mutually-recursive datatypes that
|
||||||
; underlie the SXML tree:
|
; underlie the SXML tree:
|
||||||
; <Node> ::= (name . <Nodelist>) | "text string"
|
; <Node> ::= (name . <Nodelist>) | "text string"
|
||||||
; An (ordered) set of nodes is just a list of the constituent nodes:
|
; An (ordered) set of nodes is just a list of the constituent nodes:
|
||||||
; <Nodelist> ::= (<Node> ...)
|
; <Nodelist> ::= (<Node> ...)
|
||||||
; Nodelists, and Nodes other than text strings are both lists. A
|
; Nodelists, and Nodes other than text strings are both lists. A
|
||||||
; <Nodelist> however is either an empty list, or a list whose head is
|
; <Nodelist> however is either an empty list, or a list whose head is
|
||||||
; not a symbol (an atom in general). A symbol at the head of a node is
|
; not a symbol (an atom in general). A symbol at the head of a node is
|
||||||
; either an XML name (in which case it's a tag of an XML element), or
|
; either an XML name (in which case it's a tag of an XML element), or
|
||||||
; an administrative name such as '@'.
|
; an administrative name such as '@'.
|
||||||
; See SXPath.scm and SSAX.scm for more information on SXML.
|
; See SXPath.scm and SSAX.scm for more information on SXML.
|
||||||
;
|
;
|
||||||
;
|
;
|
||||||
; Pre-Post-order traversal of a tree and creation of a new tree:
|
; Pre-Post-order traversal of a tree and creation of a new tree:
|
||||||
; pre-post-order:: <tree> x <bindings> -> <new-tree>
|
; pre-post-order:: <tree> x <bindings> -> <new-tree>
|
||||||
; where
|
; where
|
||||||
; <bindings> ::= (<binding> ...)
|
; <bindings> ::= (<binding> ...)
|
||||||
; <binding> ::= (<trigger-symbol> *preorder* . <handler>) |
|
; <binding> ::= (<trigger-symbol> *preorder* . <handler>) |
|
||||||
; (<trigger-symbol> *macro* . <handler>) |
|
; (<trigger-symbol> *macro* . <handler>) |
|
||||||
; (<trigger-symbol> <new-bindings> . <handler>) |
|
; (<trigger-symbol> <new-bindings> . <handler>) |
|
||||||
; (<trigger-symbol> . <handler>)
|
; (<trigger-symbol> . <handler>)
|
||||||
; <trigger-symbol> ::= XMLname | *text* | *default*
|
; <trigger-symbol> ::= XMLname | *text* | *default*
|
||||||
; <handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
|
; <handler> :: <trigger-symbol> x [<tree>] -> <new-tree>
|
||||||
;
|
;
|
||||||
; The pre-post-order function visits the nodes and nodelists
|
; The pre-post-order function visits the nodes and nodelists
|
||||||
; pre-post-order (depth-first). For each <Node> of the form (name
|
; pre-post-order (depth-first). For each <Node> of the form (name
|
||||||
; <Node> ...) it looks up an association with the given 'name' among
|
; <Node> ...) it looks up an association with the given 'name' among
|
||||||
; its <bindings>. If failed, pre-post-order tries to locate a
|
; its <bindings>. If failed, pre-post-order tries to locate a
|
||||||
; *default* binding. It's an error if the latter attempt fails as
|
; *default* binding. It's an error if the latter attempt fails as
|
||||||
; well. Having found a binding, the pre-post-order function first
|
; well. Having found a binding, the pre-post-order function first
|
||||||
; checks to see if the binding is of the form
|
; checks to see if the binding is of the form
|
||||||
; (<trigger-symbol> *preorder* . <handler>)
|
; (<trigger-symbol> *preorder* . <handler>)
|
||||||
; If it is, the handler is 'applied' to the current node. Otherwise,
|
; If it is, the handler is 'applied' to the current node. Otherwise,
|
||||||
; the pre-post-order function first calls itself recursively for each
|
; the pre-post-order function first calls itself recursively for each
|
||||||
; child of the current node, with <new-bindings> prepended to the
|
; child of the current node, with <new-bindings> prepended to the
|
||||||
; <bindings> in effect. The result of these calls is passed to the
|
; <bindings> in effect. The result of these calls is passed to the
|
||||||
; <handler> (along with the head of the current <Node>). To be more
|
; <handler> (along with the head of the current <Node>). To be more
|
||||||
; precise, the handler is _applied_ to the head of the current node
|
; precise, the handler is _applied_ to the head of the current node
|
||||||
; and its processed children. The result of the handler, which should
|
; and its processed children. The result of the handler, which should
|
||||||
; also be a <tree>, replaces the current <Node>. If the current <Node>
|
; also be a <tree>, replaces the current <Node>. If the current <Node>
|
||||||
; is a text string or other atom, a special binding with a symbol
|
; is a text string or other atom, a special binding with a symbol
|
||||||
; *text* is looked up.
|
; *text* is looked up.
|
||||||
;
|
;
|
||||||
; A binding can also be of a form
|
; A binding can also be of a form
|
||||||
; (<trigger-symbol> *macro* . <handler>)
|
; (<trigger-symbol> *macro* . <handler>)
|
||||||
; This is equivalent to *preorder* described above. However, the result
|
; This is equivalent to *preorder* described above. However, the result
|
||||||
; is re-processed again, with the current stylesheet.
|
; is re-processed again, with the current stylesheet.
|
||||||
;
|
;
|
||||||
(define (pre-post-order tree bindings) ...)
|
(define (pre-post-order tree bindings) ...)
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
Additional tools included into the package
|
Additional tools included into the package
|
||||||
|
|
||||||
1. "access-remote.ss"
|
1. "access-remote.ss"
|
||||||
Uniform access to local and remote resources
|
Uniform access to local and remote resources
|
||||||
Resolution for relative URIs in accordance with RFC 2396
|
Resolution for relative URIs in accordance with RFC 2396
|
||||||
|
|
||||||
2. "id.ss"
|
2. "id.ss"
|
||||||
Creation and manipulation of the ID-index for a faster access to SXML elements
|
Creation and manipulation of the ID-index for a faster access to SXML elements
|
||||||
by their unique ID
|
by their unique ID
|
||||||
Provides the DTD parser for extracting ID attribute declarations
|
Provides the DTD parser for extracting ID attribute declarations
|
||||||
|
|
||||||
3. "xlink-parser.ss"
|
3. "xlink-parser.ss"
|
||||||
Parser for XML documents that contain XLink elements
|
Parser for XML documents that contain XLink elements
|
||||||
|
|
||||||
4. "multi-parser.ss"
|
4. "multi-parser.ss"
|
||||||
SSAX multi parser: combines several specialized parsers into one
|
SSAX multi parser: combines several specialized parsers into one
|
||||||
Provides creation of parent pointers to SXML document constructed
|
Provides creation of parent pointers to SXML document constructed
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
(module info (lib "infotab.ss" "setup")
|
(module info (lib "infotab.ss" "setup")
|
||||||
(define name "ssax")
|
(define name "ssax")
|
||||||
(define blurb
|
(define blurb
|
||||||
(list "SSAX functional XML parsing framework "
|
(list "SSAX functional XML parsing framework "
|
||||||
"to inter-convert between an angular-bracket and "
|
"to inter-convert between an angular-bracket and "
|
||||||
"an S-expression-based notations for markup documents"))
|
"an S-expression-based notations for markup documents"))
|
||||||
(define primary-file "ssax.ss")
|
(define primary-file "ssax.ss")
|
||||||
(define doc.txt "doc.txt")
|
(define doc.txt "doc.txt")
|
||||||
(define categories '(xml))
|
(define categories '(xml))
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,376 +1,376 @@
|
||||||
SXML Package
|
SXML Package
|
||||||
============
|
============
|
||||||
|
|
||||||
SXML package contains a collection of tools for processing markup documents
|
SXML package contains a collection of tools for processing markup documents
|
||||||
(XML, XHTML, HTML) in the form of S-expressions (SXML, SHTML)
|
(XML, XHTML, HTML) in the form of S-expressions (SXML, SHTML)
|
||||||
|
|
||||||
You can find the API documentation in:
|
You can find the API documentation in:
|
||||||
http://modis.ispras.ru/Lizorkin/Apidoc/index.html
|
http://modis.ispras.ru/Lizorkin/Apidoc/index.html
|
||||||
|
|
||||||
SXML tools tutorial (under construction):
|
SXML tools tutorial (under construction):
|
||||||
http://modis.ispras.ru/Lizorkin/sxml-tutorial.html
|
http://modis.ispras.ru/Lizorkin/sxml-tutorial.html
|
||||||
|
|
||||||
==========================================================================
|
==========================================================================
|
||||||
|
|
||||||
Description of the main high-level package components
|
Description of the main high-level package components
|
||||||
-----------------------------------------------------
|
-----------------------------------------------------
|
||||||
|
|
||||||
1. SXML-tools
|
1. SXML-tools
|
||||||
2. SXPath - SXML Query Language
|
2. SXPath - SXML Query Language
|
||||||
3. SXPath with context
|
3. SXPath with context
|
||||||
4. DDO SXPath
|
4. DDO SXPath
|
||||||
5. Functional-style modification tool for SXML
|
5. Functional-style modification tool for SXML
|
||||||
6. STX - Scheme-enabled XSLT processor
|
6. STX - Scheme-enabled XSLT processor
|
||||||
7. XPathLink - query language for a set of linked documents
|
7. XPathLink - query language for a set of linked documents
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
1. SXML-tools
|
1. SXML-tools
|
||||||
|
|
||||||
XML is XML Infoset represented as native Scheme data - S-expressions.
|
XML is XML Infoset represented as native Scheme data - S-expressions.
|
||||||
Any Scheme programm can manipulate SXML data directly, and DOM-like API is not
|
Any Scheme programm can manipulate SXML data directly, and DOM-like API is not
|
||||||
necessary for SXML/Scheme applications.
|
necessary for SXML/Scheme applications.
|
||||||
SXML-tools (former DOMS) is just a set of handy functions which may be
|
SXML-tools (former DOMS) is just a set of handy functions which may be
|
||||||
convenient for some popular operations on SXML data.
|
convenient for some popular operations on SXML data.
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "sxml/sxml-tools.scm"
|
library file: Bigloo, Chicken, Gambit: "sxml/sxml-tools.scm"
|
||||||
PLT: "sxml-tools.ss"
|
PLT: "sxml-tools.ss"
|
||||||
|
|
||||||
http://www.pair.com/lisovsky/xml/sxmltools/
|
http://www.pair.com/lisovsky/xml/sxmltools/
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
2. SXPath - SXML Query Language
|
2. SXPath - SXML Query Language
|
||||||
|
|
||||||
SXPath is a query language for SXML. It treats a location path as a composite
|
SXPath is a query language for SXML. It treats a location path as a composite
|
||||||
query over an XPath tree or its branch. A single step is a combination of a
|
query over an XPath tree or its branch. A single step is a combination of a
|
||||||
projection, selection or a transitive closure. Multiple steps are combined via
|
projection, selection or a transitive closure. Multiple steps are combined via
|
||||||
join and union operations.
|
join and union operations.
|
||||||
|
|
||||||
Lower-level SXPath consists of a set of predicates, filters, selectors and
|
Lower-level SXPath consists of a set of predicates, filters, selectors and
|
||||||
combinators, and higher-level abbreviated SXPath functions which are
|
combinators, and higher-level abbreviated SXPath functions which are
|
||||||
implemented in terms of lower-level functions.
|
implemented in terms of lower-level functions.
|
||||||
|
|
||||||
Higher level SXPath functions are dealing with XPath expressions which may be
|
Higher level SXPath functions are dealing with XPath expressions which may be
|
||||||
represented as a list of steps in the location path ("native" SXPath):
|
represented as a list of steps in the location path ("native" SXPath):
|
||||||
(sxpath '(table (tr 3) td @ align))
|
(sxpath '(table (tr 3) td @ align))
|
||||||
or as a textual representation of XPath expressions which is compatible with
|
or as a textual representation of XPath expressions which is compatible with
|
||||||
W3C XPath recommendation ("textual" SXPath):
|
W3C XPath recommendation ("textual" SXPath):
|
||||||
(sxpath "table/tr[3]/td/@align")
|
(sxpath "table/tr[3]/td/@align")
|
||||||
|
|
||||||
An arbitrary converter implemented as a Scheme function may be used as a step
|
An arbitrary converter implemented as a Scheme function may be used as a step
|
||||||
in location path of "native" SXPath, which makes it extremely powerful and
|
in location path of "native" SXPath, which makes it extremely powerful and
|
||||||
flexible tool. On other hand, a lot of W3C Recommendations such as XSLT,
|
flexible tool. On other hand, a lot of W3C Recommendations such as XSLT,
|
||||||
XPointer, XLink depends on a textual XPath expressions.
|
XPointer, XLink depends on a textual XPath expressions.
|
||||||
|
|
||||||
It is possible to combine "native" and "textual" location paths and location
|
It is possible to combine "native" and "textual" location paths and location
|
||||||
step functions in one query, constructing an arbitrary XML query far beyond
|
step functions in one query, constructing an arbitrary XML query far beyond
|
||||||
capabilities of XPath. For example, the query
|
capabilities of XPath. For example, the query
|
||||||
(sxpath `("document/chapter[3]" ,relevant-links @ author)
|
(sxpath `("document/chapter[3]" ,relevant-links @ author)
|
||||||
makes a use of location step function relevant-links which implements an
|
makes a use of location step function relevant-links which implements an
|
||||||
arbitrary algorithm in Scheme.
|
arbitrary algorithm in Scheme.
|
||||||
|
|
||||||
SXPath may be considered as a compiler from abbreviated XPath (extended with
|
SXPath may be considered as a compiler from abbreviated XPath (extended with
|
||||||
native SXPath and location step functions) to SXPath primitives.
|
native SXPath and location step functions) to SXPath primitives.
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "sxml/sxpath.scm"
|
library file: Bigloo, Chicken, Gambit: "sxml/sxpath.scm"
|
||||||
PLT: "sxpath.ss"
|
PLT: "sxpath.ss"
|
||||||
|
|
||||||
http://www.pair.com/lisovsky/query/sxpath/
|
http://www.pair.com/lisovsky/query/sxpath/
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
3. SXPath with context
|
3. SXPath with context
|
||||||
|
|
||||||
SXPath with context provides the effective implementation for XPath reverse
|
SXPath with context provides the effective implementation for XPath reverse
|
||||||
axes ("parent::", "ancestor::" and such) on SXML documents.
|
axes ("parent::", "ancestor::" and such) on SXML documents.
|
||||||
|
|
||||||
The limitation of SXML is the absense of an upward link from a child to its
|
The limitation of SXML is the absense of an upward link from a child to its
|
||||||
parent, which makes the straightforward evaluation of XPath reverse axes
|
parent, which makes the straightforward evaluation of XPath reverse axes
|
||||||
ineffective. The previous approach for evaluating reverse axes in SXPath was
|
ineffective. The previous approach for evaluating reverse axes in SXPath was
|
||||||
searching for a parent from the root of the SXML tree.
|
searching for a parent from the root of the SXML tree.
|
||||||
|
|
||||||
SXPath with context provides the fast reverse axes, which is achieved by
|
SXPath with context provides the fast reverse axes, which is achieved by
|
||||||
storing previously visited ancestors of the context node in the context.
|
storing previously visited ancestors of the context node in the context.
|
||||||
With a special static analysis of an XPath expression, only the minimal
|
With a special static analysis of an XPath expression, only the minimal
|
||||||
required number of ancestors is stored in the context on each location step.
|
required number of ancestors is stored in the context on each location step.
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "sxml/xpath-context.scm"
|
library file: Bigloo, Chicken, Gambit: "sxml/xpath-context.scm"
|
||||||
PLT: "xpath-context_xlink.ss"
|
PLT: "xpath-context_xlink.ss"
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
4. DDO SXPath
|
4. DDO SXPath
|
||||||
|
|
||||||
The optimized SXPath that implements distinct document order (DDO) of the
|
The optimized SXPath that implements distinct document order (DDO) of the
|
||||||
nodeset produced.
|
nodeset produced.
|
||||||
|
|
||||||
Unlike conventional SXPath and SXPath with context, DDO SXPath guarantees that
|
Unlike conventional SXPath and SXPath with context, DDO SXPath guarantees that
|
||||||
the execution time is at worst polynomial of the XPath expression size and of
|
the execution time is at worst polynomial of the XPath expression size and of
|
||||||
the SXML document size.
|
the SXML document size.
|
||||||
|
|
||||||
The API of DDO SXPath is compatible of that in conventional SXPath. The main
|
The API of DDO SXPath is compatible of that in conventional SXPath. The main
|
||||||
following kinds of optimization methods are designed and implemented in DDO
|
following kinds of optimization methods are designed and implemented in DDO
|
||||||
SXPath:
|
SXPath:
|
||||||
|
|
||||||
- All XPath axes are implemented to keep a nodeset in distinct document
|
- All XPath axes are implemented to keep a nodeset in distinct document
|
||||||
order (DDO). An axis can now be considered as a converter:
|
order (DDO). An axis can now be considered as a converter:
|
||||||
nodeset_in_DDO --> nodeset_in_DDO
|
nodeset_in_DDO --> nodeset_in_DDO
|
||||||
|
|
||||||
- Type inference for XPath expressions allows determining whether a
|
- Type inference for XPath expressions allows determining whether a
|
||||||
predicate involves context-position implicitly;
|
predicate involves context-position implicitly;
|
||||||
|
|
||||||
- Faster evaluation for particular kinds of XPath predicates that involve
|
- Faster evaluation for particular kinds of XPath predicates that involve
|
||||||
context-position, like: [position() > number] or [number];
|
context-position, like: [position() > number] or [number];
|
||||||
|
|
||||||
- Sort-merge join algorithm implemented for XPath EqualityComparison of
|
- Sort-merge join algorithm implemented for XPath EqualityComparison of
|
||||||
two nodesets;
|
two nodesets;
|
||||||
|
|
||||||
- Deeply nested XPath predicates are evaluated at the very beginning of the
|
- Deeply nested XPath predicates are evaluated at the very beginning of the
|
||||||
evaluation phase, to guarantee that evaluation of deeply nested predicates
|
evaluation phase, to guarantee that evaluation of deeply nested predicates
|
||||||
is performed no more than once for each combination of
|
is performed no more than once for each combination of
|
||||||
(context-node, context-position, context-size)
|
(context-node, context-position, context-size)
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "sxml/ddo-txpath.scm"
|
library file: Bigloo, Chicken, Gambit: "sxml/ddo-txpath.scm"
|
||||||
PLT: "ddo-txpath.ss"
|
PLT: "ddo-txpath.ss"
|
||||||
|
|
||||||
http://modis.ispras.ru/Lizorkin/ddo.html
|
http://modis.ispras.ru/Lizorkin/ddo.html
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
5. Functional-style modification tool for SXML
|
5. Functional-style modification tool for SXML
|
||||||
|
|
||||||
A tool for making functional-style modifications to SXML documents
|
A tool for making functional-style modifications to SXML documents
|
||||||
The basics of modification language design was inspired by Patrick Lehti and
|
The basics of modification language design was inspired by Patrick Lehti and
|
||||||
his data manipulation processor for XML Query Language:
|
his data manipulation processor for XML Query Language:
|
||||||
http://www.ipsi.fraunhofer.de/~lehti/
|
http://www.ipsi.fraunhofer.de/~lehti/
|
||||||
However, with functional techniques we can do this better...
|
However, with functional techniques we can do this better...
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "sxml/modif.scm"
|
library file: Bigloo, Chicken, Gambit: "sxml/modif.scm"
|
||||||
PLT: "modif.ss"
|
PLT: "modif.ss"
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
6. STX - Scheme-enabled XSLT processor
|
6. STX - Scheme-enabled XSLT processor
|
||||||
|
|
||||||
STX is an XML transformation tool based on XSLT and Scheme which combines
|
STX is an XML transformation tool based on XSLT and Scheme which combines
|
||||||
a processor for most common XSLT stylesheets and a framework for their
|
a processor for most common XSLT stylesheets and a framework for their
|
||||||
extension in Scheme and provides an environment for a general-purpose
|
extension in Scheme and provides an environment for a general-purpose
|
||||||
transformation of XML data. It integrates two functional languages - Scheme
|
transformation of XML data. It integrates two functional languages - Scheme
|
||||||
and XSLT-like transformation language on the basis of the common data model -
|
and XSLT-like transformation language on the basis of the common data model -
|
||||||
SXML.
|
SXML.
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "stx/stx-engine.scm"
|
library file: Bigloo, Chicken, Gambit: "stx/stx-engine.scm"
|
||||||
PLT: "stx-engine.ss"
|
PLT: "stx-engine.ss"
|
||||||
|
|
||||||
http://www.pair.com/lisovsky/transform/stx/
|
http://www.pair.com/lisovsky/transform/stx/
|
||||||
|
|
||||||
-------------------------------------------------
|
-------------------------------------------------
|
||||||
|
|
||||||
7. XPathLink - query language for a set of linked documents
|
7. XPathLink - query language for a set of linked documents
|
||||||
|
|
||||||
XLink is a language for describing links between resources using XML attributes
|
XLink is a language for describing links between resources using XML attributes
|
||||||
and namespaces. XLink provides expressive means for linking information in
|
and namespaces. XLink provides expressive means for linking information in
|
||||||
different XML documents. With XLink, practical XML application data can be
|
different XML documents. With XLink, practical XML application data can be
|
||||||
expressed as several linked XML documents, rather than a single complicated XML
|
expressed as several linked XML documents, rather than a single complicated XML
|
||||||
document. Such a design makes it very attractive to have a query language that
|
document. Such a design makes it very attractive to have a query language that
|
||||||
would inherently recognize XLink links and provide a natural navigation
|
would inherently recognize XLink links and provide a natural navigation
|
||||||
mechanism over them.
|
mechanism over them.
|
||||||
|
|
||||||
Such a query language has been designed and implemented in Scheme. This
|
Such a query language has been designed and implemented in Scheme. This
|
||||||
language is an extension to XPath with 3 additional axes. The implementation
|
language is an extension to XPath with 3 additional axes. The implementation
|
||||||
is naturally an extended SXPath. We call this language XPath with XLink
|
is naturally an extended SXPath. We call this language XPath with XLink
|
||||||
support, or XPathLink.
|
support, or XPathLink.
|
||||||
|
|
||||||
Additionally, an HTML <A> hyperlink can be considered as a particular case of
|
Additionally, an HTML <A> hyperlink can be considered as a particular case of
|
||||||
an XLink link. This observation makes it possible to query HTML documents with
|
an XLink link. This observation makes it possible to query HTML documents with
|
||||||
XPathLink as well. Neil W. Van Dyke <neil@neilvandyke.org> and his permissive
|
XPathLink as well. Neil W. Van Dyke <neil@neilvandyke.org> and his permissive
|
||||||
HTML parser HtmlPrag have made this feature possible.
|
HTML parser HtmlPrag have made this feature possible.
|
||||||
|
|
||||||
library file: Bigloo, Chicken, Gambit: "sxml/xlink.scm"
|
library file: Bigloo, Chicken, Gambit: "sxml/xlink.scm"
|
||||||
PLT: "xpath-context_xlink.ss"
|
PLT: "xpath-context_xlink.ss"
|
||||||
|
|
||||||
http://modis.ispras.ru/Lizorkin/xpathlink.html
|
http://modis.ispras.ru/Lizorkin/xpathlink.html
|
||||||
|
|
||||||
|
|
||||||
==========================================================================
|
==========================================================================
|
||||||
|
|
||||||
Examples and expected results
|
Examples and expected results
|
||||||
-----------------------------
|
-----------------------------
|
||||||
|
|
||||||
Obtaining an SXML document from XML
|
Obtaining an SXML document from XML
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
|
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
|
||||||
==>
|
==>
|
||||||
(*TOP*
|
(*TOP*
|
||||||
(*PI* xml "version='1.0'")
|
(*PI* xml "version='1.0'")
|
||||||
(poem
|
(poem
|
||||||
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
|
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
|
||||||
(stanza
|
(stanza
|
||||||
(line "Let us go then, you and I,")
|
(line "Let us go then, you and I,")
|
||||||
(line "When the evening is spread out against the sky")
|
(line "When the evening is spread out against the sky")
|
||||||
(line "Like a patient etherized upon a table:"))
|
(line "Like a patient etherized upon a table:"))
|
||||||
(stanza
|
(stanza
|
||||||
(line "In the room the women come and go")
|
(line "In the room the women come and go")
|
||||||
(line "Talking of Michaelangelo."))))
|
(line "Talking of Michaelangelo."))))
|
||||||
|
|
||||||
Accessing parts of the document with SXPath
|
Accessing parts of the document with SXPath
|
||||||
((sxpath "poem/stanza[2]/line/text()")
|
((sxpath "poem/stanza[2]/line/text()")
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
|
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
|
||||||
==>
|
==>
|
||||||
("In the room the women come and go" "Talking of Michaelangelo.")
|
("In the room the women come and go" "Talking of Michaelangelo.")
|
||||||
|
|
||||||
Obtaining/querying HTML documents
|
Obtaining/querying HTML documents
|
||||||
((sxpath "html/head/title")
|
((sxpath "html/head/title")
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/index.html"))
|
(sxml:document "http://modis.ispras.ru/Lizorkin/index.html"))
|
||||||
==>
|
==>
|
||||||
((title "Dmitry Lizorkin homepage"))
|
((title "Dmitry Lizorkin homepage"))
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
SXML Transformations
|
SXML Transformations
|
||||||
|
|
||||||
Transforming the document according to XSLT stylesheet
|
Transforming the document according to XSLT stylesheet
|
||||||
(apply
|
(apply
|
||||||
string-append
|
string-append
|
||||||
(sxml:clean-feed
|
(sxml:clean-feed
|
||||||
(stx:transform-dynamic
|
(stx:transform-dynamic
|
||||||
(sxml:add-parents
|
(sxml:add-parents
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
|
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
|
||||||
(stx:make-stx-stylesheet
|
(stx:make-stx-stylesheet
|
||||||
(sxml:document
|
(sxml:document
|
||||||
"http://modis.ispras.ru/Lizorkin/XML/poem2html.xsl"
|
"http://modis.ispras.ru/Lizorkin/XML/poem2html.xsl"
|
||||||
'((xsl . "http://www.w3.org/1999/XSL/Transform")))))))
|
'((xsl . "http://www.w3.org/1999/XSL/Transform")))))))
|
||||||
==>
|
==>
|
||||||
"<html><head><title>The Lovesong of J. Alfred Prufrock</title></head>
|
"<html><head><title>The Lovesong of J. Alfred Prufrock</title></head>
|
||||||
<body><h1>The Lovesong of J. Alfred Prufrock</h1>
|
<body><h1>The Lovesong of J. Alfred Prufrock</h1>
|
||||||
<p>Let us go then, you and I,<br/>
|
<p>Let us go then, you and I,<br/>
|
||||||
When the evening is spread out against the sky<br/>
|
When the evening is spread out against the sky<br/>
|
||||||
Like a patient etherized upon a table:<br/></p>
|
Like a patient etherized upon a table:<br/></p>
|
||||||
<p>In the room the women come and go<br/>Talking of Michaelangelo.<br/></p>
|
<p>In the room the women come and go<br/>Talking of Michaelangelo.<br/></p>
|
||||||
<i>T. S. Eliot</i></body></html>"
|
<i>T. S. Eliot</i></body></html>"
|
||||||
|
|
||||||
Expressing the same transformation in pre-post-order (requires SSAX package)
|
Expressing the same transformation in pre-post-order (requires SSAX package)
|
||||||
(pre-post-order
|
(pre-post-order
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
|
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
|
||||||
`((*TOP* *macro* . ,(lambda top (car ((sxpath '(*)) top))))
|
`((*TOP* *macro* . ,(lambda top (car ((sxpath '(*)) top))))
|
||||||
(poem
|
(poem
|
||||||
unquote
|
unquote
|
||||||
(lambda elem
|
(lambda elem
|
||||||
`(html
|
`(html
|
||||||
(head
|
(head
|
||||||
(title ,((sxpath "string(@title)") elem)))
|
(title ,((sxpath "string(@title)") elem)))
|
||||||
(body
|
(body
|
||||||
(h1 ,((sxpath "string(@title)") elem))
|
(h1 ,((sxpath "string(@title)") elem))
|
||||||
,@((sxpath "node()") elem)
|
,@((sxpath "node()") elem)
|
||||||
(i ,((sxpath "string(@poet)") elem))))))
|
(i ,((sxpath "string(@poet)") elem))))))
|
||||||
(@ *preorder* . ,(lambda x x))
|
(@ *preorder* . ,(lambda x x))
|
||||||
(stanza . ,(lambda (tag . content)
|
(stanza . ,(lambda (tag . content)
|
||||||
`(p ,@(map-union (lambda (x) x) content))))
|
`(p ,@(map-union (lambda (x) x) content))))
|
||||||
(line . ,(lambda (tag . content) (append content '((br)))))
|
(line . ,(lambda (tag . content) (append content '((br)))))
|
||||||
(*text* . ,(lambda (tag text) text))))
|
(*text* . ,(lambda (tag text) text))))
|
||||||
==>
|
==>
|
||||||
(html
|
(html
|
||||||
(head (title "The Lovesong of J. Alfred Prufrock"))
|
(head (title "The Lovesong of J. Alfred Prufrock"))
|
||||||
(body
|
(body
|
||||||
(h1 "The Lovesong of J. Alfred Prufrock")
|
(h1 "The Lovesong of J. Alfred Prufrock")
|
||||||
(p
|
(p
|
||||||
"Let us go then, you and I,"
|
"Let us go then, you and I,"
|
||||||
(br)
|
(br)
|
||||||
"When the evening is spread out against the sky"
|
"When the evening is spread out against the sky"
|
||||||
(br)
|
(br)
|
||||||
"Like a patient etherized upon a table:"
|
"Like a patient etherized upon a table:"
|
||||||
(br))
|
(br))
|
||||||
(p "In the room the women come and go" (br)
|
(p "In the room the women come and go" (br)
|
||||||
"Talking of Michaelangelo." (br))
|
"Talking of Michaelangelo." (br))
|
||||||
(i "T. S. Eliot")))
|
(i "T. S. Eliot")))
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
XPathLink: a query language with XLink support
|
XPathLink: a query language with XLink support
|
||||||
|
|
||||||
Returning a chapter element that is linked with the first item
|
Returning a chapter element that is linked with the first item
|
||||||
in the table of contents
|
in the table of contents
|
||||||
((sxpath/c "doc/item[1]/traverse::chapter")
|
((sxpath/c "doc/item[1]/traverse::chapter")
|
||||||
(xlink:documents "http://modis.ispras.ru/Lizorkin/XML/doc.xml"))
|
(xlink:documents "http://modis.ispras.ru/Lizorkin/XML/doc.xml"))
|
||||||
==>
|
==>
|
||||||
((chapter (@ (id "chap1"))
|
((chapter (@ (id "chap1"))
|
||||||
(title "Abstract")
|
(title "Abstract")
|
||||||
(p "This document describes about XLink Engine...")))
|
(p "This document describes about XLink Engine...")))
|
||||||
|
|
||||||
Traversing between documents with XPathLink
|
Traversing between documents with XPathLink
|
||||||
((sxpath/c "descendant::a[.='XPathLink']/traverse::html/
|
((sxpath/c "descendant::a[.='XPathLink']/traverse::html/
|
||||||
descendant::blockquote[1]/node()")
|
descendant::blockquote[1]/node()")
|
||||||
(xlink:documents "http://modis.ispras.ru/Lizorkin/index.html"))
|
(xlink:documents "http://modis.ispras.ru/Lizorkin/index.html"))
|
||||||
==>
|
==>
|
||||||
((b "Abstract: ")
|
((b "Abstract: ")
|
||||||
"\r\n"
|
"\r\n"
|
||||||
"XPathLink is a query language for XML documents linked with XLink links.\r\n"
|
"XPathLink is a query language for XML documents linked with XLink links.\r\n"
|
||||||
"XPathLink is based on XPath and extends it with transparent XLink support.\r\n"
|
"XPathLink is based on XPath and extends it with transparent XLink support.\r\n"
|
||||||
"The implementation of XPathLink in Scheme is provided.\r\n")
|
"The implementation of XPathLink in Scheme is provided.\r\n")
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
SXML Modifications
|
SXML Modifications
|
||||||
|
|
||||||
Modifying the SXML representation of the document
|
Modifying the SXML representation of the document
|
||||||
((sxml:modify '("/poem/stanza[2]" move-preceding "preceding-sibling::stanza"))
|
((sxml:modify '("/poem/stanza[2]" move-preceding "preceding-sibling::stanza"))
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
|
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/poem.xml"))
|
||||||
==>
|
==>
|
||||||
(*TOP*
|
(*TOP*
|
||||||
(*PI* xml "version='1.0'")
|
(*PI* xml "version='1.0'")
|
||||||
(poem
|
(poem
|
||||||
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
|
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
|
||||||
(stanza
|
(stanza
|
||||||
(line "In the room the women come and go")
|
(line "In the room the women come and go")
|
||||||
(line "Talking of Michaelangelo."))
|
(line "Talking of Michaelangelo."))
|
||||||
(stanza
|
(stanza
|
||||||
(line "Let us go then, you and I,")
|
(line "Let us go then, you and I,")
|
||||||
(line "When the evening is spread out against the sky")
|
(line "When the evening is spread out against the sky")
|
||||||
(line "Like a patient etherized upon a table:"))))
|
(line "Like a patient etherized upon a table:"))))
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
DDO SXPath: the optimized XPath implementation
|
DDO SXPath: the optimized XPath implementation
|
||||||
|
|
||||||
Return all text nodes that follow the keyword ``XPointer'' and
|
Return all text nodes that follow the keyword ``XPointer'' and
|
||||||
that are not descendants of the element appendix
|
that are not descendants of the element appendix
|
||||||
((ddo:sxpath "//text()[contains(., 'XPointer')]/
|
((ddo:sxpath "//text()[contains(., 'XPointer')]/
|
||||||
following::text()[not(./ancestor::appendix)]")
|
following::text()[not(./ancestor::appendix)]")
|
||||||
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/doc.xml"))
|
(sxml:document "http://modis.ispras.ru/Lizorkin/XML/doc.xml"))
|
||||||
==>
|
==>
|
||||||
("XPointer is the fragment identifier of documents having the mime-type..."
|
("XPointer is the fragment identifier of documents having the mime-type..."
|
||||||
"Models for using XLink/XPointer "
|
"Models for using XLink/XPointer "
|
||||||
"There are important keywords."
|
"There are important keywords."
|
||||||
"samples"
|
"samples"
|
||||||
"Conclusion"
|
"Conclusion"
|
||||||
"Thanks a lot.")
|
"Thanks a lot.")
|
||||||
|
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
Lazy XML processing
|
Lazy XML processing
|
||||||
|
|
||||||
Lazy XML-to-SXML conversion
|
Lazy XML-to-SXML conversion
|
||||||
(define doc
|
(define doc
|
||||||
(lazy:xml->sxml
|
(lazy:xml->sxml
|
||||||
(open-input-resource "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
|
(open-input-resource "http://modis.ispras.ru/Lizorkin/XML/poem.xml")
|
||||||
'()))
|
'()))
|
||||||
doc
|
doc
|
||||||
==>
|
==>
|
||||||
(*TOP*
|
(*TOP*
|
||||||
(*PI* xml "version='1.0'")
|
(*PI* xml "version='1.0'")
|
||||||
(poem
|
(poem
|
||||||
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
|
(@ (title "The Lovesong of J. Alfred Prufrock") (poet "T. S. Eliot"))
|
||||||
(stanza (line "Let us go then, you and I,") #<struct:promise>)
|
(stanza (line "Let us go then, you and I,") #<struct:promise>)
|
||||||
#<struct:promise>))
|
#<struct:promise>))
|
||||||
|
|
||||||
Querying a lazy SXML document, lazyly
|
Querying a lazy SXML document, lazyly
|
||||||
(define res ((lazy:sxpath "poem/stanza/line[1]") doc))
|
(define res ((lazy:sxpath "poem/stanza/line[1]") doc))
|
||||||
res
|
res
|
||||||
==>
|
==>
|
||||||
((line "Let us go then, you and I,") #<struct:promise>)
|
((line "Let us go then, you and I,") #<struct:promise>)
|
||||||
|
|
||||||
Obtain the next portion of the result
|
Obtain the next portion of the result
|
||||||
(force (cadr res))
|
(force (cadr res))
|
||||||
==>
|
==>
|
||||||
((line "In the room the women come and go") #<struct:promise>)
|
((line "In the room the women come and go") #<struct:promise>)
|
||||||
|
|
||||||
Converting the lazy result to a conventional SXML nodeset
|
Converting the lazy result to a conventional SXML nodeset
|
||||||
(lazy:result->list res)
|
(lazy:result->list res)
|
||||||
==>
|
==>
|
||||||
((line "Let us go then, you and I,")
|
((line "Let us go then, you and I,")
|
||||||
(line "In the room the women come and go"))
|
(line "In the room the women come and go"))
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
(module info (lib "infotab.ss" "setup")
|
(module info (lib "infotab.ss" "setup")
|
||||||
(define name "sxml")
|
(define name "sxml")
|
||||||
(define blurb
|
(define blurb
|
||||||
(list "Collection of tools for processing markup documents "
|
(list "Collection of tools for processing markup documents "
|
||||||
"in the form of S-expressions"))
|
"in the form of S-expressions"))
|
||||||
(define primary-file "sxml.ss")
|
(define primary-file "sxml.ss")
|
||||||
(define doc.txt "doc.txt")
|
(define doc.txt "doc.txt")
|
||||||
(define homepage "http://modis.ispras.ru/Lizorkin/sxml-tutorial.html")
|
(define homepage "http://modis.ispras.ru/Lizorkin/sxml-tutorial.html")
|
||||||
(define categories '(xml))
|
(define categories '(xml))
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user