start on regexp functions; further formatting improvements for defproc
svn: r6745
This commit is contained in:
parent
4e72b10ce4
commit
bf717526b0
|
@ -393,20 +393,21 @@
|
|||
[(eq? v '...)
|
||||
dots0]
|
||||
[else v]))]
|
||||
[prototype-size (lambda (s)
|
||||
(let loop ([s s])
|
||||
[prototype-size (lambda (s first-combine next-combine)
|
||||
(let loop ([s s][combine first-combine])
|
||||
(if (null? s)
|
||||
1
|
||||
(+ 1 (loop (cdr s))
|
||||
(cond
|
||||
[(symbol? (car s)) (string-length (symbol->string (car s)))]
|
||||
[(pair? (car s))
|
||||
(if (keyword? (caar s))
|
||||
(+ (string-length (keyword->string (caar s)))
|
||||
3
|
||||
(string-length (symbol->string (cadar s))))
|
||||
(string-length (symbol->string (caar s))))]
|
||||
[else 0])))))])
|
||||
0
|
||||
(combine
|
||||
(loop (cdr s) next-combine)
|
||||
(cond
|
||||
[(symbol? (car s)) (string-length (symbol->string (car s)))]
|
||||
[(pair? (car s))
|
||||
(if (keyword? (caar s))
|
||||
(+ (string-length (keyword->string (caar s)))
|
||||
3
|
||||
(string-length (symbol->string (cadar s))))
|
||||
(string-length (symbol->string (caar s))))]
|
||||
[else 0])))))])
|
||||
(parameterize ([current-variable-list
|
||||
(map (lambda (i)
|
||||
(and (pair? i)
|
||||
|
@ -422,34 +423,41 @@
|
|||
append
|
||||
(map
|
||||
(lambda (stx-id prototype arg-contracts result-contract first?)
|
||||
(append
|
||||
(list
|
||||
(list (make-flow
|
||||
(let-values ([(required optional more-required)
|
||||
(let loop ([a (cdr prototype)][r-accum null])
|
||||
(if (or (null? a)
|
||||
(and (has-optional? (car a))))
|
||||
(let ([req (reverse r-accum)])
|
||||
(let loop ([a a][o-accum null])
|
||||
(if (or (null? a)
|
||||
(not (has-optional? (car a))))
|
||||
(values req (reverse o-accum) a)
|
||||
(loop (cdr a) (cons (car a) o-accum)))))
|
||||
(loop (cdr a) (cons (car a) r-accum))))]
|
||||
[(tagged) (if first?
|
||||
(make-target-element
|
||||
#f
|
||||
(list (to-element (make-just-context (car prototype)
|
||||
stx-id)))
|
||||
(register-scheme-definition stx-id))
|
||||
(to-element (make-just-context (car prototype)
|
||||
stx-id)))]
|
||||
[(short?) (or ((prototype-size prototype) . < . 40)
|
||||
((length prototype) . < . 3))]
|
||||
[(end) (list (to-flow spacer)
|
||||
(to-flow 'rarr)
|
||||
(to-flow spacer)
|
||||
(make-flow (list (result-contract))))])
|
||||
(let*-values ([(required optional more-required)
|
||||
(let loop ([a (cdr prototype)][r-accum null])
|
||||
(if (or (null? a)
|
||||
(and (has-optional? (car a))))
|
||||
(let ([req (reverse r-accum)])
|
||||
(let loop ([a a][o-accum null])
|
||||
(if (or (null? a)
|
||||
(not (has-optional? (car a))))
|
||||
(values req (reverse o-accum) a)
|
||||
(loop (cdr a) (cons (car a) o-accum)))))
|
||||
(loop (cdr a) (cons (car a) r-accum))))]
|
||||
[(tagged) (if first?
|
||||
(make-target-element
|
||||
#f
|
||||
(list (to-element (make-just-context (car prototype)
|
||||
stx-id)))
|
||||
(register-scheme-definition stx-id))
|
||||
(to-element (make-just-context (car prototype)
|
||||
stx-id)))]
|
||||
[(flat-size) (prototype-size prototype + +)]
|
||||
[(short?) (or (flat-size . < . 40)
|
||||
((length prototype) . < . 3))]
|
||||
[(res) (result-contract)]
|
||||
[(result-next-line?) ((+ (if short?
|
||||
flat-size
|
||||
(prototype-size prototype + max))
|
||||
(flow-element-width res))
|
||||
. >= . 50)]
|
||||
[(end) (list (to-flow spacer)
|
||||
(to-flow 'rarr)
|
||||
(to-flow spacer)
|
||||
(make-flow (list res)))])
|
||||
(append
|
||||
(list
|
||||
(list (make-flow
|
||||
(if short?
|
||||
(make-table-if-necessary
|
||||
"prototype"
|
||||
|
@ -468,12 +476,16 @@
|
|||
'paren-shape
|
||||
#\?))))
|
||||
(map arg->elem more-required))))
|
||||
end)))
|
||||
(if result-next-line?
|
||||
null
|
||||
end))))
|
||||
(let ([not-end
|
||||
(list (to-flow spacer)
|
||||
(to-flow spacer)
|
||||
(to-flow spacer)
|
||||
(to-flow spacer))])
|
||||
(if result-next-line?
|
||||
(list (to-flow spacer))
|
||||
(list (to-flow spacer)
|
||||
(to-flow spacer)
|
||||
(to-flow spacer)
|
||||
(to-flow spacer)))])
|
||||
(list
|
||||
(make-table
|
||||
"prototype"
|
||||
|
@ -513,40 +525,46 @@
|
|||
#f
|
||||
(list a "]" (schemeparenfont ")"))))]
|
||||
[else a])))
|
||||
(if (null? (cdr args))
|
||||
(if (and (null? (cdr args))
|
||||
(not result-next-line?))
|
||||
end
|
||||
not-end))
|
||||
(loop (cdr args) (sub1 req))))))))))))))
|
||||
(apply append
|
||||
(map (lambda (v arg-contract)
|
||||
(cond
|
||||
[(pair? v)
|
||||
(list
|
||||
(list
|
||||
(make-flow
|
||||
(make-table-if-necessary
|
||||
"argcontract"
|
||||
(list
|
||||
(let ([v (if (keyword? (car v))
|
||||
(cdr v)
|
||||
v)])
|
||||
(append
|
||||
(list
|
||||
(to-flow (hspace 2))
|
||||
(to-flow (arg->elem v))
|
||||
(to-flow spacer)
|
||||
(to-flow ":")
|
||||
(to-flow spacer)
|
||||
(make-flow (list (arg-contract))))
|
||||
(if (has-optional? v)
|
||||
(list (to-flow spacer)
|
||||
(to-flow "=")
|
||||
(to-flow spacer)
|
||||
(to-flow (to-element (caddr v))))
|
||||
null))))))))]
|
||||
[else null]))
|
||||
(cdr prototype)
|
||||
arg-contracts))))
|
||||
(loop (cdr args) (sub1 req)))))))))))))
|
||||
(if result-next-line?
|
||||
(list (list (make-flow (make-table-if-necessary
|
||||
"prototype"
|
||||
(list end)))))
|
||||
null)
|
||||
(apply append
|
||||
(map (lambda (v arg-contract)
|
||||
(cond
|
||||
[(pair? v)
|
||||
(list
|
||||
(list
|
||||
(make-flow
|
||||
(make-table-if-necessary
|
||||
"argcontract"
|
||||
(list
|
||||
(let ([v (if (keyword? (car v))
|
||||
(cdr v)
|
||||
v)])
|
||||
(append
|
||||
(list
|
||||
(to-flow (hspace 2))
|
||||
(to-flow (arg->elem v))
|
||||
(to-flow spacer)
|
||||
(to-flow ":")
|
||||
(to-flow spacer)
|
||||
(make-flow (list (arg-contract))))
|
||||
(if (has-optional? v)
|
||||
(list (to-flow spacer)
|
||||
(to-flow "=")
|
||||
(to-flow spacer)
|
||||
(to-flow (to-element (caddr v))))
|
||||
null))))))))]
|
||||
[else null]))
|
||||
(cdr prototype)
|
||||
arg-contracts)))))
|
||||
stx-ids
|
||||
prototypes
|
||||
arg-contractss
|
||||
|
|
|
@ -372,7 +372,8 @@
|
|||
(make-link-element "schemesyntaxlink" (list s) stag)]
|
||||
[vd
|
||||
(make-link-element "schemevaluelink" (list s) vtag)]
|
||||
[else s])))))
|
||||
[else s]))))
|
||||
(lambda () s))
|
||||
(literalize-spaces s))
|
||||
(cond
|
||||
[(positive? quote-depth) value-color]
|
||||
|
|
|
@ -95,28 +95,33 @@
|
|||
delayed-element-ref
|
||||
delayed-element-set!)
|
||||
(make-struct-type 'delayed-element #f
|
||||
1 1 #f
|
||||
2 1 #f
|
||||
(list (cons prop:serializable
|
||||
(make-serialize-info
|
||||
(lambda (d)
|
||||
(unless (delayed-element-ref d 1)
|
||||
(unless (delayed-element-ref d 2)
|
||||
(error 'serialize-delayed-element
|
||||
"cannot serialize a delayed element that was not resolved: ~e"
|
||||
d))
|
||||
(vector (delayed-element-ref d 1)))
|
||||
(vector (delayed-element-ref d 2)))
|
||||
#'deserialize-delayed-element
|
||||
#f
|
||||
(or (current-load-relative-directory) (current-directory)))))))
|
||||
(define-syntax delayed-element (list-immutable #'struct:delayed-element
|
||||
#'make-delayed-element
|
||||
#'delayed-element?
|
||||
(list-immutable #'delayed-element-render)
|
||||
(list-immutable #'set-delayed-element-render!)
|
||||
(list-immutable #'delayed-element-sizer
|
||||
#'delayed-element-render)
|
||||
(list-immutable #'set-delayed-element-sizer!
|
||||
#'set-delayed-element-render!)
|
||||
#t))
|
||||
(define delayed-element-render (make-struct-field-accessor delayed-element-ref 0))
|
||||
(define delayed-element-sizer (make-struct-field-accessor delayed-element-ref 1))
|
||||
(define set-delayed-element-render! (make-struct-field-mutator delayed-element-set! 0))
|
||||
(define set-delayed-element-sizer! (make-struct-field-mutator delayed-element-set! 1))
|
||||
(provide/contract
|
||||
(struct delayed-element ([render (any/c part? any/c . -> . list?)])))
|
||||
(struct delayed-element ([render (any/c part? any/c . -> . list?)]
|
||||
[sizer (-> any)])))
|
||||
|
||||
(provide deserialize-delayed-element)
|
||||
(define deserialize-delayed-element
|
||||
|
@ -124,9 +129,9 @@
|
|||
|
||||
(provide force-delayed-element)
|
||||
(define (force-delayed-element d renderer sec ht)
|
||||
(or (delayed-element-ref d 1)
|
||||
(or (delayed-element-ref d 2)
|
||||
(let ([v ((delayed-element-ref d 0) renderer sec ht)])
|
||||
(delayed-element-set! d 1 v)
|
||||
(delayed-element-set! d 2 v)
|
||||
v)))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
@ -163,5 +168,52 @@
|
|||
renderer sec ht)]
|
||||
[else (element->string c)])]))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
(provide flow-element-width
|
||||
element-width)
|
||||
|
||||
(define (element-width s)
|
||||
(cond
|
||||
[(string? s) (string-length s)]
|
||||
[(element? s) (apply + (map element-width (element-content s)))]
|
||||
[(delayed-element? s) (element-width ((delayed-element-sizer s)))]
|
||||
[else 1]))
|
||||
|
||||
(define (paragraph-width s)
|
||||
(apply + (map element-width (paragraph-content s))))
|
||||
|
||||
(define (flow-width f)
|
||||
(apply max 0 (map flow-element-width (flow-paragraphs f))))
|
||||
|
||||
(define (flow-element-width p)
|
||||
(cond
|
||||
[(paragraph? p) (paragraph-width p)]
|
||||
[(table? p) (table-width p)]
|
||||
[(itemization? p) (itemization-width p)]
|
||||
[(blockquote? p) (blockquote-width p)]
|
||||
[(delayed-flow-element? p) 1]))
|
||||
|
||||
(define (table-width p)
|
||||
(let ([flowss (table-flowss p)])
|
||||
(if (null? flowss)
|
||||
0
|
||||
(let loop ([flowss flowss])
|
||||
(if (null? (car flowss))
|
||||
0
|
||||
(+ (apply max
|
||||
0
|
||||
(map flow-width
|
||||
(map car flowss)))
|
||||
(loop (map cdr flowss))))))))
|
||||
|
||||
(define (itemization-width p)
|
||||
(apply max 0 (map flow-width (itemization-flows p))))
|
||||
|
||||
(define (blockquote-width p)
|
||||
(+ 4 (apply max 0 (map paragraph-width (blockquote-paragraphs p)))))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
)
|
||||
|
||||
|
|
|
@ -28,25 +28,7 @@ language.
|
|||
@subsection[#:tag "mz:char-input"]{From Bytes to Characters}
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@section["Regular Expressions"]
|
||||
|
||||
@require["rx.ss"]
|
||||
|
||||
Common grammar:
|
||||
|
||||
@common-table
|
||||
|
||||
Rx table:
|
||||
|
||||
@rx-table
|
||||
|
||||
Px table:
|
||||
|
||||
@px-table
|
||||
|
||||
Types:
|
||||
|
||||
@type-table
|
||||
@include-section["regexps.scrbl"]
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
|
||||
|
|
454
collects/scribblings/reference/regexps.scrbl
Normal file
454
collects/scribblings/reference/regexps.scrbl
Normal file
|
@ -0,0 +1,454 @@
|
|||
#reader(lib "docreader.ss" "scribble")
|
||||
@require[(lib "bnf.ss" "scribble")]
|
||||
@require["mz.ss"]
|
||||
|
||||
@require["rx.ss"]
|
||||
|
||||
@title[#:tag "mz:regexp"]{Regular Expressions}
|
||||
|
||||
@;{
|
||||
\index{regular expressions}
|
||||
\index{regexps|see{regular expressions}}
|
||||
\index{pattern matching}
|
||||
\index{strings!pattern matching}
|
||||
\index{input ports!pattern matching}
|
||||
}
|
||||
|
||||
Regular expressions are specified as strings or byte strings, using
|
||||
the same pattern language as the Unix utility @exec{egrep} or Perl. A
|
||||
string-specified pattern produces a character regexp matcher, and a
|
||||
byte-string pattern produces a byte regexp matcher. If a character
|
||||
regexp is used with a byte string or input port, it matches UTF-8
|
||||
encodings (see @secref["mz:encodings"]) of matching character streams;
|
||||
if a byte regexp is used with a character string, it matches bytes in
|
||||
the UTF-8 encoding of the string.
|
||||
|
||||
Regular expressions can be compiled into a @defterm{regexp value} for
|
||||
repeated matches. The @scheme[regexp] and @scheme[byte-regexp]
|
||||
procedures convert a string or byte string (respectively) into a
|
||||
regexp value using one syntax of regular expressions that is most
|
||||
compatible to @exec{egrep}. The @scheme[pregexp] and
|
||||
@scheme[byte-pregexp] procedures produce a regexp value using a
|
||||
slightly different syntax of regular expressions that is more
|
||||
compatible with Perl. In addition, Scheme constants written with
|
||||
@litchar{#rx} or @litchar{#px} (see @secref["mz:reader"]) produce
|
||||
compiled regexp values.
|
||||
|
||||
The internal size of a regexp value is limited to 32 kilobytes; this
|
||||
limit roughly corresponds to a source string with 32,000 literal
|
||||
characters or 5,000 operators.
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@section[#:tag "mz:regexp-syntax"]{Regexp Syntax}
|
||||
|
||||
The following syntax specifications describe the content of a string
|
||||
that represents a regular expression. The syntax of the corresponding
|
||||
string may involve extra escape characters. For example, the regular
|
||||
expression @litchar["(.*)\\1"] can be represented with the string
|
||||
@scheme["(.*)\\1"] or the regexp constant @scheme[#rx"(.*)\\1"]; the
|
||||
@litchar["\\"] in the regular expression must be escaped to include it
|
||||
in a string or regexp constant.
|
||||
|
||||
The @scheme[regexp] and @scheme[pregexp] syntaxes share a common core:
|
||||
|
||||
@common-table
|
||||
|
||||
The following completes the grammar for @scheme[regexp], which treats
|
||||
@litchar["{"] and @litchar["}"] as literals, @litchar["\\"] as a
|
||||
literal within ranges, and @litchar["\\"] as a literal producer
|
||||
outside of ranges.
|
||||
|
||||
@rx-table
|
||||
|
||||
The following completes the grammar for @scheme[pregexp], which uses
|
||||
@litchar["{"] and @litchar["}"] bounded repetition and uses
|
||||
@litchar["\\"] for meta-characters both inside and outside of ranges.
|
||||
|
||||
@px-table
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@section{Additional Syntactic Constraints}
|
||||
|
||||
In addition to matching a grammars, regular expressions must meet two
|
||||
syntactic restrictions:
|
||||
|
||||
@itemize{
|
||||
|
||||
@item{In a @nonterm{repeat} other than @nonterm{atom}@litchar{?},
|
||||
then @nonterm{atom} must not match an empty sequence.}
|
||||
|
||||
@item{In a @litchar{(?<=}@nonterm{regexp}@litchar{)} or
|
||||
@litchar{(?<!}@nonterm{regexp}@litchar{)},
|
||||
the @nonterm{regexp} must match a bounded sequence, only.}
|
||||
|
||||
}
|
||||
|
||||
These contraints are checked syntactically by the following type
|
||||
system. A type [@math{n}, @math{m}] corresponds to an expression that
|
||||
matches between @math{n} and @math{m} characters. In the rule for
|
||||
@litchar{(}@nonterm{Regexp}@litchar{)}, @math{N} means the number such
|
||||
that the opening parenthesis is the @math{N}th opening parenthesis for
|
||||
collecting match reports. Non-emptiness is inferred for a
|
||||
backreference pattern, @litchar["\\"]@nonterm{N}, so that a
|
||||
backreference can be used for repetition patterns; in the case of
|
||||
mutual dependencies among backreferences, the inference chooses the
|
||||
fixpoint that maximizes non-emptiness. Finiteness is not inferred for
|
||||
backreferences (i.e., a backreference is assumed to match an
|
||||
arbitrarily large sequence).
|
||||
|
||||
@type-table
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@section{Regexp Constructors}
|
||||
|
||||
@defproc[(regexp [str string?]) regexp?]{
|
||||
|
||||
Takes a string representation of a regular expression (using the
|
||||
syntax in @secref["mz:regexp-syntax"]) and compiles it into a regexp
|
||||
value. Other regular expression procedures accept either a string or a
|
||||
regexp value as the matching pattern. If a regular expression string
|
||||
is used multiple times, it is faster to compile the string once to a
|
||||
regexp value and use it for repeated matches instead of using the
|
||||
string each time.
|
||||
|
||||
The @scheme[object-name] procedure (see @secref["mz:infernames"]) returns
|
||||
the source string for a regexp value.
|
||||
|
||||
@examples[
|
||||
(regexp "ap*le")
|
||||
(object-name #rx"ap*le")
|
||||
]}
|
||||
|
||||
@defproc[(pregexp [string string?]) pregexp?]{
|
||||
|
||||
Like @scheme[regexp], except that it uses a slightly different syntax
|
||||
(see @secref["mz:regexp-syntax"]). The result can be used with
|
||||
@scheme[regexp-match], etc., just like the result from
|
||||
@scheme[regexp].
|
||||
|
||||
@examples[
|
||||
(pregexp "ap*le")
|
||||
(regexp? #px"ap*le")
|
||||
]}
|
||||
|
||||
@defproc[(regexp? [v any/c]) boolean?]{
|
||||
|
||||
Returns @scheme[#t] if @scheme[v] is a regexp value created by
|
||||
@scheme[regexp] or @scheme[pregexp], @scheme[#f] otherwise.}
|
||||
|
||||
@defproc[(pregexp? [v any/c]) boolean?]{
|
||||
|
||||
Returns @scheme[#t] if @scheme[v] is a regexp value created by
|
||||
@scheme[pregexp] (not @scheme[regexp]), @scheme[#f] otherwise.}
|
||||
|
||||
@defproc[(byte-regexp [bstr bytes?]) byte-regexp?]{
|
||||
|
||||
Takes a byte-string representation of a regular expression (using the
|
||||
syntax in @secref["mz:regexp-syntax"]) and compiles it into a
|
||||
byte-regexp value.
|
||||
|
||||
The @scheme[object-name] procedure (see @secref["mz:infernames"])
|
||||
returns the source byte string for a regexp value.
|
||||
|
||||
@examples[
|
||||
(byte-regexp #"ap*le")
|
||||
(byte-regexp "ap*le")
|
||||
]}
|
||||
|
||||
@defproc[(byte-pregexp [bstr bytes?]) byte-pregexp?]{
|
||||
|
||||
Like @scheme[byte-regexp], except that it uses a slightly different
|
||||
syntax (see @secref["mz:regexp-syntax"]). The result can be used with
|
||||
@scheme[regexp-match], etc., just like the result from
|
||||
@scheme[byte-regexp].
|
||||
|
||||
@examples[
|
||||
(byte-pregexp #"ap*le")
|
||||
]}
|
||||
|
||||
@defproc[(byte-regexp? [v any/c]) boolean?]{
|
||||
|
||||
Returns @scheme[#t] if @scheme[v] is a regexp value created by
|
||||
@scheme[byte-regexp] or @scheme[byte-pregexp], @scheme[#f] otherwise.}
|
||||
|
||||
@defproc[(byte-pregexp? [v any/c]) boolean?]{
|
||||
|
||||
Returns @scheme[#t] if @scheme[v] is a regexp value created by
|
||||
@scheme[byte-pregexp] (not @scheme[byte-regexp]), @scheme[#f]
|
||||
otherwise.}
|
||||
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@section{Regexp Matching}
|
||||
|
||||
@defproc[(regexp-match [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input (or/c string? bytes? input-port?)]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[output-port (or/c output-port? false/c) #f])
|
||||
(or/c (listof (or/c (cons (or/c string? bytes?)
|
||||
(or/c string? bytes?))
|
||||
false/c))
|
||||
false/c)]{
|
||||
|
||||
Attempts to match @scheme[pattern] (a string, byte string, regexp
|
||||
value, or byte-regexp value) once to a portion of @scheme[input]. The
|
||||
matcher finds a portion of @scheme[input] that matches and is closest
|
||||
to the start of the input (after @scheme[start-pos]).
|
||||
|
||||
The optional @scheme[start-pos] and @scheme[end-pos] arguments select
|
||||
a portion of @scheme[input] for matching; the default is the entire
|
||||
string or the stream up to an end-of-file. When @scheme[input] is a
|
||||
string, @scheme[start-pos] is a character position; when
|
||||
@scheme[input] is a byte string, then @scheme[start-pos] is a byte
|
||||
position; and when @scheme[input] is an input port, @scheme[start-pos]
|
||||
is the number of bytes to skip before starting to match. The
|
||||
@scheme[end-pos] argument can be @scheme[#f], which corresponds to the
|
||||
end of the string or the end-of-file in the stream; otherwise, it is a
|
||||
character or byte position, like @scheme[start-pos]. If @scheme[input]
|
||||
is an input port, and if the end-of-file is reached before
|
||||
@scheme[start-pos] bytes are skipped, then the match fails.
|
||||
|
||||
In @scheme[pattern], a start-of-string @litchar{^} refers to the first
|
||||
position of @scheme[input] after @scheme[start-pos], and the
|
||||
end-of-input @litchar{$} refers to the @scheme[end-pos]th position or
|
||||
(in the case of an input port) the end of file, whichever comes first.
|
||||
|
||||
If the match fails, @scheme[#f] is returned. If the match succeeds, a
|
||||
list containing strings or byte string, and possibly @scheme[#f], is
|
||||
returned. The list contains strings only if @scheme[input] is a string
|
||||
and @scheme[pattern] is not a byte regexp value. Otherwise, the list
|
||||
contains byte strings (substrings of the UTF-8 encoding of
|
||||
@scheme[input], if @scheme[input] is a string).
|
||||
|
||||
The first [byte] string in a result list is the portion of
|
||||
@scheme[input] that matched @scheme[pattern]. If two portions of
|
||||
@scheme[input] can match @scheme[pattern], then the match that starts
|
||||
earliest is found.
|
||||
|
||||
Additional [byte] strings are returned in the list if @scheme[pattern]
|
||||
contains parenthesized sub-expressions (but not when the open
|
||||
parenthesis is followed by @litchar{?:}). Matches for the
|
||||
sub-expressions are provided in the order of the opening parentheses
|
||||
in @scheme[pattern]. When sub-expressions occur in branches of an
|
||||
@litchar["|"] ``or'' pattern, in a @litchar{*} ``zero or more''
|
||||
pattern, or other places where the overall pattern can succeed without
|
||||
a match for the sub-expression, then a @scheme[#f] is returned for the
|
||||
sub-expression if it did not contribute to the final match. When a
|
||||
single sub-expression occurs within a @litchar{*} ``zero or more''
|
||||
pattern or other multiple-match positions, then the rightmost match
|
||||
associated with the sub-expression is returned in the list.
|
||||
|
||||
If the optional @scheme[output-port] is provided as an output port,
|
||||
the part of @scheme[input] from its beginning (not @scheme[start-pos])
|
||||
that precedes the match is written to the port. All of @scheme[input]
|
||||
up to @scheme[end-pos] is written to the port if no match is
|
||||
found. This functionality is most useful when @scheme[input] is an
|
||||
input port.
|
||||
|
||||
When matching an input port, a match failure reads up to
|
||||
@scheme[end-pos] bytes (or end-of-file), even if @scheme[pattern]
|
||||
begins with a start-of-string @litchar{^}; see also
|
||||
@scheme[regexp-match/fail-without-reading]. On success, all bytes up
|
||||
to and including the match are eventually read from the port, but
|
||||
matching proceeds by first peeking bytes from the port (using
|
||||
@scheme[peek-bytes-avail!]), and then (re-)reading matching bytes to
|
||||
discard them after the match result is determined. Non-matching bytes
|
||||
may be read and discarded before the match is determined. The matcher
|
||||
peeks in blocking mode only as far as necessary to determine a match,
|
||||
but it may peek extra bytes to fill an internal buffer if immediately
|
||||
available (i.e., without blocking). Greedy repeat operators in
|
||||
@scheme[pattern], such as @litchar{*} or @litchar{+}, tend to force
|
||||
reading the entire content of the port (up to @scheme[end-pos]) to
|
||||
determine a match.
|
||||
|
||||
If the input port is read simultaneously by another thread, or if the
|
||||
port is a custom port with inconsistent reading and peeking procedures
|
||||
(see @secref["mz:customport"]), then the bytes that are peeked and
|
||||
used for matching may be different than the bytes read and discarded
|
||||
after the match completes; the matcher inspects only the peeked
|
||||
bytes. To avoid such interleaving, use @scheme[regexp-match-peek]
|
||||
(with a @scheme[progress-evt] argument) followed by
|
||||
@scheme[port-commit-peeked].}
|
||||
|
||||
@defproc[(regexp-match-positions [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input (or/c string? bytes? input-port?)]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[output-port (or/c output-port? false/c) #f])
|
||||
(or/c (listof (or/c (cons nonnegative-exact-integer?
|
||||
nonnegative-exact-integer?)
|
||||
false/c))
|
||||
false/c)]{
|
||||
|
||||
Like @scheme[regexp-match], but returns a list of number pairs (and
|
||||
@scheme[#f]) instead of a list of strings. Each pair of numbers refers
|
||||
to a range of characters or bytes in @scheme[input]. If the result for
|
||||
the same arguments with @scheme[regexp-match] would be a list of byte
|
||||
strings, the resulting ranges correspond to byte ranges; in that case,
|
||||
if @scheme[input] is a character string, the byte ranges correspond to
|
||||
bytes in the UTF-8 encoding of the string.
|
||||
|
||||
Range results are returned in a @scheme[substring]- and
|
||||
@scheme[subbytes]-compatible manner, independent of
|
||||
@scheme[start-pos]. In the case of an input port, the returned
|
||||
positions indicate the number of bytes that were read, including
|
||||
@scheme[start-pos], before the first matching byte.}
|
||||
|
||||
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input (or/c string? bytes? input-port?)]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[output-port (or/c output-port? false/c) #f])
|
||||
boolean?] {
|
||||
|
||||
Like @scheme[regexp-match], but returns merely @scheme[#t] when the
|
||||
match succeeds, @scheme[#f] otherwise.}
|
||||
|
||||
@defproc[(regexp-match-peek [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input input-port?]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[progress (or/c evt false/c) #f])
|
||||
(or/c (listof (or/c (cons bytes? bytes?)
|
||||
false/c))
|
||||
false/c)]{
|
||||
|
||||
Like @scheme[regexp-match] on input ports, but only peeks bytes from
|
||||
@scheme[input-port] instead of reading them. Furthermore, instead of
|
||||
an output port, the last optional argument is a progress event for
|
||||
@scheme[input-port] (see @secref["mz:read"]). If @scheme[progress]
|
||||
becomes ready, then the match stops peeking from @scheme[input-port]
|
||||
and returns @scheme[#f]. The @scheme[progress] argument can be
|
||||
@scheme[#f], in which case the peek may continue with inconsistent
|
||||
information if another process meanwhile reads from
|
||||
@scheme[input-port].}
|
||||
|
||||
@defproc[(regexp-match-peek-positions [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input input-port?]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[progress (or/c evt false/c) #f])
|
||||
(or/c (listof (or/c (cons nonnegative-exact-integer?
|
||||
nonnegative-exact-integer?)
|
||||
false/c))
|
||||
false/c)]{
|
||||
|
||||
Like @scheme[regexp-match-positions] on input ports, but only peeks
|
||||
bytes from @scheme[input-port] instead of reading them, and with a
|
||||
@scheme[progress] argument like @scheme[regexp-match-peek].}
|
||||
|
||||
@defproc[(regexp-match-peek-immediate [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input input-port?]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[progress (or/c evt false/c) #f])
|
||||
(or/c (listof (or/c (cons bytes? bytes?)
|
||||
false/c))
|
||||
false/c)]{
|
||||
|
||||
Like @scheme[regexp-match-peek], but it attempts to match only bytes
|
||||
that are available from @scheme[input-port] without blocking. The
|
||||
match fails if not-yet-available characters might be used to match
|
||||
@scheme[pattern].}
|
||||
|
||||
@defproc[(regexp-match-peek-positions-immediate [pattern (or/c string? bytes? regexp? bytes-regexp?)]
|
||||
[input input-port?]
|
||||
[start-pos nonnegative-exact-integer? 0]
|
||||
[end-pos (or/c nonnegative-exact-integer? false/c) #f]
|
||||
[progress (or/c evt false/c) #f])
|
||||
(or/c (listof (or/c (cons nonnegative-exact-integer?
|
||||
nonnegative-exact-integer?)
|
||||
false/c))
|
||||
false/c)]{
|
||||
|
||||
Like @scheme[regexp-match-peek-positions], but it attempts to match
|
||||
only bytes that are available from @scheme[input-port] without
|
||||
blocking. The match fails if not-yet-available characters might be
|
||||
used to match @scheme[pattern].}
|
||||
|
||||
@;{
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@section{Regexp Substitution}
|
||||
|
||||
@defproc[(regexp-replace [char-pattern any/c][string any/c][insert any/c]) any]{
|
||||
|
||||
Performs a match using @scheme[pattern] on @scheme[input] and
|
||||
then returns a string in which the matching portion of @scheme[input]
|
||||
is replaced with @scheme[insert-string]. If @scheme[char-pattern]
|
||||
matches no part of @scheme[string], then @scheme[string] is returned
|
||||
unmodified.
|
||||
|
||||
The @scheme[char-pattern] must be a string or a character regexp value
|
||||
(not a byte string or a byte regexp value).
|
||||
|
||||
If @scheme[insert-string] contains ``\&'', then ``\&'' is replaced with
|
||||
the matching portion of @scheme[string] before it is substituted into
|
||||
@scheme[string]. If @scheme[insert-string] contains
|
||||
``{\Backslash}@scheme[n]'' (for some integer @scheme[n]), then it is
|
||||
replaced with the @scheme[n]th matching sub-expression from
|
||||
@scheme[string].\footnote{The backslash is a character in the string, so
|
||||
an extra backslash is required to specify the string as a Scheme
|
||||
constant. For example, the Scheme constant
|
||||
@scheme["\\1"] is ``{\Backslash}1''.} ``\&''
|
||||
and ``{\Backslash}0'' are synonymous. If the @scheme[n]th sub-expression
|
||||
was not used in the match or if @scheme[n] is greater than the number of
|
||||
sub-expressions in @scheme[pattern], then ``{\Backslash}@scheme[n]'' is
|
||||
replaced with the empty string.
|
||||
|
||||
A literal ``\&'' or ``{\Backslash}'' is specified as
|
||||
``{\Backslash}\&'' or ``{\Backslash}{\Backslash}'', respectively. If
|
||||
@scheme[insert-string] contains ``{\Backslash}\$'', then
|
||||
``{\Backslash}\$'' is replaced with the empty string. (This can be
|
||||
used to terminate a number @scheme[n] following a backslash.) If a
|
||||
``{\Backslash}'' is followed by anything other than a digit, ``\&'',
|
||||
``{\Backslash}'', or ``\$'', then it is treated as ``{\Backslash}0''.}
|
||||
|
||||
@item{@defproc[(regexp-replace [byte-pattern any/c][string-or-bytes any/c][insert-string-or-bytes any/c]) any]
|
||||
%
|
||||
is analogous to @scheme[regexp-replace] on strings, where
|
||||
@scheme[byte-pattern] is a byte string or a byte regexp value. The result
|
||||
is always a byte string.}
|
||||
|
||||
@item{@defproc[(regexp-replace [char-pattern any/c][string any/c][proc any/c]) any]
|
||||
%
|
||||
is like @scheme[regexp-replace], but instead of an
|
||||
@scheme[insert-string] third argument, the third argument is a
|
||||
procedure that accepts match strings and produces a string to
|
||||
replace the match. The @scheme[proc] must accept the same number of
|
||||
arguments as @scheme[regexp-match] produces list elements for a
|
||||
successful match with @scheme[char-pattern].}
|
||||
|
||||
@item{@defproc[(regexp-replace [byte-pattern any/c][string-or-bytes any/c][proc any/c]) any]
|
||||
%
|
||||
is analogous to @scheme[regexp-replace] on strings and a procedure
|
||||
argument, but the procedure accepts byte strings to produce a byte
|
||||
string, instead of character strings.}
|
||||
|
||||
@item{@defproc[(regexp-replace* [pattern any/c][string any/c][insert-string any/c]) any]
|
||||
%
|
||||
is the same as @scheme[regexp-replace], except that every instance of
|
||||
@scheme[pattern] in @scheme[string] is replaced with
|
||||
@scheme[insert-string]. Only non-overlapping instances of @scheme[pattern]
|
||||
in the original @scheme[string] are replaced, so instances of
|
||||
@scheme[pattern] within inserted strings are \Em{not} replaced
|
||||
recursively. If, in the process of repeating matches, @scheme[pattern]
|
||||
matches an empty string, the @exnraise[exn:fail].}
|
||||
|
||||
@item{@defproc[(regexp-replace* [byte-pattern any/c][bytes any/c][insert-bytes any/c]) any]
|
||||
%
|
||||
is analogous to @scheme[regexp-replace*] on strings.}
|
||||
|
||||
@item{@defproc[(regexp-replace* [char-pattern any/c][string any/c][proc any/c]) any]
|
||||
%
|
||||
is like @scheme[regexp-replace] with a procedure argument, but with
|
||||
multiple instances replaced. The given @scheme[proc] is called once
|
||||
for each match.}
|
||||
|
||||
@item{@defproc[(regexp-replace* [byte-pattern any/c][bytes any/c][proc any/c]) any]
|
||||
%
|
||||
is like @scheme[regexp-replace*] with a string and procedure
|
||||
argument, but the procedure accepts and produces byte strings.}
|
||||
|
||||
}
|
|
@ -76,12 +76,11 @@ depends on the current inspector.)
|
|||
[immutables (listof non-negative-exact-integer?)
|
||||
null]
|
||||
[guard (or/c procedure? false/c) #f])
|
||||
(values
|
||||
struct-type?
|
||||
struct-constructor-procedure?
|
||||
struct-predicate-procedure?
|
||||
struct-accessor-procedure?
|
||||
struct-mutator-procedure?)]{
|
||||
(values struct-type?
|
||||
struct-constructor-procedure?
|
||||
struct-predicate-procedure?
|
||||
struct-accessor-procedure?
|
||||
struct-mutator-procedure?)]{
|
||||
|
||||
Creates a new structure type. The @scheme[name] argument is used as
|
||||
the type name. If @scheme[super-type] is not @scheme[#f], the new type
|
||||
|
@ -234,10 +233,9 @@ A @index['("structure type properties")]{@defterm{structure type
|
|||
|
||||
@defproc[(make-struct-type-property [name symbol?]
|
||||
[guard (or/c procedure? false/c) #f])
|
||||
(values
|
||||
struct-type-property?
|
||||
procedure?
|
||||
procedure?)]{
|
||||
(values struct-type-property?
|
||||
procedure?
|
||||
procedure?)]{
|
||||
|
||||
Creates a new structure type property and returns three values:
|
||||
|
||||
|
@ -355,15 +353,14 @@ Returns two values:
|
|||
}}
|
||||
|
||||
@defproc[(struct-type-info [struct-type struct-type?])
|
||||
(values
|
||||
symbol?
|
||||
nonnegative-exact-integer?
|
||||
nonnegative-exact-integer?
|
||||
struct-accessor-procedure?
|
||||
struct-mutator-procedure?
|
||||
(listof nonnegative-exact-integer?)
|
||||
(or/c struct-type? false/c)
|
||||
boolean?)]{
|
||||
(values symbol?
|
||||
nonnegative-exact-integer?
|
||||
nonnegative-exact-integer?
|
||||
struct-accessor-procedure?
|
||||
struct-mutator-procedure?
|
||||
(listof nonnegative-exact-integer?)
|
||||
(or/c struct-type? false/c)
|
||||
boolean?)]{
|
||||
|
||||
Returns eight values that provide information about the structure type
|
||||
descriptor @scheme[struct-type], assuming that the type is controlled
|
||||
|
|
Loading…
Reference in New Issue
Block a user