start on regexp functions; further formatting improvements for defproc

svn: r6745
2007-06-27 00:17:04 +00:00 · 2007-06-27 00:17:04 +00:00 · bf717526b0
commit bf717526b0
parent 4e72b10ce4
6 changed files with 629 additions and 125 deletions
--- a/collects/scribble/manual.ss
+++ b/collects/scribble/manual.ss
@ -393,20 +393,21 @@
                        [(eq? v '...)
                         dots0]
                        [else v]))]
-          [prototype-size (lambda (s)
-                            (let loop ([s s])
+          [prototype-size (lambda (s first-combine next-combine)
+                            (let loop ([s s][combine first-combine])
                              (if (null? s)
-                                  1
-                                  (+ 1 (loop (cdr s))
-                                     (cond
-                                      [(symbol? (car s)) (string-length (symbol->string (car s)))]
-                                      [(pair? (car s)) 
-                                       (if (keyword? (caar s))
-                                           (+ (string-length (keyword->string (caar s)))
-                                              3
-                                              (string-length (symbol->string (cadar s))))
-                                           (string-length (symbol->string (caar s))))]
-                                      [else 0])))))])
+                                  0
+                                  (combine
+                                   (loop (cdr s) next-combine)
+                                   (cond
+                                    [(symbol? (car s)) (string-length (symbol->string (car s)))]
+                                    [(pair? (car s)) 
+                                     (if (keyword? (caar s))
+                                         (+ (string-length (keyword->string (caar s)))
+                                            3
+                                            (string-length (symbol->string (cadar s))))
+                                         (string-length (symbol->string (caar s))))]
+                                    [else 0])))))])
      (parameterize ([current-variable-list
                      (map (lambda (i)
                             (and (pair? i)
@ -422,34 +423,41 @@
            append
            (map 
             (lambda (stx-id prototype arg-contracts result-contract first?)
-               (append
-                (list
-                 (list (make-flow
-                        (let-values ([(required optional more-required)
-                                         (let loop ([a (cdr prototype)][r-accum null])
-                                           (if (or (null? a)
-                                                   (and (has-optional? (car a))))
-                                               (let ([req (reverse r-accum)])
-                                                 (let loop ([a a][o-accum null])
-                                                   (if (or (null? a)
-                                                           (not (has-optional? (car a))))
-                                                       (values req (reverse o-accum) a)
-                                                       (loop (cdr a) (cons (car a) o-accum)))))
-                                               (loop (cdr a) (cons (car a) r-accum))))]
-                                        [(tagged) (if first?
-                                                      (make-target-element
-                                                       #f
-                                                       (list (to-element (make-just-context (car prototype)
-                                                                                            stx-id)))
-                                                       (register-scheme-definition stx-id))
-                                                      (to-element (make-just-context (car prototype)
-                                                                                     stx-id)))]
-                                        [(short?) (or ((prototype-size prototype) . < . 40)
-                                                      ((length prototype) . < . 3))]
-                                        [(end) (list (to-flow spacer)
-                                                     (to-flow 'rarr)
-                                                     (to-flow spacer)
-                                                     (make-flow (list (result-contract))))])
+               (let*-values ([(required optional more-required)
+                              (let loop ([a (cdr prototype)][r-accum null])
+                                (if (or (null? a)
+                                        (and (has-optional? (car a))))
+                                    (let ([req (reverse r-accum)])
+                                      (let loop ([a a][o-accum null])
+                                        (if (or (null? a)
+                                                (not (has-optional? (car a))))
+                                            (values req (reverse o-accum) a)
+                                            (loop (cdr a) (cons (car a) o-accum)))))
+                                    (loop (cdr a) (cons (car a) r-accum))))]
+                             [(tagged) (if first?
+                                           (make-target-element
+                                            #f
+                                            (list (to-element (make-just-context (car prototype)
+                                                                                 stx-id)))
+                                            (register-scheme-definition stx-id))
+                                           (to-element (make-just-context (car prototype)
+                                                                          stx-id)))]
+                             [(flat-size) (prototype-size prototype + +)]
+                             [(short?) (or (flat-size . < . 40)
+                                           ((length prototype) . < . 3))]
+                             [(res) (result-contract)]
+                             [(result-next-line?) ((+ (if short? 
+                                                          flat-size
+                                                          (prototype-size prototype + max))
+                                                      (flow-element-width res))
+                                                   . >= . 50)]
+                             [(end) (list (to-flow spacer)
+                                          (to-flow 'rarr)
+                                          (to-flow spacer)
+                                          (make-flow (list res)))])
+                 (append
+                  (list
+                   (list (make-flow
                          (if short?
                              (make-table-if-necessary
                               "prototype"
@ -468,12 +476,16 @@
                                                      'paren-shape
                                                      #\?))))
                                               (map arg->elem more-required))))
-                                 end)))
+                                 (if result-next-line?
+                                     null
+                                     end))))
                              (let ([not-end
-                                     (list (to-flow spacer)
-                                           (to-flow spacer)
-                                           (to-flow spacer)
-                                           (to-flow spacer))])
+                                     (if result-next-line?
+                                         (list (to-flow spacer))
+                                         (list (to-flow spacer)
+                                               (to-flow spacer)
+                                               (to-flow spacer)
+                                               (to-flow spacer)))])
                                (list
                                 (make-table
                                  "prototype"
@ -513,40 +525,46 @@
                                                                #f 
                                                                (list a "]" (schemeparenfont ")"))))]
                                                          [else a])))
-                                                      (if (null? (cdr args))
+                                                      (if (and (null? (cdr args))
+                                                               (not result-next-line?))
                                                          end
                                                          not-end))
-                                               (loop (cdr args) (sub1 req))))))))))))))
-                (apply append
-                       (map (lambda (v arg-contract)
-                              (cond
-                               [(pair? v)
-                                (list
-                                 (list
-                                  (make-flow
-                                   (make-table-if-necessary
-                                    "argcontract"
-                                    (list
-                                     (let ([v (if (keyword? (car v)) 
-                                                  (cdr v)
-                                                  v)])
-                                       (append
-                                        (list
-                                         (to-flow (hspace 2))
-                                         (to-flow (arg->elem v))
-                                         (to-flow spacer)
-                                         (to-flow ":")
-                                         (to-flow spacer)
-                                         (make-flow (list (arg-contract))))
-                                        (if (has-optional? v)
-                                            (list (to-flow spacer)
-                                                  (to-flow "=")
-                                                  (to-flow spacer)
-                                                  (to-flow (to-element (caddr v))))
-                                            null))))))))]
-                               [else null]))
-                            (cdr prototype)
-                            arg-contracts))))
+                                               (loop (cdr args) (sub1 req)))))))))))))
+                  (if result-next-line?
+                      (list (list (make-flow (make-table-if-necessary
+                                              "prototype" 
+                                              (list end)))))
+                      null)
+                  (apply append
+                         (map (lambda (v arg-contract)
+                                (cond
+                                 [(pair? v)
+                                  (list
+                                   (list
+                                    (make-flow
+                                     (make-table-if-necessary
+                                      "argcontract"
+                                      (list
+                                       (let ([v (if (keyword? (car v)) 
+                                                    (cdr v)
+                                                    v)])
+                                         (append
+                                          (list
+                                           (to-flow (hspace 2))
+                                           (to-flow (arg->elem v))
+                                           (to-flow spacer)
+                                           (to-flow ":")
+                                           (to-flow spacer)
+                                           (make-flow (list (arg-contract))))
+                                          (if (has-optional? v)
+                                              (list (to-flow spacer)
+                                                    (to-flow "=")
+                                                    (to-flow spacer)
+                                                    (to-flow (to-element (caddr v))))
+                                              null))))))))]
+                                 [else null]))
+                              (cdr prototype)
+                              arg-contracts)))))
             stx-ids
             prototypes
             arg-contractss
--- a/collects/scribble/scheme.ss
+++ b/collects/scribble/scheme.ss
@ -372,7 +372,8 @@
                                   (make-link-element "schemesyntaxlink" (list s) stag)]
                                  [vd
                                   (make-link-element "schemevaluelink" (list s) vtag)]
-                                  [else s])))))
+                                  [else s]))))
+                            (lambda () s))
                           (literalize-spaces s))
                       (cond
                        [(positive? quote-depth) value-color]
--- a/collects/scribble/struct.ss
+++ b/collects/scribble/struct.ss
@ -95,28 +95,33 @@
                  delayed-element-ref
                  delayed-element-set!)
    (make-struct-type 'delayed-element #f
-                      1 1 #f
+                      2 1 #f
                      (list (cons prop:serializable 
                                  (make-serialize-info
                                   (lambda (d)
-                                     (unless (delayed-element-ref d 1)
+                                     (unless (delayed-element-ref d 2)
                                       (error 'serialize-delayed-element
                                              "cannot serialize a delayed element that was not resolved: ~e"
                                              d))
-                                     (vector (delayed-element-ref d 1)))
+                                     (vector (delayed-element-ref d 2)))
                                   #'deserialize-delayed-element
                                   #f
                                   (or (current-load-relative-directory) (current-directory)))))))
  (define-syntax delayed-element (list-immutable #'struct:delayed-element
                                                 #'make-delayed-element
                                                 #'delayed-element?
-                                                 (list-immutable #'delayed-element-render)
-                                                 (list-immutable #'set-delayed-element-render!)
+                                                 (list-immutable #'delayed-element-sizer 
+                                                                 #'delayed-element-render)
+                                                 (list-immutable #'set-delayed-element-sizer!
+                                                                 #'set-delayed-element-render!)
                                                 #t))
  (define delayed-element-render (make-struct-field-accessor delayed-element-ref 0))
+  (define delayed-element-sizer (make-struct-field-accessor delayed-element-ref 1))
  (define set-delayed-element-render! (make-struct-field-mutator delayed-element-set! 0))
+  (define set-delayed-element-sizer! (make-struct-field-mutator delayed-element-set! 1))
  (provide/contract
-   (struct delayed-element ([render (any/c part? any/c . -> . list?)])))
+   (struct delayed-element ([render (any/c part? any/c . -> . list?)]
+                            [sizer (-> any)])))

  (provide deserialize-delayed-element)
  (define deserialize-delayed-element
@ -124,9 +129,9 @@
  
  (provide force-delayed-element)
  (define (force-delayed-element d renderer sec ht)
-    (or (delayed-element-ref d 1)
+    (or (delayed-element-ref d 2)
        (let ([v ((delayed-element-ref d 0) renderer sec ht)])
-          (delayed-element-set! d 1 v)
+          (delayed-element-set! d 2 v)
          v)))

  ;; ----------------------------------------
@ -163,5 +168,52 @@
                         renderer sec ht)]
       [else (element->string c)])]))

+  ;; ----------------------------------------
+  
+  (provide flow-element-width
+           element-width)
+
+  (define (element-width s)
+    (cond
+     [(string? s) (string-length s)]
+     [(element? s) (apply + (map element-width (element-content s)))]
+     [(delayed-element? s) (element-width ((delayed-element-sizer s)))]
+     [else 1]))
+
+  (define (paragraph-width s)
+    (apply + (map element-width (paragraph-content s))))
+
+  (define (flow-width f)
+    (apply max 0 (map flow-element-width (flow-paragraphs f))))
+
+  (define (flow-element-width p)
+    (cond
+     [(paragraph? p) (paragraph-width p)]
+     [(table? p) (table-width p)]
+     [(itemization? p) (itemization-width p)]
+     [(blockquote? p) (blockquote-width p)]
+     [(delayed-flow-element? p) 1]))
+
+  (define (table-width p)
+    (let ([flowss (table-flowss p)])
+      (if (null? flowss)
+          0
+          (let loop ([flowss flowss])
+            (if (null? (car flowss))
+                0
+                (+ (apply max 
+                          0
+                          (map flow-width
+                               (map car flowss)))
+                   (loop (map cdr flowss))))))))
+
+  (define (itemization-width p)
+    (apply max 0 (map flow-width (itemization-flows p))))
+
+  (define (blockquote-width p)
+    (+ 4 (apply max 0 (map paragraph-width (blockquote-paragraphs p)))))
+
+  ;; ----------------------------------------
+
  )

--- a/collects/scribblings/reference/reference.scrbl
+++ b/collects/scribblings/reference/reference.scrbl
@ -28,25 +28,7 @@ language.
@subsection[#:tag "mz:char-input"]{From Bytes to Characters}

@;------------------------------------------------------------------------
-@section["Regular Expressions"]
-
-@require["rx.ss"]
-
-Common grammar:
-
-@common-table
-
-Rx table:
-
-@rx-table
-
-Px table:
-
-@px-table
-
-Types:
-
-@type-table
+@include-section["regexps.scrbl"]

@;------------------------------------------------------------------------

--- a/collects/scribblings/reference/regexps.scrbl
+++ b/collects/scribblings/reference/regexps.scrbl
@ -0,0 +1,454 @@
+#reader(lib "docreader.ss" "scribble")
+@require[(lib "bnf.ss" "scribble")]
+@require["mz.ss"]
+
+@require["rx.ss"]
+
+@title[#:tag "mz:regexp"]{Regular Expressions}
+
+@;{
+\index{regular expressions}
+\index{regexps|see{regular expressions}}
+\index{pattern matching}
+\index{strings!pattern matching}
+\index{input ports!pattern matching}
+}
+
+Regular expressions are specified as strings or byte strings, using
+the same pattern language as the Unix utility @exec{egrep} or Perl. A
+string-specified pattern produces a character regexp matcher, and a
+byte-string pattern produces a byte regexp matcher. If a character
+regexp is used with a byte string or input port, it matches UTF-8
+encodings (see @secref["mz:encodings"]) of matching character streams;
+if a byte regexp is used with a character string, it matches bytes in
+the UTF-8 encoding of the string.
+
+Regular expressions can be compiled into a @defterm{regexp value} for
+repeated matches. The @scheme[regexp] and @scheme[byte-regexp]
+procedures convert a string or byte string (respectively) into a
+regexp value using one syntax of regular expressions that is most
+compatible to @exec{egrep}. The @scheme[pregexp] and
+@scheme[byte-pregexp] procedures produce a regexp value using a
+slightly different syntax of regular expressions that is more
+compatible with Perl.  In addition, Scheme constants written with
+@litchar{#rx} or @litchar{#px} (see @secref["mz:reader"]) produce
+compiled regexp values.
+
+The internal size of a regexp value is limited to 32 kilobytes; this
+limit roughly corresponds to a source string with 32,000 literal
+characters or 5,000 operators.
+
+@;------------------------------------------------------------------------
+@section[#:tag "mz:regexp-syntax"]{Regexp Syntax}
+
+The following syntax specifications describe the content of a string
+that represents a regular expression. The syntax of the corresponding
+string may involve extra escape characters. For example, the regular
+expression @litchar["(.*)\\1"] can be represented with the string
+@scheme["(.*)\\1"] or the regexp constant @scheme[#rx"(.*)\\1"]; the
+@litchar["\\"] in the regular expression must be escaped to include it
+in a string or regexp constant.
+
+The @scheme[regexp] and @scheme[pregexp] syntaxes share a common core:
+
+@common-table
+
+The following completes the grammar for @scheme[regexp], which treats
+@litchar["{"] and @litchar["}"] as literals, @litchar["\\"] as a
+literal within ranges, and @litchar["\\"] as a literal producer
+outside of ranges.
+
+@rx-table
+
+The following completes the grammar for @scheme[pregexp], which uses
+@litchar["{"] and @litchar["}"] bounded repetition and uses
+@litchar["\\"] for meta-characters both inside and outside of ranges.
+
+@px-table
+
+@;------------------------------------------------------------------------
+@section{Additional Syntactic Constraints}
+
+In addition to matching a grammars, regular expressions must meet two
+syntactic restrictions:
+
+@itemize{
+
+ @item{In a @nonterm{repeat} other than @nonterm{atom}@litchar{?},
+       then @nonterm{atom} must not match an empty sequence.}
+
+ @item{In a @litchar{(?<=}@nonterm{regexp}@litchar{)} or
+       @litchar{(?<!}@nonterm{regexp}@litchar{)},
+       the @nonterm{regexp} must match a bounded sequence, only.}
+
+}
+
+These contraints are checked syntactically by the following type
+system. A type [@math{n}, @math{m}] corresponds to an expression that
+matches between @math{n} and @math{m} characters. In the rule for
+@litchar{(}@nonterm{Regexp}@litchar{)}, @math{N} means the number such
+that the opening parenthesis is the @math{N}th opening parenthesis for
+collecting match reports.  Non-emptiness is inferred for a
+backreference pattern, @litchar["\\"]@nonterm{N}, so that a
+backreference can be used for repetition patterns; in the case of
+mutual dependencies among backreferences, the inference chooses the
+fixpoint that maximizes non-emptiness.  Finiteness is not inferred for
+backreferences (i.e., a backreference is assumed to match an
+arbitrarily large sequence).
+
+@type-table
+
+@;------------------------------------------------------------------------
+@section{Regexp Constructors}
+
+@defproc[(regexp [str string?]) regexp?]{
+
+Takes a string representation of a regular expression (using the
+syntax in @secref["mz:regexp-syntax"]) and compiles it into a regexp
+value. Other regular expression procedures accept either a string or a
+regexp value as the matching pattern. If a regular expression string
+is used multiple times, it is faster to compile the string once to a
+regexp value and use it for repeated matches instead of using the
+string each time.
+
+The @scheme[object-name] procedure (see @secref["mz:infernames"]) returns
+the source string for a regexp value.
+
+@examples[
+(regexp "ap*le")
+(object-name #rx"ap*le")
+]}
+
+@defproc[(pregexp [string string?]) pregexp?]{
+
+Like @scheme[regexp], except that it uses a slightly different syntax
+(see @secref["mz:regexp-syntax"]). The result can be used with
+@scheme[regexp-match], etc., just like the result from
+@scheme[regexp].
+
+@examples[
+(pregexp "ap*le")
+(regexp? #px"ap*le")
+]}
+
+@defproc[(regexp? [v any/c]) boolean?]{
+
+Returns @scheme[#t] if @scheme[v] is a regexp value created by
+@scheme[regexp] or @scheme[pregexp], @scheme[#f] otherwise.}
+
+@defproc[(pregexp? [v any/c]) boolean?]{
+
+Returns @scheme[#t] if @scheme[v] is a regexp value created by
+@scheme[pregexp] (not @scheme[regexp]), @scheme[#f] otherwise.}
+
+@defproc[(byte-regexp [bstr bytes?]) byte-regexp?]{
+
+Takes a byte-string representation of a regular expression (using the
+syntax in @secref["mz:regexp-syntax"]) and compiles it into a
+byte-regexp value.
+
+The @scheme[object-name] procedure (see @secref["mz:infernames"])
+returns the source byte string for a regexp value.
+
+@examples[
+(byte-regexp #"ap*le")
+(byte-regexp "ap*le")
+]}
+
+@defproc[(byte-pregexp [bstr bytes?]) byte-pregexp?]{
+
+Like @scheme[byte-regexp], except that it uses a slightly different
+syntax (see @secref["mz:regexp-syntax"]). The result can be used with
+@scheme[regexp-match], etc., just like the result from
+@scheme[byte-regexp].
+
+@examples[
+(byte-pregexp #"ap*le")
+]}
+
+@defproc[(byte-regexp? [v any/c]) boolean?]{
+
+Returns @scheme[#t] if @scheme[v] is a regexp value created by
+@scheme[byte-regexp] or @scheme[byte-pregexp], @scheme[#f] otherwise.}
+
+@defproc[(byte-pregexp? [v any/c]) boolean?]{
+
+Returns @scheme[#t] if @scheme[v] is a regexp value created by
+@scheme[byte-pregexp] (not @scheme[byte-regexp]), @scheme[#f]
+otherwise.}
+
+
+@;------------------------------------------------------------------------
+@section{Regexp Matching}
+
+@defproc[(regexp-match [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                       [input (or/c string? bytes? input-port?)]
+                       [start-pos nonnegative-exact-integer? 0]
+                       [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                       [output-port (or/c output-port? false/c) #f])
+         (or/c (listof (or/c (cons (or/c string? bytes?)
+                                   (or/c string? bytes?))
+                             false/c))
+               false/c)]{
+
+Attempts to match @scheme[pattern] (a string, byte string, regexp
+value, or byte-regexp value) once to a portion of @scheme[input].  The
+matcher finds a portion of @scheme[input] that matches and is closest
+to the start of the input (after @scheme[start-pos]).
+
+The optional @scheme[start-pos] and @scheme[end-pos] arguments select
+a portion of @scheme[input] for matching; the default is the entire
+string or the stream up to an end-of-file. When @scheme[input] is a
+string, @scheme[start-pos] is a character position; when
+@scheme[input] is a byte string, then @scheme[start-pos] is a byte
+position; and when @scheme[input] is an input port, @scheme[start-pos]
+is the number of bytes to skip before starting to match. The
+@scheme[end-pos] argument can be @scheme[#f], which corresponds to the
+end of the string or the end-of-file in the stream; otherwise, it is a
+character or byte position, like @scheme[start-pos]. If @scheme[input]
+is an input port, and if the end-of-file is reached before
+@scheme[start-pos] bytes are skipped, then the match fails.
+
+In @scheme[pattern], a start-of-string @litchar{^} refers to the first
+position of @scheme[input] after @scheme[start-pos], and the
+end-of-input @litchar{$} refers to the @scheme[end-pos]th position or
+(in the case of an input port) the end of file, whichever comes first.
+
+If the match fails, @scheme[#f] is returned. If the match succeeds, a
+list containing strings or byte string, and possibly @scheme[#f], is
+returned. The list contains strings only if @scheme[input] is a string
+and @scheme[pattern] is not a byte regexp value. Otherwise, the list
+contains byte strings (substrings of the UTF-8 encoding of
+@scheme[input], if @scheme[input] is a string).
+
+The first [byte] string in a result list is the portion of
+@scheme[input] that matched @scheme[pattern]. If two portions of
+@scheme[input] can match @scheme[pattern], then the match that starts
+earliest is found.
+ 
+Additional [byte] strings are returned in the list if @scheme[pattern]
+contains parenthesized sub-expressions (but not when the open
+parenthesis is followed by @litchar{?:}). Matches for the
+sub-expressions are provided in the order of the opening parentheses
+in @scheme[pattern]. When sub-expressions occur in branches of an
+@litchar["|"] ``or'' pattern, in a @litchar{*} ``zero or more''
+pattern, or other places where the overall pattern can succeed without
+a match for the sub-expression, then a @scheme[#f] is returned for the
+sub-expression if it did not contribute to the final match. When a
+single sub-expression occurs within a @litchar{*} ``zero or more''
+pattern or other multiple-match positions, then the rightmost match
+associated with the sub-expression is returned in the list.
+
+If the optional @scheme[output-port] is provided as an output port,
+the part of @scheme[input] from its beginning (not @scheme[start-pos])
+that precedes the match is written to the port. All of @scheme[input]
+up to @scheme[end-pos] is written to the port if no match is
+found. This functionality is most useful when @scheme[input] is an
+input port.
+
+When matching an input port, a match failure reads up to
+@scheme[end-pos] bytes (or end-of-file), even if @scheme[pattern]
+begins with a start-of-string @litchar{^}; see also
+@scheme[regexp-match/fail-without-reading]. On success, all bytes up
+to and including the match are eventually read from the port, but
+matching proceeds by first peeking bytes from the port (using
+@scheme[peek-bytes-avail!]), and then (re-)reading matching bytes to
+discard them after the match result is determined. Non-matching bytes
+may be read and discarded before the match is determined. The matcher
+peeks in blocking mode only as far as necessary to determine a match,
+but it may peek extra bytes to fill an internal buffer if immediately
+available (i.e., without blocking). Greedy repeat operators in
+@scheme[pattern], such as @litchar{*} or @litchar{+}, tend to force
+reading the entire content of the port (up to @scheme[end-pos]) to
+determine a match.
+
+If the input port is read simultaneously by another thread, or if the
+port is a custom port with inconsistent reading and peeking procedures
+(see @secref["mz:customport"]), then the bytes that are peeked and
+used for matching may be different than the bytes read and discarded
+after the match completes; the matcher inspects only the peeked
+bytes. To avoid such interleaving, use @scheme[regexp-match-peek]
+(with a @scheme[progress-evt] argument) followed by
+@scheme[port-commit-peeked].}
+
+@defproc[(regexp-match-positions [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                        [input (or/c string? bytes? input-port?)]
+                        [start-pos nonnegative-exact-integer? 0]
+                        [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                        [output-port (or/c output-port? false/c) #f])
+          (or/c (listof (or/c (cons nonnegative-exact-integer?
+                                    nonnegative-exact-integer?)
+                              false/c))
+                false/c)]{
+
+Like @scheme[regexp-match], but returns a list of number pairs (and
+@scheme[#f]) instead of a list of strings. Each pair of numbers refers
+to a range of characters or bytes in @scheme[input]. If the result for
+the same arguments with @scheme[regexp-match] would be a list of byte
+strings, the resulting ranges correspond to byte ranges; in that case,
+if @scheme[input] is a character string, the byte ranges correspond to
+bytes in the UTF-8 encoding of the string.
+
+Range results are returned in a @scheme[substring]- and
+@scheme[subbytes]-compatible manner, independent of
+@scheme[start-pos]. In the case of an input port, the returned
+positions indicate the number of bytes that were read, including
+@scheme[start-pos], before the first matching byte.}
+
+@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                        [input (or/c string? bytes? input-port?)]
+                        [start-pos nonnegative-exact-integer? 0]
+                        [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                        [output-port (or/c output-port? false/c) #f])
+           boolean?] {
+
+Like @scheme[regexp-match], but returns merely @scheme[#t] when the
+match succeeds, @scheme[#f] otherwise.}
+
+@defproc[(regexp-match-peek [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                            [input input-port?]
+                            [start-pos nonnegative-exact-integer? 0]
+                            [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                            [progress (or/c evt false/c) #f])
+          (or/c (listof (or/c (cons bytes? bytes?)
+                              false/c))
+                false/c)]{
+         
+Like @scheme[regexp-match] on input ports, but only peeks bytes from
+@scheme[input-port] instead of reading them. Furthermore, instead of
+an output port, the last optional argument is a progress event for
+@scheme[input-port] (see @secref["mz:read"]). If @scheme[progress]
+becomes ready, then the match stops peeking from @scheme[input-port]
+and returns @scheme[#f]. The @scheme[progress] argument can be
+@scheme[#f], in which case the peek may continue with inconsistent
+information if another process meanwhile reads from
+@scheme[input-port].}
+
+@defproc[(regexp-match-peek-positions [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                            [input input-port?]
+                            [start-pos nonnegative-exact-integer? 0]
+                            [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                            [progress (or/c evt false/c) #f])
+          (or/c (listof (or/c (cons nonnegative-exact-integer?
+                                    nonnegative-exact-integer?)
+                              false/c))
+                false/c)]{
+
+Like @scheme[regexp-match-positions] on input ports, but only peeks
+bytes from @scheme[input-port] instead of reading them, and with a
+@scheme[progress] argument like @scheme[regexp-match-peek].}
+
+@defproc[(regexp-match-peek-immediate [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                            [input input-port?]
+                            [start-pos nonnegative-exact-integer? 0]
+                            [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                            [progress (or/c evt false/c) #f])
+          (or/c (listof (or/c (cons bytes? bytes?)
+                              false/c))
+                false/c)]{
+
+Like @scheme[regexp-match-peek], but it attempts to match only bytes
+that are available from @scheme[input-port] without blocking.  The
+match fails if not-yet-available characters might be used to match
+@scheme[pattern].}
+
+@defproc[(regexp-match-peek-positions-immediate [pattern (or/c string? bytes? regexp? bytes-regexp?)]
+                            [input input-port?]
+                            [start-pos nonnegative-exact-integer? 0]
+                            [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                            [progress (or/c evt false/c) #f])
+          (or/c (listof (or/c (cons nonnegative-exact-integer?
+                                    nonnegative-exact-integer?)
+                              false/c))
+                false/c)]{
+
+Like @scheme[regexp-match-peek-positions], but it attempts to match
+only bytes that are available from @scheme[input-port] without
+blocking. The match fails if not-yet-available characters might be
+used to match @scheme[pattern].}
+
+@;{
+
+@;------------------------------------------------------------------------
+@section{Regexp Substitution}
+
+@defproc[(regexp-replace [char-pattern any/c][string any/c][insert any/c]) any]{
+
+Performs a match using @scheme[pattern] on @scheme[input] and
+then returns a string in which the matching portion of @scheme[input]
+is replaced with @scheme[insert-string].  If @scheme[char-pattern]
+matches no part of @scheme[string], then @scheme[string] is returned
+unmodified.
+ 
+ The @scheme[char-pattern] must be a string or a character regexp value
+ (not a byte string or a byte regexp value).
+
+ If @scheme[insert-string] contains ``\&'', then ``\&'' is replaced with
+ the matching portion of @scheme[string] before it is substituted into
+ @scheme[string].  If @scheme[insert-string] contains
+ ``{\Backslash}@scheme[n]'' (for some integer @scheme[n]), then it is
+ replaced with the @scheme[n]th matching sub-expression from
+ @scheme[string].\footnote{The backslash is a character in the string, so
+ an extra backslash is required to specify the string as a Scheme
+ constant. For example, the Scheme constant
+ @scheme["\\1"] is ``{\Backslash}1''.} ``\&''
+ and ``{\Backslash}0'' are synonymous. If the @scheme[n]th sub-expression
+ was not used in the match or if @scheme[n] is greater than the number of
+ sub-expressions in @scheme[pattern], then ``{\Backslash}@scheme[n]'' is
+ replaced with the empty string.
+
+ A literal ``\&'' or ``{\Backslash}'' is specified as
+ ``{\Backslash}\&'' or ``{\Backslash}{\Backslash}'', respectively.  If
+ @scheme[insert-string] contains ``{\Backslash}\$'', then
+ ``{\Backslash}\$'' is replaced with the empty string. (This can be
+ used to terminate a number @scheme[n] following a backslash.) If a
+ ``{\Backslash}'' is followed by anything other than a digit, ``\&'',
+ ``{\Backslash}'', or ``\$'', then it is treated as ``{\Backslash}0''.}
+
+ @item{@defproc[(regexp-replace [byte-pattern any/c][string-or-bytes any/c][insert-string-or-bytes any/c]) any]
+%
+  is analogous to @scheme[regexp-replace] on strings, where
+  @scheme[byte-pattern] is a byte string or a byte regexp value. The result
+  is always a byte string.}
+
+ @item{@defproc[(regexp-replace [char-pattern any/c][string any/c][proc any/c]) any]
+%
+   is like @scheme[regexp-replace], but instead of an
+   @scheme[insert-string] third argument, the third argument is a
+   procedure that accepts match strings and produces a string to
+   replace the match. The @scheme[proc] must accept the same number of
+   arguments as @scheme[regexp-match] produces list elements for a
+   successful match with @scheme[char-pattern].}
+
+ @item{@defproc[(regexp-replace [byte-pattern any/c][string-or-bytes any/c][proc any/c]) any]
+%
+   is analogous to @scheme[regexp-replace] on strings and a procedure
+   argument, but the procedure accepts byte strings to produce a byte
+   string, instead of character strings.}
+
+ @item{@defproc[(regexp-replace* [pattern any/c][string any/c][insert-string any/c]) any]
+%
+ is the same as @scheme[regexp-replace], except that every instance of
+ @scheme[pattern] in @scheme[string] is replaced with
+ @scheme[insert-string]. Only non-overlapping instances of @scheme[pattern]
+ in the original @scheme[string] are replaced, so instances of
+ @scheme[pattern] within inserted strings are \Em{not} replaced
+ recursively. If, in the process of repeating matches, @scheme[pattern]
+ matches an empty string, the @exnraise[exn:fail].}
+
+ @item{@defproc[(regexp-replace* [byte-pattern any/c][bytes any/c][insert-bytes any/c]) any]
+%
+ is analogous to @scheme[regexp-replace*] on strings.}
+
+ @item{@defproc[(regexp-replace* [char-pattern any/c][string any/c][proc any/c]) any]
+%
+   is like @scheme[regexp-replace] with a procedure argument, but with
+   multiple instances replaced. The given @scheme[proc] is called once
+   for each match.}
+
+ @item{@defproc[(regexp-replace* [byte-pattern any/c][bytes any/c][proc any/c]) any]
+%
+   is like @scheme[regexp-replace*] with a string and procedure
+   argument, but the procedure accepts and produces byte strings.}
+
+}
--- a/collects/scribblings/reference/struct.scrbl
+++ b/collects/scribblings/reference/struct.scrbl
@ -76,12 +76,11 @@ depends on the current inspector.)
                           [immutables (listof non-negative-exact-integer?)
                                       null]
                           [guard (or/c procedure? false/c) #f])
-          (values 
-           struct-type?
-           struct-constructor-procedure?
-           struct-predicate-procedure?
-           struct-accessor-procedure?
-           struct-mutator-procedure?)]{
+          (values struct-type?
+                  struct-constructor-procedure?
+                  struct-predicate-procedure?
+                  struct-accessor-procedure?
+                  struct-mutator-procedure?)]{

 Creates a new structure type.  The @scheme[name] argument is used as
 the type name. If @scheme[super-type] is not @scheme[#f], the new type
@ -234,10 +233,9 @@ A @index['("structure type properties")]{@defterm{structure type

@defproc[(make-struct-type-property [name symbol?]
                                    [guard (or/c procedure? false/c) #f]) 
-         (values 
-          struct-type-property?
-          procedure?
-          procedure?)]{
+         (values struct-type-property?
+                 procedure?
+                 procedure?)]{

 Creates a new structure type property and returns three values:

@ -355,15 +353,14 @@ Returns two values:
 }}

@defproc[(struct-type-info [struct-type struct-type?])
-         (values 
-          symbol?
-          nonnegative-exact-integer?
-          nonnegative-exact-integer?
-          struct-accessor-procedure?
-          struct-mutator-procedure?
-          (listof nonnegative-exact-integer?)
-          (or/c struct-type? false/c)
-          boolean?)]{
+         (values symbol?
+                 nonnegative-exact-integer?
+                 nonnegative-exact-integer?
+                 struct-accessor-procedure?
+                 struct-mutator-procedure?
+                 (listof nonnegative-exact-integer?)
+                 (or/c struct-type? false/c)
+                 boolean?)]{

 Returns eight values that provide information about the structure type
 descriptor @scheme[struct-type], assuming that the type is controlled