From dc5670cad5383adcbfeb88d081be06acea225dc1 Mon Sep 17 00:00:00 2001
From: Matthew Flatt <mflatt@racket-lang.org>
Date: Tue, 20 Nov 2007 13:54:07 +0000
Subject: [PATCH] finish up regexp (and former mzlib/string.ss) docs

svn: r7785
---
 collects/scribblings/reference/numbers.scrbl |  20 ++
 collects/scribblings/reference/regexps.scrbl | 330 +++++++++++++++----
 collects/sirmail/readr.ss                    |   7 +-
 3 files changed, 282 insertions(+), 75 deletions(-)

diff --git a/collects/scribblings/reference/numbers.scrbl b/collects/scribblings/reference/numbers.scrbl
index 2731ebbec0..829dbcd593 100644
--- a/collects/scribblings/reference/numbers.scrbl
+++ b/collects/scribblings/reference/numbers.scrbl
@@ -1,5 +1,6 @@
 #lang scribble/doc
 @require["mz.ss"]
+@require[(only-in scheme pi)]
 
 @title[#:tag "numbers"]{Numbers}
 
@@ -652,6 +653,25 @@ one of the last three integers must be non-zero.}
           (string->number "111" 7)  (string->number "#b111" 7)]
 }
 
+@defproc[(real->decimal-string [n real?] [decimal-digits nonnegative-exact-integer? 2])
+         string?]{
+
+Prints @scheme[n] into a string and returns the string. The printed
+form of @scheme[n] shows exactly @scheme[decimal-digits] digits after
+the decimal point. The printed for uses a minus sign if @scheme[n] is
+negative, and it does not use a plus sign if @scheme[n] is positive.
+
+Before printing, @scheme[n] is converted to an exact number,
+multiplied by @scheme[(expt 10 decimal-digits)], rounded, and then
+divided again by @scheme[(expt 10 decimal-digits)].  The result of ths
+process is an exact number whose decimal representation has no more
+than @scheme[decimal-digits] digits after the decimal (and it is
+padded with trailing zeros if necessary).
+
+@examples[
+(real->decimal-string pi)
+(real->decimal-string pi 5)
+]}
 
 @defproc[(integer-bytes->integer [bstr bytes?]
                                  [signed? any/c]
diff --git a/collects/scribblings/reference/regexps.scrbl b/collects/scribblings/reference/regexps.scrbl
index 694c988a28..dd8074928d 100644
--- a/collects/scribblings/reference/regexps.scrbl
+++ b/collects/scribblings/reference/regexps.scrbl
@@ -178,6 +178,21 @@ syntax (see @secref["regexp-syntax"]). The result can be used with
 (byte-pregexp #"ap*le")
 ]}
 
+@defproc*[([(regexp-quote [str string?][case-sensitive? any/c #f]) string?]
+           [(regexp-quote [bstr bytes?][case-sensitive? any/c #f]) bytes?])]{
+
+Produces a string or byte string suitable for use with @scheme[regexp]
+to match the literal sequence of characters in @scheme[str] or
+sequence of bytes in @scheme[bstr]. If @scheme[case-sensitive?] is
+true, the resulting regexp matches letters in @scheme[str] or
+@scheme[bytes] case-insensitively, otherwise it matches
+case-sensitively.
+
+@examples[
+(regexp-match "." "apple.scm")
+(regexp-match (regexp-quote ".") "apple.scm")
+]}
+
 
 @;------------------------------------------------------------------------
 @section{Regexp Matching}
@@ -270,7 +285,66 @@ used for matching may be different than the bytes read and discarded
 after the match completes; the matcher inspects only the peeked
 bytes. To avoid such interleaving, use @scheme[regexp-match-peek]
 (with a @scheme[progress-evt] argument) followed by
-@scheme[port-commit-peeked].}
+@scheme[port-commit-peeked].
+
+@examples[
+(regexp-match #rx"x." "12x4x6")
+(regexp-match #rx"y." "12x4x6")
+(regexp-match #rx"x." "12x4x6" 3)
+(regexp-match #rx"x." "12x4x6" 3 4)
+(regexp-match #rx#"x." "12x4x6")
+(regexp-match #rx"x." "12x4x6" 0 #f (current-output-port))
+(regexp-match #rx"(-[0-9]*)+" "a-12--345b")
+]}
+
+
+@defproc[(regexp-match* [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                        [input (or/c string? bytes? input-port?)]
+                        [start-pos nonnegative-exact-integer? 0]
+                        [end-pos (or/c nonnegative-exact-integer? false/c) #f])
+         (listof (or/c string? bytes?))]{
+
+Like @scheme[regexp-match], but the result is a list of strings or
+byte strings corresponding to a sequence of matches of
+@scheme[pattern] in @scheme[input]. (Unlike @scheme[regexp-match],
+results for parenthesized sub-patterns in @scheme[pattern] are not
+returned.)  If @scheme[pattern] matches a zero-length string or byte
+sequence along the way, the @exnraise[exn:fail].
+
+If @scheme[input] contains no matches (in the range @scheme[start-pos]
+to @scheme[end-pos]), @scheme[null] is returned. Otherwise, each item
+in the resulting list is a distinct substring or byte sequence from
+@scheme[input] that matches @scheme[pattern]. The @scheme[end-pos]
+argument can be @scheme[#f] to match to the end of @scheme[input]
+(which corresponds to an end-of-file if @scheme[input] is an input
+port).
+
+@examples[
+(regexp-match* #rx"x." "12x4x6")
+]}
+
+
+@defproc[(regexp-match/fail-without-reading 
+                       [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                       [input input-port?]
+                       [start-pos nonnegative-exact-integer? 0]
+                       [end-pos (or/c nonnegative-exact-integer? false/c) #f]
+                       [output-port (or/c output-port? false/c) #f])
+         (or/c (listof (or/c (cons (or/c string? bytes?)
+                                   (or/c string? bytes?))
+                             false/c))
+               false/c)]{
+
+Like @scheme[regexp-match] on input ports, except that if the match
+fails, no characters are read and discarded from @scheme[in].
+
+This procedure is especially useful with a @scheme[pattern] that
+begins with a start-of-string @litchar{^} or with a non-@scheme[#f]
+@scheme[end-pos], since each limits the amount of peeking into the
+port. Otherwise, beware that a large portion of the stream may be
+peeked (and therefore pulled into memory) before the match succeeds or
+fails.}
+
 
 @defproc[(regexp-match-positions [pattern (or/c string? bytes? regexp? byte-regexp?)]
                         [input (or/c string? bytes? input-port?)]
@@ -294,7 +368,29 @@ Range results are returned in a @scheme[substring]- and
 @scheme[subbytes]-compatible manner, independent of
 @scheme[start-pos]. In the case of an input port, the returned
 positions indicate the number of bytes that were read, including
-@scheme[start-pos], before the first matching byte.}
+@scheme[start-pos], before the first matching byte.
+
+@examples[
+(regexp-match-positions #rx"x." "12x4x6")
+(regexp-match-positions #rx"x." "12x4x6" 3)
+(regexp-match-positions #rx"(-[0-9]*)+" "a-12--345b")
+]}
+
+
+@defproc[(regexp-match-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                        [input (or/c string? bytes? input-port?)]
+                        [start-pos nonnegative-exact-integer? 0]
+                        [end-pos (or/c nonnegative-exact-integer? false/c) #f])
+         (listof (cons nonnegative-exact-integer?
+                       nonnegative-exact-integer?))]{
+
+Like @scheme[regexp-match-positions], but returns multiple matches
+like @scheme[regexp-match*].
+
+@examples[
+(regexp-match-positions #rx"x." "12x4x6")
+]}
+
 
 @defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
                         [input (or/c string? bytes? input-port?)]
@@ -304,7 +400,26 @@ positions indicate the number of bytes that were read, including
            boolean?]{
 
 Like @scheme[regexp-match], but returns merely @scheme[#t] when the
-match succeeds, @scheme[#f] otherwise.}
+match succeeds, @scheme[#f] otherwise.
+
+@examples[
+(regexp-match? #rx"x." "12x4x6")
+(regexp-match? #rx"y." "12x4x6")
+]}
+
+
+@defproc[(regexp-match-exact? [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                              [input (or/c string? bytes? input-port?)])
+          boolean?]{
+
+Like @scheme[regexp-match?], but @scheme[#t] is only returned when the
+entire content of @scheme[input] matches @scheme[pattern].
+
+@examples[
+(regexp-match-exact? #rx"x." "12x4x6")
+(regexp-match-exact? #rx"1.*x." "12x4x6")
+]}
+
 
 @defproc[(regexp-match-peek [pattern (or/c string? bytes? regexp? byte-regexp?)]
                             [input input-port?]
@@ -323,7 +438,18 @@ becomes ready, then the match stops peeking from @scheme[input-port]
 and returns @scheme[#f]. The @scheme[progress] argument can be
 @scheme[#f], in which case the peek may continue with inconsistent
 information if another process meanwhile reads from
-@scheme[input-port].}
+@scheme[input-port].
+
+@examples[
+(define p (open-input-string "a abcd"))
+(regexp-match-peek ".*bc" p)
+(regexp-match-peek ".*bc" p 2)
+(regexp-match ".*bc" p 2)
+(peek-char p)
+(regexp-match ".*bc" p)
+(peek-char p)
+]}
+
 
 @defproc[(regexp-match-peek-positions [pattern (or/c string? bytes? regexp? byte-regexp?)]
                             [input input-port?]
@@ -339,6 +465,7 @@ Like @scheme[regexp-match-positions] on input ports, but only peeks
 bytes from @scheme[input-port] instead of reading them, and with a
 @scheme[progress] argument like @scheme[regexp-match-peek].}
 
+
 @defproc[(regexp-match-peek-immediate [pattern (or/c string? bytes? regexp? byte-regexp?)]
                             [input input-port?]
                             [start-pos nonnegative-exact-integer? 0]
@@ -353,6 +480,7 @@ that are available from @scheme[input-port] without blocking.  The
 match fails if not-yet-available characters might be used to match
 @scheme[pattern].}
 
+
 @defproc[(regexp-match-peek-positions-immediate [pattern (or/c string? bytes? regexp? byte-regexp?)]
                             [input input-port?]
                             [start-pos nonnegative-exact-integer? 0]
@@ -368,88 +496,146 @@ only bytes that are available from @scheme[input-port] without
 blocking. The match fails if not-yet-available characters might be
 used to match @scheme[pattern].}
 
-@;{
+
+@defproc[(regexp-match-peek-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                            [input input-port?]
+                            [start-pos nonnegative-exact-integer? 0]
+                            [end-pos (or/c nonnegative-exact-integer? false/c) #f])
+         (listof (cons nonnegative-exact-integer?
+                       nonnegative-exact-integer?))]{
+
+Like @scheme[regexp-match-peek-positions], but returns multiple matches like
+@scheme[regexp-match*].}
+
+@;------------------------------------------------------------------------
+@section{Regexp Splitting}
+
+@defproc[(regexp-split [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                       [input (or/c string? bytes? input-port?)]
+                       [start-pos nonnegative-exact-integer? 0]
+                       [end-pos (or/c nonnegative-exact-integer? false/c) #f])
+         (listof (or/c string? bytes?))]{
+
+The complement of @scheme[regexp-match*]: the result is a list of
+strings (if @scheme[pattern] is a string or character regexp and
+@scheme[input] is a string) or byte strings (otherwise) from in
+@scheme[input] that are separated by matches to
+@scheme[pattern]. Adjacent matches are separated with @scheme[""] or
+@scheme[#""]. If @scheme[pattern] matches a zero-length string or byte
+sequence along the way, the @exnraise[exn:fail].
+
+If @scheme[input] contains no matches (in the range @scheme[start-pos]
+to @scheme[end-pos]), the result is a list containing @scheme[input]'s
+content (from @scheme[start-pos] to @scheme[end-pos]) as a single
+element. If a match occurs at the beginning of @scheme[input] (at
+@scheme[start-pos]), the resulting list will start with an empty
+string or byte string, and if a match occurs at the end (at
+@scheme[end-pos]), the list will end with an empty string or byte
+string. The @scheme[end-pos] argument can be @scheme[#f], in which
+case splitting goes to the end of @scheme[input] (which corresponds to
+an end-of-file if @scheme[input] is an input port).
+
+@examples[
+(regexp-split #rx"x" "12x4x6")
+(regexp-split #rx"." "12x4x6")
+]}
 
 @;------------------------------------------------------------------------
 @section{Regexp Substitution}
 
-@defproc[(regexp-replace [char-pattern any/c][string any/c][insert any/c]) any]{
+@defproc[(regexp-replace [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                         [input (or/c string? bytes?)]
+                         [insert (or/c string? bytes? 
+                                       (string? . -> . string?)
+                                       (bytes? . -> . bytes?))])
+         (or/c string? bytes?)]{
 
-Performs a match using @scheme[pattern] on @scheme[input] and
-then returns a string in which the matching portion of @scheme[input]
-is replaced with @scheme[insert-string].  If @scheme[char-pattern]
-matches no part of @scheme[string], then @scheme[string] is returned
+Performs a match using @scheme[pattern] on @scheme[input], and then
+returns a string or byte string in which the matching portion of
+@scheme[input] is replaced with @scheme[insert].  If @scheme[pattern]
+matches no part of @scheme[input], then @scheme[iput] is returned
 unmodified.
 
- The @scheme[char-pattern] must be a string or a character regexp value
- (not a byte string or a byte regexp value).
+If @scheme[pattern] is a string or character regexp and @scheme[input]
+is a string, then @scheme[insert] must be a string or a procedure that
+accept strings, and the result is a string. If @scheme[pattern] is a
+byte string or byte regexp, or if @scheme[input] is a byte string,
+then @scheme[insert] as a string is converted to a byte string,
+@scheme[insert] as a procedure is called with a byte string, and the
+result is a byte string.
 
- If @scheme[insert-string] contains ``\&'', then ``\&'' is replaced with
- the matching portion of @scheme[string] before it is substituted into
- @scheme[string].  If @scheme[insert-string] contains
- ``{\Backslash}@scheme[n]'' (for some integer @scheme[n]), then it is
- replaced with the @scheme[n]th matching sub-expression from
- @scheme[string].\footnote{The backslash is a character in the string, so
- an extra backslash is required to specify the string as a Scheme
- constant. For example, the Scheme constant
- @scheme["\\1"] is ``{\Backslash}1''.} ``\&''
- and ``{\Backslash}0'' are synonymous. If the @scheme[n]th sub-expression
- was not used in the match or if @scheme[n] is greater than the number of
- sub-expressions in @scheme[pattern], then ``{\Backslash}@scheme[n]'' is
- replaced with the empty string.
+If @scheme[insert] contains @litchar["&"], then @litchar["&"]
+is replaced with the matching portion of @scheme[input] before it is
+substituted into the match's place.  If @scheme[insert] contains
+@litchar["\\"]@nonterm{n} for some integer @nonterm{n}, then it is
+replaced with the @nonterm{n}th matching sub-expression from
+@scheme[input]. A @litchar{&} and @litchar["\\0"] are synonymous. If
+the @nonterm{n}th sub-expression was not used in the match, or if
+@nonterm{n} is greater than the number of sub-expressions in
+@scheme[pattern], then @litchar["\\"]@nonterm{n} is replaced with the
+empty string.
 
- A literal ``\&'' or ``{\Backslash}'' is specified as
- ``{\Backslash}\&'' or ``{\Backslash}{\Backslash}'', respectively.  If
- @scheme[insert-string] contains ``{\Backslash}\$'', then
- ``{\Backslash}\$'' is replaced with the empty string. (This can be
- used to terminate a number @scheme[n] following a backslash.) If a
- ``{\Backslash}'' is followed by anything other than a digit, ``\&'',
- ``{\Backslash}'', or ``\$'', then it is treated as ``{\Backslash}0''.}
+To substitute a literal @litchar{&} or @litchar["\\"], use
+@litchar["\\&"] and @litchar["\\\\"], respectively, in
+@scheme[insert]. A @litchar["\\$"] in @scheme[insert] is
+equivalent to an empty sequence; this can be used to terminate a
+number @nonterm{n} following @litchar["\\"]. If a @litchar["\\"] in
+@scheme[insert] is followed by anything other than a digit,
+@litchar{&}, @litchar["\\"], or @litchar{$}, then the @litchar["\\"]
+by itself is treated as @litchar["\\0"].
 
- @item{@defproc[(regexp-replace [byte-pattern any/c][string-or-bytes any/c][insert-string-or-bytes any/c]) any]
-%
-  is analogous to @scheme[regexp-replace] on strings, where
-  @scheme[byte-pattern] is a byte string or a byte regexp value. The result
-  is always a byte string.}
+Note that the @litchar["\\"] described in the previous paragraphs is a
+character or byte of @scheme[input]. To write such an @scheme[input]
+as a Scheme string literal, an escaping @litchar["\\"] is needed
+before the @litchar["\\"]. For example, the Scheme constant
+@scheme["\\1"] is @litchar["\\1"].
 
- @item{@defproc[(regexp-replace [char-pattern any/c][string any/c][proc any/c]) any]
-%
-   is like @scheme[regexp-replace], but instead of an
-   @scheme[insert-string] third argument, the third argument is a
-   procedure that accepts match strings and produces a string to
-   replace the match. The @scheme[proc] must accept the same number of
-   arguments as @scheme[regexp-match] produces list elements for a
-   successful match with @scheme[char-pattern].}
+@examples[
+(regexp-replace "mi" "mi casa" "su")
+(regexp-replace "mi" "mi casa" string-upcase)
+(regexp-replace "([Mm])i ([a-zA-Z]*)" "Mi Casa" "\\1y \\2")
+(regexp-replace "([Mm])i ([a-zA-Z]*)" "mi cerveza Mi Mi Mi"
+                "\\1y \\2")
+(regexp-replace #rx"x" "12x4x6" "\\\\")
+(display (regexp-replace #rx"x" "12x4x6" "\\\\"))
+]}
 
- @item{@defproc[(regexp-replace [byte-pattern any/c][string-or-bytes any/c][proc any/c]) any]
-%
-   is analogous to @scheme[regexp-replace] on strings and a procedure
-   argument, but the procedure accepts byte strings to produce a byte
-   string, instead of character strings.}
+@defproc[(regexp-replace* [pattern (or/c string? bytes? regexp? byte-regexp?)]
+                          [input (or/c string? bytes?)]
+                          [insert (or/c string? bytes? 
+                                        (string? . -> . string?)
+                                        (bytes? . -> . bytes?))])
+         (or/c string? bytes?)]{
 
- @item{@defproc[(regexp-replace* [pattern any/c][string any/c][insert-string any/c]) any]
-%
- is the same as @scheme[regexp-replace], except that every instance of
- @scheme[pattern] in @scheme[string] is replaced with
- @scheme[insert-string]. Only non-overlapping instances of @scheme[pattern]
- in the original @scheme[string] are replaced, so instances of
- @scheme[pattern] within inserted strings are \Em{not} replaced
- recursively. If, in the process of repeating matches, @scheme[pattern]
- matches an empty string, the @exnraise[exn:fail].}
+Like @scheme[regexp-replace], except that every instance of
+@scheme[pattern] in @scheme[input] is replaced with @scheme[insert],
+instead of just the first match. Only non-overlapping instances of
+@scheme[pattern] in @scheme[input] are replaced, so instances of
+@scheme[pattern] within inserted strings are @italic{not} replaced
+recursively. If, in the process of repeating matches, @scheme[pattern]
+matches an empty string, the @exnraise[exn:fail].
 
- @item{@defproc[(regexp-replace* [byte-pattern any/c][bytes any/c][insert-bytes any/c]) any]
-%
- is analogous to @scheme[regexp-replace*] on strings.}
+@examples[
+(regexp-replace* "([Mm])i ([a-zA-Z]*)" "mi cerveza Mi Mi Mi" 
+                 "\\1y \\2")
+(regexp-replace* "([Mm])i ([a-zA-Z]*)" "mi cerveza Mi Mi Mi" 
+                 (lambda (all one two)
+                   (string-append (string-downcase one) "y"
+                                  (string-upcase two))))
+(display (regexp-replace* #rx"x" "12x4x6" "\\\\"))
+]}
 
- @item{@defproc[(regexp-replace* [char-pattern any/c][string any/c][proc any/c]) any]
-%
-   is like @scheme[regexp-replace] with a procedure argument, but with
-   multiple instances replaced. The given @scheme[proc] is called once
-   for each match.}
+@defproc*[([(regexp-replace-quote [str string?][case-sensitive? any/c #f]) string?]
+           [(regexp-replace-quote [bstr bytes?][case-sensitive? any/c #f]) bytes?])]{
 
- @item{@defproc[(regexp-replace* [byte-pattern any/c][bytes any/c][proc any/c]) any]
-%
-   is like @scheme[regexp-replace*] with a string and procedure
-   argument, but the procedure accepts and produces byte strings.}
+Produces a string suitable for use as the third argument to
+@scheme[regexp-replace] to insert the literal sequence of characters
+in @scheme[str] or bytes in @scheme[bstr] as a replacement.
+Concretely, every @litchar["\\"] and @litchar{&} in @scheme[str] or
+@scheme[bstr] is protected by a quoting @litchar["\\"].
+
+@examples[
+(regexp-replace "UT" "Go UT!" "A&M")
+(regexp-replace "UT" "Go UT!" (regexp-replace-quote "A&M"))
+]}
 
-}
diff --git a/collects/sirmail/readr.ss b/collects/sirmail/readr.ss
index 5ac3464de4..171e3da196 100644
--- a/collects/sirmail/readr.ss
+++ b/collects/sirmail/readr.ss
@@ -1868,12 +1868,13 @@
 
       ;; Optional GC icon (lots of work for this little thing!)
       (when (get-pref 'sirmail:show-gc-icon)
-	(let* ([gif (make-object bitmap% (build-path (collection-path "icons") "recycle.gif"))]
+	(let* ([gif (make-object bitmap% (build-path (collection-path "icons") "recycle.png"))]
 	       [w (send gif get-width)]
 	       [h (send gif get-height)]
-	       [recycle-bm (make-object bitmap% (quotient w 2) (quotient h 2))]
+               [scale 1]
+	       [recycle-bm (make-object bitmap% (quotient w scale) (quotient h scale))]
 	       [dc (make-object bitmap-dc% recycle-bm)])
-	  (send dc set-scale 0.5 0.5)
+	  (send dc set-scale (/ 1 scale) (/ 1 scale))
 	  (send dc draw-bitmap gif 0 0)
 	  (send dc set-bitmap #f)
 	  (let* ([w (send recycle-bm get-width)]