From bf717526b0d76546793db9a52863547fbb030706 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 27 Jun 2007 00:17:04 +0000 Subject: [PATCH] start on regexp functions; further formatting improvements for defproc svn: r6745 --- collects/scribble/manual.ss | 174 ++++--- collects/scribble/scheme.ss | 3 +- collects/scribble/struct.ss | 68 ++- .../scribblings/reference/reference.scrbl | 20 +- collects/scribblings/reference/regexps.scrbl | 454 ++++++++++++++++++ collects/scribblings/reference/struct.scrbl | 35 +- 6 files changed, 629 insertions(+), 125 deletions(-) create mode 100644 collects/scribblings/reference/regexps.scrbl diff --git a/collects/scribble/manual.ss b/collects/scribble/manual.ss index dcd345bb6a..d01ed7afa4 100644 --- a/collects/scribble/manual.ss +++ b/collects/scribble/manual.ss @@ -393,20 +393,21 @@ [(eq? v '...) dots0] [else v]))] - [prototype-size (lambda (s) - (let loop ([s s]) + [prototype-size (lambda (s first-combine next-combine) + (let loop ([s s][combine first-combine]) (if (null? s) - 1 - (+ 1 (loop (cdr s)) - (cond - [(symbol? (car s)) (string-length (symbol->string (car s)))] - [(pair? (car s)) - (if (keyword? (caar s)) - (+ (string-length (keyword->string (caar s))) - 3 - (string-length (symbol->string (cadar s)))) - (string-length (symbol->string (caar s))))] - [else 0])))))]) + 0 + (combine + (loop (cdr s) next-combine) + (cond + [(symbol? (car s)) (string-length (symbol->string (car s)))] + [(pair? (car s)) + (if (keyword? (caar s)) + (+ (string-length (keyword->string (caar s))) + 3 + (string-length (symbol->string (cadar s)))) + (string-length (symbol->string (caar s))))] + [else 0])))))]) (parameterize ([current-variable-list (map (lambda (i) (and (pair? i) @@ -422,34 +423,41 @@ append (map (lambda (stx-id prototype arg-contracts result-contract first?) - (append - (list - (list (make-flow - (let-values ([(required optional more-required) - (let loop ([a (cdr prototype)][r-accum null]) - (if (or (null? a) - (and (has-optional? (car a)))) - (let ([req (reverse r-accum)]) - (let loop ([a a][o-accum null]) - (if (or (null? a) - (not (has-optional? (car a)))) - (values req (reverse o-accum) a) - (loop (cdr a) (cons (car a) o-accum))))) - (loop (cdr a) (cons (car a) r-accum))))] - [(tagged) (if first? - (make-target-element - #f - (list (to-element (make-just-context (car prototype) - stx-id))) - (register-scheme-definition stx-id)) - (to-element (make-just-context (car prototype) - stx-id)))] - [(short?) (or ((prototype-size prototype) . < . 40) - ((length prototype) . < . 3))] - [(end) (list (to-flow spacer) - (to-flow 'rarr) - (to-flow spacer) - (make-flow (list (result-contract))))]) + (let*-values ([(required optional more-required) + (let loop ([a (cdr prototype)][r-accum null]) + (if (or (null? a) + (and (has-optional? (car a)))) + (let ([req (reverse r-accum)]) + (let loop ([a a][o-accum null]) + (if (or (null? a) + (not (has-optional? (car a)))) + (values req (reverse o-accum) a) + (loop (cdr a) (cons (car a) o-accum))))) + (loop (cdr a) (cons (car a) r-accum))))] + [(tagged) (if first? + (make-target-element + #f + (list (to-element (make-just-context (car prototype) + stx-id))) + (register-scheme-definition stx-id)) + (to-element (make-just-context (car prototype) + stx-id)))] + [(flat-size) (prototype-size prototype + +)] + [(short?) (or (flat-size . < . 40) + ((length prototype) . < . 3))] + [(res) (result-contract)] + [(result-next-line?) ((+ (if short? + flat-size + (prototype-size prototype + max)) + (flow-element-width res)) + . >= . 50)] + [(end) (list (to-flow spacer) + (to-flow 'rarr) + (to-flow spacer) + (make-flow (list res)))]) + (append + (list + (list (make-flow (if short? (make-table-if-necessary "prototype" @@ -468,12 +476,16 @@ 'paren-shape #\?)))) (map arg->elem more-required)))) - end))) + (if result-next-line? + null + end)))) (let ([not-end - (list (to-flow spacer) - (to-flow spacer) - (to-flow spacer) - (to-flow spacer))]) + (if result-next-line? + (list (to-flow spacer)) + (list (to-flow spacer) + (to-flow spacer) + (to-flow spacer) + (to-flow spacer)))]) (list (make-table "prototype" @@ -513,40 +525,46 @@ #f (list a "]" (schemeparenfont ")"))))] [else a]))) - (if (null? (cdr args)) + (if (and (null? (cdr args)) + (not result-next-line?)) end not-end)) - (loop (cdr args) (sub1 req)))))))))))))) - (apply append - (map (lambda (v arg-contract) - (cond - [(pair? v) - (list - (list - (make-flow - (make-table-if-necessary - "argcontract" - (list - (let ([v (if (keyword? (car v)) - (cdr v) - v)]) - (append - (list - (to-flow (hspace 2)) - (to-flow (arg->elem v)) - (to-flow spacer) - (to-flow ":") - (to-flow spacer) - (make-flow (list (arg-contract)))) - (if (has-optional? v) - (list (to-flow spacer) - (to-flow "=") - (to-flow spacer) - (to-flow (to-element (caddr v)))) - null))))))))] - [else null])) - (cdr prototype) - arg-contracts)))) + (loop (cdr args) (sub1 req))))))))))))) + (if result-next-line? + (list (list (make-flow (make-table-if-necessary + "prototype" + (list end))))) + null) + (apply append + (map (lambda (v arg-contract) + (cond + [(pair? v) + (list + (list + (make-flow + (make-table-if-necessary + "argcontract" + (list + (let ([v (if (keyword? (car v)) + (cdr v) + v)]) + (append + (list + (to-flow (hspace 2)) + (to-flow (arg->elem v)) + (to-flow spacer) + (to-flow ":") + (to-flow spacer) + (make-flow (list (arg-contract)))) + (if (has-optional? v) + (list (to-flow spacer) + (to-flow "=") + (to-flow spacer) + (to-flow (to-element (caddr v)))) + null))))))))] + [else null])) + (cdr prototype) + arg-contracts))))) stx-ids prototypes arg-contractss diff --git a/collects/scribble/scheme.ss b/collects/scribble/scheme.ss index 6b4066b4bc..c820bad7fb 100644 --- a/collects/scribble/scheme.ss +++ b/collects/scribble/scheme.ss @@ -372,7 +372,8 @@ (make-link-element "schemesyntaxlink" (list s) stag)] [vd (make-link-element "schemevaluelink" (list s) vtag)] - [else s]))))) + [else s])))) + (lambda () s)) (literalize-spaces s)) (cond [(positive? quote-depth) value-color] diff --git a/collects/scribble/struct.ss b/collects/scribble/struct.ss index 63839ebcd8..14ce466733 100644 --- a/collects/scribble/struct.ss +++ b/collects/scribble/struct.ss @@ -95,28 +95,33 @@ delayed-element-ref delayed-element-set!) (make-struct-type 'delayed-element #f - 1 1 #f + 2 1 #f (list (cons prop:serializable (make-serialize-info (lambda (d) - (unless (delayed-element-ref d 1) + (unless (delayed-element-ref d 2) (error 'serialize-delayed-element "cannot serialize a delayed element that was not resolved: ~e" d)) - (vector (delayed-element-ref d 1))) + (vector (delayed-element-ref d 2))) #'deserialize-delayed-element #f (or (current-load-relative-directory) (current-directory))))))) (define-syntax delayed-element (list-immutable #'struct:delayed-element #'make-delayed-element #'delayed-element? - (list-immutable #'delayed-element-render) - (list-immutable #'set-delayed-element-render!) + (list-immutable #'delayed-element-sizer + #'delayed-element-render) + (list-immutable #'set-delayed-element-sizer! + #'set-delayed-element-render!) #t)) (define delayed-element-render (make-struct-field-accessor delayed-element-ref 0)) + (define delayed-element-sizer (make-struct-field-accessor delayed-element-ref 1)) (define set-delayed-element-render! (make-struct-field-mutator delayed-element-set! 0)) + (define set-delayed-element-sizer! (make-struct-field-mutator delayed-element-set! 1)) (provide/contract - (struct delayed-element ([render (any/c part? any/c . -> . list?)]))) + (struct delayed-element ([render (any/c part? any/c . -> . list?)] + [sizer (-> any)]))) (provide deserialize-delayed-element) (define deserialize-delayed-element @@ -124,9 +129,9 @@ (provide force-delayed-element) (define (force-delayed-element d renderer sec ht) - (or (delayed-element-ref d 1) + (or (delayed-element-ref d 2) (let ([v ((delayed-element-ref d 0) renderer sec ht)]) - (delayed-element-set! d 1 v) + (delayed-element-set! d 2 v) v))) ;; ---------------------------------------- @@ -163,5 +168,52 @@ renderer sec ht)] [else (element->string c)])])) + ;; ---------------------------------------- + + (provide flow-element-width + element-width) + + (define (element-width s) + (cond + [(string? s) (string-length s)] + [(element? s) (apply + (map element-width (element-content s)))] + [(delayed-element? s) (element-width ((delayed-element-sizer s)))] + [else 1])) + + (define (paragraph-width s) + (apply + (map element-width (paragraph-content s)))) + + (define (flow-width f) + (apply max 0 (map flow-element-width (flow-paragraphs f)))) + + (define (flow-element-width p) + (cond + [(paragraph? p) (paragraph-width p)] + [(table? p) (table-width p)] + [(itemization? p) (itemization-width p)] + [(blockquote? p) (blockquote-width p)] + [(delayed-flow-element? p) 1])) + + (define (table-width p) + (let ([flowss (table-flowss p)]) + (if (null? flowss) + 0 + (let loop ([flowss flowss]) + (if (null? (car flowss)) + 0 + (+ (apply max + 0 + (map flow-width + (map car flowss))) + (loop (map cdr flowss)))))))) + + (define (itemization-width p) + (apply max 0 (map flow-width (itemization-flows p)))) + + (define (blockquote-width p) + (+ 4 (apply max 0 (map paragraph-width (blockquote-paragraphs p))))) + + ;; ---------------------------------------- + ) diff --git a/collects/scribblings/reference/reference.scrbl b/collects/scribblings/reference/reference.scrbl index 391c134e2c..286529b116 100644 --- a/collects/scribblings/reference/reference.scrbl +++ b/collects/scribblings/reference/reference.scrbl @@ -28,25 +28,7 @@ language. @subsection[#:tag "mz:char-input"]{From Bytes to Characters} @;------------------------------------------------------------------------ -@section["Regular Expressions"] - -@require["rx.ss"] - -Common grammar: - -@common-table - -Rx table: - -@rx-table - -Px table: - -@px-table - -Types: - -@type-table +@include-section["regexps.scrbl"] @;------------------------------------------------------------------------ diff --git a/collects/scribblings/reference/regexps.scrbl b/collects/scribblings/reference/regexps.scrbl new file mode 100644 index 0000000000..39bb11ab8d --- /dev/null +++ b/collects/scribblings/reference/regexps.scrbl @@ -0,0 +1,454 @@ +#reader(lib "docreader.ss" "scribble") +@require[(lib "bnf.ss" "scribble")] +@require["mz.ss"] + +@require["rx.ss"] + +@title[#:tag "mz:regexp"]{Regular Expressions} + +@;{ +\index{regular expressions} +\index{regexps|see{regular expressions}} +\index{pattern matching} +\index{strings!pattern matching} +\index{input ports!pattern matching} +} + +Regular expressions are specified as strings or byte strings, using +the same pattern language as the Unix utility @exec{egrep} or Perl. A +string-specified pattern produces a character regexp matcher, and a +byte-string pattern produces a byte regexp matcher. If a character +regexp is used with a byte string or input port, it matches UTF-8 +encodings (see @secref["mz:encodings"]) of matching character streams; +if a byte regexp is used with a character string, it matches bytes in +the UTF-8 encoding of the string. + +Regular expressions can be compiled into a @defterm{regexp value} for +repeated matches. The @scheme[regexp] and @scheme[byte-regexp] +procedures convert a string or byte string (respectively) into a +regexp value using one syntax of regular expressions that is most +compatible to @exec{egrep}. The @scheme[pregexp] and +@scheme[byte-pregexp] procedures produce a regexp value using a +slightly different syntax of regular expressions that is more +compatible with Perl. In addition, Scheme constants written with +@litchar{#rx} or @litchar{#px} (see @secref["mz:reader"]) produce +compiled regexp values. + +The internal size of a regexp value is limited to 32 kilobytes; this +limit roughly corresponds to a source string with 32,000 literal +characters or 5,000 operators. + +@;------------------------------------------------------------------------ +@section[#:tag "mz:regexp-syntax"]{Regexp Syntax} + +The following syntax specifications describe the content of a string +that represents a regular expression. The syntax of the corresponding +string may involve extra escape characters. For example, the regular +expression @litchar["(.*)\\1"] can be represented with the string +@scheme["(.*)\\1"] or the regexp constant @scheme[#rx"(.*)\\1"]; the +@litchar["\\"] in the regular expression must be escaped to include it +in a string or regexp constant. + +The @scheme[regexp] and @scheme[pregexp] syntaxes share a common core: + +@common-table + +The following completes the grammar for @scheme[regexp], which treats +@litchar["{"] and @litchar["}"] as literals, @litchar["\\"] as a +literal within ranges, and @litchar["\\"] as a literal producer +outside of ranges. + +@rx-table + +The following completes the grammar for @scheme[pregexp], which uses +@litchar["{"] and @litchar["}"] bounded repetition and uses +@litchar["\\"] for meta-characters both inside and outside of ranges. + +@px-table + +@;------------------------------------------------------------------------ +@section{Additional Syntactic Constraints} + +In addition to matching a grammars, regular expressions must meet two +syntactic restrictions: + +@itemize{ + + @item{In a @nonterm{repeat} other than @nonterm{atom}@litchar{?}, + then @nonterm{atom} must not match an empty sequence.} + + @item{In a @litchar{(?<=}@nonterm{regexp}@litchar{)} or + @litchar{(?