diff --git a/collects/scribblings/reference/regexps.scrbl b/collects/scribblings/reference/regexps.scrbl index f7b4da0214..c810e9858e 100644 --- a/collects/scribblings/reference/regexps.scrbl +++ b/collects/scribblings/reference/regexps.scrbl @@ -1,5 +1,8 @@ #lang scribble/doc -@(require scribble/bnf "mz.rkt" "rx.rkt") +@(require scribble/bnf + "mz.rkt" + "rx.rkt" + (for-syntax racket/base)) @title[#:tag "regexp"]{Regular Expressions} @@ -8,6 +11,27 @@ @section-index["strings" "pattern matching"] @section-index["input ports" "pattern matching"] +@(define-syntax (rx-examples stx) + (syntax-case stx () + [(_ [num rx input] ...) + (with-syntax ([(ex ...) + (map (lambda (num rx input) + `(eval:alts #,(racket + (code:line + (regexp-match ,rx ,input) + (code:comment @#,t["ex" + (let ([s (number->string ,num)]) + (elemtag `(rxex ,s) + (racketcommentfont s))) + ,(if (pregexp? (syntax-e rx)) + `(list ", uses " (racketmetafont "#px")) + "")]))) + (regexp-match ,rx ,input))) + (syntax->list #'(num ...)) + (syntax->list #'(rx ...)) + (syntax->list #'(input ...)))]) + #`(examples ex ...))])) + @guideintro["regexp"]{regular expressions} @deftech{Regular expressions} are specified as strings or byte @@ -66,6 +90,45 @@ The Unicode categories follow. @category-table +@rx-examples[ +[1 #rx"a|b" "cat"] +[2 #rx"[at]" "cat"] +[3 #rx"ca*[at]" "caaat"] +[4 #rx"ca+[at]" "caaat"] +[5 #rx"ca?t?" "ct"] +[6 #rx"ca*?[at]" "caaat"] +[7 #px"ca{2}" "caaat"] +[8 #px"ca{2,}t" "catcaat"] +[9 #px"ca{,2}t" "caaatcat"] +[10 #px"ca{1,2}t" "caaatcat"] +[11 #rx"(c*)(a*)" "caat"] +[12 #rx"[^ca]" "caat"] +[13 #rx".(.)." "cat"] +[14 #rx"^a|^c" "cat"] +[15 #rx"a$|t$" "cat"] +[16 #px"c(.)\\1t" "caat"] +[17 #px".\\b." "cat in hat"] +[18 #px".\\B." "cat in hat"] +[19 #px"\\p{Ll}" "Cat"] +[20 #px"\\P{Ll}" "cat!"] +[21 #rx"\\|" "c|t"] +[22 #rx"[a-f]*" "cat"] +[23 #px"[a-f\\d]*" "1cat"] +[24 #px" [\\w]" "cat hat"] +[25 #px"t[\\s]" "cat\nhat"] +[26 #px"[[:lower:]]+" "Cat"] +[27 #rx"[]]" "c]t"] +[28 #rx"[-]" "c-t"] +[29 #rx"[]a[]+" "c[a]t"] +[30 #rx"[a^]+" "ca^t"] +[31 #rx".a(?=p)" "cat nap"] +[32 #rx".a(?!t)" "cat nap"] +[33 #rx"(?<=n)a." "cat nap"] +[34 #rx"(?Regexp) Match Regexp, only first possible #co | Look Match empty if Look matches #co - | (?TstPces|Pces) Match 1st Pces if Tst, else 2nd Pces #co + | (?TstPces|Pces) Match 1st Pces if Tst, else 2nd Pces #co 36 | (?TstPces) Match Pces if Tst, empty if not Tst #co Atom ::= ... ... #px - | \N Match latest reported match for N##th _(_ #px + | \N Match latest reported match for N##th _(_ #px 16 | Class Match any character in Class #px - | \b Match _\w*_ boundary #px - | \B Match where _\b_ does not #px - | \p{Property} Match (UTF-8 encoded) in Property #px - | \P{Property} Match (UTF-8 encoded) not in Property #px + | \b Match _\w*_ boundary #px 17 + | \B Match where _\b_ does not #px 18 + | \p{Property} Match (UTF-8 encoded) in Property #px 19 + | \P{Property} Match (UTF-8 encoded) not in Property #px 20 Literal :== Any character except _(_, _)_, _*_, _+_, _?_, _[_, _._, _^_, _\_, or _|_ #rx Literal :== Any character except _(_, _)_, _*_, _+_, _?_, _[_, _]_, _{_, _}_, _._, _^_, _\_, or _|_ #px - | \Aliteral Match Aliteral #ot + | \Aliteral Match Aliteral #ot 21 Aliteral :== Any character #rx Aliteral :== Any character except _a_-_z_, _A_-_Z_, _0_-_9_ #px - Rng ::= ] Rng contains _]_ only #co - | - Rng contains _-_ only #co + Rng ::= ] Rng contains _]_ only #co 27 + | - Rng contains _-_ only #co 28 | Mrng Rng contains everything in Mrng #co | Mrng- Rng contains _-_ and everything in Mrng #co - Mrng ::= ]Lrng Mrng contains _]_ and everything in Lrng #co - | -Lrng Mrng contains _-_ and everything in Lrng #co + Mrng ::= ]Lrng Mrng contains _]_ and everything in Lrng #co 29 + | -Lrng Mrng contains _-_ and everything in Lrng #co 29 | Lirng Mrng contains everything in Lirng #co Lirng ::= Riliteral Lirng contains a literal character #co - | Riliteral-Rliteral Lirng contains Unicode range inclusive #co + | Riliteral-Rliteral Lirng contains Unicode range inclusive #co 22 | LirngLrng Lirng contains everything in both #co - Lrng ::= ^ Lrng contains _^_ #co + Lrng ::= ^ Lrng contains _^_ #co 30 | Rliteral-Rliteral Lrng contains Unicode range inclusive #co | ^Lrng Lrng contains _^_ and more #co | Lirng Lrng contains everything in Lirng #co - Look ::= (?=Regexp) Match if Regexp matches #mode - | (?!Regexp) Match if Regexp doesn't match #mode - | (?<=Regexp) Match if Regexp matches preceding #mode - | (?symbol kind) line)] + [(#px"^(.*?) +#(\\w+)(?:| ([0-9]+))$" line kind ex) (list (string->symbol kind) line ex)] [else (error 'grammar-lines "bad line: ~s" line)]))) (define (table-content modes) @@ -201,22 +205,29 @@ x (paragraph plain (list (if (element? x) x (element #f x)))))) (define (row . xs) (map cell xs)) - (define (render-line line) + (define (ex-ref ex) (if ex + (smaller (list 'nbsp (elemref `(rxex ,ex) + (format "ex~a" ex)))) + "")) + (define (render-line line ex) (regexp-case line [(#rx"^([^ ]*) +::= ((?:[^ ]+| [|] )*) +([^ ].*)$" prod val meaning) (row (fixup-ids prod) ::= (lit-ize (fixup-ids val)) - spacer (as-smaller (as-meaning (fixup-ids meaning))))] + spacer (as-smaller (as-meaning (fixup-ids meaning))) + (ex-ref ex))] [(#rx"^([^ ]*) +:== (.*)$" prod meaning) (row (fixup-ids prod) ::= (as-meaning (fixup-ids meaning)) - 'cont 'cont)] + 'cont 'cont + (ex-ref ex))] [(#rx"^ + [|] ((?:[^ ]| [|] )*) +([^ ].*)$" val meaning) (row 'nbsp -or- (lit-ize (fixup-ids val)) - spacer (as-smaller (as-meaning (fixup-ids meaning))))])) + spacer (as-smaller (as-meaning (fixup-ids meaning))) + (ex-ref ex))])) (table (style #f (list (table-columns (map (lambda (s) (style #f (list s))) - '(left left center left left left))))) + '(left left center left left left left))))) (for/list ([line (in-list grammar-lines)] #:when (memq (car line) modes)) - (cons (paragraph plain (list spacer)) (render-line (cdr line)))))) + (cons (paragraph plain (list spacer)) (render-line (cadr line) (caddr line)))))) (provide common-table rx-table px-table category-table) (define common-table (table-content '(co mode)))