#lang at-exp racket/base (require scribble/core scribble/manual scribble/bnf racket/list racket/string) ;; If you edit this table, please try to avoid making the table wider ;; or causing line-wrapping in HTML. (I know that someone who edits ;; the table is unlikely to see this request, but it's worth a try.) (define grammar @string-append{ Regexp ::= Pces Match Pces #co | Regexp|Regexp Match either Regexp, try left first #co 1 Pces ::= Pce Match Pce #co | PcePces Match Pce followed by Pces #co Pce ::= Repeat Match Repeat, longest possible #co 3 | Repeat? Match Repeat, shortest possible #co 6 | Atom Match Atom exactly once #co Repeat ::= Atom* Match Atom 0 or more times #co 3 | Atom+ Match Atom 1 or more times #co 4 | Atom? Match Atom 0 or 1 times #co 5 Repeat ::= ... ... #px | Atom{N} Match Atom exactly N times #px 7 | Atom{N,} Match Atom N or more times #px 8 | Atom{,M} Match Atom between 0 and M times #px 9 | Atom{N,M} Match Atom between N and M times #px 10 Atom ::= (Regexp) Match sub-expression Regexp and report #co 11 | [Rng] Match any character in Rng #co 2 | [^Rng] Match any character not in Rng #co 12 | . Match any (except newline in multi mode) #co 13 | ^ Match start (or after newline in multi mode) #co 14 | $ Match end (or before newline in multi mode) #co 15 | Literal Match a single literal character #co 1 | (?Mode:Regexp) Match Regexp using Mode #co 35 | (?>Regexp) Match Regexp, only first possible #co | Look Match empty if Look matches #co | (?TstPces|Pces) Match 1st Pces if Tst, else 2nd Pces #co 36 | (?TstPces) Match Pces if Tst, empty if not Tst #co Atom ::= ... ... #px | \N Match latest reported match for N##th _(_ #px 16 | Class Match any character in Class #px | \b Match _\w*_ boundary #px 17 | \B Match where _\b_ does not #px 18 | \p{Property} Match (UTF-8 encoded) in Property #px 19 | \P{Property} Match (UTF-8 encoded) not in Property #px 20 Literal :== Any character except _(_, _)_, _*_, _+_, _?_, _[_, _._, _^_, _\_, or _|_ #rx Literal :== Any character except _(_, _)_, _*_, _+_, _?_, _[_, _]_, _{_, _}_, _._, _^_, _\_, or _|_ #px | \Aliteral Match Aliteral #ot 21 Aliteral :== Any character #rx Aliteral :== Any character except _a_-_z_, _A_-_Z_, _0_-_9_ #px Rng ::= ] Rng contains _]_ only #co 27 | - Rng contains _-_ only #co 28 | Mrng Rng contains everything in Mrng #co | Mrng- Rng contains _-_ and everything in Mrng #co Mrng ::= ]Lrng Mrng contains _]_ and everything in Lrng #co 29 | -Lrng Mrng contains _-_ and everything in Lrng #co 29 | Lirng Mrng contains everything in Lirng #co Lirng ::= Riliteral Lirng contains a literal character #co | Riliteral-Rliteral Lirng contains Unicode range inclusive #co 22 | LirngLrng Lirng contains everything in both #co Lrng ::= ^ Lrng contains _^_ #co 30 | Rliteral-Rliteral Lrng contains Unicode range inclusive #co | ^Lrng Lrng contains _^_ and more #co | Lirng Lrng contains everything in Lirng #co Look ::= (?=Regexp) Match if Regexp matches #mode 31 | (?!Regexp) Match if Regexp doesn't match #mode 32 | (?<=Regexp) Match if Regexp matches preceding #mode 33 | (?symbol kind) line ex)] [else (error 'grammar-lines "bad line: ~s" line)]))) (define (table-content modes) (define (cell x) (if (eq? x 'cont) x (paragraph plain (list (if (element? x) x (element #f x)))))) (define (row . xs) (map cell xs)) (define (ex-ref ex) (if ex (smaller (list 'nbsp (elemref `(rxex ,ex) (format "ex~a" ex)))) "")) (define (render-line line ex) (regexp-case line [(#rx"^([^ ]*) +::= ((?:[^ ]+| [|] )*) +([^ ].*)$" prod val meaning) (row (fixup-ids prod) ::= (lit-ize (fixup-ids val)) spacer (as-smaller (as-meaning (fixup-ids meaning))) (ex-ref ex))] [(#rx"^([^ ]*) +:== (.*)$" prod meaning) (row (fixup-ids prod) ::= (as-meaning (fixup-ids meaning)) 'cont 'cont (ex-ref ex))] [(#rx"^ + [|] ((?:[^ ]| [|] )*) +([^ ].*)$" val meaning) (row 'nbsp -or- (lit-ize (fixup-ids val)) spacer (as-smaller (as-meaning (fixup-ids meaning))) (ex-ref ex))])) (table (style #f (list (table-columns (map (lambda (s) (style #f (list s))) '(left left center left left left left))))) (for/list ([line (in-list grammar-lines)] #:when (memq (car line) modes)) (cons (paragraph plain (list spacer)) (render-line (cadr line) (caddr line)))))) (provide common-table rx-table px-table category-table) (define common-table (table-content '(co mode))) (define rx-table (table-content '(rx ot))) (define px-table (table-content '(px ot cat))) (define category-table (table-content '(ucat))) ;; ---------------------------------------------------------------------- (define types @string-append{ Regexp_1|Regexp_2 : @; iff Regexp_1 : AND Regexp_2 : PcePces : iff Pce : AND Pces : Repeat? : <0,m> iff Repeat : Atom* : <0,+inf.0> iff Atom : AND n > 0 Atom+ : <1,+inf.0> iff Atom : AND n > 0 Atom? : <0,m> iff Atom : Atom{N} : iff Atom : AND n > 0 Atom{N,} : iff Atom : AND n > 0 Atom{,M} : <0,m*M> iff Atom : AND n > 0 Atom{N,M} : iff Atom : AND n > 0 (Regexp) : AND \alpha_N=n iff Regexp : (?Mode:Regexp) : iff Regexp : (?=Regexp) : <0,0> iff Regexp : (?!Regexp) : <0,0> iff Regexp : (?<=Regexp) : <0,0> iff Regexp : AND m < +inf.0 (? iff Regexp : AND m < +inf.0 (?>Regexp) : iff Regexp : (?TstPces_1|Pces_2) : @; iff Tst : AND Pces_1 : AND Pces_2 : (?TstPces) : <0,m1> iff Tst : AND Pces : (N) : <\alpha_N,+inf.0> [Rng] : <1,1> [^Rng] : <1,1> . : <1,1> ^ : <0,0> $ : <0,0> Literal : <1,1> \N : <\alpha_N,+inf.0> Class : <1,1> \b : <0,0> \B : <0,0> \p{Property} : <1,6> \P{Property} : <1,6>}) (define (subscripts i) (regexp-case i [(#rx"^(.*)_(.)(.*)$" X S Y) `(,@(subscripts X) ,(element 'subscript (list S)) ,@(subscripts Y))] [(#rx"^(.*)([nm])([012]?)(.*)$" X V N Y) `(,@(subscripts X) ,(element 'italic (list V)) ,(element 'subscript (list N)) ,@(subscripts Y))] [else (list i)])) (define (meta i) (regexp-case i [(#rx"^(.*)(min|max)(.*)$" X M Y) `(,@(meta X) ,(element #f (list M)) ,@(meta Y))] [(#rx"^(.*)[+]inf[.]0(.*)$" X Y) `(,@(meta X) infin ,@(meta Y))] [(#rx"^(.*)[\\]alpha(.*)$" X Y) `(,@(meta X) alpha ,@(meta Y))] [else (list i)])) (define (fixup-one-type t) (let ([t (regexp-replace* #rx"<([^(,]*|[^,]*[(].*[)][^,]*),([^>]*)>" t "[\\1, \\2]")]) (append-map subscripts (append-map meta (fixup-ids t))))) (define (fixup-type t) (regexp-case t [(#rx"^(.*?) AND (.*)$" X Y) `(,@(fixup-type X) ,(hspace 3) ,@(fixup-type Y))] [(#rx"^(.*?) : (.*)$" X Y) `(,@(lit-ize (append-map subscripts (fixup-ids X))) ,spacer ,(tt ":") ,spacer ,@(fixup-one-type Y))] [else (fixup-one-type t)])) (provide type-table) (define type-table (let () (define rule-style (list (table-cells (list (list (style "inferencetop" '(center))) (list (style "inferencebottom" '(center))))))) (define do-clauses (case-lambda [(bottom top) (table (style #f rule-style) (list (list (paragraph plain `(,spacer ,@top ,spacer))) (list (paragraph plain `(,spacer ,@bottom ,spacer)))))] [(single) (paragraph plain single)])) (define (do-line line) (apply do-clauses (map fixup-type (regexp-split #rx" iff " line)))) (define (do-row para) (add-between (map do-line (regexp-split #rx" *\r*\n" para)) (paragraph plain (list (hspace 3))))) (define (do-para para) (list (table plain (list (do-row para))))) (table (style #f (list (table-columns (list (style #f '(center)))))) (add-between (map do-para (regexp-split #px"\r*\n(?: *\r*\n)+" types)) (list (paragraph plain (list spacer)))))))