(module rx scheme/base (require scribble/struct scribble/manual scribble/bnf) (define grammar " Regexp ::= Pieces Match Pieces #co | Regexp|Regexp Match either Regexp, try left first #co Pieces ::= Piece Match Piece #co | PiecePieces Match Piece followed by Pieces #co Piece ::= Repeat Match Repeat, longest possible #co | Repeat? Match Repeat, shortest possible #co | Atom Match Atom exactly once #co Repeat ::= Atom* Match Atom 0 or more times #co | Atom+ Match Atom 1 or more times #co | Atom? Match Atom 0 or 1 times #co Repeat ::= ... ... #px | Atom{N} Match Atom exactly N times #px | Atom{N,} Match Atom N or more times #px | Atom{,M} Match Atom between 0 and M times #px | Atom{N,M} Match Atom between N and M times #px Atom ::= (Regexp) Match sub-expression Regexp and report #co | [Range] Match any character in Range #co | [^Range] Match any character not in Range #co | . Match any (except newline in multi mode) #co | ^ Match start (or after newline in multi mode) #co | $ Match end (or before newline in multi mode) #co | Literal Match a single literal character #co | (?Mode:Regexp) Match Regexp using Mode #co | (?>Regexp) Match Regexp, only first possible #co | Look Match empty if Look matches #co | (?PredPieces|Pieces) Match 1st Pieces if Pred, else 2nd Pieces #co | (?PredPieces) Match Pieces if Pred, empty if not Pred #co Atom ::= ... ... #px | %N Match latest reported match for N##th _(_ #px | Class Match any character in Class #px | %b Match %w* boundary #px | %B Match where %b does not #px | %p{Property} Match (UTF-8 encoded) in Property #px | %P{Property} Match (UTF-8 encoded) not in Property #px Literal :== Any character except _(_, _)_, _*_, _+_, _?_, _[_, _._, _^_, _\\_, or _|_ #rx Literal :== Any character except _(_, _)_, _*_, _+_, _?_, _[_, _]_, _{_, _}_, _._, _^_, _\\_, or _|_ #px | \\Aliteral Match Aliteral #ot Aliteral :== Any character #rx Aliteral :== Any character except _a_-_z_, _A_-_Z_, _0_-_9_ #px Range ::= ] Range contains _]_ only #co | - Range contains _-_ only #co | Mrange Range contains everything in Mrange #co | Mrange- Range contains _-_ and everything in Mrange #co Mrange ::= ]Lrange Mrange contains _]_ and everything in Lrange #co | -Lrange Mrange contains _-_ and everything in Lrange #co | Lrange Mrange contains everything in Lrange #co Lrange ::= Rliteral Lrange contains a literal character #co | Rliteral-Rliteral Lrange contains Unicode range inclusive #co | LrangeLrange Lrange contains everything in both #co Look ::= (?=Regexp) Match if Regexp matches #mode | (?!Regexp) Match if Regexp doesn't match #mode | (?<=Regexp) Match if Regexp matches preceeding #mode | (? (lambda (m) (append (fixup-ids (substring s 0 (caar m))) (list spacer (tt "|") spacer) (fixup-ids (substring s (cdar m)))))] [(regexp-match-positions #rx"MM|NN" s) => (lambda (m) (append (fixup-ids (substring s 0 (caar m))) (list (substring s (caar m) (add1 (caar m)))) (fixup-ids (substring s (cdar m)))))] [(regexp-match-positions #rx"##" s) => (lambda (m) (append (fixup-ids (substring s 0 (caar m))) (fixup-ids (substring s (cdar m)))))] [(string=? s "...") (list (make-element #f (list s)))] [(string=? s "") null] [else (list (regexp-replace* #rx"%" s "\\\\"))]))) (define (lit-ize l) (map (lambda (i) (if (string? i) (litchar i) i)) l)) (define (as-meaning l) (map (lambda (s) (let loop ([s s]) (cond [(and (string? s) (regexp-match #rx"^(.*?)_([^_]+|_)_(.*)$" s)) => (lambda (m) (make-element #f (list (loop (cadr m)) (litchar (caddr m)) (loop (cadddr m)))))] [else s]))) l)) (define (smaller l) (list (make-element "smaller" l))) (define spacer (hspace 1)) (define ::= (make-element #f (list (hspace 1) (tt "::=") spacer))) (define -or- (tt "|")) (define (table-content mode) (define re:output (regexp (format " *#~a$" mode))) (define re:start-prod "^([^ ]*)( +)::= ((?:[^ ]| [|] )*)( +)([^ ].*)$") (define re:or-prod "^( +) [|] ((?:[^ ]| [|] )*)( +)([^ ].*)$") (define re:eng-prod "^([^ ]*)( +):== (.*)$") (define lines (filter values (map (lambda (s) (cond [(regexp-match-positions re:output s) => (lambda (m) (substring s 0 (caar m)))] [(regexp-match-positions #rx" *#[a-z]+$" s) #f] [(equal? s "") #f] [else (error 'lines "no match!?: ~s" s)])) (regexp-split "\r*\n" grammar)))) (define table-lines (map (lambda (line) (cond [(regexp-match re:start-prod line) => (lambda (m) (let ([prod (list-ref m 1)] [val (list-ref m 3)] [meaning (list-ref m 5)]) (list (make-element #f (fixup-ids prod)) ::= (make-element #f (lit-ize (fixup-ids val))) spacer (make-element #f (smaller (as-meaning (fixup-ids meaning)))))))] [(regexp-match re:eng-prod line) => (lambda (m) (let ([prod (list-ref m 1)] [meaning (list-ref m 3)]) (list (make-element #f (fixup-ids prod)) ::= (make-element #f (as-meaning (fixup-ids meaning))) 'cont 'cont)))] [(regexp-match re:or-prod line) => (lambda (m) (let ([val (list-ref m 2)] [meaning (list-ref m 4)]) (list 'nbsp -or- (make-element #f (lit-ize (fixup-ids val))) spacer (make-element #f (smaller (as-meaning (fixup-ids meaning)))))))])) lines)) (make-table '((alignment left left center left left left)) (map (lambda (line) (cons (make-flow (list (make-paragraph (list (hspace 1))))) (map (lambda (i) (if (eq? i 'cont) i (make-flow (list (make-paragraph (list i)))))) line))) table-lines))) (define common-table (table-content "(co|mode)")) (define rx-table (table-content "(?:rx|ot)")) (define px-table (table-content "(?:px|ot|cat)")) (provide common-table rx-table px-table) ;; ---------------------------------------------------------------------- (define types " Regexp_1|Regexp_2 : iff Regexp_1 : AND Regexp_2 : PiecePieces : iff Piece : AND Pieces : Repeat? : <0,m> iff Repeat : Atom* : <0,+inf.0> iff Atom : AND n > 0 Atom+ : <1,+inf.0> iff Atom : AND n > 0 Atom? : <0,m> iff Atom : Atom{N} : iff Atom : AND n > 0 Atom{N,} : iff Atom : AND n > 0 Atom{,M} : <0,m*M> iff Atom : AND n > 0 Atom{N,M} : iff Atom : AND n > 0 (Regexp) : AND \\alpha_N=n iff Regexp : (?Mode:Regexp) : iff Regexp : (?=Regexp) : <0,0> iff Regexp : (?!Regexp) : <0,0> iff Regexp : (?<=Regexp) : <0,0> iff Regexp : AND m < +inf.0 (? iff Regexp : AND m < +inf.0 (?>Regexp) : iff Regexp : (?PredPieces_1|Pieces_2) : iff Pred : AND Pieces_1 : AND Pieces_2 : (?PredPieces) : <0,m1> iff Pred : AND Pieces : (N) : <\\alpha_N,+inf.0> [Range] : <1,1> [^Range] : <1,1> . : <1,1> ^ : <0,0> $ : <0,0> Literal : <1,1> %N : <\\alpha_N,+inf.0> Class : <1,1> %b : <0,0> %B : <0,0> %p{Property} : <1,6> %P{Property} : <1,6>") (define (subscripts i) (cond [(and (string? i) (regexp-match #rx"^(.*)_(.)(.*)$" i)) => (lambda (m) (append (subscripts (cadr m)) (list (make-element 'subscript (list (caddr m)))) (subscripts (cadddr m))))] [(and (string? i) (regexp-match #rx"^(.*)([nm])([012]?)(.*)$" i)) => (lambda (m) (append (subscripts (cadr m)) (list (make-element 'italic (list (caddr m))) (make-element 'subscript (list (cadddr m)))) (subscripts (cadddr (cdr m)))))] [else (list i)])) (define (meta i) (cond [(and (string? i) (regexp-match #rx"^(.*)(min|max)(.*)$" i)) => (lambda (m) (append (meta (cadr m)) (list (make-element #f (list (caddr m)))) (meta (cadddr m))))] [(and (string? i) (regexp-match #rx"^(.*)([+]inf[.]0)(.*)$" i)) => (lambda (m) (append (meta (cadr m)) (list 'infin) (meta (cadddr m))))] [(and (string? i) (regexp-match #rx"^(.*)([\\]alpha)(.*)$" i)) => (lambda (m) (append (meta (cadr m)) (list 'alpha) (meta (cadddr m))))] [else (list i)])) (define (fixup-one-type t) (apply append (map subscripts (apply append (map meta (fixup-ids (regexp-replace* #rx"<([^(,]*|[^,]*[(].*[)][^,]*),([^>]*)>" t "[\\1, \\2]"))))))) (define (fixup-type t) (cond [(regexp-match-positions #rx" AND " t) => (lambda (m) (append (fixup-type (substring t 0 (caar m))) (list (hspace 3)) (fixup-type (substring t (cdar m)))))] [(regexp-match-positions #rx" : " t) => (lambda (m) (append (lit-ize (apply append (map subscripts (fixup-ids (substring t 0 (caar m)))))) (list spacer (tt ":") spacer) (fixup-one-type (substring t (cdar m)))))] [else (fixup-one-type t)])) (define (insert i l) (cond [(null? l) null] [(null? (cdr l)) l] [else (list* (car l) i (insert i (cdr l)))])) (define type-table (let* ([lines (regexp-split "\r*\n" types)] [lines (let loop ([lines lines]) (if (null? lines) null (let ([line (car lines)]) (if (equal? line "") (cons null (loop (cdr lines))) (let ([r (loop (cdr lines))]) (if (null? r) (list (list line)) (cons (cons line (car r)) (cdr r))))))))]) (make-table '((alignment center)) (insert (list (make-flow (list (make-paragraph (list spacer))))) (map (lambda (line) (list (make-flow (list (make-table #f (list (insert (make-flow (list (make-paragraph (list (hspace 3))))) (map (lambda (line) (make-flow (list (call-with-values (lambda () (apply values (regexp-split " iff " line))) (case-lambda [(bottom top) (make-table '((alignment center) (row-styles "inferencetop" "inferencebottom")) (list (list (make-flow (list (make-paragraph (append (list spacer) (fixup-type top) (list spacer)))))) (list (make-flow (list (make-paragraph (append (list spacer) (fixup-type bottom) (list spacer))))))))] [(single) (make-paragraph (fixup-type line))]))))) line)))))))) lines))))) (provide type-table))