racket/examples/ez-grammar.ss
dyb 64b0db8e30 fixed gather-filedata's sort of profile entries. for any two
entries x and y in the list produced by the sort call, if x's
bfp = y's bfp, x should come before y if x's efp < y's efp.
The idea is that enclosing entries should always come later
in the list.  this affects only languages where two expressions
can start at the same character position.
  pdhtml.ss
expanded capability of ez-grammar with support for simpl
parsing of binary operators w/precedence and associativity
and automatically generated markdown grammar descriptions.
ez-grammar-test.ss now also doubles as a test of pdhtml for
algebraic languages.
  mats/examples.ms,
  examples/ez-grammar.ss, examples/ez-grammar-test.ss,
  examples/Makefile

original commit: 53b8d16a1e86f3956585dbec0c7b573e485f7844
2017-10-30 21:01:43 -04:00

760 lines
35 KiB
Scheme

;;; Copyright 2017 Cisco Systems, Inc.
;;;
;;; Licensed under the Apache License, Version 2.0 (the "License");
;;; you may not use this file except in compliance with the License.
;;; You may obtain a copy of the License at
;;;
;;; http://www.apache.org/licenses/LICENSE-2.0
;;;
;;; Unless required by applicable law or agreed to in writing, software
;;; distributed under the License is distributed on an "AS IS" BASIS,
;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;;; See the License for the specific language governing permissions and
;;; limitations under the License.
;;; See http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf for origins of
;;; some of the monadic combinators.
;;; Authors: Jon Rossie, Kent Dybvig
;;; The define-grammar form produces a parser:
;;;
;;; parser : token-stream -> ((Tree token-stream) ...)
;;;
;;; If the return value is the empty list, a parse error occurred.
;;; If the return value has multiple elements, the parse was ambiguous.
;;; The token-stream in each (Tree token-stream) is the tail of the
;;; input stream that begins with the last token consumed by the parse.
;;; This gives the consumer access to both the first and last token,
;;; allowing it to determine cheaply the extent of the parse, including
;;; source locations if source information is attached to the tokens.
;;; Internally, backtracking occurs whenever a parser return value
;;; has multiple elements.
;;; This code should be included into a lexical context that supplies:
;;;
;;; token-bfp : token -> token's beginning file position
;;; token-efp : token -> token's ending file position
;;; meta constant? : syntax-object -> boolean
;;; sep->parser : sep -> parser
;;; constant->parser : constant -> parser
;;; make-src : bfp x efp -> src. bfp > efp => no tokens consumed.
;;;
;;; See ez-grammar-test.ss for an example.
(module (define-grammar
is sat item peek seq ++ +++ many many+ ?
parse-consumed-all? parse-result-value parse-result-unused
grammar-trace
)
(import (streams))
(define grammar-trace (make-parameter #f))
(define-record-type parse-result
(nongenerative parse-result)
(sealed #t)
(fields value unused))
;; to enable $trace-is to determine the ending file position (efp) of a parse
;; form, the input stream actually points to the preceding token rather than
;; to the current token. the next few routines establish, maintain, and deal
;; with that invariant.
(define make-top-level-parser
(lambda (parser)
(lambda (inp)
(parser (stream-cons 'dummy-token inp)))))
(define preceding-token
(lambda (inp)
(stream-car inp)))
(define current-token
(lambda (inp)
(stream-car (stream-cdr inp))))
(define remaining-tokens
(lambda (inp)
(stream-cdr inp)))
(define no-more-tokens?
(lambda (inp)
(stream-null? (stream-cdr inp))))
(define parse-consumed-all?
(lambda (res)
(no-more-tokens? (parse-result-unused res))))
;; A parser generator
(define result
(lambda (v)
;; this is a parser that ignores its input and produces v
(lambda (inp)
(stream (make-parse-result v inp)))))
;; A parse that always generates a parse error
(define zero
(lambda (inp)
stream-nil))
;; For a non-empty stream, successfully consume the first element
(define item
(lambda (inp)
(cond
[(no-more-tokens? inp) '()]
[else
(stream (make-parse-result (current-token inp) (remaining-tokens inp)))])))
(define (peek p)
(lambda (inp)
(stream-map (lambda (pr)
(make-parse-result (parse-result-value pr) inp))
(p inp))))
;;------------------------------------------
(define bind
(lambda (parser receiver)
(lambda (inp)
(let ([res* (parser inp)])
(stream-append-all
(stream-map (lambda (res)
((receiver (parse-result-value res))
(parse-result-unused res)))
res*))))))
;; monad comprehensions
(define-syntax is-where ; used by is and trace-is
(lambda (x)
(syntax-case x (where <-)
[(_ expr (where)) #'expr]
[(_ expr (where [x <- p] clauses ...))
#'(bind p (lambda (x) (is-where expr (where clauses ...))))]
[(_ expr (where pred clauses ...))
#'(if pred (is-where expr (where clauses ...)) zero)]
[(_ expr where-clause) (syntax-error #'where-clause)])))
(indirect-export is-where bind)
(define-syntax is
(syntax-rules ()
[(_ expr where-clause) (is-where (result expr) where-clause)]))
(indirect-export is is-where)
(module (trace-is)
(define ($trace-is name proc head)
(lambda (unused)
(let ([res (proc (token-bfp (current-token head)) (token-efp (preceding-token unused)))])
(when (and name (grammar-trace)) (printf "<<~s = ~s~%" name res))
(stream (make-parse-result res unused)))))
(define-syntax trace-is
(syntax-rules ()
[(_ name proc-expr where-clause)
(lambda (inp) ((is-where ($trace-is 'name proc-expr inp) where-clause) inp))]))
(indirect-export trace-is $trace-is))
(define (seq2 p q) (is (cons x y) (where [x <- p] [y <- q])))
(define seq
(lambda p*
(let loop ([p* p*])
(cond
[(null? p*) (result '())]
[else (seq2 (car p*) (loop (cdr p*)))]))))
(define (sat pred) (is x (where [x <- item] (pred x))))
(define ++ ;; introduce ambiguity
(lambda (p q)
(lambda (inp)
(stream-append2 (p inp)
(lambda ()
(q inp))))))
(define (many+ p) (is (cons x xs) (where [x <- p] [xs <- (many p)])))
(define (many p) (++ (many+ p) (result '())))
(define (? p) (++ (sat p) (result #f)))
(define (sepby1 p sep)
(is (cons x xs)
(where
[x <- p]
[xs <- (many (is y (where [_ <- sep] [y <- p])))])))
(define (sepby p sep) (++ (sepby1 p sep) (result '())))
(define (bracket open p close) (is x (where [_ <- open] [x <- p] [_ <- close])))
(define (optional p default)
(lambda (inp)
(let ([res (p inp)])
(if (stream-null? res)
(stream (make-parse-result default inp))
res))))
(define (first p)
(lambda (inp)
(let ([res (p inp)])
(if (stream-null? res)
res
(stream (stream-car res))))))
(define (+++ p q) (first (++ p q))) ;; choose first match, cut backtracking
(define-syntax infix-expression-parser
(lambda (x)
(syntax-case x ()
[(_ ((L/R ?op-parser) ...) ?term-parser ?receiver)
(with-syntax ([(op-parser ...) (generate-temporaries #'(?op-parser ...))])
#`(let ([op-parser ?op-parser] ... [term-parser (lambda (inp) (?term-parser inp))] [receiver ?receiver])
#,(let f ([ls #'((L/R op-parser) ...)])
(if (null? ls)
#'term-parser
#`(let ([next #,(f (cdr ls))])
#,(syntax-case (car ls) (LEFT RIGHT)
[(LEFT op-parser)
#'(let ()
(define-record-type frob (nongenerative) (sealed #t) (fields op y efp))
(trace-is binop-left (lambda (bfp ignore-this-efp)
(fold-left
(lambda (x f) (receiver bfp (frob-efp f) (frob-op f) x (frob-y f)))
x f*))
(where
[x <- next]
[f* <- (rec this
(optional
(is (cons f f*)
(where
[f <- (trace-is binop-left-tail (lambda (bfp efp) (make-frob op y efp))
(where
[op <- op-parser]
[y <- next]))]
[f* <- this]))
'()))])))]
[(RIGHT op-parser)
#'(rec this
(+++
(trace-is binop-right (lambda (bfp efp) (receiver bfp efp op x y))
(where
[x <- next]
[op <- op-parser]
[y <- this]))
next))]))))))])))
(define (format-inp inp)
(if (no-more-tokens? inp)
"#<null-stream>"
(format "(~s ...)" (current-token inp))))
(define-syntax define-grammar
(lambda (x)
(define-record-type grammar
(nongenerative)
(sealed #t)
(fields title paragraph* section*))
(define-record-type section
(nongenerative)
(sealed #t)
(fields title paragraph* suppressed? clause*))
(define-record-type clause
(nongenerative)
(fields id alias* before-paragraph* after-paragraph*))
(define-record-type regular-clause
(nongenerative)
(sealed #t)
(parent clause)
(fields prod*))
(define-record-type binop-clause
(nongenerative)
(sealed #t)
(parent clause)
(fields level* term receiver)
(protocol
(lambda (pargs->new)
(lambda (nt alias* before-paragraph* after-paragraph* level* term src? receiver)
((pargs->new nt alias* before-paragraph* after-paragraph*) level* term
#`(lambda (bfp efp op x y)
#,(if src?
#`(#,receiver (make-src bfp efp) op x y)
#`(#,receiver op x y))))))))
(define-record-type terminal-clause
(nongenerative)
(sealed #t)
(fields term*))
(define-record-type terminal
(nongenerative)
(sealed #t)
(fields parser alias* paragraph*))
(define-record-type production
(nongenerative)
(sealed #t)
(fields name paragraph* elt* receiver)
(protocol
(let ()
(define (check-elts elt*)
(for-each (lambda (elt) (unless (elt? elt) (errorf 'make-production "~s is not an elt" elt))) elt*))
(lambda (new)
(case-lambda
[(name elt* receiver)
(check-elts elt*)
(new name #f elt* receiver)]
[(name paragraph* elt* receiver)
(check-elts elt*)
(new name paragraph* elt* receiver)])))))
(define-record-type elt
(nongenerative))
(define-record-type sep-elt
(nongenerative)
(sealed #t)
(parent elt)
(fields +? elt sep))
(define-record-type opt-elt
(nongenerative)
(sealed #t)
(parent elt)
(fields elt default))
(define-record-type kleene-elt
(nongenerative)
(sealed #t)
(parent elt)
(fields +? elt))
(define-record-type constant-elt
(nongenerative)
(sealed #t)
(parent elt)
(fields k))
(define-record-type id-elt
(nongenerative)
(sealed #t)
(parent elt)
(fields id))
(define paragraph?
(lambda (x)
(syntax-case x (include)
[(include filename) (string? (datum filename))]
[(str ...) (andmap string? (datum (str ...)))])))
(define (gentemp) (datum->syntax #'* (gensym)))
(define (elt-temps elt*)
(for-each (lambda (elt) (unless (elt? elt) (errorf 'elt-temps "~s is not an elt" elt))) elt*)
(fold-left
(lambda (t* elt)
(if (constant-elt? elt) t* (cons (gentemp) t*)))
'()
elt*))
(define (left-factor clause*)
(define syntax-equal?
(lambda (x y)
(equal? (syntax->datum x) (syntax->datum y))))
(define (elt-equal? x y)
(cond
[(sep-elt? x)
(and (sep-elt? y)
(eq? (sep-elt-+? x) (sep-elt-+? y))
(elt-equal? (sep-elt-elt x) (sep-elt-elt y))
(syntax-equal? (sep-elt-sep x) (sep-elt-sep y)))]
[(opt-elt? x)
(and (opt-elt? y)
(elt-equal? (opt-elt-elt x) (opt-elt-elt y))
(syntax-equal? (opt-elt-default x) (opt-elt-default y)))]
[(kleene-elt? x)
(and (kleene-elt? y)
(eq? (kleene-elt-+? x) (kleene-elt-+? y))
(elt-equal? (kleene-elt-elt x) (kleene-elt-elt y)))]
[(constant-elt? x)
(and (constant-elt? y)
(syntax-equal? (constant-elt-k x) (constant-elt-k y)))]
[(id-elt? x)
(and (id-elt? y)
(syntax-equal? (id-elt-id x) (id-elt-id y)))]
[else #f]))
(let lp1 ([clause* clause*] [new-clause* '()])
(if (null? clause*)
(reverse new-clause*)
(let ([clause (car clause*)])
(let lp2 ([prod* (regular-clause-prod* clause)] [new-prod* '()] [clause* (cdr clause*)])
(if (null? prod*)
(lp1 clause* (cons (make-regular-clause (clause-id clause) (clause-alias* clause) '() '() (reverse new-prod*)) new-clause*))
(let ([prod (car prod*)] [prod* (cdr prod*)])
(let ([elt* (production-elt* prod)])
(if (null? elt*)
(lp2 prod* (cons prod new-prod*) clause*)
(let ([elt (car elt*)])
(let-values ([(haves have-nots) (partition
(lambda (prod)
(let ([elt* (production-elt* prod)])
(and (not (null? elt*))
(elt-equal? (car elt*) elt))))
prod*)])
(if (null? haves)
(lp2 prod* (cons prod new-prod*) clause*)
(let ([haves (cons prod haves)])
; "haves" start with the same elt. to cut down on the number of new
; nonterminals and receiver overhead, find the largest common prefix
(let ([prefix (cons elt
(let f ([elt** (map production-elt* haves)])
(let ([elt** (map cdr elt**)])
(if (ormap null? elt**)
'()
(let ([elt (caar elt**)])
(if (andmap (lambda (elt*) (elt-equal? (car elt*) elt)) (cdr elt**))
(cons elt (f elt**))
'()))))))])
(let ([t (gentemp)] [n (length prefix)] [t* (elt-temps prefix)])
(lp2 have-nots
(cons (make-production #f (append prefix (list (make-id-elt t)))
#`(lambda (bfp efp #,@t* p) (p bfp #,@t*)))
new-prod*)
(cons (make-regular-clause t '() '() '()
(map (lambda (prod)
(let ([elt* (list-tail (production-elt* prod) n)])
(make-production (production-name prod) elt*
(let ([u* (elt-temps elt*)])
#`(lambda (bfp efp #,@u*)
(lambda (bfp #,@t*)
(#,(production-receiver prod) bfp efp #,@t* #,@u*)))))))
haves))
clause*)))))))))))))))))
(define (make-env tclause* clause*)
(let ([env (make-hashtable (lambda (x) (symbol-hash (syntax->datum x))) free-identifier=?)])
(define (insert parser)
(lambda (name)
(let ([a (hashtable-cell env name #f)])
(when (cdr a) (syntax-error name "duplicate terminal/non-terminal name"))
(set-cdr! a parser))))
(for-each
(lambda (tclause)
(for-each
(lambda (term)
(let ([parser (terminal-parser term)])
(for-each (insert parser) (cons parser (terminal-alias* term)))))
(terminal-clause-term* tclause)))
tclause*)
(for-each
(lambda (clause)
(let ([id (clause-id clause)])
(for-each (insert id) (cons id (clause-alias* clause)))))
clause*)
env))
(define (lookup id env)
(or (hashtable-ref env id #f)
(syntax-error id "unrecognized terminal or nonterminal")))
(define (render-markdown name grammar mdfn env)
(define (separators sep ls)
(if (null? ls)
""
(apply string-append
(cons (car ls)
(map (lambda (s) (format "~a~a" sep s)) (cdr ls))))))
(define (render-paragraph hard-leading-newline?)
(lambda (paragraph)
(define (md-text s)
(list->string
(fold-right
(lambda (c ls)
(case c
[(#\\) (cons* c c ls)]
[else (cons c ls)]))
'()
(string->list s))))
(syntax-case paragraph (include)
[(include filename)
(string? (datum filename))
(let ([text (call-with-port (open-input-file (datum filename)) get-string-all)])
(unless (equal? text "")
(if hard-leading-newline? (printf "\\\n") (newline))
(display-string text)))]
[(sentence ...)
(andmap string? (datum (sentence ...)))
(let ([sentence* (datum (sentence ...))])
(unless (null? sentence*)
(if hard-leading-newline? (printf "\\\n") (newline))
(printf "~a\n" (separators " " (map md-text sentence*)))))])))
(define (format-elt x)
(cond
[(sep-elt? x)
(let* ([one (format-elt (sep-elt-elt x))]
[sep (constant->markdown (syntax->datum (sep-elt-sep x)))]
[seq (format "~a&nbsp;&nbsp;~a&nbsp;&nbsp;`...`" one sep)])
(if (sep-elt-+? x)
seq
(format "OPT(~a)" seq)))]
[(opt-elt? x)
(format "~a~~opt~~" (format-elt (opt-elt-elt x)))]
[(kleene-elt? x)
(let ([one (format-elt (kleene-elt-elt x))])
(if (kleene-elt-+? x)
(format "~a&nbsp;&nbsp;`...`" one)
(format "OPT(~a)" one)))]
[(constant-elt? x) (constant->markdown (syntax->datum (constant-elt-k x)))]
[(id-elt? x) (format "[*~s*](#~s)"
(syntax->datum (id-elt-id x))
(syntax->datum (lookup (id-elt-id x) env)))]
[else (errorf 'format-elt "unexpected elt ~s" x)]))
(define (render-elt x)
(printf "&nbsp;&nbsp;~a" (format-elt x)))
(define (render-production prod)
(unless (null? (production-elt* prod))
(printf " : ")
(for-each render-elt (production-elt* prod))
(printf "\n"))
(when (and (null? (production-elt* prod))
(not (null? (production-paragraph* prod))))
(errorf 'render-production "empty production must not have description: ~a" (production-paragraph* prod)))
(for-each (render-paragraph #t) (production-paragraph* prod)))
(define (render-clause clause)
(define (render-aliases alias*)
(unless (null? alias*)
(printf " \naliases: ~{*~a*~^, ~}\n" (map syntax->datum alias*))))
(if (terminal-clause? clause)
(for-each
(lambda (term)
(printf "\n#### *~a* {#~:*~a}\n" (syntax->datum (terminal-parser term)))
(render-aliases (terminal-alias* term))
(for-each (render-paragraph #f) (terminal-paragraph* term)))
(terminal-clause-term* clause))
(let ([id (syntax->datum (clause-id clause))])
(printf "\n#### *~a* {#~:*~a}\n" id)
(render-aliases (clause-alias* clause))
(for-each (render-paragraph #f) (clause-before-paragraph* clause))
(printf "\nsyntax:\n")
(if (binop-clause? clause)
(let ([level* (binop-clause-level* clause)])
(let loop ([level* level*] [first? #t])
(unless (null? level*)
(let ([level (syntax->datum (car level*))] [level* (cdr level*)])
(let ([L/R (car level)] [op* (cdr level)])
(printf " : _~(~a~)-associative" L/R)
(if first?
(if (null? level*)
(printf ":_\n")
(printf ", highest precedence:_\n"))
(if (null? level*)
(printf ", lowest precedence:_\n")
(printf ":_\n")))
(for-each
(lambda (op) (printf " : ~s ~a ~s\n" id (constant->markdown op) id))
op*))
(loop level* #f))))
(printf " : _leaves:_\n")
(printf " : ")
(render-elt (binop-clause-term clause))
(printf "\n"))
(for-each render-production (or (regular-clause-prod* clause) '())))
(for-each (render-paragraph #f) (clause-after-paragraph* clause)))))
(define (render-section section)
(unless (section-suppressed? section)
(printf "\n## ~a\n" (or (section-title section) "The section"))
(for-each (render-paragraph #f) (section-paragraph* section))
(for-each render-clause (section-clause* section))))
(with-output-to-file mdfn
(lambda ()
(printf "# ~a\n" (or (grammar-title grammar) "The grammar"))
(for-each (render-paragraph #f) (grammar-paragraph* grammar))
(for-each render-section (grammar-section* grammar)))
'replace))
(module (parse-grammar)
(define parse-elt
(lambda (elt)
(syntax-case elt (SEP+ SEP* OPT K* K+)
[(SEP+ p sep) (make-sep-elt #t (parse-elt #'p) #'sep)]
[(SEP* p sep) (make-sep-elt #f (parse-elt #'p) #'sep)]
[(OPT p default) (make-opt-elt (parse-elt #'p) #'default)]
[(K+ p) (make-kleene-elt #t (parse-elt #'p))]
[(K* p) (make-kleene-elt #f (parse-elt #'p))]
[k (constant? #'k) (make-constant-elt #'k)]
[id (identifier? #'id) (make-id-elt #'id)]
[_ (syntax-error elt "invalid production element")])))
(define parse-production
(lambda (prod)
(define (finish name src? paragraph* elt* receiver)
(let ([elt* (map parse-elt elt*)])
(make-production name paragraph* elt*
(with-syntax ([(t ...) (elt-temps elt*)])
#`(lambda (bfp efp t ...)
#,(if src?
#`(#,receiver (make-src bfp efp) t ...)
#`(#,receiver t ...)))))))
(syntax-case prod (:: src =>)
[[name :: src elt ... => receiver]
(finish #'name #t '() #'(elt ...) #'receiver)]
[[name :: elt ... => receiver]
(finish #'name #f '() #'(elt ...) #'receiver)])))
(define (parse-terminal term)
(syntax-case term (DESCRIPTION)
[(parser (alias ...) (DESCRIPTION paragraph ...))
(and (identifier? #'parser) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph ...)))
(make-terminal #'parser #'(alias ...) #'(paragraph ...))]
[(parser (alias ...))
(and (identifier? #'parser) (andmap identifier? #'(alias ...)))
(make-terminal #'parser #'(alias ...) '())]))
(define (parse-clause clause nt alias* before-paragraph* after-paragraph* stuff*)
(syntax-case stuff* (BINOP :: src =>)
[((BINOP src (level ...) term) => receiver)
(make-binop-clause nt alias* before-paragraph* after-paragraph* #'(level ...) (parse-elt #'term) #t #'receiver)]
[((BINOP (level ...) term) => receiver)
(make-binop-clause nt alias* before-paragraph* after-paragraph* #'(level ...) (parse-elt #'term) #f #'receiver)]
[(prod prods ...)
(make-regular-clause nt alias* before-paragraph* after-paragraph* (map parse-production #'(prod prods ...)))]
[else (syntax-error clause)]))
(define (parse-top top* knull kgrammar ksection kclause)
(if (null? top*)
(knull)
(let ([top (car top*)] [top* (cdr top*)])
(syntax-case top (GRAMMAR SECTION SUPPRESSED DESCRIPTION BINOP TERMINALS src =>)
[(GRAMMAR title paragraph ...)
(andmap paragraph? #'(paragraph ...))
(kgrammar top* (datum title) #'(paragraph ...))]
[(SECTION SUPPRESSED title paragraph ...)
(andmap paragraph? #'(paragraph ...))
(ksection top* (datum title) #'(paragraph ...) #t)]
[(SECTION title paragraph ...)
(andmap paragraph? #'(paragraph ...))
(ksection top* (datum title) #'(paragraph ...) #f)]
[(TERMINALS term ...)
(kclause top* (make-terminal-clause (map parse-terminal #'(term ...))))]
[(TERMINALS term ...)
(kclause top* (make-terminal-clause (map parse-terminal #'(term ...))))]
[(nt (alias ...) (DESCRIPTION paragraph1 ...) stuff ... (DESCRIPTION paragraph2 ...))
(and (identifier? #'nt) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph1 ...)) (andmap paragraph? #'(paragraph2 ...)))
(kclause top* (parse-clause top #'nt #'(alias ...) #'(paragraph1 ...) #'(paragraph2 ...) #'(stuff ...)))]
[(nt (alias ...) (DESCRIPTION paragraph ...) stuff ...)
(and (identifier? #'nt) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph ...)))
(kclause top* (parse-clause top #'nt #'(alias ...) #'(paragraph ...) '() #'(stuff ...)))]
[(nt (alias ...) stuff ... (DESCRIPTION paragraph ...))
(and (identifier? #'nt) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph ...)))
(kclause top* (parse-clause top #'nt #'(alias ...) '() #'(paragraph ...) #'(stuff ...)))]
[(nt (alias ...) stuff ...)
(and (identifier? #'nt) (andmap identifier? #'(alias ...)))
(kclause top* (parse-clause top #'nt #'(alias ...) '() '() #'(stuff ...)))]))))
(define (parse-grammar top*)
(define (misplaced-grammar-error top)
(syntax-error top "unexpected GRAMMAR element after other elements"))
(define (s1 top*) ; looking for GRAMMAR form, first SECTION form, or clause
(parse-top top*
(lambda () (make-grammar #f '() '()))
(lambda (top* title paragraph*)
(make-grammar title paragraph* (s2 top*)))
(lambda (top* title paragraph* suppressed?)
(make-grammar #f '()
(s3 top* title paragraph* suppressed? '() '())))
(lambda (top* clause)
(make-grammar #f '()
(s3 top* #f '() #f (list clause) '())))))
(define (s2 top*) ; looking for first SECTION form or clause
(parse-top top*
(lambda () '())
(lambda (title paragraph*) (misplaced-grammar-error (car top*)))
(lambda (top* title paragraph* suppressed?)
(s3 top* title paragraph* suppressed? '() '()))
(lambda (top* clause)
(s3 top* #f '() #f (list clause) '()))))
(define (s3 top* title paragraph* suppressed? rclause* rsection*) ; steady state: looking for remaining SECTION forms and clauses
(define (finish-section)
(cons (make-section title paragraph* suppressed? (reverse rclause*)) rsection*))
(parse-top top*
(lambda () (reverse (finish-section)))
(lambda (title paragraph*) (misplaced-grammar-error (car top*)))
(lambda (top* title paragraph* suppressed?)
(s3 top* title paragraph* suppressed? '() (finish-section)))
(lambda (top* clause)
(s3 top* title paragraph* suppressed? (cons clause rclause*) rsection*))))
(s1 top*)))
(define (go init-nts top* mddir)
(let ([grammar (parse-grammar top*)])
(let* ([clause* (apply append (map section-clause* (grammar-section* grammar)))]
[terminal-clause* (filter terminal-clause? clause*)]
[binop-clause* (filter binop-clause? clause*)]
[regular-clause* (left-factor (filter regular-clause? clause*))]
[env (make-env terminal-clause* (append binop-clause* regular-clause*))])
(define (elt-helper x)
(cond
[(sep-elt? x) #`(#,(if (sep-elt-+? x) #'sepby1 #'sepby) #,(elt-helper (sep-elt-elt x)) (sep->parser #,(sep-elt-sep x)))]
[(opt-elt? x) #`(optional #,(elt-helper (opt-elt-elt x)) #,(opt-elt-default x))]
[(kleene-elt? x) #`(#,(if (kleene-elt-+? x) #'many+ #'many) #,(elt-helper (kleene-elt-elt x)))]
[(constant-elt? x) #`(constant->parser '#,(constant-elt-k x))]
[(id-elt? x) (lookup (id-elt-id x) env)]
[else (errorf 'elt-helper "unhandled elt ~s\n" x)]))
(define (binop-helper clause)
#`[#,(clause-id clause)
(infix-expression-parser
#,(map (lambda (level)
(syntax-case level ()
[(L/R op1 ... op2)
(or (free-identifier=? #'L/R #'LEFT) (free-identifier=? #'L/R #'RIGHT))
#`(L/R #,(fold-right (lambda (op next) #`(++ (binop->parser '#,op) #,next)) #'(binop->parser 'op2) #'(op1 ...)))]))
(binop-clause-level* clause))
#,(elt-helper (binop-clause-term clause))
#,(binop-clause-receiver clause))])
(define (nt-helper clause)
#`[#,(clause-id clause)
#,(let f ([prod* (regular-clause-prod* clause)])
(if (null? prod*)
#'zero
(let ([elt* (production-elt* (car prod*))])
(with-syntax ([name (production-name (car prod*))]
[(elt ...) elt*]
[receiver (production-receiver (car prod*))])
(with-syntax ([(x ...) (generate-temporaries elt*)])
(with-syntax ([([y _] ...) (filter (lambda (pr) (not (constant-elt? (cadr pr)))) #'([x elt] ...))])
(with-syntax ([(where-nt ...) (map elt-helper elt*)])
#`(+++ ;; use +++ if you don't ever need to backtrack to a previous production for the same non-terminal
(lambda (inp)
(when (and 'name (grammar-trace)) (printf ">>~s(~a)~%" 'name (format-inp inp)))
(let ([res ((trace-is name (lambda (bfp efp) (receiver bfp efp y ...)) (where [x <- where-nt] ...)) inp)])
(when (and 'name (grammar-trace))
(if (stream-null? res)
(printf "<<~s(~a) failed~%" 'name (format-inp inp))
(printf "<<~s(~a) succeeded~%" 'name (format-inp inp))))
res))
#,(f (cdr prod*))))))))))])
(with-syntax ([(init-nt ...)
(syntax-case init-nts ()
[(id1 id2 ...) (andmap identifier? #'(id1 id2 ...)) #'(id1 id2 ...)]
[id (identifier? #'id) (list #'id)])])
(when mddir
(for-each
(lambda (init-nt)
(let ([mdfn (format "~a/~a.md" mddir (syntax->datum init-nt))])
(render-markdown init-nt grammar mdfn env)))
#'(init-nt ...)))
(with-syntax ([((lhs rhs) ...)
(append
(map binop-helper binop-clause*)
(map nt-helper regular-clause*))])
#'(module (init-nt ...)
(module M (init-nt ...) (define lhs rhs) ...)
(define init-nt
(let ()
(import M)
(make-top-level-parser init-nt)))
...))))))
(syntax-case x (markdown-directory)
[(_ init-nts (markdown-directory mddir) top ...)
(string? (datum mddir))
(go #'init-nts #'(top ...) (datum mddir))]
[(_ init-nts top ...) (go #'init-nts #'(top ...) #f)])))
(indirect-export define-grammar
result
zero
is
trace-is
sepby1
sepby
optional
many
many+
+++
infix-expression-parser
grammar-trace
format-inp
trace-is
make-top-level-parser
)
)