diff --git a/LOG b/LOG index 1a470c5233..f4c4fdbe2b 100644 --- a/LOG +++ b/LOG @@ -727,3 +727,18 @@ workarea, s/Mf-base, bytevector.ss, cpnanopass.ss, date.ss, 5_6.ms, examples.ms +- fixed gather-filedata's sort of profile entries. for any two + entries x and y in the list produced by the sort call, if x's + bfp = y's bfp, x should come before y if x's efp < y's efp. + The idea is that enclosing entries should always come later + in the list. this affects only languages where two expressions + can start at the same character position. + pdhtml.ss +- expanded capability of ez-grammar with support for simpl + parsing of binary operators w/precedence and associativity + and automatically generated markdown grammar descriptions. + ez-grammar-test.ss now also doubles as a test of pdhtml for + algebraic languages. + mats/examples.ms, + examples/ez-grammar.ss, examples/ez-grammar-test.ss, + examples/Makefile diff --git a/examples/Makefile b/examples/Makefile index 5d0b987919..b1b4e1d1d5 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -25,4 +25,4 @@ needed: ${obj} all: ; echo "(time (for-each compile-file (map symbol->string '(${src}))))" | ${Scheme} -clean: ; /bin/rm -f $(obj) +clean: ; /bin/rm -f $(obj) expr.md diff --git a/examples/ez-grammar-test.ss b/examples/ez-grammar-test.ss index c9a8607d1e..3dd487187f 100644 --- a/examples/ez-grammar-test.ss +++ b/examples/ez-grammar-test.ss @@ -162,7 +162,7 @@ (unread-char c ip)) (define ($ws!) (set! $prev-pos $pos)) (define ($make-token type value) - (let ([tok (make-token type value $prev-pos (- $pos 1))]) + (let ([tok (make-token type value $prev-pos $pos)]) (set! $prev-pos $pos) tok)) (define ($lex-error c) @@ -198,7 +198,7 @@ [eof stream-nil] [char-whitespace? ($ws!) (lex)] [char-numeric? (lex-number c)] - [#\/ (seen-/)] + [#\/ (seen-slash)] [identifier-initial? (put-char sp c) (lex-identifier)] [#\( (return-token 'lparen #\()] [#\) (return-token 'rparen #\))] @@ -206,6 +206,9 @@ [#\+ (seen-plus)] [#\- (seen-minus)] [#\= (seen-equals)] + [#\* (return-token 'binop '*)] + [#\, (return-token 'sep #\,)] + [#\; (return-token 'sep #\;)] [else (lex-error c)]) (module (lex-identifier) (define (id) (return-token 'id (string->symbol (get-buf)))) @@ -215,22 +218,22 @@ [else ($unread-char c) (id)]) (define (lex-identifier) (next))) (define-state-case seen-plus c - [eof (lex-error c)] + [eof (return-token 'binop '+)] [char-numeric? (lex-signed-number #\+ c)] - [else (lex-error c)]) + [else (return-token 'binop '+)]) (define-state-case seen-minus c - [eof (lex-error c)] + [eof (return-token 'binop '-)] [char-numeric? (lex-signed-number #\- c)] - [else (lex-error c)]) + [else (return-token 'binop '-)]) (define-state-case seen-equals c - [eof (lex-error c)] + [eof (return-token 'binop '=)] [#\> (return-token 'big-arrow #f)] - [else (lex-error c)]) + [else (return-token 'binop '=)]) (module (lex-number lex-signed-number) (define (finish-number) (let ([str (get-buf)]) (let ([n (string->number str 10)]) - (unless n (errorf 'parse-ftc "unexpected number literal ~a" str)) + (unless n (errorf 'lexer "unexpected number literal ~a" str)) (return-token 'integer n)))) (define (num) (let ([c ($get-char)]) @@ -246,11 +249,11 @@ [eof (assert #f)] [char-numeric? (put-char sp c) (num)] [else (assert #f)]))) - (define-state-case seen-/ c - [eof (lex-error c)] + (define-state-case seen-slash c + [eof (return-token 'binop '/)] [#\* (lex-block-comment)] [#\/ (lex-comment)] - [else (lex-error c)]) + [else (return-token 'binop '/)]) (define-state-case lex-comment c [eof (lex)] [#\newline ($ws!) (lex)] @@ -281,34 +284,53 @@ (wr (token-efp x) p))) ) -(library (parser) - (export parse) +(module parser () + (export parse *sfd*) (import (chezscheme) (streams) (lexer)) + (define *sfd*) (module (define-grammar is sat parse-consumed-all? parse-result-value grammar-trace make-src) (define (sep->parser sep) (cond - [(char? sep) (sat (lambda (x) (eq? (token-value x) sep)))] + [(char? sep) (sat (lambda (x) (and (eq? (token-type x) 'sep) (eq? (token-value x) sep))))] [(symbol? sep) (sat (lambda (x) (eq? (token-type x) sep)))] [else (errorf "don't know how to parse separator: ~s" sep)])) (meta define (constant? x) (let ([x (syntax->datum x)]) (or (string? x) (char? x)))) (define constant->parser - (let () + (lambda (const) (define (token-sat type val) (sat (lambda (x) (let ([ans (and (token? x) (eqv? (token-type x) type) (eqv? (token-value x) val))]) (when (grammar-trace) (printf " ~s is [~s, ~a]? => ~s~%" x type val ans)) ans)))) - (lambda (const) - (if (string? const) - (case const - ["=>" (token-sat 'big-arrow #f)] - [else (token-sat 'id (string->symbol const))]) - (case const - [#\( (token-sat 'lparen const)] - [#\) (token-sat 'rparen const)] - [#\! (token-sat 'bang const)] - [else (errorf 'constant->parser "don't know how to construct a parser for ~a" const)]))))) - (define make-src (lambda (bfp efp) (and (<= bfp efp) (cons bfp efp)))) + (if (string? const) + (case const + [else (token-sat 'id (string->symbol const))]) + (case const + [#\( (token-sat 'lparen const)] + [#\) (token-sat 'rparen const)] + [#\! (token-sat 'bang const)] + [else (errorf 'constant->parser "don't know how to construct a parser for ~a" const)])))) + (meta define (constant->markdown k) + (format "~a" k)) + (define binop->parser + (lambda (binop) + (define (binop-sat type val) + (is val + (where [x <- item] (and (token? x) (eq? (token-type x) type) (eq? (token-value x) val))))) + (define (unexpected) (errorf 'binop->parser "don't know how to construct a parser for ~a" binop)) + (if (string? binop) + (binop-sat 'binop + (case binop + ["=" '=] + ["+" '+] + ["-" '-] + ["*" '*] + ["/" '/] + [else (unexpected)])) + (unexpected)))) + (define make-src + (lambda (bfp efp) + (make-source-object *sfd* bfp efp))) (include "ez-grammar.ss")) (define token @@ -330,102 +352,219 @@ (when (grammar-trace) (printf " ~s is [~s, ~s]? => ~s~%" x type val ans)) ans)))]))])) - (define-grammar expr - (expr - [integer :: src (token 'integer) => + (define identifier (token 'id)) + + (define integer (token 'integer)) + + (define-grammar expr (markdown-directory ".") + (TERMINALS + (identifier (x y) (DESCRIPTION ("An identifier is ..."))) + (integer (i) (DESCRIPTION ("An integer literal is ...")))) + (expr (e) + (BINOP src ((RIGHT "=") (LEFT "+" "-") (LEFT "*" "/")) t) => + (lambda (src op x y) + (make-annotation `(,op ,x ,y) src `(,op ,(annotation-stripped x) ,(annotation-stripped y))))) + (term (t) + [test-SEP+ :: src "sepplus" #\( (SEP+ e #\;) #\) => + (lambda (src e+) + (make-annotation `(SEP+ ,@e+) src `(SEP+ ,@(map annotation-stripped e+))))] + [test-SEP* :: src "sepstar" #\( (SEP* e #\,) #\) => + (lambda (src e*) + (make-annotation `(SEP* ,@e*) src `(SEP* ,@(map annotation-stripped e*))))] + [test-OPT :: src "opt" #\( (OPT e #f) #\) => + (lambda (src maybe-e) + (if maybe-e + (make-annotation `(OPT ,maybe-e) src `(OPT ,(annotation-stripped maybe-e))) + (make-annotation `(OPT) src `(OPT))))] + [test-K+ :: src "kplus" #\( (K+ e) #\) => + (lambda (src e+) + (make-annotation `(K+ ,@e+) src `(K+ ,@(map annotation-stripped e+))))] + [test-K* :: src "kstar" #\( (K* e) #\) => + (lambda (src e*) + (make-annotation `(K* ,@e*) src `(K* ,@(map annotation-stripped e*))))] + [varref :: src x => + (lambda (src id) + (make-annotation `(id ,id) src `(id ,id)))] + [intref :: src i => (lambda (src n) - `(int ,src ,n))] - [becomes :: src "=>" expr => - (lambda (src e) - `(=> ,src ,e))] - [becomes! :: src "=>" #\! expr => - (lambda (src e) - `(=>! ,src ,e))] - [group :: src #\( expr #\) => + (make-annotation `(int ,n) src `(int ,n)))] + [group :: src #\( e #\) => (lambda (src e) `(group ,src ,e))])) (define parse - (lambda (fn) - (let ([ip (open-input-file fn)]) - (dynamic-wind - void - (lambda () - (let ([token-stream (lexer fn ip)]) - (define (oops) - (let ([last-token (stream-last-forced token-stream)]) - (if last-token - (errorf 'parse "parse error at or before character ~s of ~a" (token-bfp last-token) fn) - (errorf 'parse "no expressions found in ~a" fn)))) -;;; return the first result, if any, for which the input stream was entirely consumed. - (let loop ([res* (expr token-stream)]) - (if (null? res*) - (oops) - (let ([res (car res*)]) - (if (parse-consumed-all? res) - (parse-result-value res) - (loop (cdr res*)))))))) - (lambda () (close-input-port ip)))))) - ) + (lambda (fn ip) + (let ([token-stream (lexer fn ip)]) + (define (oops) + (let ([last-token (stream-last-forced token-stream)]) + (if last-token + (errorf 'parse "parse error at or before character ~s of ~a" (token-bfp last-token) fn) + (errorf 'parse "no expressions found in ~a" fn)))) + ;;; return the first result, if any, for which the input stream was entirely consumed. + (let loop ([res* (expr token-stream)]) + (if (null? res*) + (oops) + (let ([res (car res*)]) + (if (parse-consumed-all? res) + (parse-result-value res) + (loop (cdr res*)))))))))) + +(define run + (lambda (fn) + (import parser) + (let* ([ip (open-file-input-port fn)] + [sfd (make-source-file-descriptor fn ip #t)] + [ip (transcoded-port ip (native-transcoder))]) + (fluid-let ([*sfd* sfd]) + (eval + `(let () + (define-syntax define-ops + (lambda (x) + (syntax-case x () + [(_ op ...) + #`(begin + (define-syntax op + (lambda (x) + (let ([src (annotation-source (syntax->annotation x))]) + (with-syntax ([bfp (source-object-bfp src)] [efp (source-object-efp src)]) + (syntax-case x () + [(_ e (... ...)) #'`(op (bfp . efp) ,e (... ...))]))))) + ...)]))) + (define-ops SEP+ SEP* OPT K+ K* id int group) + (define-ops = + - * /) + (define x 'x) + (define y 'y) + (define z 'z) + ,(dynamic-wind + void + (lambda () (parse fn ip)) + (lambda () (close-input-port ip))))))))) (define (ez-grammar-test) - (import (parser)) - (with-output-to-file "ez-grammar-test1" - (lambda () - (for-each display - '( - "1347\n" - ))) - 'replace) + (define n 0) + (define test + (lambda (line* okay?) + (set! n (+ n 1)) + (let ([fn (format "testfile~s" n)]) + (with-output-to-file fn + (lambda () (for-each (lambda (line) (printf "~a\n" line)) line*)) + 'replace) + (let ([result (parameterize ([compile-profile #t] [compile-interpret-simple #f]) + (guard (c [else c]) (run fn)))]) + (guard (c [else #f]) (profile-dump-html)) + (delete-file fn) + (delete-file "profile.html") + (delete-file (format "~a.html" fn)) + (unless (okay? result) + (printf "test ~s failed\n" n) + (printf " test code:") + (for-each (lambda (line) (printf " ~a\n" line)) line*) + (printf " result:\n ") + (if (condition? result) + (begin (display-condition result) (newline)) + (parameterize ([pretty-initial-indent 4]) + (pretty-print result))) + (newline)))))) - (with-output-to-file "ez-grammar-test2" - (lambda () - (for-each display - '( - "\n" - "/* hello */ => ( => 1253) /* goodbye\n" - " 111111111122222222223333333333\n" - "123456789012345678901234567890123456789\n" - "*/\n" - ))) - 'replace) + (define-syntax returns + (syntax-rules () + [(_ k) (lambda (x) (equal? x 'k))])) - (with-output-to-file "ez-grammar-test3err" - (lambda () - (for-each display - '( - "\n" - "/* hello */ => (=> 1253 =>) /* goodbye\n" - " 111111111122222222223333333333\n" - "123456789012345678901234567890123456789\n" - "*/\n" - ))) - 'replace) + (define-syntax oops + (syntax-rules () + [(_ (c) e1 e2 ...) + (lambda (c) (and (condition? c) e1 e2 ...))])) - (with-output-to-file "ez-grammar-test4err" - (lambda () - (for-each display - '( - "3 /*\n" - ))) - 'replace) + (test + '( + "1347" + ) + (returns + (int (0 . 4) 1347))) - (unless (guard (c [else #f]) (equal? (parse "ez-grammar-test1") (quote (int (0 . 3) 1347)))) - (printf "test 1 failed\n")) - (delete-file "ez-grammar-test1") - (unless (guard (c [else #f]) (equal? (parse "ez-grammar-test2") (quote (=> (13 . 25) (group (16 . 25) (=> (18 . 24) (int (21 . 24) 1253))))))) - (printf "test 2 failed\n")) - (delete-file "ez-grammar-test2") - (unless (guard (c [else (and (equal? (condition-message c) "parse error at or before character ~s of ~a") (equal? (condition-irritants c) (quote (25 "ez-grammar-test3err"))))]) (parse "ez-grammar-test3err") #f) - (printf "test 3 failed\n")) - (delete-file "ez-grammar-test3err") - (unless (guard (c [else (and (equal? (condition-message c) "unexpected ~a at character ~s of ~a") (equal? (condition-irritants c) (quote ("eof" 6 "ez-grammar-test4err"))))]) (parse "ez-grammar-test4err") #f) - (printf "test 4 failed\n")) - (delete-file "ez-grammar-test4err") - (printf "end of tests\n")) + (test + '( + "3 /*" + ) + (oops (c) + (equal? (condition-message c) "unexpected ~a at character ~s of ~a") + (equal? (condition-irritants c) '("eof" 6 "testfile2")))) + + (test + '( + "3 / 4 + 5 opt(6)" + ) + (oops (c) + (equal? (condition-message c) "parse error at or before character ~s of ~a") + (equal? (condition-irritants c) '(10 "testfile3")))) + + (test + '( + "x = y = 5" + ) + (returns + (= + (0 . 9) + (id (0 . 1) x) + (= (4 . 9) (id (4 . 5) y) (int (8 . 9) 5))))) + + (test + '( + "x = y = x + 5 - z * 7 + 8 / z" + ) + (returns + (= + (0 . 29) + (id (0 . 1) x) + (= + (4 . 29) + (id (4 . 5) y) + (+ + (8 . 29) + (- + (8 . 21) + (+ (8 . 13) (id (8 . 9) x) (int (12 . 13) 5)) + (* (16 . 21) (id (16 . 17) z) (int (20 . 21) 7))) + (/ (24 . 29) (int (24 . 25) 8) (id (28 . 29) z))))))) + + (test + '( + "opt(opt(opt()))" + ) + (returns + (OPT (0 . 15) (OPT (4 . 14) (OPT (8 . 13)))))) + + (test + '( + "kstar(3 4 kplus(1 2 3 kstar()))" + ) + (returns + (K* (0 . 31) + (int (6 . 7) 3) + (int (8 . 9) 4) + (K+ (10 . 30) + (int (16 . 17) 1) + (int (18 . 19) 2) + (int (20 . 21) 3) + (K* (22 . 29)))))) + + (test + '( + "sepplus( opt() ; opt(5) ; sepstar(17, 34) ; sepstar())" + ) + (returns + (SEP+ (0 . 54) + (OPT (9 . 14)) + (OPT (17 . 23) (int (21 . 22) 5)) + (SEP* (26 . 41) (int (34 . 36) 17) (int (38 . 40) 34)) + (SEP* (44 . 53))))) + + (delete-file "expr.md") + (printf "~s tests ran\n" n) + ) #!eof -The following should print only "end of tests". +The following should print only " tests ran". -echo '(ez-grammar-test)' | scheme -q ez-grammar-test.ss +echo '(ez-grammar-test)' | ../bin/scheme -q ez-grammar-test.ss diff --git a/examples/ez-grammar.ss b/examples/ez-grammar.ss index 744217793f..1d95dd806b 100644 --- a/examples/ez-grammar.ss +++ b/examples/ez-grammar.ss @@ -44,7 +44,7 @@ ;;; See ez-grammar-test.ss for an example. (module (define-grammar - is sat peek seq ++ +++ many many+ ? + is sat item peek seq ++ +++ many many+ ? parse-consumed-all? parse-result-value parse-result-unused grammar-trace ) @@ -54,6 +54,7 @@ (define-record-type parse-result (nongenerative parse-result) + (sealed #t) (fields value unused)) ;; to enable $trace-is to determine the ending file position (efp) of a parse @@ -144,7 +145,7 @@ (define ($trace-is name proc head) (lambda (unused) (let ([res (proc (token-bfp (current-token head)) (token-efp (preceding-token unused)))]) - (when (and 'name (grammar-trace)) (printf "<<~s = ~s~%" 'name res)) + (when (and name (grammar-trace)) (printf "<<~s = ~s~%" name res)) (stream (make-parse-result res unused))))) (define-syntax trace-is @@ -203,6 +204,46 @@ (define (+++ p q) (first (++ p q))) ;; choose first match, cut backtracking + (define-syntax infix-expression-parser + (lambda (x) + (syntax-case x () + [(_ ((L/R ?op-parser) ...) ?term-parser ?receiver) + (with-syntax ([(op-parser ...) (generate-temporaries #'(?op-parser ...))]) + #`(let ([op-parser ?op-parser] ... [term-parser (lambda (inp) (?term-parser inp))] [receiver ?receiver]) + #,(let f ([ls #'((L/R op-parser) ...)]) + (if (null? ls) + #'term-parser + #`(let ([next #,(f (cdr ls))]) + #,(syntax-case (car ls) (LEFT RIGHT) + [(LEFT op-parser) + #'(let () + (define-record-type frob (nongenerative) (sealed #t) (fields op y efp)) + (trace-is binop-left (lambda (bfp ignore-this-efp) + (fold-left + (lambda (x f) (receiver bfp (frob-efp f) (frob-op f) x (frob-y f))) + x f*)) + (where + [x <- next] + [f* <- (rec this + (optional + (is (cons f f*) + (where + [f <- (trace-is binop-left-tail (lambda (bfp efp) (make-frob op y efp)) + (where + [op <- op-parser] + [y <- next]))] + [f* <- this])) + '()))])))] + [(RIGHT op-parser) + #'(rec this + (+++ + (trace-is binop-right (lambda (bfp efp) (receiver bfp efp op x y)) + (where + [x <- next] + [op <- op-parser] + [y <- this])) + next))]))))))]))) + (define (format-inp inp) (if (no-more-tokens? inp) "#" @@ -210,43 +251,132 @@ (define-syntax define-grammar (lambda (x) - (define-record-type production + (define-record-type grammar (nongenerative) - (fields name elt* receiver)) + (sealed #t) + (fields title paragraph* section*)) + (define-record-type section + (nongenerative) + (sealed #t) + (fields title paragraph* suppressed? clause*)) (define-record-type clause (nongenerative) - (fields id prod*)) + (fields id alias* before-paragraph* after-paragraph*)) + (define-record-type regular-clause + (nongenerative) + (sealed #t) + (parent clause) + (fields prod*)) + (define-record-type binop-clause + (nongenerative) + (sealed #t) + (parent clause) + (fields level* term receiver) + (protocol + (lambda (pargs->new) + (lambda (nt alias* before-paragraph* after-paragraph* level* term src? receiver) + ((pargs->new nt alias* before-paragraph* after-paragraph*) level* term + #`(lambda (bfp efp op x y) + #,(if src? + #`(#,receiver (make-src bfp efp) op x y) + #`(#,receiver op x y)))))))) + (define-record-type terminal-clause + (nongenerative) + (sealed #t) + (fields term*)) + (define-record-type terminal + (nongenerative) + (sealed #t) + (fields parser alias* paragraph*)) + (define-record-type production + (nongenerative) + (sealed #t) + (fields name paragraph* elt* receiver) + (protocol + (let () + (define (check-elts elt*) + (for-each (lambda (elt) (unless (elt? elt) (errorf 'make-production "~s is not an elt" elt))) elt*)) + (lambda (new) + (case-lambda + [(name elt* receiver) + (check-elts elt*) + (new name #f elt* receiver)] + [(name paragraph* elt* receiver) + (check-elts elt*) + (new name paragraph* elt* receiver)]))))) + (define-record-type elt + (nongenerative)) + (define-record-type sep-elt + (nongenerative) + (sealed #t) + (parent elt) + (fields +? elt sep)) + (define-record-type opt-elt + (nongenerative) + (sealed #t) + (parent elt) + (fields elt default)) + (define-record-type kleene-elt + (nongenerative) + (sealed #t) + (parent elt) + (fields +? elt)) + (define-record-type constant-elt + (nongenerative) + (sealed #t) + (parent elt) + (fields k)) + (define-record-type id-elt + (nongenerative) + (sealed #t) + (parent elt) + (fields id)) + (define paragraph? + (lambda (x) + (syntax-case x (include) + [(include filename) (string? (datum filename))] + [(str ...) (andmap string? (datum (str ...)))]))) (define (gentemp) (datum->syntax #'* (gensym))) (define (elt-temps elt*) + (for-each (lambda (elt) (unless (elt? elt) (errorf 'elt-temps "~s is not an elt" elt))) elt*) (fold-left (lambda (t* elt) - (if (constant? elt) t* (cons (gentemp) t*))) + (if (constant-elt? elt) t* (cons (gentemp) t*))) '() elt*)) - (define parse-production - (lambda (cl) - (syntax-case cl (:: src =>) - [[name :: src elt ... => receiver] - (make-production #'name #'(elt ...) - (with-syntax ([(t ...) (elt-temps #'(elt ...))]) - #'(lambda (bfp efp t ...) - (receiver (make-src bfp efp) t ...))))] - [[name :: elt ... => receiver] - (make-production #'name #'(elt ...) - (with-syntax ([(t ...) (elt-temps #'(elt ...))]) - #'(lambda (bfp efp t ...) - (receiver t ...))))]))) (define (left-factor clause*) (define syntax-equal? (lambda (x y) (equal? (syntax->datum x) (syntax->datum y)))) + (define (elt-equal? x y) + (cond + [(sep-elt? x) + (and (sep-elt? y) + (eq? (sep-elt-+? x) (sep-elt-+? y)) + (elt-equal? (sep-elt-elt x) (sep-elt-elt y)) + (syntax-equal? (sep-elt-sep x) (sep-elt-sep y)))] + [(opt-elt? x) + (and (opt-elt? y) + (elt-equal? (opt-elt-elt x) (opt-elt-elt y)) + (syntax-equal? (opt-elt-default x) (opt-elt-default y)))] + [(kleene-elt? x) + (and (kleene-elt? y) + (eq? (kleene-elt-+? x) (kleene-elt-+? y)) + (elt-equal? (kleene-elt-elt x) (kleene-elt-elt y)))] + [(constant-elt? x) + (and (constant-elt? y) + (syntax-equal? (constant-elt-k x) (constant-elt-k y)))] + [(id-elt? x) + (and (id-elt? y) + (syntax-equal? (id-elt-id x) (id-elt-id y)))] + [else #f])) (let lp1 ([clause* clause*] [new-clause* '()]) (if (null? clause*) (reverse new-clause*) (let ([clause (car clause*)]) - (let lp2 ([prod* (clause-prod* clause)] [new-prod* '()] [clause* (cdr clause*)]) + (let lp2 ([prod* (regular-clause-prod* clause)] [new-prod* '()] [clause* (cdr clause*)]) (if (null? prod*) - (lp1 clause* (cons (make-clause (clause-id clause) (reverse new-prod*)) new-clause*)) + (lp1 clause* (cons (make-regular-clause (clause-id clause) (clause-alias* clause) '() '() (reverse new-prod*)) new-clause*)) (let ([prod (car prod*)] [prod* (cdr prod*)]) (let ([elt* (production-elt* prod)]) (if (null? elt*) @@ -256,7 +386,7 @@ (lambda (prod) (let ([elt* (production-elt* prod)]) (and (not (null? elt*)) - (syntax-equal? (car elt*) elt)))) + (elt-equal? (car elt*) elt)))) prod*)]) (if (null? haves) (lp2 prod* (cons prod new-prod*) clause*) @@ -269,15 +399,15 @@ (if (ormap null? elt**) '() (let ([elt (caar elt**)]) - (if (andmap (lambda (elt*) (syntax-equal? (car elt*) elt)) (cdr elt**)) - (cons (caar elt**) (f elt**)) + (if (andmap (lambda (elt*) (elt-equal? (car elt*) elt)) (cdr elt**)) + (cons elt (f elt**)) '()))))))]) (let ([t (gentemp)] [n (length prefix)] [t* (elt-temps prefix)]) (lp2 have-nots - (cons (make-production #f (append prefix (list t)) + (cons (make-production #f (append prefix (list (make-id-elt t))) #`(lambda (bfp efp #,@t* p) (p bfp #,@t*))) new-prod*) - (cons (make-clause t + (cons (make-regular-clause t '() '() '() (map (lambda (prod) (let ([elt* (list-tail (production-elt* prod) n)]) (make-production (production-name prod) elt* @@ -287,54 +417,325 @@ (#,(production-receiver prod) bfp efp #,@t* #,@u*))))))) haves)) clause*))))))))))))))))) - (define (nt-helper clause*) - (define (elt-helper x) - (syntax-case x (SEP+ SEP* OPT K* K+) - [(SEP+ p sep) #`(sepby1 #,(elt-helper #'p) (sep->parser sep))] - [(SEP* p sep) #`(sepby #,(elt-helper #'p) (sep->parser sep))] - [(OPT p dflt) #`(optional #,(elt-helper #'p) dflt)] - [(K* p) #`(many #,(elt-helper #'p))] - [(K+ p) #`(many+ #,(elt-helper #'p))] - [k (constant? #'k) #'(constant->parser 'k)] - [p #'p])) - (let loop ([clause* clause*] [binding* '()]) - (if (null? clause*) - binding* - (loop - (cdr clause*) - (cons - #`[#,(clause-id (car clause*)) - #,(let f ([prod* (clause-prod* (car clause*))]) - (if (null? prod*) - #'zero - (with-syntax ([name (production-name (car prod*))] - [(elt ...) (production-elt* (car prod*))] - [receiver (production-receiver (car prod*))]) - (with-syntax ([(x ...) (generate-temporaries #'(elt ...))]) - (with-syntax ([([y _] ...) (filter (lambda (pr) (not (constant? (cadr pr)))) #'([x elt] ...))]) - (with-syntax ([(where-nt ...) (map elt-helper #'(elt ...))]) - #`(+++ ;; use +++ if you don't ever need to backtrack to a previous production for the same non-terminal - (lambda (inp) - (when (and 'name (grammar-trace)) (printf ">>~s(~a)~%" 'name (format-inp inp))) - (let ([res ((trace-is name (lambda (bfp efp) (receiver bfp efp y ...)) (where [x <- where-nt] ...)) inp)]) - (when (and 'name (grammar-trace)) - (if (stream-null? res) - (printf "<<~s(~a) failed~%" 'name (format-inp inp)) - (printf "<<~s(~a) succeeded~%" 'name (format-inp inp)))) - res)) - #,(f (cdr prod*)))))))))] - binding*))))) - (syntax-case x () - [(_ init-nt [nt prod prods ...] ...) - (with-syntax ([(binding ...) - (nt-helper - (left-factor - (map (lambda (nt prod*) (make-clause nt (map parse-production prod*))) - #'(nt ...) - #'((prod prods ...) ...))))]) - #'(define init-nt - (letrec (binding ...) - (make-top-level-parser init-nt))))]))) + (define (make-env tclause* clause*) + (let ([env (make-hashtable (lambda (x) (symbol-hash (syntax->datum x))) free-identifier=?)]) + (define (insert parser) + (lambda (name) + (let ([a (hashtable-cell env name #f)]) + (when (cdr a) (syntax-error name "duplicate terminal/non-terminal name")) + (set-cdr! a parser)))) + (for-each + (lambda (tclause) + (for-each + (lambda (term) + (let ([parser (terminal-parser term)]) + (for-each (insert parser) (cons parser (terminal-alias* term))))) + (terminal-clause-term* tclause))) + tclause*) + (for-each + (lambda (clause) + (let ([id (clause-id clause)]) + (for-each (insert id) (cons id (clause-alias* clause))))) + clause*) + env)) + (define (lookup id env) + (or (hashtable-ref env id #f) + (syntax-error id "unrecognized terminal or nonterminal"))) + (define (render-markdown name grammar mdfn env) + (define (separators sep ls) + (if (null? ls) + "" + (apply string-append + (cons (car ls) + (map (lambda (s) (format "~a~a" sep s)) (cdr ls)))))) + (define (render-paragraph hard-leading-newline?) + (lambda (paragraph) + (define (md-text s) + (list->string + (fold-right + (lambda (c ls) + (case c + [(#\\) (cons* c c ls)] + [else (cons c ls)])) + '() + (string->list s)))) + (syntax-case paragraph (include) + [(include filename) + (string? (datum filename)) + (let ([text (call-with-port (open-input-file (datum filename)) get-string-all)]) + (unless (equal? text "") + (if hard-leading-newline? (printf "\\\n") (newline)) + (display-string text)))] + [(sentence ...) + (andmap string? (datum (sentence ...))) + (let ([sentence* (datum (sentence ...))]) + (unless (null? sentence*) + (if hard-leading-newline? (printf "\\\n") (newline)) + (printf "~a\n" (separators " " (map md-text sentence*)))))]))) + (define (format-elt x) + (cond + [(sep-elt? x) + (let* ([one (format-elt (sep-elt-elt x))] + [sep (constant->markdown (syntax->datum (sep-elt-sep x)))] + [seq (format "~a  ~a  `...`" one sep)]) + (if (sep-elt-+? x) + seq + (format "OPT(~a)" seq)))] + [(opt-elt? x) + (format "~a~~opt~~" (format-elt (opt-elt-elt x)))] + [(kleene-elt? x) + (let ([one (format-elt (kleene-elt-elt x))]) + (if (kleene-elt-+? x) + (format "~a  `...`" one) + (format "OPT(~a)" one)))] + [(constant-elt? x) (constant->markdown (syntax->datum (constant-elt-k x)))] + [(id-elt? x) (format "[*~s*](#~s)" + (syntax->datum (id-elt-id x)) + (syntax->datum (lookup (id-elt-id x) env)))] + [else (errorf 'format-elt "unexpected elt ~s" x)])) + (define (render-elt x) + (printf "  ~a" (format-elt x))) + (define (render-production prod) + (unless (null? (production-elt* prod)) + (printf " : ") + (for-each render-elt (production-elt* prod)) + (printf "\n")) + (when (and (null? (production-elt* prod)) + (not (null? (production-paragraph* prod)))) + (errorf 'render-production "empty production must not have description: ~a" (production-paragraph* prod))) + (for-each (render-paragraph #t) (production-paragraph* prod))) + (define (render-clause clause) + (define (render-aliases alias*) + (unless (null? alias*) + (printf " \naliases: ~{*~a*~^, ~}\n" (map syntax->datum alias*)))) + (if (terminal-clause? clause) + (for-each + (lambda (term) + (printf "\n#### *~a* {#~:*~a}\n" (syntax->datum (terminal-parser term))) + (render-aliases (terminal-alias* term)) + (for-each (render-paragraph #f) (terminal-paragraph* term))) + (terminal-clause-term* clause)) + (let ([id (syntax->datum (clause-id clause))]) + (printf "\n#### *~a* {#~:*~a}\n" id) + (render-aliases (clause-alias* clause)) + (for-each (render-paragraph #f) (clause-before-paragraph* clause)) + (printf "\nsyntax:\n") + (if (binop-clause? clause) + (let ([level* (binop-clause-level* clause)]) + (let loop ([level* level*] [first? #t]) + (unless (null? level*) + (let ([level (syntax->datum (car level*))] [level* (cdr level*)]) + (let ([L/R (car level)] [op* (cdr level)]) + (printf " : _~(~a~)-associative" L/R) + (if first? + (if (null? level*) + (printf ":_\n") + (printf ", highest precedence:_\n")) + (if (null? level*) + (printf ", lowest precedence:_\n") + (printf ":_\n"))) + (for-each + (lambda (op) (printf " : ~s ~a ~s\n" id (constant->markdown op) id)) + op*)) + (loop level* #f)))) + (printf " : _leaves:_\n") + (printf " : ") + (render-elt (binop-clause-term clause)) + (printf "\n")) + (for-each render-production (or (regular-clause-prod* clause) '()))) + (for-each (render-paragraph #f) (clause-after-paragraph* clause))))) + (define (render-section section) + (unless (section-suppressed? section) + (printf "\n## ~a\n" (or (section-title section) "The section")) + (for-each (render-paragraph #f) (section-paragraph* section)) + (for-each render-clause (section-clause* section)))) + (with-output-to-file mdfn + (lambda () + (printf "# ~a\n" (or (grammar-title grammar) "The grammar")) + (for-each (render-paragraph #f) (grammar-paragraph* grammar)) + (for-each render-section (grammar-section* grammar))) + 'replace)) + (module (parse-grammar) + (define parse-elt + (lambda (elt) + (syntax-case elt (SEP+ SEP* OPT K* K+) + [(SEP+ p sep) (make-sep-elt #t (parse-elt #'p) #'sep)] + [(SEP* p sep) (make-sep-elt #f (parse-elt #'p) #'sep)] + [(OPT p default) (make-opt-elt (parse-elt #'p) #'default)] + [(K+ p) (make-kleene-elt #t (parse-elt #'p))] + [(K* p) (make-kleene-elt #f (parse-elt #'p))] + [k (constant? #'k) (make-constant-elt #'k)] + [id (identifier? #'id) (make-id-elt #'id)] + [_ (syntax-error elt "invalid production element")]))) + (define parse-production + (lambda (prod) + (define (finish name src? paragraph* elt* receiver) + (let ([elt* (map parse-elt elt*)]) + (make-production name paragraph* elt* + (with-syntax ([(t ...) (elt-temps elt*)]) + #`(lambda (bfp efp t ...) + #,(if src? + #`(#,receiver (make-src bfp efp) t ...) + #`(#,receiver t ...))))))) + (syntax-case prod (:: src =>) + [[name :: src elt ... => receiver] + (finish #'name #t '() #'(elt ...) #'receiver)] + [[name :: elt ... => receiver] + (finish #'name #f '() #'(elt ...) #'receiver)]))) + (define (parse-terminal term) + (syntax-case term (DESCRIPTION) + [(parser (alias ...) (DESCRIPTION paragraph ...)) + (and (identifier? #'parser) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph ...))) + (make-terminal #'parser #'(alias ...) #'(paragraph ...))] + [(parser (alias ...)) + (and (identifier? #'parser) (andmap identifier? #'(alias ...))) + (make-terminal #'parser #'(alias ...) '())])) + (define (parse-clause clause nt alias* before-paragraph* after-paragraph* stuff*) + (syntax-case stuff* (BINOP :: src =>) + [((BINOP src (level ...) term) => receiver) + (make-binop-clause nt alias* before-paragraph* after-paragraph* #'(level ...) (parse-elt #'term) #t #'receiver)] + [((BINOP (level ...) term) => receiver) + (make-binop-clause nt alias* before-paragraph* after-paragraph* #'(level ...) (parse-elt #'term) #f #'receiver)] + [(prod prods ...) + (make-regular-clause nt alias* before-paragraph* after-paragraph* (map parse-production #'(prod prods ...)))] + [else (syntax-error clause)])) + (define (parse-top top* knull kgrammar ksection kclause) + (if (null? top*) + (knull) + (let ([top (car top*)] [top* (cdr top*)]) + (syntax-case top (GRAMMAR SECTION SUPPRESSED DESCRIPTION BINOP TERMINALS src =>) + [(GRAMMAR title paragraph ...) + (andmap paragraph? #'(paragraph ...)) + (kgrammar top* (datum title) #'(paragraph ...))] + [(SECTION SUPPRESSED title paragraph ...) + (andmap paragraph? #'(paragraph ...)) + (ksection top* (datum title) #'(paragraph ...) #t)] + [(SECTION title paragraph ...) + (andmap paragraph? #'(paragraph ...)) + (ksection top* (datum title) #'(paragraph ...) #f)] + [(TERMINALS term ...) + (kclause top* (make-terminal-clause (map parse-terminal #'(term ...))))] + [(TERMINALS term ...) + (kclause top* (make-terminal-clause (map parse-terminal #'(term ...))))] + [(nt (alias ...) (DESCRIPTION paragraph1 ...) stuff ... (DESCRIPTION paragraph2 ...)) + (and (identifier? #'nt) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph1 ...)) (andmap paragraph? #'(paragraph2 ...))) + (kclause top* (parse-clause top #'nt #'(alias ...) #'(paragraph1 ...) #'(paragraph2 ...) #'(stuff ...)))] + [(nt (alias ...) (DESCRIPTION paragraph ...) stuff ...) + (and (identifier? #'nt) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph ...))) + (kclause top* (parse-clause top #'nt #'(alias ...) #'(paragraph ...) '() #'(stuff ...)))] + [(nt (alias ...) stuff ... (DESCRIPTION paragraph ...)) + (and (identifier? #'nt) (andmap identifier? #'(alias ...)) (andmap paragraph? #'(paragraph ...))) + (kclause top* (parse-clause top #'nt #'(alias ...) '() #'(paragraph ...) #'(stuff ...)))] + [(nt (alias ...) stuff ...) + (and (identifier? #'nt) (andmap identifier? #'(alias ...))) + (kclause top* (parse-clause top #'nt #'(alias ...) '() '() #'(stuff ...)))])))) + (define (parse-grammar top*) + (define (misplaced-grammar-error top) + (syntax-error top "unexpected GRAMMAR element after other elements")) + (define (s1 top*) ; looking for GRAMMAR form, first SECTION form, or clause + (parse-top top* + (lambda () (make-grammar #f '() '())) + (lambda (top* title paragraph*) + (make-grammar title paragraph* (s2 top*))) + (lambda (top* title paragraph* suppressed?) + (make-grammar #f '() + (s3 top* title paragraph* suppressed? '() '()))) + (lambda (top* clause) + (make-grammar #f '() + (s3 top* #f '() #f (list clause) '()))))) + (define (s2 top*) ; looking for first SECTION form or clause + (parse-top top* + (lambda () '()) + (lambda (title paragraph*) (misplaced-grammar-error (car top*))) + (lambda (top* title paragraph* suppressed?) + (s3 top* title paragraph* suppressed? '() '())) + (lambda (top* clause) + (s3 top* #f '() #f (list clause) '())))) + (define (s3 top* title paragraph* suppressed? rclause* rsection*) ; steady state: looking for remaining SECTION forms and clauses + (define (finish-section) + (cons (make-section title paragraph* suppressed? (reverse rclause*)) rsection*)) + (parse-top top* + (lambda () (reverse (finish-section))) + (lambda (title paragraph*) (misplaced-grammar-error (car top*))) + (lambda (top* title paragraph* suppressed?) + (s3 top* title paragraph* suppressed? '() (finish-section))) + (lambda (top* clause) + (s3 top* title paragraph* suppressed? (cons clause rclause*) rsection*)))) + (s1 top*))) + (define (go init-nts top* mddir) + (let ([grammar (parse-grammar top*)]) + (let* ([clause* (apply append (map section-clause* (grammar-section* grammar)))] + [terminal-clause* (filter terminal-clause? clause*)] + [binop-clause* (filter binop-clause? clause*)] + [regular-clause* (left-factor (filter regular-clause? clause*))] + [env (make-env terminal-clause* (append binop-clause* regular-clause*))]) + (define (elt-helper x) + (cond + [(sep-elt? x) #`(#,(if (sep-elt-+? x) #'sepby1 #'sepby) #,(elt-helper (sep-elt-elt x)) (sep->parser #,(sep-elt-sep x)))] + [(opt-elt? x) #`(optional #,(elt-helper (opt-elt-elt x)) #,(opt-elt-default x))] + [(kleene-elt? x) #`(#,(if (kleene-elt-+? x) #'many+ #'many) #,(elt-helper (kleene-elt-elt x)))] + [(constant-elt? x) #`(constant->parser '#,(constant-elt-k x))] + [(id-elt? x) (lookup (id-elt-id x) env)] + [else (errorf 'elt-helper "unhandled elt ~s\n" x)])) + (define (binop-helper clause) + #`[#,(clause-id clause) + (infix-expression-parser + #,(map (lambda (level) + (syntax-case level () + [(L/R op1 ... op2) + (or (free-identifier=? #'L/R #'LEFT) (free-identifier=? #'L/R #'RIGHT)) + #`(L/R #,(fold-right (lambda (op next) #`(++ (binop->parser '#,op) #,next)) #'(binop->parser 'op2) #'(op1 ...)))])) + (binop-clause-level* clause)) + #,(elt-helper (binop-clause-term clause)) + #,(binop-clause-receiver clause))]) + (define (nt-helper clause) + #`[#,(clause-id clause) + #,(let f ([prod* (regular-clause-prod* clause)]) + (if (null? prod*) + #'zero + (let ([elt* (production-elt* (car prod*))]) + (with-syntax ([name (production-name (car prod*))] + [(elt ...) elt*] + [receiver (production-receiver (car prod*))]) + (with-syntax ([(x ...) (generate-temporaries elt*)]) + (with-syntax ([([y _] ...) (filter (lambda (pr) (not (constant-elt? (cadr pr)))) #'([x elt] ...))]) + (with-syntax ([(where-nt ...) (map elt-helper elt*)]) + #`(+++ ;; use +++ if you don't ever need to backtrack to a previous production for the same non-terminal + (lambda (inp) + (when (and 'name (grammar-trace)) (printf ">>~s(~a)~%" 'name (format-inp inp))) + (let ([res ((trace-is name (lambda (bfp efp) (receiver bfp efp y ...)) (where [x <- where-nt] ...)) inp)]) + (when (and 'name (grammar-trace)) + (if (stream-null? res) + (printf "<<~s(~a) failed~%" 'name (format-inp inp)) + (printf "<<~s(~a) succeeded~%" 'name (format-inp inp)))) + res)) + #,(f (cdr prod*))))))))))]) + (with-syntax ([(init-nt ...) + (syntax-case init-nts () + [(id1 id2 ...) (andmap identifier? #'(id1 id2 ...)) #'(id1 id2 ...)] + [id (identifier? #'id) (list #'id)])]) + (when mddir + (for-each + (lambda (init-nt) + (let ([mdfn (format "~a/~a.md" mddir (syntax->datum init-nt))]) + (render-markdown init-nt grammar mdfn env))) + #'(init-nt ...))) + (with-syntax ([((lhs rhs) ...) + (append + (map binop-helper binop-clause*) + (map nt-helper regular-clause*))]) + #'(module (init-nt ...) + (module M (init-nt ...) (define lhs rhs) ...) + (define init-nt + (let () + (import M) + (make-top-level-parser init-nt))) + ...)))))) + (syntax-case x (markdown-directory) + [(_ init-nts (markdown-directory mddir) top ...) + (string? (datum mddir)) + (go #'init-nts #'(top ...) (datum mddir))] + [(_ init-nts top ...) (go #'init-nts #'(top ...) #f)]))) (indirect-export define-grammar result @@ -347,6 +748,7 @@ many many+ +++ + infix-expression-parser grammar-trace format-inp diff --git a/mats/examples.ms b/mats/examples.ms index 7235e4bf7e..f92c5d16d5 100644 --- a/mats/examples.ms +++ b/mats/examples.ms @@ -20,7 +20,7 @@ (begin (mat name (begin - (parameterize ((current-directory *examples-directory*)) + (parameterize ((source-directories (cons *examples-directory* (source-directories)))) (load (format "~a/~a.ss" *examples-directory* file)) ...) #t) @@ -588,5 +588,5 @@ edit> (examples-mat ez-grammar-test ("ez-grammar-test") (equal? (with-output-to-string ez-grammar-test) - "end of tests\n") + "8 tests ran\n") ) diff --git a/s/pdhtml.ss b/s/pdhtml.ss index 8980aea277..fd1b1e2210 100644 --- a/s/pdhtml.ss +++ b/s/pdhtml.ss @@ -204,7 +204,7 @@ (let ([entry* (sort (lambda (x y) (or (> (entrydata-bfp x) (entrydata-bfp y)) (and (= (entrydata-bfp x) (entrydata-bfp y)) - (> (entrydata-efp x) (entrydata-efp y))))) + (< (entrydata-efp x) (entrydata-efp y))))) (filedata-entry* fdata))]) #;(assert (not (null? entry*))) (let loop ([entry (car entry*)] [entry* (cdr entry*)] [new-entry* '()])