From ff64811604ec9859bca50271b94506adf236628b Mon Sep 17 00:00:00 2001 From: dybvig Date: Tue, 8 Aug 2017 22:04:52 -0400 Subject: [PATCH] - added "ez-grammar" example program examples/ez-grammar.ss, examples/ez-grammar-test.ss, examples/Makefile, examples.ms original commit: dd0dec63e7c333d2edc51e7340a8b14e7e3d130b --- LOG | 3 + examples/Makefile | 2 +- examples/ez-grammar-test.ss | 423 ++++++++++++++++++++++++++++++++++++ examples/ez-grammar.ss | 357 ++++++++++++++++++++++++++++++ mats/examples.ms | 6 + 5 files changed, 790 insertions(+), 1 deletion(-) create mode 100644 examples/ez-grammar-test.ss create mode 100644 examples/ez-grammar.ss diff --git a/LOG b/LOG index b7596a66cc..e6306febbb 100644 --- a/LOG +++ b/LOG @@ -556,3 +556,6 @@ syntax.stex, debug.stex, system.stex, release_notes.stex - fixed broken mats on Windows caused by Bash/WSL changes 7.ms, ftype.ms +- added "ez-grammar" example program + examples/ez-grammar.ss, examples/ez-grammar-test.ss, + examples/Makefile, examples.ms diff --git a/examples/Makefile b/examples/Makefile index 80231c59e4..5d0b987919 100644 --- a/examples/Makefile +++ b/examples/Makefile @@ -12,7 +12,7 @@ src = def.ss edit.ss fact.ss fatfib.ss fft.ss fib.ss freq.ss interpret.ss\ m4.ss macro.ss matrix.ss object.ss power.ss queue.ss rabbit.ss rsa.ss\ - scons.ss setof.ss socket.ss unify.ss compat.ss + scons.ss setof.ss socket.ss unify.ss compat.ss ez-grammar-test.ss obj = ${src:%.ss=%.so} Scheme = ../bin/scheme -q diff --git a/examples/ez-grammar-test.ss b/examples/ez-grammar-test.ss new file mode 100644 index 0000000000..112b357c7f --- /dev/null +++ b/examples/ez-grammar-test.ss @@ -0,0 +1,423 @@ +;;; Copyright 2017 Cisco Systems, Inc. +;;; +;;; Licensed under the Apache License, Version 2.0 (the "License"); +;;; you may not use this file except in compliance with the License. +;;; You may obtain a copy of the License at +;;; +;;; http://www.apache.org/licenses/LICENSE-2.0 +;;; +;;; Unless required by applicable law or agreed to in writing, software +;;; distributed under the License is distributed on an "AS IS" BASIS, +;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +;;; See the License for the specific language governing permissions and +;;; limitations under the License. + +;;; This file contains a sample parser defined via the ez-grammar system +;;; and a simple test of the parser. + +;;; This file is organized as follows: +;;; +;;; - (streams) library providing the required exports for ez-grammar and +;;; the parser. +;;; +;;; - (state-case) library exporting the state-case macro, copped from +;;; cmacros.ss, for use by the lexer. +;;; +;;; - (lexer) library providing a simple lexer that reads characters +;;; from a port and produces a corresponding stream of tokens. +;;; +;;; - (parser) library providing the sample parser. +;;; +;;; - ez-grammar-test procedure that tests the sample parser. +;;; +;;; Instructions for running the test are at the end of this file. + +(library (streams) + (export stream-cons stream-car stream-cdr stream-nil stream-null? + stream-map stream stream-append2 stream-append-all stream-last-forced) + (import (chezscheme)) + + (define stream-cons + (lambda (x thunk) + (cons x thunk))) + + (define stream-car + (lambda (x) + (car x))) + + (define stream-cdr + (lambda (x) + (when (procedure? (cdr x)) (set-cdr! x ((cdr x)))) + (cdr x))) + + (define stream-nil '()) + + (define stream-null? + (lambda (x) + (null? x))) + + (define stream-map + (lambda (f x) + (if (stream-null? x) + '() + (stream-cons (f (stream-car x)) + (lambda () + (stream-map f (stream-cdr x))))))) + + (define stream + (lambda xs + xs)) + + (define stream-append2 + (lambda (xs thunk) + (if (null? xs) + (thunk) + (stream-cons (stream-car xs) + (lambda () + (stream-append2 (stream-cdr xs) thunk)))))) + + (define stream-append-all + (lambda (stream$) ;; stream of streams + (if (stream-null? stream$) + stream$ + (stream-append2 (stream-car stream$) + (lambda () (stream-append-all (stream-cdr stream$))))))) + + (define stream-last-forced + (lambda (x) + (and (not (null? x)) + (let loop ([x x]) + (let ([next (cdr x)]) + (if (pair? next) + (loop next) + (car x))))))) +) + +(library (state-case) + (export state-case eof) + (import (chezscheme)) + + ;;; from Chez Scheme Version 9.4 cmacros.ss + (define-syntax state-case + (lambda (x) + (define state-case-test + (lambda (cvar k) + (with-syntax ((cvar cvar)) + (syntax-case k (-) + (char + (char? (datum char)) + #'(char=? cvar char)) + ((char1 - char2) + (and (char? (datum char1)) (char? (datum char2))) + #'(char<=? char1 cvar char2)) + (predicate + (identifier? #'predicate) + #'(predicate cvar)))))) + (define state-case-help + (lambda (cvar clauses) + (syntax-case clauses (else) + (((else exp1 exp2 ...)) + #'(begin exp1 exp2 ...)) + ((((k ...) exp1 exp2 ...) . more) + (with-syntax (((test ...) + (map (lambda (k) (state-case-test cvar k)) + #'(k ...))) + (rest (state-case-help cvar #'more))) + #'(if (or test ...) (begin exp1 exp2 ...) rest))) + (((k exp1 exp2 ...) . more) + (with-syntax ((test (state-case-test cvar #'k)) + (rest (state-case-help cvar #'more))) + #'(if test (begin exp1 exp2 ...) rest)))))) + (syntax-case x (eof) + ((_ cvar (eof exp1 exp2 ...) more ...) + (identifier? #'cvar) + (with-syntax ((rest (state-case-help #'cvar #'(more ...)))) + #'(if (eof-object? cvar) + (begin exp1 exp2 ...) + rest)))))) + + (define-syntax eof + (lambda (x) + (syntax-error x "misplaced aux keyword"))) +) + +(library (lexer) + (export token? token-type token-value token-bfp token-efp lexer) + (import (chezscheme) (state-case) (streams)) + + (define-record-type token + (nongenerative) + (fields type value bfp efp)) + + ;; test lexer + (define lexer + (lambda (fn ip) + (define $prev-pos 0) + (define $pos 0) + (define ($get-char) + (set! $pos (+ $pos 1)) + (get-char ip)) + (define ($unread-char c) + (set! $pos (- $pos 1)) + (unread-char c ip)) + (define ($ws!) (set! $prev-pos $pos)) + (define ($make-token type value) + (let ([tok (make-token type value $prev-pos (- $pos 1))]) + (set! $prev-pos $pos) + tok)) + (define ($lex-error c) + (errorf #f "unexpected ~a at character ~s of ~a" + (if (eof-object? c) + "eof" + (format "character '~c'" c)) + $pos fn)) + (define-syntax lex-error + (syntax-rules () + [(_ ?c) + (let ([c ?c]) + ($lex-error c) + (void))])) + (let-values ([(sp get-buf) (open-string-output-port)]) + (define (return-token type value) + (stream-cons ($make-token type value) lex)) + (module (identifier-initial? identifier-subsequent?) + (define identifier-initial? + (lambda (c) + (char-alphabetic? c))) + (define identifier-subsequent? + (lambda (c) + (or (char-alphabetic? c) + (char-numeric? c))))) + (define-syntax define-state-case + (syntax-rules () + [(_ ?def-id ?char-id clause ...) + (define (?def-id) + (let ([?char-id ($get-char)]) + (state-case ?char-id clause ...)))])) + (define-state-case lex c + [eof stream-nil] + [char-whitespace? ($ws!) (lex)] + [char-numeric? (lex-number c)] + [#\/ (seen-/)] + [identifier-initial? (put-char sp c) (lex-identifier)] + [#\( (return-token 'lparen #\()] + [#\) (return-token 'rparen #\))] + [#\! (return-token 'bang #\!)] + [#\+ (seen-plus)] + [#\- (seen-minus)] + [#\= (seen-equals)] + [else (lex-error c)]) + (module (lex-identifier) + (define (id) (return-token 'id (string->symbol (get-buf)))) + (define-state-case next c + [eof (id)] + [identifier-subsequent? (put-char sp c) (next)] + [else ($unread-char c) (id)]) + (define (lex-identifier) (next))) + (define-state-case seen-plus c + [eof (lex-error c)] + [char-numeric? (lex-signed-number #\+ c)] + [else (lex-error c)]) + (define-state-case seen-minus c + [eof (lex-error c)] + [char-numeric? (lex-signed-number #\- c)] + [else (lex-error c)]) + (define-state-case seen-equals c + [eof (lex-error c)] + [#\> (return-token 'big-arrow #f)] + [else (lex-error c)]) + (module (lex-number lex-signed-number) + (define (finish-number) + (let ([str (get-buf)]) + (let ([n (string->number str 10)]) + (unless n (errorf 'parse-ftc "unexpected number literal ~a" str)) + (return-token 'integer n)))) + (define (num) + (let ([c ($get-char)]) + (state-case c + [eof (finish-number)] + [char-numeric? (put-char sp c) (num)] + [else ($unread-char c) (finish-number)]))) + (define (lex-signed-number s c) + (put-char sp s) + (lex-number c)) + (define (lex-number c) + (state-case c + [eof (assert #f)] + [char-numeric? (put-char sp c) (num)] + [else (assert #f)]))) + (define-state-case seen-/ c + [eof (lex-error c)] + [#\* (lex-block-comment)] + [#\/ (lex-comment)] + [else (lex-error c)]) + (define-state-case lex-comment c + [eof (lex)] + [#\newline ($ws!) (lex)] + [else (lex-comment)]) + (define (lex-block-comment) + (define-state-case maybe-end-comment c + [eof (lex-error c)] + [#\/ ($ws!) (lex)] + [else (lex-block-comment)]) + (let ([c ($get-char)]) + (state-case c + [eof (lex-error c)] + [#\* (maybe-end-comment)] + [else (lex-block-comment)]))) + (lex)))) + + (record-writer (record-type-descriptor token) + (lambda (x p wr) + (put-char p #\[) + (wr (token-type x) p) + (put-char p #\,) + (put-char p #\space) + (wr (token-value x) p) + (put-char p #\]) + (put-char p #\:) + (wr (token-bfp x) p) + (put-char p #\-) + (wr (token-efp x) p))) +) + +(library (parser) + (export parse) + (import (chezscheme) (streams) (lexer)) + (module (define-grammar is sat parse-consumed-all? parse-result-value grammar-trace make-src) + (define (sep->parser sep) + (cond + [(char? sep) (sat (lambda (x) (eq? (token-value x) sep)))] + [(symbol? sep) (sat (lambda (x) (eq? (token-type x) sep)))] + [else (errorf "don't know how to parse separator: ~s" sep)])) + (meta define (constant? x) (let ([x (syntax->datum x)]) (or (string? x) (char? x)))) + (define constant->parser + (let () + (define (token-sat type val) + (sat (lambda (x) + (let ([ans (and (token? x) (eqv? (token-type x) type) (eqv? (token-value x) val))]) + (when (grammar-trace) (printf " ~s is [~s, ~a]? => ~s~%" x type val ans)) + ans)))) + (lambda (const) + (if (string? const) + (case const + ["=>" (token-sat 'big-arrow #f)] + [else (token-sat 'id (string->symbol const))]) + (case const + [#\( (token-sat 'lparen const)] + [#\) (token-sat 'rparen const)] + [#\! (token-sat 'bang const)] + [else (errorf 'constant->parser "don't know how to construct a parser for ~a" const)]))))) + (define make-src (lambda (bfp efp) (and (<= bfp efp) (cons bfp efp)))) + (include "ez-grammar.ss")) + + (define token + (case-lambda + [(type) + (is (token-value x) + (where + [x <- (sat (lambda (x) + (let ([ans (eq? (token-type x) type)]) + (when (grammar-trace) (printf " ~s is ~s? => ~s~%" x type ans)) + ans)))]))] + [(type val) + (is (token-value x) + (where + [x <- (sat (lambda (x) + (let ([ans (and + (eq? (token-type x) type) + (eqv? (token-value x) val))]) + (when (grammar-trace) (printf " ~s is [~s, ~s]? => ~s~%" x type val ans)) + ans)))]))])) + + (define-grammar expr + (expr + [integer :: src (token 'integer) => + (lambda (src n) + `(int ,src ,n))] + [becomes :: src "=>" expr => + (lambda (src e) + `(=> ,src ,e))] + [becomes! :: src "=>" #\! expr => + (lambda (src e) + `(=>! ,src ,e))] + [group :: src #\( expr #\) => + (lambda (src e) + `(group ,src ,e))])) + + (define parse + (lambda (fn) + (let ([ip (open-input-file fn)]) + (let ([token-stream (lexer fn ip)]) + (define (oops) + (let ([last-token (stream-last-forced token-stream)]) + (if last-token + (errorf 'parse "parse error at or before character ~s of ~a" (token-bfp last-token) fn) + (errorf 'parse "no expressions found in ~a" fn)))) + ;;; return the first result, if any, for which the input stream was entirely consumed. + (let loop ([res* (expr token-stream)]) + (if (null? res*) + (oops) + (let ([res (car res*)]) + (if (parse-consumed-all? res) + (parse-result-value res) + (loop (cdr res*)))))))))) + ) + +(define (ez-grammar-test) + (import (parser)) + (with-output-to-file "/tmp/t1" + (lambda () + (for-each display + '( + "1347\n" + ))) + 'replace) + + (with-output-to-file "/tmp/t2" + (lambda () + (for-each display + '( + "\n" + "/* hello */ => ( => 1253) /* goodbye\n" + " 111111111122222222223333333333\n" + "123456789012345678901234567890123456789\n" + "*/\n" + ))) + 'replace) + + (with-output-to-file "/tmp/t3err" + (lambda () + (for-each display + '( + "\n" + "/* hello */ => (=> 1253 =>) /* goodbye\n" + " 111111111122222222223333333333\n" + "123456789012345678901234567890123456789\n" + "*/\n" + ))) + 'replace) + + (with-output-to-file "/tmp/t4err" + (lambda () + (for-each display + '( + "3 /*\n" + ))) + 'replace) + + (unless (guard (c [else #f]) (equal? (parse "/tmp/t1") (quote (int (0 . 3) 1347)))) + (printf "test 1 failed\n")) + (unless (guard (c [else #f]) (equal? (parse "/tmp/t2") (quote (=> (13 . 25) (group (16 . 25) (=> (18 . 24) (int (21 . 24) 1253))))))) + (printf "test 2 failed\n")) + (unless (guard (c [else (and (equal? (condition-message c) "parse error at or before character ~s of ~a") (equal? (condition-irritants c) (quote (25 "/tmp/t3err"))))]) (parse "/tmp/t3err") #f) + (printf "test 3 failed\n")) + (unless (guard (c [else (and (equal? (condition-message c) "unexpected ~a at character ~s of ~a") (equal? (condition-irritants c) (quote ("eof" 6 "/tmp/t4err"))))]) (parse "/tmp/t4err") #f) + (printf "test 4 failed\n")) + (printf "end of tests\n")) + +#!eof + +The following should print only "end of tests". + +echo '(ez-grammar-test)' | scheme -q ez-grammar-test.ss diff --git a/examples/ez-grammar.ss b/examples/ez-grammar.ss new file mode 100644 index 0000000000..744217793f --- /dev/null +++ b/examples/ez-grammar.ss @@ -0,0 +1,357 @@ +;;; Copyright 2017 Cisco Systems, Inc. +;;; +;;; Licensed under the Apache License, Version 2.0 (the "License"); +;;; you may not use this file except in compliance with the License. +;;; You may obtain a copy of the License at +;;; +;;; http://www.apache.org/licenses/LICENSE-2.0 +;;; +;;; Unless required by applicable law or agreed to in writing, software +;;; distributed under the License is distributed on an "AS IS" BASIS, +;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +;;; See the License for the specific language governing permissions and +;;; limitations under the License. + +;;; See http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf for origins of +;;; some of the monadic combinators. + +;;; Authors: Jon Rossie, Kent Dybvig + +;;; The define-grammar form produces a parser: +;;; +;;; parser : token-stream -> ((Tree token-stream) ...) +;;; +;;; If the return value is the empty list, a parse error occurred. +;;; If the return value has multiple elements, the parse was ambiguous. +;;; The token-stream in each (Tree token-stream) is the tail of the +;;; input stream that begins with the last token consumed by the parse. +;;; This gives the consumer access to both the first and last token, +;;; allowing it to determine cheaply the extent of the parse, including +;;; source locations if source information is attached to the tokens. + +;;; Internally, backtracking occurs whenever a parser return value +;;; has multiple elements. + +;;; This code should be included into a lexical context that supplies: +;;; +;;; token-bfp : token -> token's beginning file position +;;; token-efp : token -> token's ending file position +;;; meta constant? : syntax-object -> boolean +;;; sep->parser : sep -> parser +;;; constant->parser : constant -> parser +;;; make-src : bfp x efp -> src. bfp > efp => no tokens consumed. +;;; +;;; See ez-grammar-test.ss for an example. + +(module (define-grammar + is sat peek seq ++ +++ many many+ ? + parse-consumed-all? parse-result-value parse-result-unused + grammar-trace + ) + (import (streams)) + + (define grammar-trace (make-parameter #f)) + + (define-record-type parse-result + (nongenerative parse-result) + (fields value unused)) + + ;; to enable $trace-is to determine the ending file position (efp) of a parse + ;; form, the input stream actually points to the preceding token rather than + ;; to the current token. the next few routines establish, maintain, and deal + ;; with that invariant. + (define make-top-level-parser + (lambda (parser) + (lambda (inp) + (parser (stream-cons 'dummy-token inp))))) + + (define preceding-token + (lambda (inp) + (stream-car inp))) + + (define current-token + (lambda (inp) + (stream-car (stream-cdr inp)))) + + (define remaining-tokens + (lambda (inp) + (stream-cdr inp))) + + (define no-more-tokens? + (lambda (inp) + (stream-null? (stream-cdr inp)))) + + (define parse-consumed-all? + (lambda (res) + (no-more-tokens? (parse-result-unused res)))) + + ;; A parser generator + (define result + (lambda (v) + ;; this is a parser that ignores its input and produces v + (lambda (inp) + (stream (make-parse-result v inp))))) + + ;; A parse that always generates a parse error + (define zero + (lambda (inp) + stream-nil)) + + ;; For a non-empty stream, successfully consume the first element + (define item + (lambda (inp) + (cond + [(no-more-tokens? inp) '()] + [else + (stream (make-parse-result (current-token inp) (remaining-tokens inp)))]))) + + (define (peek p) + (lambda (inp) + (stream-map (lambda (pr) + (make-parse-result (parse-result-value pr) inp)) + (p inp)))) + + ;;------------------------------------------ + + (define bind + (lambda (parser receiver) + (lambda (inp) + (let ([res* (parser inp)]) + (stream-append-all + (stream-map (lambda (res) + ((receiver (parse-result-value res)) + (parse-result-unused res))) + res*)))))) + + ;; monad comprehensions + (define-syntax is-where ; used by is and trace-is + (lambda (x) + (syntax-case x (where <-) + [(_ expr (where)) #'expr] + [(_ expr (where [x <- p] clauses ...)) + #'(bind p (lambda (x) (is-where expr (where clauses ...))))] + [(_ expr (where pred clauses ...)) + #'(if pred (is-where expr (where clauses ...)) zero)] + [(_ expr where-clause) (syntax-error #'where-clause)]))) + (indirect-export is-where bind) + + (define-syntax is + (syntax-rules () + [(_ expr where-clause) (is-where (result expr) where-clause)])) + (indirect-export is is-where) + + (module (trace-is) + (define ($trace-is name proc head) + (lambda (unused) + (let ([res (proc (token-bfp (current-token head)) (token-efp (preceding-token unused)))]) + (when (and 'name (grammar-trace)) (printf "<<~s = ~s~%" 'name res)) + (stream (make-parse-result res unused))))) + + (define-syntax trace-is + (syntax-rules () + [(_ name proc-expr where-clause) + (lambda (inp) ((is-where ($trace-is 'name proc-expr inp) where-clause) inp))])) + (indirect-export trace-is $trace-is)) + + (define (seq2 p q) (is (cons x y) (where [x <- p] [y <- q]))) + + (define seq + (lambda p* + (let loop ([p* p*]) + (cond + [(null? p*) (result '())] + [else (seq2 (car p*) (loop (cdr p*)))])))) + + (define (sat pred) (is x (where [x <- item] (pred x)))) + + (define ++ ;; introduce ambiguity + (lambda (p q) + (lambda (inp) + (stream-append2 (p inp) + (lambda () + (q inp)))))) + + (define (many+ p) (is (cons x xs) (where [x <- p] [xs <- (many p)]))) + + (define (many p) (++ (many+ p) (result '()))) + + (define (? p) (++ (sat p) (result #f))) + + (define (sepby1 p sep) + (is (cons x xs) + (where + [x <- p] + [xs <- (many (is y (where [_ <- sep] [y <- p])))]))) + + (define (sepby p sep) (++ (sepby1 p sep) (result '()))) + + (define (bracket open p close) (is x (where [_ <- open] [x <- p] [_ <- close]))) + + (define (optional p default) + (lambda (inp) + (let ([res (p inp)]) + (if (stream-null? res) + (stream (make-parse-result default inp)) + res)))) + + (define (first p) + (lambda (inp) + (let ([res (p inp)]) + (if (stream-null? res) + res + (stream (stream-car res)))))) + + (define (+++ p q) (first (++ p q))) ;; choose first match, cut backtracking + + (define (format-inp inp) + (if (no-more-tokens? inp) + "#" + (format "(~s ...)" (current-token inp)))) + + (define-syntax define-grammar + (lambda (x) + (define-record-type production + (nongenerative) + (fields name elt* receiver)) + (define-record-type clause + (nongenerative) + (fields id prod*)) + (define (gentemp) (datum->syntax #'* (gensym))) + (define (elt-temps elt*) + (fold-left + (lambda (t* elt) + (if (constant? elt) t* (cons (gentemp) t*))) + '() + elt*)) + (define parse-production + (lambda (cl) + (syntax-case cl (:: src =>) + [[name :: src elt ... => receiver] + (make-production #'name #'(elt ...) + (with-syntax ([(t ...) (elt-temps #'(elt ...))]) + #'(lambda (bfp efp t ...) + (receiver (make-src bfp efp) t ...))))] + [[name :: elt ... => receiver] + (make-production #'name #'(elt ...) + (with-syntax ([(t ...) (elt-temps #'(elt ...))]) + #'(lambda (bfp efp t ...) + (receiver t ...))))]))) + (define (left-factor clause*) + (define syntax-equal? + (lambda (x y) + (equal? (syntax->datum x) (syntax->datum y)))) + (let lp1 ([clause* clause*] [new-clause* '()]) + (if (null? clause*) + (reverse new-clause*) + (let ([clause (car clause*)]) + (let lp2 ([prod* (clause-prod* clause)] [new-prod* '()] [clause* (cdr clause*)]) + (if (null? prod*) + (lp1 clause* (cons (make-clause (clause-id clause) (reverse new-prod*)) new-clause*)) + (let ([prod (car prod*)] [prod* (cdr prod*)]) + (let ([elt* (production-elt* prod)]) + (if (null? elt*) + (lp2 prod* (cons prod new-prod*) clause*) + (let ([elt (car elt*)]) + (let-values ([(haves have-nots) (partition + (lambda (prod) + (let ([elt* (production-elt* prod)]) + (and (not (null? elt*)) + (syntax-equal? (car elt*) elt)))) + prod*)]) + (if (null? haves) + (lp2 prod* (cons prod new-prod*) clause*) + (let ([haves (cons prod haves)]) + ; "haves" start with the same elt. to cut down on the number of new + ; nonterminals and receiver overhead, find the largest common prefix + (let ([prefix (cons elt + (let f ([elt** (map production-elt* haves)]) + (let ([elt** (map cdr elt**)]) + (if (ormap null? elt**) + '() + (let ([elt (caar elt**)]) + (if (andmap (lambda (elt*) (syntax-equal? (car elt*) elt)) (cdr elt**)) + (cons (caar elt**) (f elt**)) + '()))))))]) + (let ([t (gentemp)] [n (length prefix)] [t* (elt-temps prefix)]) + (lp2 have-nots + (cons (make-production #f (append prefix (list t)) + #`(lambda (bfp efp #,@t* p) (p bfp #,@t*))) + new-prod*) + (cons (make-clause t + (map (lambda (prod) + (let ([elt* (list-tail (production-elt* prod) n)]) + (make-production (production-name prod) elt* + (let ([u* (elt-temps elt*)]) + #`(lambda (bfp efp #,@u*) + (lambda (bfp #,@t*) + (#,(production-receiver prod) bfp efp #,@t* #,@u*))))))) + haves)) + clause*))))))))))))))))) + (define (nt-helper clause*) + (define (elt-helper x) + (syntax-case x (SEP+ SEP* OPT K* K+) + [(SEP+ p sep) #`(sepby1 #,(elt-helper #'p) (sep->parser sep))] + [(SEP* p sep) #`(sepby #,(elt-helper #'p) (sep->parser sep))] + [(OPT p dflt) #`(optional #,(elt-helper #'p) dflt)] + [(K* p) #`(many #,(elt-helper #'p))] + [(K+ p) #`(many+ #,(elt-helper #'p))] + [k (constant? #'k) #'(constant->parser 'k)] + [p #'p])) + (let loop ([clause* clause*] [binding* '()]) + (if (null? clause*) + binding* + (loop + (cdr clause*) + (cons + #`[#,(clause-id (car clause*)) + #,(let f ([prod* (clause-prod* (car clause*))]) + (if (null? prod*) + #'zero + (with-syntax ([name (production-name (car prod*))] + [(elt ...) (production-elt* (car prod*))] + [receiver (production-receiver (car prod*))]) + (with-syntax ([(x ...) (generate-temporaries #'(elt ...))]) + (with-syntax ([([y _] ...) (filter (lambda (pr) (not (constant? (cadr pr)))) #'([x elt] ...))]) + (with-syntax ([(where-nt ...) (map elt-helper #'(elt ...))]) + #`(+++ ;; use +++ if you don't ever need to backtrack to a previous production for the same non-terminal + (lambda (inp) + (when (and 'name (grammar-trace)) (printf ">>~s(~a)~%" 'name (format-inp inp))) + (let ([res ((trace-is name (lambda (bfp efp) (receiver bfp efp y ...)) (where [x <- where-nt] ...)) inp)]) + (when (and 'name (grammar-trace)) + (if (stream-null? res) + (printf "<<~s(~a) failed~%" 'name (format-inp inp)) + (printf "<<~s(~a) succeeded~%" 'name (format-inp inp)))) + res)) + #,(f (cdr prod*)))))))))] + binding*))))) + (syntax-case x () + [(_ init-nt [nt prod prods ...] ...) + (with-syntax ([(binding ...) + (nt-helper + (left-factor + (map (lambda (nt prod*) (make-clause nt (map parse-production prod*))) + #'(nt ...) + #'((prod prods ...) ...))))]) + #'(define init-nt + (letrec (binding ...) + (make-top-level-parser init-nt))))]))) + + (indirect-export define-grammar + result + zero + is + trace-is + sepby1 + sepby + optional + many + many+ + +++ + + grammar-trace + format-inp + trace-is + + make-top-level-parser + ) +) diff --git a/mats/examples.ms b/mats/examples.ms index c4bd3e7cd5..c13b6d3bb2 100644 --- a/mats/examples.ms +++ b/mats/examples.ms @@ -585,3 +585,9 @@ edit> (begin (define-struct! caramel x y z) (eqv? (caramel-x (caramel 1 2 3)) 1)) ) + +(examples-mat ez-grammar-test ("ez-grammar-test") + (equal? + (with-output-to-string ez-grammar-test) + "end of tests\n") +)