- added "ez-grammar" example program
examples/ez-grammar.ss, examples/ez-grammar-test.ss, examples/Makefile, examples.ms original commit: dd0dec63e7c333d2edc51e7340a8b14e7e3d130b
This commit is contained in:
parent
7ba018bd41
commit
ff64811604
3
LOG
3
LOG
|
@ -556,3 +556,6 @@
|
|||
syntax.stex, debug.stex, system.stex, release_notes.stex
|
||||
- fixed broken mats on Windows caused by Bash/WSL changes
|
||||
7.ms, ftype.ms
|
||||
- added "ez-grammar" example program
|
||||
examples/ez-grammar.ss, examples/ez-grammar-test.ss,
|
||||
examples/Makefile, examples.ms
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
src = def.ss edit.ss fact.ss fatfib.ss fft.ss fib.ss freq.ss interpret.ss\
|
||||
m4.ss macro.ss matrix.ss object.ss power.ss queue.ss rabbit.ss rsa.ss\
|
||||
scons.ss setof.ss socket.ss unify.ss compat.ss
|
||||
scons.ss setof.ss socket.ss unify.ss compat.ss ez-grammar-test.ss
|
||||
obj = ${src:%.ss=%.so}
|
||||
|
||||
Scheme = ../bin/scheme -q
|
||||
|
|
423
examples/ez-grammar-test.ss
Normal file
423
examples/ez-grammar-test.ss
Normal file
|
@ -0,0 +1,423 @@
|
|||
;;; Copyright 2017 Cisco Systems, Inc.
|
||||
;;;
|
||||
;;; Licensed under the Apache License, Version 2.0 (the "License");
|
||||
;;; you may not use this file except in compliance with the License.
|
||||
;;; You may obtain a copy of the License at
|
||||
;;;
|
||||
;;; http://www.apache.org/licenses/LICENSE-2.0
|
||||
;;;
|
||||
;;; Unless required by applicable law or agreed to in writing, software
|
||||
;;; distributed under the License is distributed on an "AS IS" BASIS,
|
||||
;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
;;; See the License for the specific language governing permissions and
|
||||
;;; limitations under the License.
|
||||
|
||||
;;; This file contains a sample parser defined via the ez-grammar system
|
||||
;;; and a simple test of the parser.
|
||||
|
||||
;;; This file is organized as follows:
|
||||
;;;
|
||||
;;; - (streams) library providing the required exports for ez-grammar and
|
||||
;;; the parser.
|
||||
;;;
|
||||
;;; - (state-case) library exporting the state-case macro, copped from
|
||||
;;; cmacros.ss, for use by the lexer.
|
||||
;;;
|
||||
;;; - (lexer) library providing a simple lexer that reads characters
|
||||
;;; from a port and produces a corresponding stream of tokens.
|
||||
;;;
|
||||
;;; - (parser) library providing the sample parser.
|
||||
;;;
|
||||
;;; - ez-grammar-test procedure that tests the sample parser.
|
||||
;;;
|
||||
;;; Instructions for running the test are at the end of this file.
|
||||
|
||||
(library (streams)
|
||||
(export stream-cons stream-car stream-cdr stream-nil stream-null?
|
||||
stream-map stream stream-append2 stream-append-all stream-last-forced)
|
||||
(import (chezscheme))
|
||||
|
||||
(define stream-cons
|
||||
(lambda (x thunk)
|
||||
(cons x thunk)))
|
||||
|
||||
(define stream-car
|
||||
(lambda (x)
|
||||
(car x)))
|
||||
|
||||
(define stream-cdr
|
||||
(lambda (x)
|
||||
(when (procedure? (cdr x)) (set-cdr! x ((cdr x))))
|
||||
(cdr x)))
|
||||
|
||||
(define stream-nil '())
|
||||
|
||||
(define stream-null?
|
||||
(lambda (x)
|
||||
(null? x)))
|
||||
|
||||
(define stream-map
|
||||
(lambda (f x)
|
||||
(if (stream-null? x)
|
||||
'()
|
||||
(stream-cons (f (stream-car x))
|
||||
(lambda ()
|
||||
(stream-map f (stream-cdr x)))))))
|
||||
|
||||
(define stream
|
||||
(lambda xs
|
||||
xs))
|
||||
|
||||
(define stream-append2
|
||||
(lambda (xs thunk)
|
||||
(if (null? xs)
|
||||
(thunk)
|
||||
(stream-cons (stream-car xs)
|
||||
(lambda ()
|
||||
(stream-append2 (stream-cdr xs) thunk))))))
|
||||
|
||||
(define stream-append-all
|
||||
(lambda (stream$) ;; stream of streams
|
||||
(if (stream-null? stream$)
|
||||
stream$
|
||||
(stream-append2 (stream-car stream$)
|
||||
(lambda () (stream-append-all (stream-cdr stream$)))))))
|
||||
|
||||
(define stream-last-forced
|
||||
(lambda (x)
|
||||
(and (not (null? x))
|
||||
(let loop ([x x])
|
||||
(let ([next (cdr x)])
|
||||
(if (pair? next)
|
||||
(loop next)
|
||||
(car x)))))))
|
||||
)
|
||||
|
||||
(library (state-case)
|
||||
(export state-case eof)
|
||||
(import (chezscheme))
|
||||
|
||||
;;; from Chez Scheme Version 9.4 cmacros.ss
|
||||
(define-syntax state-case
|
||||
(lambda (x)
|
||||
(define state-case-test
|
||||
(lambda (cvar k)
|
||||
(with-syntax ((cvar cvar))
|
||||
(syntax-case k (-)
|
||||
(char
|
||||
(char? (datum char))
|
||||
#'(char=? cvar char))
|
||||
((char1 - char2)
|
||||
(and (char? (datum char1)) (char? (datum char2)))
|
||||
#'(char<=? char1 cvar char2))
|
||||
(predicate
|
||||
(identifier? #'predicate)
|
||||
#'(predicate cvar))))))
|
||||
(define state-case-help
|
||||
(lambda (cvar clauses)
|
||||
(syntax-case clauses (else)
|
||||
(((else exp1 exp2 ...))
|
||||
#'(begin exp1 exp2 ...))
|
||||
((((k ...) exp1 exp2 ...) . more)
|
||||
(with-syntax (((test ...)
|
||||
(map (lambda (k) (state-case-test cvar k))
|
||||
#'(k ...)))
|
||||
(rest (state-case-help cvar #'more)))
|
||||
#'(if (or test ...) (begin exp1 exp2 ...) rest)))
|
||||
(((k exp1 exp2 ...) . more)
|
||||
(with-syntax ((test (state-case-test cvar #'k))
|
||||
(rest (state-case-help cvar #'more)))
|
||||
#'(if test (begin exp1 exp2 ...) rest))))))
|
||||
(syntax-case x (eof)
|
||||
((_ cvar (eof exp1 exp2 ...) more ...)
|
||||
(identifier? #'cvar)
|
||||
(with-syntax ((rest (state-case-help #'cvar #'(more ...))))
|
||||
#'(if (eof-object? cvar)
|
||||
(begin exp1 exp2 ...)
|
||||
rest))))))
|
||||
|
||||
(define-syntax eof
|
||||
(lambda (x)
|
||||
(syntax-error x "misplaced aux keyword")))
|
||||
)
|
||||
|
||||
(library (lexer)
|
||||
(export token? token-type token-value token-bfp token-efp lexer)
|
||||
(import (chezscheme) (state-case) (streams))
|
||||
|
||||
(define-record-type token
|
||||
(nongenerative)
|
||||
(fields type value bfp efp))
|
||||
|
||||
;; test lexer
|
||||
(define lexer
|
||||
(lambda (fn ip)
|
||||
(define $prev-pos 0)
|
||||
(define $pos 0)
|
||||
(define ($get-char)
|
||||
(set! $pos (+ $pos 1))
|
||||
(get-char ip))
|
||||
(define ($unread-char c)
|
||||
(set! $pos (- $pos 1))
|
||||
(unread-char c ip))
|
||||
(define ($ws!) (set! $prev-pos $pos))
|
||||
(define ($make-token type value)
|
||||
(let ([tok (make-token type value $prev-pos (- $pos 1))])
|
||||
(set! $prev-pos $pos)
|
||||
tok))
|
||||
(define ($lex-error c)
|
||||
(errorf #f "unexpected ~a at character ~s of ~a"
|
||||
(if (eof-object? c)
|
||||
"eof"
|
||||
(format "character '~c'" c))
|
||||
$pos fn))
|
||||
(define-syntax lex-error
|
||||
(syntax-rules ()
|
||||
[(_ ?c)
|
||||
(let ([c ?c])
|
||||
($lex-error c)
|
||||
(void))]))
|
||||
(let-values ([(sp get-buf) (open-string-output-port)])
|
||||
(define (return-token type value)
|
||||
(stream-cons ($make-token type value) lex))
|
||||
(module (identifier-initial? identifier-subsequent?)
|
||||
(define identifier-initial?
|
||||
(lambda (c)
|
||||
(char-alphabetic? c)))
|
||||
(define identifier-subsequent?
|
||||
(lambda (c)
|
||||
(or (char-alphabetic? c)
|
||||
(char-numeric? c)))))
|
||||
(define-syntax define-state-case
|
||||
(syntax-rules ()
|
||||
[(_ ?def-id ?char-id clause ...)
|
||||
(define (?def-id)
|
||||
(let ([?char-id ($get-char)])
|
||||
(state-case ?char-id clause ...)))]))
|
||||
(define-state-case lex c
|
||||
[eof stream-nil]
|
||||
[char-whitespace? ($ws!) (lex)]
|
||||
[char-numeric? (lex-number c)]
|
||||
[#\/ (seen-/)]
|
||||
[identifier-initial? (put-char sp c) (lex-identifier)]
|
||||
[#\( (return-token 'lparen #\()]
|
||||
[#\) (return-token 'rparen #\))]
|
||||
[#\! (return-token 'bang #\!)]
|
||||
[#\+ (seen-plus)]
|
||||
[#\- (seen-minus)]
|
||||
[#\= (seen-equals)]
|
||||
[else (lex-error c)])
|
||||
(module (lex-identifier)
|
||||
(define (id) (return-token 'id (string->symbol (get-buf))))
|
||||
(define-state-case next c
|
||||
[eof (id)]
|
||||
[identifier-subsequent? (put-char sp c) (next)]
|
||||
[else ($unread-char c) (id)])
|
||||
(define (lex-identifier) (next)))
|
||||
(define-state-case seen-plus c
|
||||
[eof (lex-error c)]
|
||||
[char-numeric? (lex-signed-number #\+ c)]
|
||||
[else (lex-error c)])
|
||||
(define-state-case seen-minus c
|
||||
[eof (lex-error c)]
|
||||
[char-numeric? (lex-signed-number #\- c)]
|
||||
[else (lex-error c)])
|
||||
(define-state-case seen-equals c
|
||||
[eof (lex-error c)]
|
||||
[#\> (return-token 'big-arrow #f)]
|
||||
[else (lex-error c)])
|
||||
(module (lex-number lex-signed-number)
|
||||
(define (finish-number)
|
||||
(let ([str (get-buf)])
|
||||
(let ([n (string->number str 10)])
|
||||
(unless n (errorf 'parse-ftc "unexpected number literal ~a" str))
|
||||
(return-token 'integer n))))
|
||||
(define (num)
|
||||
(let ([c ($get-char)])
|
||||
(state-case c
|
||||
[eof (finish-number)]
|
||||
[char-numeric? (put-char sp c) (num)]
|
||||
[else ($unread-char c) (finish-number)])))
|
||||
(define (lex-signed-number s c)
|
||||
(put-char sp s)
|
||||
(lex-number c))
|
||||
(define (lex-number c)
|
||||
(state-case c
|
||||
[eof (assert #f)]
|
||||
[char-numeric? (put-char sp c) (num)]
|
||||
[else (assert #f)])))
|
||||
(define-state-case seen-/ c
|
||||
[eof (lex-error c)]
|
||||
[#\* (lex-block-comment)]
|
||||
[#\/ (lex-comment)]
|
||||
[else (lex-error c)])
|
||||
(define-state-case lex-comment c
|
||||
[eof (lex)]
|
||||
[#\newline ($ws!) (lex)]
|
||||
[else (lex-comment)])
|
||||
(define (lex-block-comment)
|
||||
(define-state-case maybe-end-comment c
|
||||
[eof (lex-error c)]
|
||||
[#\/ ($ws!) (lex)]
|
||||
[else (lex-block-comment)])
|
||||
(let ([c ($get-char)])
|
||||
(state-case c
|
||||
[eof (lex-error c)]
|
||||
[#\* (maybe-end-comment)]
|
||||
[else (lex-block-comment)])))
|
||||
(lex))))
|
||||
|
||||
(record-writer (record-type-descriptor token)
|
||||
(lambda (x p wr)
|
||||
(put-char p #\[)
|
||||
(wr (token-type x) p)
|
||||
(put-char p #\,)
|
||||
(put-char p #\space)
|
||||
(wr (token-value x) p)
|
||||
(put-char p #\])
|
||||
(put-char p #\:)
|
||||
(wr (token-bfp x) p)
|
||||
(put-char p #\-)
|
||||
(wr (token-efp x) p)))
|
||||
)
|
||||
|
||||
(library (parser)
|
||||
(export parse)
|
||||
(import (chezscheme) (streams) (lexer))
|
||||
(module (define-grammar is sat parse-consumed-all? parse-result-value grammar-trace make-src)
|
||||
(define (sep->parser sep)
|
||||
(cond
|
||||
[(char? sep) (sat (lambda (x) (eq? (token-value x) sep)))]
|
||||
[(symbol? sep) (sat (lambda (x) (eq? (token-type x) sep)))]
|
||||
[else (errorf "don't know how to parse separator: ~s" sep)]))
|
||||
(meta define (constant? x) (let ([x (syntax->datum x)]) (or (string? x) (char? x))))
|
||||
(define constant->parser
|
||||
(let ()
|
||||
(define (token-sat type val)
|
||||
(sat (lambda (x)
|
||||
(let ([ans (and (token? x) (eqv? (token-type x) type) (eqv? (token-value x) val))])
|
||||
(when (grammar-trace) (printf " ~s is [~s, ~a]? => ~s~%" x type val ans))
|
||||
ans))))
|
||||
(lambda (const)
|
||||
(if (string? const)
|
||||
(case const
|
||||
["=>" (token-sat 'big-arrow #f)]
|
||||
[else (token-sat 'id (string->symbol const))])
|
||||
(case const
|
||||
[#\( (token-sat 'lparen const)]
|
||||
[#\) (token-sat 'rparen const)]
|
||||
[#\! (token-sat 'bang const)]
|
||||
[else (errorf 'constant->parser "don't know how to construct a parser for ~a" const)])))))
|
||||
(define make-src (lambda (bfp efp) (and (<= bfp efp) (cons bfp efp))))
|
||||
(include "ez-grammar.ss"))
|
||||
|
||||
(define token
|
||||
(case-lambda
|
||||
[(type)
|
||||
(is (token-value x)
|
||||
(where
|
||||
[x <- (sat (lambda (x)
|
||||
(let ([ans (eq? (token-type x) type)])
|
||||
(when (grammar-trace) (printf " ~s is ~s? => ~s~%" x type ans))
|
||||
ans)))]))]
|
||||
[(type val)
|
||||
(is (token-value x)
|
||||
(where
|
||||
[x <- (sat (lambda (x)
|
||||
(let ([ans (and
|
||||
(eq? (token-type x) type)
|
||||
(eqv? (token-value x) val))])
|
||||
(when (grammar-trace) (printf " ~s is [~s, ~s]? => ~s~%" x type val ans))
|
||||
ans)))]))]))
|
||||
|
||||
(define-grammar expr
|
||||
(expr
|
||||
[integer :: src (token 'integer) =>
|
||||
(lambda (src n)
|
||||
`(int ,src ,n))]
|
||||
[becomes :: src "=>" expr =>
|
||||
(lambda (src e)
|
||||
`(=> ,src ,e))]
|
||||
[becomes! :: src "=>" #\! expr =>
|
||||
(lambda (src e)
|
||||
`(=>! ,src ,e))]
|
||||
[group :: src #\( expr #\) =>
|
||||
(lambda (src e)
|
||||
`(group ,src ,e))]))
|
||||
|
||||
(define parse
|
||||
(lambda (fn)
|
||||
(let ([ip (open-input-file fn)])
|
||||
(let ([token-stream (lexer fn ip)])
|
||||
(define (oops)
|
||||
(let ([last-token (stream-last-forced token-stream)])
|
||||
(if last-token
|
||||
(errorf 'parse "parse error at or before character ~s of ~a" (token-bfp last-token) fn)
|
||||
(errorf 'parse "no expressions found in ~a" fn))))
|
||||
;;; return the first result, if any, for which the input stream was entirely consumed.
|
||||
(let loop ([res* (expr token-stream)])
|
||||
(if (null? res*)
|
||||
(oops)
|
||||
(let ([res (car res*)])
|
||||
(if (parse-consumed-all? res)
|
||||
(parse-result-value res)
|
||||
(loop (cdr res*))))))))))
|
||||
)
|
||||
|
||||
(define (ez-grammar-test)
|
||||
(import (parser))
|
||||
(with-output-to-file "/tmp/t1"
|
||||
(lambda ()
|
||||
(for-each display
|
||||
'(
|
||||
"1347\n"
|
||||
)))
|
||||
'replace)
|
||||
|
||||
(with-output-to-file "/tmp/t2"
|
||||
(lambda ()
|
||||
(for-each display
|
||||
'(
|
||||
"\n"
|
||||
"/* hello */ => ( => 1253) /* goodbye\n"
|
||||
" 111111111122222222223333333333\n"
|
||||
"123456789012345678901234567890123456789\n"
|
||||
"*/\n"
|
||||
)))
|
||||
'replace)
|
||||
|
||||
(with-output-to-file "/tmp/t3err"
|
||||
(lambda ()
|
||||
(for-each display
|
||||
'(
|
||||
"\n"
|
||||
"/* hello */ => (=> 1253 =>) /* goodbye\n"
|
||||
" 111111111122222222223333333333\n"
|
||||
"123456789012345678901234567890123456789\n"
|
||||
"*/\n"
|
||||
)))
|
||||
'replace)
|
||||
|
||||
(with-output-to-file "/tmp/t4err"
|
||||
(lambda ()
|
||||
(for-each display
|
||||
'(
|
||||
"3 /*\n"
|
||||
)))
|
||||
'replace)
|
||||
|
||||
(unless (guard (c [else #f]) (equal? (parse "/tmp/t1") (quote (int (0 . 3) 1347))))
|
||||
(printf "test 1 failed\n"))
|
||||
(unless (guard (c [else #f]) (equal? (parse "/tmp/t2") (quote (=> (13 . 25) (group (16 . 25) (=> (18 . 24) (int (21 . 24) 1253)))))))
|
||||
(printf "test 2 failed\n"))
|
||||
(unless (guard (c [else (and (equal? (condition-message c) "parse error at or before character ~s of ~a") (equal? (condition-irritants c) (quote (25 "/tmp/t3err"))))]) (parse "/tmp/t3err") #f)
|
||||
(printf "test 3 failed\n"))
|
||||
(unless (guard (c [else (and (equal? (condition-message c) "unexpected ~a at character ~s of ~a") (equal? (condition-irritants c) (quote ("eof" 6 "/tmp/t4err"))))]) (parse "/tmp/t4err") #f)
|
||||
(printf "test 4 failed\n"))
|
||||
(printf "end of tests\n"))
|
||||
|
||||
#!eof
|
||||
|
||||
The following should print only "end of tests".
|
||||
|
||||
echo '(ez-grammar-test)' | scheme -q ez-grammar-test.ss
|
357
examples/ez-grammar.ss
Normal file
357
examples/ez-grammar.ss
Normal file
|
@ -0,0 +1,357 @@
|
|||
;;; Copyright 2017 Cisco Systems, Inc.
|
||||
;;;
|
||||
;;; Licensed under the Apache License, Version 2.0 (the "License");
|
||||
;;; you may not use this file except in compliance with the License.
|
||||
;;; You may obtain a copy of the License at
|
||||
;;;
|
||||
;;; http://www.apache.org/licenses/LICENSE-2.0
|
||||
;;;
|
||||
;;; Unless required by applicable law or agreed to in writing, software
|
||||
;;; distributed under the License is distributed on an "AS IS" BASIS,
|
||||
;;; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
;;; See the License for the specific language governing permissions and
|
||||
;;; limitations under the License.
|
||||
|
||||
;;; See http://www.cs.nott.ac.uk/~pszgmh/monparsing.pdf for origins of
|
||||
;;; some of the monadic combinators.
|
||||
|
||||
;;; Authors: Jon Rossie, Kent Dybvig
|
||||
|
||||
;;; The define-grammar form produces a parser:
|
||||
;;;
|
||||
;;; parser : token-stream -> ((Tree token-stream) ...)
|
||||
;;;
|
||||
;;; If the return value is the empty list, a parse error occurred.
|
||||
;;; If the return value has multiple elements, the parse was ambiguous.
|
||||
;;; The token-stream in each (Tree token-stream) is the tail of the
|
||||
;;; input stream that begins with the last token consumed by the parse.
|
||||
;;; This gives the consumer access to both the first and last token,
|
||||
;;; allowing it to determine cheaply the extent of the parse, including
|
||||
;;; source locations if source information is attached to the tokens.
|
||||
|
||||
;;; Internally, backtracking occurs whenever a parser return value
|
||||
;;; has multiple elements.
|
||||
|
||||
;;; This code should be included into a lexical context that supplies:
|
||||
;;;
|
||||
;;; token-bfp : token -> token's beginning file position
|
||||
;;; token-efp : token -> token's ending file position
|
||||
;;; meta constant? : syntax-object -> boolean
|
||||
;;; sep->parser : sep -> parser
|
||||
;;; constant->parser : constant -> parser
|
||||
;;; make-src : bfp x efp -> src. bfp > efp => no tokens consumed.
|
||||
;;;
|
||||
;;; See ez-grammar-test.ss for an example.
|
||||
|
||||
(module (define-grammar
|
||||
is sat peek seq ++ +++ many many+ ?
|
||||
parse-consumed-all? parse-result-value parse-result-unused
|
||||
grammar-trace
|
||||
)
|
||||
(import (streams))
|
||||
|
||||
(define grammar-trace (make-parameter #f))
|
||||
|
||||
(define-record-type parse-result
|
||||
(nongenerative parse-result)
|
||||
(fields value unused))
|
||||
|
||||
;; to enable $trace-is to determine the ending file position (efp) of a parse
|
||||
;; form, the input stream actually points to the preceding token rather than
|
||||
;; to the current token. the next few routines establish, maintain, and deal
|
||||
;; with that invariant.
|
||||
(define make-top-level-parser
|
||||
(lambda (parser)
|
||||
(lambda (inp)
|
||||
(parser (stream-cons 'dummy-token inp)))))
|
||||
|
||||
(define preceding-token
|
||||
(lambda (inp)
|
||||
(stream-car inp)))
|
||||
|
||||
(define current-token
|
||||
(lambda (inp)
|
||||
(stream-car (stream-cdr inp))))
|
||||
|
||||
(define remaining-tokens
|
||||
(lambda (inp)
|
||||
(stream-cdr inp)))
|
||||
|
||||
(define no-more-tokens?
|
||||
(lambda (inp)
|
||||
(stream-null? (stream-cdr inp))))
|
||||
|
||||
(define parse-consumed-all?
|
||||
(lambda (res)
|
||||
(no-more-tokens? (parse-result-unused res))))
|
||||
|
||||
;; A parser generator
|
||||
(define result
|
||||
(lambda (v)
|
||||
;; this is a parser that ignores its input and produces v
|
||||
(lambda (inp)
|
||||
(stream (make-parse-result v inp)))))
|
||||
|
||||
;; A parse that always generates a parse error
|
||||
(define zero
|
||||
(lambda (inp)
|
||||
stream-nil))
|
||||
|
||||
;; For a non-empty stream, successfully consume the first element
|
||||
(define item
|
||||
(lambda (inp)
|
||||
(cond
|
||||
[(no-more-tokens? inp) '()]
|
||||
[else
|
||||
(stream (make-parse-result (current-token inp) (remaining-tokens inp)))])))
|
||||
|
||||
(define (peek p)
|
||||
(lambda (inp)
|
||||
(stream-map (lambda (pr)
|
||||
(make-parse-result (parse-result-value pr) inp))
|
||||
(p inp))))
|
||||
|
||||
;;------------------------------------------
|
||||
|
||||
(define bind
|
||||
(lambda (parser receiver)
|
||||
(lambda (inp)
|
||||
(let ([res* (parser inp)])
|
||||
(stream-append-all
|
||||
(stream-map (lambda (res)
|
||||
((receiver (parse-result-value res))
|
||||
(parse-result-unused res)))
|
||||
res*))))))
|
||||
|
||||
;; monad comprehensions
|
||||
(define-syntax is-where ; used by is and trace-is
|
||||
(lambda (x)
|
||||
(syntax-case x (where <-)
|
||||
[(_ expr (where)) #'expr]
|
||||
[(_ expr (where [x <- p] clauses ...))
|
||||
#'(bind p (lambda (x) (is-where expr (where clauses ...))))]
|
||||
[(_ expr (where pred clauses ...))
|
||||
#'(if pred (is-where expr (where clauses ...)) zero)]
|
||||
[(_ expr where-clause) (syntax-error #'where-clause)])))
|
||||
(indirect-export is-where bind)
|
||||
|
||||
(define-syntax is
|
||||
(syntax-rules ()
|
||||
[(_ expr where-clause) (is-where (result expr) where-clause)]))
|
||||
(indirect-export is is-where)
|
||||
|
||||
(module (trace-is)
|
||||
(define ($trace-is name proc head)
|
||||
(lambda (unused)
|
||||
(let ([res (proc (token-bfp (current-token head)) (token-efp (preceding-token unused)))])
|
||||
(when (and 'name (grammar-trace)) (printf "<<~s = ~s~%" 'name res))
|
||||
(stream (make-parse-result res unused)))))
|
||||
|
||||
(define-syntax trace-is
|
||||
(syntax-rules ()
|
||||
[(_ name proc-expr where-clause)
|
||||
(lambda (inp) ((is-where ($trace-is 'name proc-expr inp) where-clause) inp))]))
|
||||
(indirect-export trace-is $trace-is))
|
||||
|
||||
(define (seq2 p q) (is (cons x y) (where [x <- p] [y <- q])))
|
||||
|
||||
(define seq
|
||||
(lambda p*
|
||||
(let loop ([p* p*])
|
||||
(cond
|
||||
[(null? p*) (result '())]
|
||||
[else (seq2 (car p*) (loop (cdr p*)))]))))
|
||||
|
||||
(define (sat pred) (is x (where [x <- item] (pred x))))
|
||||
|
||||
(define ++ ;; introduce ambiguity
|
||||
(lambda (p q)
|
||||
(lambda (inp)
|
||||
(stream-append2 (p inp)
|
||||
(lambda ()
|
||||
(q inp))))))
|
||||
|
||||
(define (many+ p) (is (cons x xs) (where [x <- p] [xs <- (many p)])))
|
||||
|
||||
(define (many p) (++ (many+ p) (result '())))
|
||||
|
||||
(define (? p) (++ (sat p) (result #f)))
|
||||
|
||||
(define (sepby1 p sep)
|
||||
(is (cons x xs)
|
||||
(where
|
||||
[x <- p]
|
||||
[xs <- (many (is y (where [_ <- sep] [y <- p])))])))
|
||||
|
||||
(define (sepby p sep) (++ (sepby1 p sep) (result '())))
|
||||
|
||||
(define (bracket open p close) (is x (where [_ <- open] [x <- p] [_ <- close])))
|
||||
|
||||
(define (optional p default)
|
||||
(lambda (inp)
|
||||
(let ([res (p inp)])
|
||||
(if (stream-null? res)
|
||||
(stream (make-parse-result default inp))
|
||||
res))))
|
||||
|
||||
(define (first p)
|
||||
(lambda (inp)
|
||||
(let ([res (p inp)])
|
||||
(if (stream-null? res)
|
||||
res
|
||||
(stream (stream-car res))))))
|
||||
|
||||
(define (+++ p q) (first (++ p q))) ;; choose first match, cut backtracking
|
||||
|
||||
(define (format-inp inp)
|
||||
(if (no-more-tokens? inp)
|
||||
"#<null-stream>"
|
||||
(format "(~s ...)" (current-token inp))))
|
||||
|
||||
(define-syntax define-grammar
|
||||
(lambda (x)
|
||||
(define-record-type production
|
||||
(nongenerative)
|
||||
(fields name elt* receiver))
|
||||
(define-record-type clause
|
||||
(nongenerative)
|
||||
(fields id prod*))
|
||||
(define (gentemp) (datum->syntax #'* (gensym)))
|
||||
(define (elt-temps elt*)
|
||||
(fold-left
|
||||
(lambda (t* elt)
|
||||
(if (constant? elt) t* (cons (gentemp) t*)))
|
||||
'()
|
||||
elt*))
|
||||
(define parse-production
|
||||
(lambda (cl)
|
||||
(syntax-case cl (:: src =>)
|
||||
[[name :: src elt ... => receiver]
|
||||
(make-production #'name #'(elt ...)
|
||||
(with-syntax ([(t ...) (elt-temps #'(elt ...))])
|
||||
#'(lambda (bfp efp t ...)
|
||||
(receiver (make-src bfp efp) t ...))))]
|
||||
[[name :: elt ... => receiver]
|
||||
(make-production #'name #'(elt ...)
|
||||
(with-syntax ([(t ...) (elt-temps #'(elt ...))])
|
||||
#'(lambda (bfp efp t ...)
|
||||
(receiver t ...))))])))
|
||||
(define (left-factor clause*)
|
||||
(define syntax-equal?
|
||||
(lambda (x y)
|
||||
(equal? (syntax->datum x) (syntax->datum y))))
|
||||
(let lp1 ([clause* clause*] [new-clause* '()])
|
||||
(if (null? clause*)
|
||||
(reverse new-clause*)
|
||||
(let ([clause (car clause*)])
|
||||
(let lp2 ([prod* (clause-prod* clause)] [new-prod* '()] [clause* (cdr clause*)])
|
||||
(if (null? prod*)
|
||||
(lp1 clause* (cons (make-clause (clause-id clause) (reverse new-prod*)) new-clause*))
|
||||
(let ([prod (car prod*)] [prod* (cdr prod*)])
|
||||
(let ([elt* (production-elt* prod)])
|
||||
(if (null? elt*)
|
||||
(lp2 prod* (cons prod new-prod*) clause*)
|
||||
(let ([elt (car elt*)])
|
||||
(let-values ([(haves have-nots) (partition
|
||||
(lambda (prod)
|
||||
(let ([elt* (production-elt* prod)])
|
||||
(and (not (null? elt*))
|
||||
(syntax-equal? (car elt*) elt))))
|
||||
prod*)])
|
||||
(if (null? haves)
|
||||
(lp2 prod* (cons prod new-prod*) clause*)
|
||||
(let ([haves (cons prod haves)])
|
||||
; "haves" start with the same elt. to cut down on the number of new
|
||||
; nonterminals and receiver overhead, find the largest common prefix
|
||||
(let ([prefix (cons elt
|
||||
(let f ([elt** (map production-elt* haves)])
|
||||
(let ([elt** (map cdr elt**)])
|
||||
(if (ormap null? elt**)
|
||||
'()
|
||||
(let ([elt (caar elt**)])
|
||||
(if (andmap (lambda (elt*) (syntax-equal? (car elt*) elt)) (cdr elt**))
|
||||
(cons (caar elt**) (f elt**))
|
||||
'()))))))])
|
||||
(let ([t (gentemp)] [n (length prefix)] [t* (elt-temps prefix)])
|
||||
(lp2 have-nots
|
||||
(cons (make-production #f (append prefix (list t))
|
||||
#`(lambda (bfp efp #,@t* p) (p bfp #,@t*)))
|
||||
new-prod*)
|
||||
(cons (make-clause t
|
||||
(map (lambda (prod)
|
||||
(let ([elt* (list-tail (production-elt* prod) n)])
|
||||
(make-production (production-name prod) elt*
|
||||
(let ([u* (elt-temps elt*)])
|
||||
#`(lambda (bfp efp #,@u*)
|
||||
(lambda (bfp #,@t*)
|
||||
(#,(production-receiver prod) bfp efp #,@t* #,@u*)))))))
|
||||
haves))
|
||||
clause*)))))))))))))))))
|
||||
(define (nt-helper clause*)
|
||||
(define (elt-helper x)
|
||||
(syntax-case x (SEP+ SEP* OPT K* K+)
|
||||
[(SEP+ p sep) #`(sepby1 #,(elt-helper #'p) (sep->parser sep))]
|
||||
[(SEP* p sep) #`(sepby #,(elt-helper #'p) (sep->parser sep))]
|
||||
[(OPT p dflt) #`(optional #,(elt-helper #'p) dflt)]
|
||||
[(K* p) #`(many #,(elt-helper #'p))]
|
||||
[(K+ p) #`(many+ #,(elt-helper #'p))]
|
||||
[k (constant? #'k) #'(constant->parser 'k)]
|
||||
[p #'p]))
|
||||
(let loop ([clause* clause*] [binding* '()])
|
||||
(if (null? clause*)
|
||||
binding*
|
||||
(loop
|
||||
(cdr clause*)
|
||||
(cons
|
||||
#`[#,(clause-id (car clause*))
|
||||
#,(let f ([prod* (clause-prod* (car clause*))])
|
||||
(if (null? prod*)
|
||||
#'zero
|
||||
(with-syntax ([name (production-name (car prod*))]
|
||||
[(elt ...) (production-elt* (car prod*))]
|
||||
[receiver (production-receiver (car prod*))])
|
||||
(with-syntax ([(x ...) (generate-temporaries #'(elt ...))])
|
||||
(with-syntax ([([y _] ...) (filter (lambda (pr) (not (constant? (cadr pr)))) #'([x elt] ...))])
|
||||
(with-syntax ([(where-nt ...) (map elt-helper #'(elt ...))])
|
||||
#`(+++ ;; use +++ if you don't ever need to backtrack to a previous production for the same non-terminal
|
||||
(lambda (inp)
|
||||
(when (and 'name (grammar-trace)) (printf ">>~s(~a)~%" 'name (format-inp inp)))
|
||||
(let ([res ((trace-is name (lambda (bfp efp) (receiver bfp efp y ...)) (where [x <- where-nt] ...)) inp)])
|
||||
(when (and 'name (grammar-trace))
|
||||
(if (stream-null? res)
|
||||
(printf "<<~s(~a) failed~%" 'name (format-inp inp))
|
||||
(printf "<<~s(~a) succeeded~%" 'name (format-inp inp))))
|
||||
res))
|
||||
#,(f (cdr prod*)))))))))]
|
||||
binding*)))))
|
||||
(syntax-case x ()
|
||||
[(_ init-nt [nt prod prods ...] ...)
|
||||
(with-syntax ([(binding ...)
|
||||
(nt-helper
|
||||
(left-factor
|
||||
(map (lambda (nt prod*) (make-clause nt (map parse-production prod*)))
|
||||
#'(nt ...)
|
||||
#'((prod prods ...) ...))))])
|
||||
#'(define init-nt
|
||||
(letrec (binding ...)
|
||||
(make-top-level-parser init-nt))))])))
|
||||
|
||||
(indirect-export define-grammar
|
||||
result
|
||||
zero
|
||||
is
|
||||
trace-is
|
||||
sepby1
|
||||
sepby
|
||||
optional
|
||||
many
|
||||
many+
|
||||
+++
|
||||
|
||||
grammar-trace
|
||||
format-inp
|
||||
trace-is
|
||||
|
||||
make-top-level-parser
|
||||
)
|
||||
)
|
|
@ -585,3 +585,9 @@ edit>
|
|||
|
||||
(begin (define-struct! caramel x y z) (eqv? (caramel-x (caramel 1 2 3)) 1))
|
||||
)
|
||||
|
||||
(examples-mat ez-grammar-test ("ez-grammar-test")
|
||||
(equal?
|
||||
(with-output-to-string ez-grammar-test)
|
||||
"end of tests\n")
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user