initial cut at language-creation chaper in guide: reader extension
This commit is contained in:
parent
1d0bc61098
commit
88fd429eaf
48
collects/scribblings/guide/arith.rkt
Normal file
48
collects/scribblings/guide/arith.rkt
Normal file
|
@ -0,0 +1,48 @@
|
|||
#lang racket
|
||||
(require syntax/readerr)
|
||||
|
||||
(provide read read-syntax)
|
||||
|
||||
(define (read in)
|
||||
(syntax->datum (read-syntax #f in)))
|
||||
|
||||
(define (read-syntax src in)
|
||||
(skip-whitespace in)
|
||||
(read-arith src in))
|
||||
|
||||
(define (skip-whitespace in)
|
||||
(regexp-match #px"^\\s*" in))
|
||||
|
||||
(define (read-arith src in)
|
||||
(define-values (line col pos) (port-next-location in))
|
||||
(define expr-match
|
||||
(regexp-match
|
||||
#px"^([a-z]|[0-9]+)(?:[-+*/]([a-z]|[0-9]+))*(?![-+*/])"
|
||||
in))
|
||||
|
||||
(define (to-syntax v delta span-str)
|
||||
(datum->syntax #f v (make-srcloc delta span-str)))
|
||||
(define (make-srcloc delta span-str)
|
||||
(and line
|
||||
(vector src line (+ col delta) (+ pos delta)
|
||||
(string-length span-str))))
|
||||
|
||||
(define (parse-expr s delta)
|
||||
(match (or (regexp-match #rx"^(.*?)([+-])(.*)$" s)
|
||||
(regexp-match #rx"^(.*?)([*/])(.*)$" s))
|
||||
[(list _ a-str op-str b-str)
|
||||
(define a-len (string-length a-str))
|
||||
(define a (parse-expr a-str delta))
|
||||
(define b (parse-expr b-str (+ delta 1 a-len)))
|
||||
(define op (to-syntax (string->symbol op-str)
|
||||
(+ delta a-len) op-str))
|
||||
(to-syntax (list op a b) delta s)]
|
||||
[else (to-syntax (or (string->number s)
|
||||
(string->symbol s))
|
||||
delta s)]))
|
||||
|
||||
(unless expr-match
|
||||
(raise-read-error "bad arithmetic syntax"
|
||||
src line col pos
|
||||
(and pos (- (file-position in) pos))))
|
||||
(parse-expr (bytes->string/utf-8 (car expr-match)) 0))
|
41
collects/scribblings/guide/dollar.rkt
Normal file
41
collects/scribblings/guide/dollar.rkt
Normal file
|
@ -0,0 +1,41 @@
|
|||
#lang racket
|
||||
(require syntax/readerr
|
||||
(prefix-in arith: "arith.rkt"))
|
||||
|
||||
(provide (rename-out [$-read read]
|
||||
[$-read-syntax read-syntax]))
|
||||
|
||||
(define ($-read in)
|
||||
(parameterize ([current-readtable (make-$-readtable)])
|
||||
(read in)))
|
||||
|
||||
(define ($-read-syntax src in)
|
||||
(parameterize ([current-readtable (make-$-readtable)])
|
||||
(read-syntax src in)))
|
||||
|
||||
(define (make-$-readtable)
|
||||
(make-readtable (current-readtable)
|
||||
#\$ 'terminating-macro read-dollar))
|
||||
|
||||
(define read-dollar
|
||||
(case-lambda
|
||||
[(ch in)
|
||||
(check-$-after (arith:read in) in (object-name in))]
|
||||
[(ch in src line col pos)
|
||||
(check-$-after (arith:read-syntax src in) in src)]))
|
||||
|
||||
(define (check-$-after val in src)
|
||||
(regexp-match #px"^\\s*" in)
|
||||
(let ([ch (peek-char in)])
|
||||
(unless (equal? ch #\$) (bad-ending ch src in))
|
||||
(read-char in))
|
||||
val)
|
||||
|
||||
(define (bad-ending ch src in)
|
||||
(let-values ([(line col pos) (port-next-location in)])
|
||||
((if (eof-object? ch)
|
||||
raise-read-error
|
||||
raise-read-eof-error)
|
||||
"expected a closing `$'"
|
||||
src line col pos
|
||||
(if (eof-object? ch) 0 1))))
|
6
collects/scribblings/guide/five.rkt
Normal file
6
collects/scribblings/guide/five.rkt
Normal file
|
@ -0,0 +1,6 @@
|
|||
#lang racket/base
|
||||
|
||||
(provide read read-syntax)
|
||||
|
||||
(define (read in) (read-string 5 in))
|
||||
(define (read-syntax src in) (read-string 5 in))
|
|
@ -50,6 +50,8 @@ precise details to @|MzScheme| and other reference manuals.
|
|||
|
||||
@include-section["macros.scrbl"]
|
||||
|
||||
@include-section["languages.scrbl"]
|
||||
|
||||
@include-section["performance.scrbl"]
|
||||
|
||||
@include-section["running.scrbl"]
|
||||
|
|
37
collects/scribblings/guide/languages.scrbl
Normal file
37
collects/scribblings/guide/languages.scrbl
Normal file
|
@ -0,0 +1,37 @@
|
|||
#lang scribble/doc
|
||||
@(require scribble/manual
|
||||
"guide-utils.ss")
|
||||
|
||||
@title[#:tag "languages" #:style 'toc]{Creating Languages}
|
||||
|
||||
The @tech{macro} facilities defined in the preceding chapter let a
|
||||
programmer define syntactic extensions to a language, but the
|
||||
expressiveness of a macro is limited in two ways:
|
||||
|
||||
@itemlist[
|
||||
|
||||
@item{a macro cannot restrict the syntax available in its context or
|
||||
change the meaning of surrounding forms; and}
|
||||
|
||||
@item{a macro can extend the syntax of a language only within the
|
||||
parameters of the language's lexical conventions, such as using
|
||||
parentheses to group the macro name with its subforms and using
|
||||
the core syntax of identifiers, keywords, and literals.}
|
||||
|
||||
]
|
||||
|
||||
@guideother{The distinction between the @tech{reader} and
|
||||
@tech{expander} layer is introduced in @secref["lists-and-syntax"].}
|
||||
|
||||
That is, a macro can only extend a language, and it can do so only at
|
||||
the @tech{expander} layer. Racket offers additional facilities for
|
||||
defining a starting point of the @tech{expander} layer, for extending
|
||||
the @tech{reader} layer, for defining the starting point of the
|
||||
@tech{reader} layer, and for packaging a @tech{reader} and
|
||||
@tech{expander} starting point into a conveniently named language.
|
||||
|
||||
@local-table-of-contents[]
|
||||
|
||||
@;------------------------------------------------------------------------
|
||||
@; @include-section["module-languages.scrbl"]
|
||||
@include-section["reader-extension.scrbl"]
|
225
collects/scribblings/guide/reader-extension.scrbl
Normal file
225
collects/scribblings/guide/reader-extension.scrbl
Normal file
|
@ -0,0 +1,225 @@
|
|||
#lang scribble/doc
|
||||
@(require scribble/manual
|
||||
scribble/bnf
|
||||
scribble/eval
|
||||
(for-label racket/match
|
||||
syntax/readerr)
|
||||
"guide-utils.ss"
|
||||
(for-syntax racket/base
|
||||
syntax/strip-context))
|
||||
|
||||
@(define-syntax (racketmodfile stx)
|
||||
(syntax-case stx ()
|
||||
[(_ file)
|
||||
(with-syntax ([(content ...)
|
||||
(call-with-input-file* (syntax-e #'file)
|
||||
(lambda (in)
|
||||
(read-bytes 6 in)
|
||||
(port-count-lines! in)
|
||||
(let loop ()
|
||||
(let ([v (read-syntax (object-name in) in)])
|
||||
(if (eof-object? v)
|
||||
null
|
||||
(cons (replace-context #'file v)
|
||||
(loop)))))))])
|
||||
#'(racketmod content ...))]))
|
||||
|
||||
@title[#:tag "hash-reader"]{Reader Extensions}
|
||||
|
||||
The @tech{reader} layer of the Racket language supports a
|
||||
@litchar{#reader} syntax for allowing an external processor to parse
|
||||
raw bytes into forms to be consumed by the @tech{expander} layer.
|
||||
The syntax of @litchar{#reader} is
|
||||
|
||||
@racketblock[@#,(BNF-seq @litchar{#reader} @nonterm{module-path} @nonterm{reader-specific})]
|
||||
|
||||
where @nonterm{module-path} names a module that provides
|
||||
@racketidfont{read} and @racketidfont{read-syntax} functions. The
|
||||
@nonterm{module-path} itself is written with the reader syntax put in
|
||||
place by its context. The @nonterm{reader-specific} part is a sequence
|
||||
of characters that is parsed as determined by the @racketidfont{read}
|
||||
and @racketidfont{read-syntax} functions that are exported by the
|
||||
module named through @nonterm{module-path}.
|
||||
|
||||
For example, suppose that file @filepath{five.rkt} contains
|
||||
|
||||
@racketmod[
|
||||
racket/base
|
||||
|
||||
(provide read read-syntax)
|
||||
|
||||
(define (read in) (list (read-string 5 in)))
|
||||
(define (read-syntax src in) (list (read-string 5 in)))
|
||||
]
|
||||
|
||||
Then, the program
|
||||
|
||||
@racketmod[
|
||||
racket/base
|
||||
|
||||
'(1 @#,(elem @racketmetafont{#reader} @racket["five.rkt"] @tt{23456} @racket[7]) 8)
|
||||
]
|
||||
|
||||
is equivalent to
|
||||
|
||||
@racketmod[
|
||||
racket/base
|
||||
|
||||
'(1 ("23456") 7 8)
|
||||
]
|
||||
|
||||
because the @racketidfont{read} and @racketidfont{read-syntax}
|
||||
functions of @filepath{five.rkt} both read five characters from the
|
||||
input stream and put them into a string and then a list. The reader
|
||||
functions from @filepath{five.rkt} are not obliged to follow Racket
|
||||
lexical conventions and treat the continuous sequence @litchar{234567}
|
||||
as a single number. Since only the @litchar{23456} part is consumed by
|
||||
@racketidfont{read} or @racketidfont{read-syntax}, the @litchar{7}
|
||||
remains to be parsed in the usual racket way. Similarly, the reader
|
||||
functions from @filepath{five.rkt} are not obliged to treat spaces as
|
||||
whitespace, and
|
||||
|
||||
@racketmod[
|
||||
racket/base
|
||||
|
||||
'(1 @#,(elem @racketmetafont{#reader} @racket["five.rkt"] @hspace[1] @tt{2345} @racket[67]) 8)
|
||||
]
|
||||
|
||||
is equivalent to
|
||||
|
||||
@racketmod[
|
||||
racket/base
|
||||
|
||||
'(1 (" 2345") 67 8)
|
||||
]
|
||||
|
||||
since the first character immediately after @racket["five.rkt"] is a
|
||||
space.
|
||||
|
||||
A @litchar{#reader} form can be used in the @tech{REPL}, too:
|
||||
|
||||
@interaction[
|
||||
(eval:alts @#,(elem @racketmetafont{#reader}@racket["five.rkt"]@tt{abcde}) #reader"five.rkt"abcde)
|
||||
]
|
||||
|
||||
@; ----------------------------------------------------------------------
|
||||
|
||||
@section{Source Locations}
|
||||
|
||||
The difference between @racketidfont{read} and
|
||||
@racketidfont{read-syntax} is that @racketidfont{read} is meant to be
|
||||
used for data like the Racket @racket[read] function, while
|
||||
@racketidfont{read-syntax} is meant to be used to parse programs. More
|
||||
precisely, the @racketidfont{read} function will be used when the
|
||||
enclosing stream is being parsed by the Racket @racket[read], and
|
||||
@racketidfont{read-syntax} is used when the enclosing stream is being
|
||||
parsed by the Racket @racket[read-syntax] function. Nothing requires
|
||||
@racketidfont{read} and @racketidfont{read-syntax} to parse input in
|
||||
the same way, though they normally should.
|
||||
|
||||
Although the @racketidfont{read-syntax} function can return the same
|
||||
kind of value as @racketidfont{read}, it should normally return a
|
||||
@tech{syntax object} that connects the parsed expression with source
|
||||
locations. Unlike the @filepath{five.rkt} example, the
|
||||
@racketidfont{read-syntax} function is typically implemented directly,
|
||||
and then @racketidfont{read} can use @racketidfont{read-syntax} and
|
||||
strip away source information.
|
||||
|
||||
The following @filepath{arith.rkt} module implements that reader to
|
||||
parse simple infix arithmetic expressions into Racket forms. For
|
||||
example, @litchar{1*2+3} parses into the Racket form @racket[(+ (* 1
|
||||
2) 3)]. Single-letter variables can appear in the expression. The
|
||||
implementation uses @racket[port-next-location] to obtain the current
|
||||
source location, and it uses @racket[datum->syntax] to turn raw values
|
||||
into @tech{syntax objects}.
|
||||
|
||||
@racketmodfile["arith.rkt"]
|
||||
|
||||
If the @filepath{arith.rkt} reader is used in an expression position,
|
||||
then its parse result will be treated as a Racket expression. If it is
|
||||
used in a quoted form, however, then it just produces a number or a
|
||||
list:
|
||||
|
||||
@interaction[
|
||||
(eval:alts @#,(elem @racketmetafont{#reader}@racket["arith.rkt"]@hspace[1]@tt{1*2+3}) #reader"arith.rkt" 1*2+3 )
|
||||
(eval:alts '@#,(elem @racketmetafont{#reader}@racket["arith.rkt"]@hspace[1]@tt{1*2+3}) '#reader"arith.rkt" 1*2+3 )
|
||||
]
|
||||
|
||||
The @filepath{arith.rkt} reader could also be used in positions that
|
||||
make no sense. Since the @racketidfont{read-syntax} implementation
|
||||
tracks source locations, syntax errors can at least refer to parts of
|
||||
the input in terms of their original locations (at the beginning of
|
||||
the error message):
|
||||
|
||||
@interaction[
|
||||
(eval:alts (let @#,(elem @racketmetafont{#reader}@racket["arith.rkt"]@hspace[1]@tt{1*2+3}) 8)
|
||||
(eval (parameterize ([read-accept-reader #t])
|
||||
(read-syntax 'repl (let ([p @open-input-string{(let #reader"arith.rkt" 1*2+3 8)}])
|
||||
(port-count-lines! p)
|
||||
p)))))
|
||||
]
|
||||
|
||||
@; ----------------------------------------------------------------------
|
||||
|
||||
@section{Readtables}
|
||||
|
||||
A reader extension's ability to parse input characters in an arbitrary
|
||||
way can be powerful, but many cases of lexical extension call for a
|
||||
less general but more composable approach. In much the same way that
|
||||
the @tech{expander} level of Racket syntax can be extended through
|
||||
@tech{macros}, the @tech{reader} level of Racket syntax can be more
|
||||
composably extended through a @deftech{readtable}.
|
||||
|
||||
The Racket reader is a recursive-descent parser, and the
|
||||
@tech{readtable} maps characters to parsing handlers. For example, the
|
||||
default readtable maps @litchar{(} to a handler that recursively
|
||||
parses subforms until it finds a @litchar{)}. The
|
||||
@racket[current-readtable] @tech{parameter} determines the
|
||||
@tech{readtable} that is used by @racket[read] or
|
||||
@racket[read-syntax]. Rather than parsing raw characters directly, a
|
||||
reader extension can install an extended @tech{readtable} and then
|
||||
chain to @racket[read] or @racket[read-syntax].
|
||||
|
||||
@guideother{See @secref["parameterize"] for an introduction to
|
||||
@tech{parameters}.}
|
||||
|
||||
The @racket[make-readtable] function constructs a new @tech{readtable}
|
||||
as an extension of an existing one. It accepts a sequence of
|
||||
specifications in terms of a character, a type of mapping for the
|
||||
character, and (for certain types of mappings) a parsing
|
||||
procedure. For example, to extend the readtable so that @litchar{$}
|
||||
can be used to start and end infix expressions, implement a
|
||||
@racket[parse-dollar] function and use:
|
||||
|
||||
@racketblock[
|
||||
(make-readtable (current-readtable)
|
||||
#\$ 'terminating-macro read-dollar)
|
||||
]
|
||||
|
||||
The protocol for @racket[read-dollar] requires the function to accept
|
||||
different numbers of arguments depending on whether it is being used
|
||||
in @racket[read] or @racket[read-syntax] mode. In @racket[read] mode,
|
||||
the parser function is given two arguments: the character that
|
||||
triggered the parser function and the input port that is being
|
||||
read. In @racket[read-syntax] mode, the function must accept four
|
||||
additional arguments that provide the source location of the
|
||||
character.
|
||||
|
||||
The following @filepath{dollar.rkt} module defines a
|
||||
@racket[parse-dollar] function in terms of the @racketidfont{read} and
|
||||
@racketidfont{read-syntax} functions provided by @filepath{arith.rkt},
|
||||
and it puts it together with new @racketidfont{read} and
|
||||
@racketidfont{read-syntax} functions that install the readtable can
|
||||
chain to Racket's @racket[read] or @racket[read-syntax]:
|
||||
|
||||
@racketmodfile["dollar.rkt"]
|
||||
|
||||
With this reader extension, a single @racketmetafont{#reader} can be
|
||||
used at the beginning of an expression to enable multiple uses of
|
||||
@litchar{$} to switch to infix arithmetic:
|
||||
|
||||
@interaction[
|
||||
(eval:alts @#,(elem @racketmetafont{#reader}@racket["dollar.rkt"]@hspace[1]
|
||||
@racket[(let ([a @#,tt{$1*2+3$}][b @#,tt{$5/6$}]) $a+b$)])
|
||||
#reader"dollar.rkt" (let ([a $1*2+3$][b $5/6$]) $a+b$))
|
||||
]
|
|
@ -224,10 +224,10 @@ streams. Instead, the syntax is determined by two layers:
|
|||
|
||||
@itemize[
|
||||
|
||||
@item{a @defterm{read} layer, which turns a sequence of characters
|
||||
@item{a @deftech{reader} layer, which turns a sequence of characters
|
||||
into lists, symbols, and other constants; and}
|
||||
|
||||
@item{an @defterm{expand} layer, which processes the lists, symbols,
|
||||
@item{an @deftech{expander} layer, which processes the lists, symbols,
|
||||
and other constants to parse them as an expression.}
|
||||
|
||||
]
|
||||
|
|
|
@ -316,7 +316,8 @@ character and the @racket[#f] readtable.}
|
|||
|
||||
(define tuple-readtable+
|
||||
(make-readtable tuple-readtable
|
||||
#\* 'terminating-macro (lambda a (make-special-comment #f))
|
||||
#\* 'terminating-macro (lambda a
|
||||
(make-special-comment #f))
|
||||
#\_ #\space #f))
|
||||
(parameterize ([current-readtable tuple-readtable+])
|
||||
(read (open-input-string "< * 1 __,__ 2 __,__ * \"a\" * >")))
|
||||
|
|
Loading…
Reference in New Issue
Block a user