diff --git a/collects/scribblings/guide/arith.rkt b/collects/scribblings/guide/arith.rkt new file mode 100644 index 0000000000..64999d533d --- /dev/null +++ b/collects/scribblings/guide/arith.rkt @@ -0,0 +1,48 @@ +#lang racket +(require syntax/readerr) + +(provide read read-syntax) + +(define (read in) + (syntax->datum (read-syntax #f in))) + +(define (read-syntax src in) + (skip-whitespace in) + (read-arith src in)) + +(define (skip-whitespace in) + (regexp-match #px"^\\s*" in)) + +(define (read-arith src in) + (define-values (line col pos) (port-next-location in)) + (define expr-match + (regexp-match + #px"^([a-z]|[0-9]+)(?:[-+*/]([a-z]|[0-9]+))*(?![-+*/])" + in)) + + (define (to-syntax v delta span-str) + (datum->syntax #f v (make-srcloc delta span-str))) + (define (make-srcloc delta span-str) + (and line + (vector src line (+ col delta) (+ pos delta) + (string-length span-str)))) + + (define (parse-expr s delta) + (match (or (regexp-match #rx"^(.*?)([+-])(.*)$" s) + (regexp-match #rx"^(.*?)([*/])(.*)$" s)) + [(list _ a-str op-str b-str) + (define a-len (string-length a-str)) + (define a (parse-expr a-str delta)) + (define b (parse-expr b-str (+ delta 1 a-len))) + (define op (to-syntax (string->symbol op-str) + (+ delta a-len) op-str)) + (to-syntax (list op a b) delta s)] + [else (to-syntax (or (string->number s) + (string->symbol s)) + delta s)])) + + (unless expr-match + (raise-read-error "bad arithmetic syntax" + src line col pos + (and pos (- (file-position in) pos)))) + (parse-expr (bytes->string/utf-8 (car expr-match)) 0)) diff --git a/collects/scribblings/guide/dollar.rkt b/collects/scribblings/guide/dollar.rkt new file mode 100644 index 0000000000..aac944fe70 --- /dev/null +++ b/collects/scribblings/guide/dollar.rkt @@ -0,0 +1,41 @@ +#lang racket +(require syntax/readerr + (prefix-in arith: "arith.rkt")) + +(provide (rename-out [$-read read] + [$-read-syntax read-syntax])) + +(define ($-read in) + (parameterize ([current-readtable (make-$-readtable)]) + (read in))) + +(define ($-read-syntax src in) + (parameterize ([current-readtable (make-$-readtable)]) + (read-syntax src in))) + +(define (make-$-readtable) + (make-readtable (current-readtable) + #\$ 'terminating-macro read-dollar)) + +(define read-dollar + (case-lambda + [(ch in) + (check-$-after (arith:read in) in (object-name in))] + [(ch in src line col pos) + (check-$-after (arith:read-syntax src in) in src)])) + +(define (check-$-after val in src) + (regexp-match #px"^\\s*" in) + (let ([ch (peek-char in)]) + (unless (equal? ch #\$) (bad-ending ch src in)) + (read-char in)) + val) + +(define (bad-ending ch src in) + (let-values ([(line col pos) (port-next-location in)]) + ((if (eof-object? ch) + raise-read-error + raise-read-eof-error) + "expected a closing `$'" + src line col pos + (if (eof-object? ch) 0 1)))) diff --git a/collects/scribblings/guide/five.rkt b/collects/scribblings/guide/five.rkt new file mode 100644 index 0000000000..4bfac4fa13 --- /dev/null +++ b/collects/scribblings/guide/five.rkt @@ -0,0 +1,6 @@ +#lang racket/base + +(provide read read-syntax) + +(define (read in) (read-string 5 in)) +(define (read-syntax src in) (read-string 5 in)) diff --git a/collects/scribblings/guide/guide.scrbl b/collects/scribblings/guide/guide.scrbl index c0f41600dc..998e8bdfdf 100644 --- a/collects/scribblings/guide/guide.scrbl +++ b/collects/scribblings/guide/guide.scrbl @@ -50,6 +50,8 @@ precise details to @|MzScheme| and other reference manuals. @include-section["macros.scrbl"] +@include-section["languages.scrbl"] + @include-section["performance.scrbl"] @include-section["running.scrbl"] diff --git a/collects/scribblings/guide/languages.scrbl b/collects/scribblings/guide/languages.scrbl new file mode 100644 index 0000000000..0d4cb2a9ed --- /dev/null +++ b/collects/scribblings/guide/languages.scrbl @@ -0,0 +1,37 @@ +#lang scribble/doc +@(require scribble/manual + "guide-utils.ss") + +@title[#:tag "languages" #:style 'toc]{Creating Languages} + +The @tech{macro} facilities defined in the preceding chapter let a +programmer define syntactic extensions to a language, but the +expressiveness of a macro is limited in two ways: + +@itemlist[ + + @item{a macro cannot restrict the syntax available in its context or + change the meaning of surrounding forms; and} + + @item{a macro can extend the syntax of a language only within the + parameters of the language's lexical conventions, such as using + parentheses to group the macro name with its subforms and using + the core syntax of identifiers, keywords, and literals.} + +] + +@guideother{The distinction between the @tech{reader} and +@tech{expander} layer is introduced in @secref["lists-and-syntax"].} + +That is, a macro can only extend a language, and it can do so only at +the @tech{expander} layer. Racket offers additional facilities for +defining a starting point of the @tech{expander} layer, for extending +the @tech{reader} layer, for defining the starting point of the +@tech{reader} layer, and for packaging a @tech{reader} and +@tech{expander} starting point into a conveniently named language. + +@local-table-of-contents[] + +@;------------------------------------------------------------------------ +@; @include-section["module-languages.scrbl"] +@include-section["reader-extension.scrbl"] diff --git a/collects/scribblings/guide/reader-extension.scrbl b/collects/scribblings/guide/reader-extension.scrbl new file mode 100644 index 0000000000..9f90e46540 --- /dev/null +++ b/collects/scribblings/guide/reader-extension.scrbl @@ -0,0 +1,225 @@ +#lang scribble/doc +@(require scribble/manual + scribble/bnf + scribble/eval + (for-label racket/match + syntax/readerr) + "guide-utils.ss" + (for-syntax racket/base + syntax/strip-context)) + +@(define-syntax (racketmodfile stx) + (syntax-case stx () + [(_ file) + (with-syntax ([(content ...) + (call-with-input-file* (syntax-e #'file) + (lambda (in) + (read-bytes 6 in) + (port-count-lines! in) + (let loop () + (let ([v (read-syntax (object-name in) in)]) + (if (eof-object? v) + null + (cons (replace-context #'file v) + (loop)))))))]) + #'(racketmod content ...))])) + +@title[#:tag "hash-reader"]{Reader Extensions} + +The @tech{reader} layer of the Racket language supports a +@litchar{#reader} syntax for allowing an external processor to parse +raw bytes into forms to be consumed by the @tech{expander} layer. +The syntax of @litchar{#reader} is + +@racketblock[@#,(BNF-seq @litchar{#reader} @nonterm{module-path} @nonterm{reader-specific})] + +where @nonterm{module-path} names a module that provides +@racketidfont{read} and @racketidfont{read-syntax} functions. The +@nonterm{module-path} itself is written with the reader syntax put in +place by its context. The @nonterm{reader-specific} part is a sequence +of characters that is parsed as determined by the @racketidfont{read} +and @racketidfont{read-syntax} functions that are exported by the +module named through @nonterm{module-path}. + +For example, suppose that file @filepath{five.rkt} contains + +@racketmod[ +racket/base + +(provide read read-syntax) + +(define (read in) (list (read-string 5 in))) +(define (read-syntax src in) (list (read-string 5 in))) +] + +Then, the program + +@racketmod[ +racket/base + +'(1 @#,(elem @racketmetafont{#reader} @racket["five.rkt"] @tt{23456} @racket[7]) 8) +] + +is equivalent to + +@racketmod[ +racket/base + +'(1 ("23456") 7 8) +] + +because the @racketidfont{read} and @racketidfont{read-syntax} +functions of @filepath{five.rkt} both read five characters from the +input stream and put them into a string and then a list. The reader +functions from @filepath{five.rkt} are not obliged to follow Racket +lexical conventions and treat the continuous sequence @litchar{234567} +as a single number. Since only the @litchar{23456} part is consumed by +@racketidfont{read} or @racketidfont{read-syntax}, the @litchar{7} +remains to be parsed in the usual racket way. Similarly, the reader +functions from @filepath{five.rkt} are not obliged to treat spaces as +whitespace, and + +@racketmod[ +racket/base + +'(1 @#,(elem @racketmetafont{#reader} @racket["five.rkt"] @hspace[1] @tt{2345} @racket[67]) 8) +] + +is equivalent to + +@racketmod[ +racket/base + +'(1 (" 2345") 67 8) +] + +since the first character immediately after @racket["five.rkt"] is a +space. + +A @litchar{#reader} form can be used in the @tech{REPL}, too: + +@interaction[ +(eval:alts @#,(elem @racketmetafont{#reader}@racket["five.rkt"]@tt{abcde}) #reader"five.rkt"abcde) +] + +@; ---------------------------------------------------------------------- + +@section{Source Locations} + +The difference between @racketidfont{read} and +@racketidfont{read-syntax} is that @racketidfont{read} is meant to be +used for data like the Racket @racket[read] function, while +@racketidfont{read-syntax} is meant to be used to parse programs. More +precisely, the @racketidfont{read} function will be used when the +enclosing stream is being parsed by the Racket @racket[read], and +@racketidfont{read-syntax} is used when the enclosing stream is being +parsed by the Racket @racket[read-syntax] function. Nothing requires +@racketidfont{read} and @racketidfont{read-syntax} to parse input in +the same way, though they normally should. + +Although the @racketidfont{read-syntax} function can return the same +kind of value as @racketidfont{read}, it should normally return a +@tech{syntax object} that connects the parsed expression with source +locations. Unlike the @filepath{five.rkt} example, the +@racketidfont{read-syntax} function is typically implemented directly, +and then @racketidfont{read} can use @racketidfont{read-syntax} and +strip away source information. + +The following @filepath{arith.rkt} module implements that reader to +parse simple infix arithmetic expressions into Racket forms. For +example, @litchar{1*2+3} parses into the Racket form @racket[(+ (* 1 +2) 3)]. Single-letter variables can appear in the expression. The +implementation uses @racket[port-next-location] to obtain the current +source location, and it uses @racket[datum->syntax] to turn raw values +into @tech{syntax objects}. + +@racketmodfile["arith.rkt"] + +If the @filepath{arith.rkt} reader is used in an expression position, +then its parse result will be treated as a Racket expression. If it is +used in a quoted form, however, then it just produces a number or a +list: + +@interaction[ +(eval:alts @#,(elem @racketmetafont{#reader}@racket["arith.rkt"]@hspace[1]@tt{1*2+3}) #reader"arith.rkt" 1*2+3 ) +(eval:alts '@#,(elem @racketmetafont{#reader}@racket["arith.rkt"]@hspace[1]@tt{1*2+3}) '#reader"arith.rkt" 1*2+3 ) +] + +The @filepath{arith.rkt} reader could also be used in positions that +make no sense. Since the @racketidfont{read-syntax} implementation +tracks source locations, syntax errors can at least refer to parts of +the input in terms of their original locations (at the beginning of +the error message): + +@interaction[ +(eval:alts (let @#,(elem @racketmetafont{#reader}@racket["arith.rkt"]@hspace[1]@tt{1*2+3}) 8) + (eval (parameterize ([read-accept-reader #t]) + (read-syntax 'repl (let ([p @open-input-string{(let #reader"arith.rkt" 1*2+3 8)}]) + (port-count-lines! p) + p))))) +] + +@; ---------------------------------------------------------------------- + +@section{Readtables} + +A reader extension's ability to parse input characters in an arbitrary +way can be powerful, but many cases of lexical extension call for a +less general but more composable approach. In much the same way that +the @tech{expander} level of Racket syntax can be extended through +@tech{macros}, the @tech{reader} level of Racket syntax can be more +composably extended through a @deftech{readtable}. + +The Racket reader is a recursive-descent parser, and the +@tech{readtable} maps characters to parsing handlers. For example, the +default readtable maps @litchar{(} to a handler that recursively +parses subforms until it finds a @litchar{)}. The +@racket[current-readtable] @tech{parameter} determines the +@tech{readtable} that is used by @racket[read] or +@racket[read-syntax]. Rather than parsing raw characters directly, a +reader extension can install an extended @tech{readtable} and then +chain to @racket[read] or @racket[read-syntax]. + +@guideother{See @secref["parameterize"] for an introduction to +@tech{parameters}.} + +The @racket[make-readtable] function constructs a new @tech{readtable} +as an extension of an existing one. It accepts a sequence of +specifications in terms of a character, a type of mapping for the +character, and (for certain types of mappings) a parsing +procedure. For example, to extend the readtable so that @litchar{$} +can be used to start and end infix expressions, implement a +@racket[parse-dollar] function and use: + +@racketblock[ +(make-readtable (current-readtable) + #\$ 'terminating-macro read-dollar) +] + +The protocol for @racket[read-dollar] requires the function to accept +different numbers of arguments depending on whether it is being used +in @racket[read] or @racket[read-syntax] mode. In @racket[read] mode, +the parser function is given two arguments: the character that +triggered the parser function and the input port that is being +read. In @racket[read-syntax] mode, the function must accept four +additional arguments that provide the source location of the +character. + +The following @filepath{dollar.rkt} module defines a +@racket[parse-dollar] function in terms of the @racketidfont{read} and +@racketidfont{read-syntax} functions provided by @filepath{arith.rkt}, +and it puts it together with new @racketidfont{read} and +@racketidfont{read-syntax} functions that install the readtable can +chain to Racket's @racket[read] or @racket[read-syntax]: + +@racketmodfile["dollar.rkt"] + +With this reader extension, a single @racketmetafont{#reader} can be +used at the beginning of an expression to enable multiple uses of +@litchar{$} to switch to infix arithmetic: + +@interaction[ +(eval:alts @#,(elem @racketmetafont{#reader}@racket["dollar.rkt"]@hspace[1] + @racket[(let ([a @#,tt{$1*2+3$}][b @#,tt{$5/6$}]) $a+b$)]) + #reader"dollar.rkt" (let ([a $1*2+3$][b $5/6$]) $a+b$)) +] diff --git a/collects/scribblings/guide/truth.scrbl b/collects/scribblings/guide/truth.scrbl index db5c80a0ab..5da9755628 100644 --- a/collects/scribblings/guide/truth.scrbl +++ b/collects/scribblings/guide/truth.scrbl @@ -224,10 +224,10 @@ streams. Instead, the syntax is determined by two layers: @itemize[ - @item{a @defterm{read} layer, which turns a sequence of characters + @item{a @deftech{reader} layer, which turns a sequence of characters into lists, symbols, and other constants; and} - @item{an @defterm{expand} layer, which processes the lists, symbols, + @item{an @deftech{expander} layer, which processes the lists, symbols, and other constants to parse them as an expression.} ] diff --git a/collects/scribblings/reference/readtables.scrbl b/collects/scribblings/reference/readtables.scrbl index 9d0cc3bc7d..562b812043 100644 --- a/collects/scribblings/reference/readtables.scrbl +++ b/collects/scribblings/reference/readtables.scrbl @@ -316,7 +316,8 @@ character and the @racket[#f] readtable.} (define tuple-readtable+ (make-readtable tuple-readtable - #\* 'terminating-macro (lambda a (make-special-comment #f)) + #\* 'terminating-macro (lambda a + (make-special-comment #f)) #\_ #\space #f)) (parameterize ([current-readtable tuple-readtable+]) (read (open-input-string "< * 1 __,__ 2 __,__ * \"a\" * >")))