better reader implementation

svn: r6767
This commit is contained in:
Eli Barzilay 2007-06-29 07:40:01 +00:00
parent 9aaf939a88
commit c641584342
2 changed files with 352 additions and 266 deletions

View File

@ -4,21 +4,13 @@
(lib "kw.ss")) (lib "kw.ss"))
(provide (rename *read read) (provide (rename *read read)
(rename *read-syntax read-syntax)) (rename *read-syntax read-syntax))
(define (call-with-scribble-params t)
(parameterize ([scribble:read-insert-indents #f])
(t)))
(define/kw (*read #:optional [inp (current-input-port)]) (define/kw (*read #:optional [inp (current-input-port)])
(call-with-scribble-params (wrap inp (scribble:read-inside inp)))
(lambda ()
(wrap inp (scribble:read-inside inp)))))
(define/kw (*read-syntax #:optional src [port (current-input-port)]) (define/kw (*read-syntax #:optional src [port (current-input-port)])
(call-with-scribble-params (wrap port (scribble:read-inside-syntax src port))))
(lambda ()
(wrap port (scribble:read-inside-syntax src port)))))
(define (wrap port body) (define (wrap port body)
(let* ([p-name (object-name port)] (let* ([p-name (object-name port)]

View File

@ -2,66 +2,114 @@
;; Implements the @-reader macro for embedding text in Scheme code. ;; Implements the @-reader macro for embedding text in Scheme code.
(module reader mzscheme (module reader mzscheme
(require (lib "string.ss") (lib "kw.ss") (lib "readerr.ss" "syntax"))
(require (lib "kw.ss") (lib "string.ss") (lib "readerr.ss" "syntax"))
;; -------------------------------------------------------------------------- ;; --------------------------------------------------------------------------
;; customization ;; utilities for syntax specifications below
(provide read-insert-indents) ;; regexps
(define read-insert-indents (make-parameter #t)) (define (px . args)
(let* ([args (let loop ([xs args])
(if (list? xs) (apply append (map loop xs)) (list xs)))]
[args (map (lambda (x)
(cond
[(bytes? x) x]
[(string? x) (string->bytes/utf-8 x)]
[(char? x) (regexp-quote (bytes (char->integer x)))]
[else (error 'reader "internal error [px]")]))
args)])
(byte-pregexp (apply bytes-append args))))
(define (^px . args) (px #"^" args))
;; reverses a byte string visually
(define reverse-bytes
(let ([pairs (let ([xs (bytes->list #"([{<")]
[ys (bytes->list #")]}>")])
(append (map cons xs ys) (map cons ys xs)))])
(define (rev-byte b)
(cond [(assq b pairs) => cdr]
[else b]))
(lambda (bs) (list->bytes (map rev-byte (reverse! (bytes->list bs)))))))
;; -------------------------------------------------------------------------- ;; --------------------------------------------------------------------------
;; syntax ;; syntax
(define cmd-char #\@) ;; basic customization
(define ch:command #\@)
(define ch:comment #\;)
(define ch:bar-quote #\|)
(define ch:command-quote #\\)
(define ch:attrs-begin #\[)
(define ch:attrs-end #\])
(define ch:lines-begin #\{)
(define ch:lines-end #\})
(define bars-quoted #rx#"^[ \t\r\n]*\\|([^|]*)\\|") (define str:lines-begin* #"\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*\\{")
;; attrs open with a `[', and read in one shot as a list
(define open-attrs #rx#"^[ \t\r\n]*[[][ \t\r\n]*")
(define open-lines #rx#"^[ \t\r\n]*[{](?:[ \t]*\r?\n[ \t]*)?") ; 1 newline
(define open-lines* '(#"^[ \t\r\n]*" #"(?:[ \t]*\r?\n[ \t]*)?"))
(define open-lines-special ; a special ending expected: @foo{<{ ... }>} etc
#rx#"^[ \t\r\n]*([|][^a-zA-Z0-9 \t\r\n@\\]*?[{])(?:[ \t]*\r?\n[ \t]*)?")
(define open-attr/lines #rx#"^[ \t\r\n]*[[{][ \t\r\n]*")
(define close-attrs #rx#"^[]]")
(define close-lines #rx#"^(?:[ \t]*\r?\n[ \t]*)?[}]") ; swallow 1 newline
(define close-lines* '(#"^(?:[ \t]*\r?\n[ \t]*)?" #""))
(define comment-start #rx#"^[ \t]*;")
(define comment-line #rx#"^[^\r\n]*\r?\n[ \t]*") ; like tex's `%' nl & space
(define sub-start #rx#"^[@]")
(define line-item #rx#"^(?:[^{}@\r\n]*[^\\{}@\r\n]|[\\]+[{}@])+")
(define line-item* '(#"^(?:[^{}@\r\n]*[^\\{}@\r\n]|[\\]+(?:[@]|" #"))+"))
(define end-of-line #rx#"^([\\]+)?\r?\n[ \t]*") ; make \-eoln possible
(define bar-pfx-remove #rx#"^[|]")
(define bslash-unquote #rx#"[\\]([\\]*[{}@])")
(define bslash-unquote* '(#"[\\]([\\]+(?:[@]|" #"))"))
(define byte-pairs (define re:command (^px ch:command
(map (lambda (b) (cons (bytes-ref b 0) (bytes-ref b 1))) ;; the following identifies string escapes, see
'(#"()" #"[]" #"{}" #"<>"))) ;; hoe it is used below
"("ch:bar-quote"?\")?"))
(define re:whitespaces (^px "\\s+"))
(define re:comment-start (^px ch:comment))
(define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%'
(define re:expr-escape (^px ch:bar-quote))
(define re:attrs-begin (^px ch:attrs-begin))
(define re:attrs-end (^px ch:attrs-end))
(define re:lines-begin (^px ch:lines-begin))
(define re:lines-begin* (^px str:lines-begin*))
(define re:lines-end (^px ch:lines-end))
(define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line
(define re:end-of-line (^px str:end-of-line))
(define (re:line-item* bgn end)
(^px "(.+?)(?:"bgn"|"end
"|"ch:command-quote"*"ch:command
"|"str:end-of-line")"))
(define re:line-item (re:line-item* ch:lines-begin ch:lines-end))
(define re:line-item-no-nests (^px "(.+?)(?:"ch:command-quote"*"ch:command
"|"str:end-of-line")"))
(define re:command-unquote (^px ch:command-quote
"("ch:command-quote"*"ch:command")"))
;; -------------------------------------------------------------------------- ;; --------------------------------------------------------------------------
;; utilities ;; utilities
(define make-spaces ;; like `regexp-match/fail-without-reading', without extras; the regexp that
(let ([t (make-hash-table)]) ;; is used must be anchored -- nothing is dropped
(lambda (n) (define (*regexp-match-peek-positions pattern input-port)
(hash-table-get t n (unless (and (byte-regexp? pattern)
(lambda () (regexp-match? #rx#"^\\^" (object-name pattern)))
(let ([s (make-string n #\space)]) (error 'reader "internal error [invalid bregexp] ~e" pattern))
(hash-table-put! t n s) s)))))) (regexp-match-peek-positions pattern input-port))
;; the following doesn't work -- must peek first
;; (define (*regexp-match-positions pattern input-port)
;; (unless (and (byte-regexp? pattern)
;; (regexp-match? #rx#"^\\^" (object-name pattern)))
;; (error 'reader "internal error [invalid bregexp] ~e" pattern))
;; (regexp-match-peek-positions pattern input-port))
(define (*regexp-match pattern input-port)
(let ([m (*regexp-match-peek-positions pattern input-port)])
(and m (let ([s (read-bytes (cdar m) input-port)])
(cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p))))
(cdr m)))))))
;; like regexp-match, but returns the whole match
(define (*regexp-match1 pattern input-port)
(let ([m (*regexp-match-peek-positions pattern input-port)])
(and m (read-bytes (cdar m) input-port))))
;; Skips whitespace characters, sensitive to the current readtable's ;; Skips whitespace characters, sensitive to the current readtable's
;; definition of whitespace; optimizes common spaces when possible ;; definition of whitespace; optimizes common spaces when possible
(define skip-whitespace (define skip-whitespace
(let* ([plain-readtables (make-hash-table 'weak)] (let* ([plain-readtables (make-hash-table 'weak)]
[plain-spaces '(#\space #\tab #\newline #\return #\page)] [plain-spaces " \t\n\r\f"]
[plain-spaces-re [plain-spaces-list (string->list " \t\n\r\f")]
(regexp (string-append "^["(apply string plain-spaces)"]*"))]) [plain-spaces-re (^px "[" plain-spaces "]*")])
(define (skip-plain-spaces port) (define (skip-plain-spaces port)
;; hack: according to the specs, this might consume more characters ;; hack: according to the specs, this might consume more characters
;; than needed, but it seems to work fine with a simple <ch>* regexp ;; than needed, but it works fine with a simple <ch>* regexp (because
(regexp-match-positions plain-spaces-re port)) ;; it can always match an empty string)
(*regexp-match-peek-positions plain-spaces-re port))
(define (whitespace? ch rt) (define (whitespace? ch rt)
(if rt (if rt
(let-values ([(like-ch/sym _1 _2) (readtable-mapping rt ch)]) (let-values ([(like-ch/sym _1 _2) (readtable-mapping rt ch)])
@ -73,7 +121,7 @@
(hash-table-get plain-readtables rt (hash-table-get plain-readtables rt
(lambda () (lambda ()
(let ([plain? (andmap (lambda (ch) (whitespace? ch rt)) (let ([plain? (andmap (lambda (ch) (whitespace? ch rt))
plain-spaces)]) plain-spaces-list)])
(hash-table-put! plain-readtables rt #t) (hash-table-put! plain-readtables rt #t)
rt)))) rt))))
(lambda (port) (lambda (port)
@ -102,9 +150,19 @@
(datum->syntax-object #f d (placeholder-loc sp)) (datum->syntax-object #f d (placeholder-loc sp))
(datum->syntax-object sp d sp))) (datum->syntax-object sp d sp)))
;; make n spaces, cached for n
(define make-spaces
(let ([t (make-hash-table)])
(lambda (n)
(hash-table-get t n
(lambda ()
(let ([s (make-string n #\space)])
(hash-table-put! t n s) s))))))
;; a unique eol string
(define eol-token "\n") (define eol-token "\n")
(define (eol-syntax? stx) (eq? eol-token (syntax-e stx))) (define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x))))
;; sanity check, in case this gets violated in the future ;; sanity check, in case this property gets violated in the future
(unless (eol-syntax? (datum->syntax-object #f eol-token)) (unless (eol-syntax? (datum->syntax-object #f eol-token))
(error 'reader "internal error [invalid assumption]")) (error 'reader "internal error [invalid assumption]"))
@ -114,9 +172,20 @@
(define ((dispatcher start-inside?) (define ((dispatcher start-inside?)
char inp source-name line-num col-num position) char inp source-name line-num col-num position)
(define (read-error* line col pos span msg . xs)
(let* ([eof? (and (eq? 'eof msg) (pair? xs))]
[msg (apply format (if eof? xs (cons msg xs)))])
((if eof? raise-read-error raise-read-eof-error)
msg source-name line col pos span)))
(define (read-error msg . xs) (define (read-error msg . xs)
(let-values ([(line col pos) (port-next-location inp)]) (let-values ([(line col pos) (port-next-location inp)])
(raise-read-error (apply format msg xs) source-name line col pos #f))) (apply read-error* line col pos #f msg xs)))
(define (*match rx) (*regexp-match rx inp))
(define (*match1 rx) (*regexp-match1 rx inp))
;; (define (*skip rx) (*regexp-match-positions rx inp)) <- see above
(define (*skip rx) (*regexp-match1 rx inp))
(define (*peek rx) (*regexp-match-peek-positions rx inp))
(define (cur-pos) (define (cur-pos)
(let-values ([(line col pos) (port-next-location inp)]) (let-values ([(line col pos) (port-next-location inp)])
@ -125,229 +194,252 @@
(define (span-from start) (define (span-from start)
(and start (- (cur-pos) start))) (and start (- (cur-pos) start)))
(define (read-delimited-list end-re) (define (read-delimited-list end-re end-ch)
(let loop ([r '()]) (let loop ([r '()])
(skip-whitespace inp) (skip-whitespace inp)
(if (regexp-match/fail-without-reading end-re inp) (if (*skip end-re)
(reverse! r) (reverse! r)
(let ([x (read-syntax/recursive source-name inp)]) (let ([x (read-syntax/recursive source-name inp)])
(if (eof-object? x) (if (eof-object? x)
(read-error "expected a ']'") (read-error 'eof "expected a '~a'" end-ch)
(loop (if (special-comment? x) r (cons x r)))))))) (loop (if (special-comment? x) r (cons x r))))))))
(define (read-from-bytes-exact-or-identifier bs) ;; adds indentation (as new syntaxes, not merged); if the first line was
(let ([inp (open-input-bytes bs)] ;; not empty, then it is treated specially. called with at least two items
[default (lambda _ (string->symbol (bytes->string/utf-8 bs)))]) ;; (see below).
(with-handlers ([void default]) (define (add-indents stxs 1st-eol?)
(let ([x (read inp)]) (unless (andmap (lambda (x)
;; must match all -- otherwise: default (and (or (syntax? x) (placeholder? x))
(if (regexp-match #rx#"^[ \t\r\n]*$" inp) x (default)))))) (syntax/placeholder-column x)
(syntax/placeholder-line x)))
stxs)
;; the reader always turns on line counting
(read-error "internal error [add-indents] ~s" stxs))
(let* ([mincol
(let loop ([min #f] [stxs (if 1st-eol? stxs (cdr stxs))])
(if (null? stxs)
(or min (error "internal error [add-indents]"))
(loop (if (eol-syntax? (car stxs))
min
(let ([c (syntax/placeholder-column (car stxs))])
(if (or (not min) (< c min)) c min)))
(cdr stxs))))]
[mincol (if 1st-eol?
mincol
(min mincol (syntax/placeholder-column (car stxs))))])
(let loop (;; no indentation for text on the first '{' line
[newline? 1st-eol?] [curline -1] [stxs stxs] [r '()])
(if (null? stxs)
(reverse! r)
(let* ([stx (car stxs)]
[line (syntax/placeholder-line stx)])
(loop (eol-syntax? stx) line (cdr stxs)
(let ([stxcol (syntax/placeholder-column stx)]
[stx* (syntax/placeholder-strip stx)])
(if (and newline? (< curline line) (< mincol stxcol))
(list* stx*
(datum->syntax-object/placeholder stx
(make-spaces (- stxcol mincol)))
r)
(cons stx* r)))))))))
(define (reverse-bytes bytes) ;; gets an accumulated (reversed) list of syntaxes, sorts things out
(define (rev-byte b) ;; (remove prefix and suffix newlines, adds indentation if needed)
(cond [(assq b byte-pairs) => cdr] (define (done-lines rlines)
[else b])) (cond
(let* ([len (bytes-length bytes)] [r (make-bytes len)]) [(andmap eol-syntax? rlines)
(let loop ([i (sub1 len)]) ;; nothing to do (includes null, so the code below can assume a pair)
(when (<= 0 i) (reverse! rlines)]
(bytes-set! r i (rev-byte (bytes-ref bytes (- len i 1)))) [start-inside?
(loop (sub1 i)))) ;; no newlines removed
r)) (add-indents (reverse! rlines) #t)] ; don't ignore the 1st line
[else
;; strip off leading and trailing newlines
(let* ([rlines (if (eol-syntax? (car rlines)) (cdr rlines) rlines)]
[lines (reverse! rlines)]
[1st-eol? (eol-syntax? (car lines))]
[lines (if 1st-eol? (cdr lines) lines)])
(if (null? (cdr lines)) ; common case: one string
(list (syntax/placeholder-strip (car lines)))
(add-indents lines 1st-eol?)))]))
(define (get-attrs) ;; cons stx (new syntax) to the list of stxs, merging it if both are
(and (regexp-match/fail-without-reading open-attrs inp)
(read-delimited-list close-attrs)))
(define ((get-line open open-re close close-re item-re unquote-re level))
(let-values ([(line col pos) (port-next-location inp)])
(define (make-stx sexpr)
(datum->syntax-object #f
(if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr)
(list source-name line col pos (span-from pos))))
(cond [(regexp-match/fail-without-reading close-re inp)
=> (lambda (m)
(let ([l (sub1 (unbox level))])
(set-box! level l)
(and (<= 0 l) (make-stx (car m)))))]
[(regexp-match/fail-without-reading open-re inp)
=> (lambda (m)
(set-box! level (add1 (unbox level)))
(make-stx (car m)))]
[(regexp-match-peek-positions sub-start inp)
;; read the next value, include comment objs, keep source
;; location manually (see above)
(let ([x (read-syntax/recursive source-name inp)])
(if (or (syntax? x) (special-comment? x))
x
(make-placeholder x
(list source-name line col pos (span-from pos)))))]
[(regexp-match/fail-without-reading end-of-line inp)
=> (lambda (m)
(if (cadr m) ; backslashes?
(list (make-stx (cadr m)) (make-stx eol-token))
(make-stx eol-token)))]
[(regexp-match/fail-without-reading item-re inp)
=> (lambda (m)
(let* ([m (car m)]
[m (regexp-replace bar-pfx-remove m #"")]
[m (regexp-replace* unquote-re m #"\\1")])
(make-stx m)))]
[(and (not (eq? item-re line-item))
(regexp-match/fail-without-reading #rx#"[{}]" inp))
=> (lambda (m)
(make-stx (car m)))]
[(regexp-match/fail-without-reading #rx#"^$" inp)
(if start-inside? #f (read-error "missing `~a'" close))]
[else (read-error "internal error [get-line]")])))
;; adds stx (new syntax) to the list of stxs, merging it if both are
;; strings, except for newline markers ;; strings, except for newline markers
(define (maybe-merge stx stxs) (define (maybe-merge stx stxs)
(if (and (pair? stxs) (syntax? stx) (syntax? (car stxs)) (let* ([2nd (and (syntax? stx) (syntax-e stx))]
(string? (syntax-e stx)) [stx0 (and (pair? stxs) (car stxs))]
(string? (syntax-e (car stxs))) [1st (and (syntax? stx0) (syntax-e stx0))])
(not (eol-syntax? stx)) (if (and (string? 1st) (not (eq? eol-token 1st))
(not (eol-syntax? (car stxs)))) (string? 2nd) (not (eq? eol-token 2nd)))
(let ([fst (car stxs)]) (cons (datum->syntax-object stx0
(cons (datum->syntax-object stx (string-append 1st 2nd)
(string-append (syntax-e fst) (syntax-e stx)) (list (syntax-source stx0)
(list (syntax-source fst) (syntax-line stx0)
(syntax-line fst) (syntax-column stx0)
(syntax-column fst) (syntax-position stx0)
(syntax-position fst) ;; this is called right after reading stx
(span-from (syntax-position fst)))) (span-from (syntax-position stx0))))
(cdr stxs))) (cdr stxs))
(cons stx stxs))) (cons stx stxs))))
(define (add-indents stxs) (define (get-lines* re:begin re:end re:item end-token)
(unless (andmap (lambda (x) ;; re:begin, re:end, end-token can be false if start-inside? is #t
(or (and (syntax? x) (syntax-line x) (syntax-column x)) (let loop ([lvl 0] [r '()])
(placeholder? x))) (let-values ([(line col pos) (port-next-location inp)])
stxs) (define (make-stx sexpr)
(read-error "internal error [add-indents] ~s" stxs)) (datum->syntax-object #f
(cond (if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr)
[(not (read-insert-indents)) (map syntax/placeholder-strip stxs)] (list source-name line col pos (span-from pos))))
[(null? stxs) '()] (cond [(and re:begin (*match1 re:begin))
[else (let ([mincol (apply min (map syntax/placeholder-column stxs))]) => (lambda (m) (loop (add1 lvl) (maybe-merge (make-stx m) r)))]
(let loop ([curline line-num] [stxs stxs] [r '()]) [(and re:end (*match1 re:end))
(if (null? stxs) => (lambda (m)
(reverse! r) (if (and (zero? lvl) (not start-inside?))
(let* ([stx (car stxs)] (done-lines r)
[line (syntax/placeholder-line stx)]) (loop (sub1 lvl) (maybe-merge (make-stx m) r))))]
(loop line (cdr stxs) [(*skip re:end-of-line)
(let ([stxcol (syntax/placeholder-column stx)] (loop lvl (cons (make-stx eol-token) r))] ; no merge needed
[stx* (syntax/placeholder-strip stx)]) [(*match re:command-unquote)
(if (and (< curline line) (< mincol stxcol)) => (lambda (m)
(list* stx* (loop lvl (maybe-merge (make-stx (cadr m)) r)))]
(datum->syntax-object/placeholder stx [(*peek re:command)
(make-spaces (- stxcol mincol))) ;; read the next value, include comment objs, keep source
r) ;; location manually (see above)
(cons stx* r))))))))])) => (lambda (m)
;; if the command is a string escape, use `read-syntax',
;; so that we don't get a placeholder, and we can merge
;; the string to others
(let* ([reader (if (cadr m)
read-syntax read-syntax/recursive)]
[x (reader source-name inp)])
(loop lvl
(cond [(special-comment? x) r]
[(syntax? x) (maybe-merge x r)]
;; otherwise it's a placeholder to wrap
[else (cons (make-placeholder x ; no merge
(list source-name line col pos
(span-from pos)))
r)]))))]
;; must be last, since it will always succeed with 1 char
[(*peek re:item) ; don't read: regexp grabs the following text
=> (lambda (m)
(loop lvl
(maybe-merge (make-stx (read-bytes (cdadr m) inp))
r)))]
[(*peek #rx#"^$")
(if end-token
(read-error 'eof "missing closing `~a'" end-token)
(done-lines r))]
[else (read-error "internal error [get-lines*]")]))))
(define (get-lines) (define (get-lines)
(define get (cond [(*skip re:lines-begin)
(cond [start-inside? (get-lines* re:lines-begin re:lines-end re:line-item ch:lines-end)]
(get-line "{" open-lines "}" close-lines [(*match1 re:lines-begin*)
line-item bslash-unquote (box 0))] => (lambda (bgn)
[(regexp-match/fail-without-reading open-lines-special inp) (let* ([end (reverse-bytes bgn)]
=> (lambda (m) [bgn* (regexp-quote bgn)]
(let* ([open (cadr m)] [end* (regexp-quote end)])
[close (reverse-bytes open)] (get-lines* (^px bgn*) (^px end*)
[open-re (regexp-quote open)] (re:line-item* bgn* end*)
[close-re (regexp-quote close)] end)))]
[either-re (bytes-append open-re #"|" close-re)] [else #f]))
[bre (lambda (pfx/sfx re)
(byte-regexp (define (get-attrs)
(bytes-append (car pfx/sfx) (and (*skip re:attrs-begin)
re (read-delimited-list re:attrs-end ch:attrs-end)))
(cadr pfx/sfx))))])
(get-line open (bre open-lines* open-re) (define (get-escape-expr)
close (bre close-lines* close-re) (define-values (line col pos) (port-next-location inp))
(bre line-item* either-re) (and (*skip re:expr-escape)
(bre bslash-unquote* either-re) (begin
(box 0))))] (skip-whitespace inp)
[(regexp-match/fail-without-reading open-lines inp) (begin0 (parameterize ([current-readtable command-readtable])
(get-line "{" open-lines "}" close-lines (let loop ()
line-item bslash-unquote (box 0))] (let ([expr
[else #f])) ;; should be `read-syntax/recursive', but see
(and get (let loop ([lines '()] [more '()]) ;; the next comment (this also means that we
(let-values ([(line more) (if (pair? more) ;; never get a special-comment)
(values (car more) (cdr more)) (read-syntax)])
(values (get) more))]) (if (special-comment? expr)
(cond [(not line) (add-indents (reverse! lines))] (loop)
;; can happen from a sub @;{...} comment ;; we need to use the proper source location,
[(special-comment? line) (loop lines more)] ;; including the initial "@|" so if an escape is
[(list? line) (loop lines (append line more))] ;; at the beginning of a line no bogus indentation
[else (loop (maybe-merge line lines) more)]))))) ;; is added later
(datum->syntax-object expr (syntax-e expr)
(list source-name line-num col-num position
(span-from position)))))))
(skip-whitespace inp)
(unless (*skip re:expr-escape)
(read-error* line col pos #f
"expecting a terminating '~a'" ch:bar-quote))))))
;; called only when we must see a command in the input
(define (get-command)
(define-values (line col pos) (port-next-location inp))
(let ([cmd (parameterize ([current-readtable command-readtable])
(read-syntax/recursive source-name inp))])
(if (special-comment? cmd)
(read-error* line col pos (span-from pos)
"expecting a command expression, got a comment")
cmd)))
(define (get-rprefixes) ; return punctuation prefixes in reverse (define (get-rprefixes) ; return punctuation prefixes in reverse
(cond (let loop ([r '()])
[(regexp-match/fail-without-reading (let-values ([(line col pos) (port-next-location inp)])
#rx#"^(?:[ \t\r\n]*(?:'|`|,@?))+" inp) (cond
=> (lambda (m) [(*match1 #rx#"^(?:'|`|,@?)")
;; accumulate prefixes in reverse => (lambda (m)
(let loop ([s (car m)] [r '()]) (let ([sym (cond
(cond [(assoc m '([#"'" quote]
[(equal? #"" s) r] [#"`" quasiquote]
[(regexp-match #rx#"^[ \t\r\n]*('|`|,@?)(.*)$" s) [#"," unquote]
=> (lambda (m) [#",@" unquote-splicing]))
(loop (caddr m) => cadr]
(cons (let ([m (cadr m)]) [else (read-error "internal error [rpfxs]")])])
(cond (loop (cons (datum->syntax-object #f sym
[(assoc m '([#"'" quote] (list source-name line col pos
[#"`" quasiquote] (span-from pos)))
[#"," unquote] r))))]
[#",@" unquote-splicing])) [(*peek re:whitespaces)
=> cadr] (read-error "unexpected whitespace after ~a" ch:command)]
[else (read-error [else r]))))
"internal error [rpfxs]")]))
r)))]
[else (read-error "internal error [rpfxs]")])))]
[else '()]))
(define (get-command) ; #f means no command
(let-values ([(line col pos) (port-next-location inp)])
(cond [(regexp-match-peek-positions open-attr/lines inp)
(values #f #f)]
[(regexp-match/fail-without-reading bars-quoted inp)
=> (lambda (m)
(values (datum->syntax-object #f
(read-from-bytes-exact-or-identifier (cadr m))
(list source-name line col pos (span-from pos)))
#t))]
[else (values
(parameterize ([current-readtable cmd-readtable])
(let loop ()
(let ([x (read-syntax/recursive source-name inp)])
(if (special-comment? x) (loop) x))))
#f)])))
(cond (cond
[start-inside? [start-inside?
(datum->syntax-object #f (get-lines) (datum->syntax-object #f (get-lines* #f #f re:line-item-no-nests #f)
(list source-name line-num col-num position (span-from position)))] (list source-name line-num col-num position (span-from position)))]
[(regexp-match/fail-without-reading comment-start inp) [(*peek re:whitespaces)
(if (regexp-match-peek-positions open-lines inp) (read-error "unexpected whitespace after ~a" ch:command)]
(get-lines) (regexp-match comment-line inp)) [(*skip re:comment-start)
(unless (get-lines) (*skip re:comment-line))
(make-special-comment #f)] (make-special-comment #f)]
[else [else
(let* ([pfx (get-rprefixes)] (let*-values
[bars? #f] ([(rpfxs) (get-rprefixes)]
[cmd (let-values ([(cmd bs?) (get-command)]) [(cmd attrs lines)
(set! bars? bs?) cmd)] ; #f means no command (cond
[attrs (and (not bars?) (get-attrs))] ;; try get-lines first -- so @|{...}| is not used as a
[lines (and (not bars?) (get-lines))] ;; simple expression escape, same for get-attrs
[stx (and (or attrs lines) [(get-lines) => (lambda (lines) (values #f #f lines))]
(append (or attrs '()) (or lines '())))] [(get-attrs) => (lambda (attrs) (values #f attrs (get-lines)))]
[stx (or (and cmd stx (cons cmd stx)) ; all parts [(get-escape-expr) => (lambda (expr) (values expr #f #f))]
stx ; no cmd part => just a parenthesized expression [else (values (get-command) (get-attrs) (get-lines))])]
cmd ; no attrs/lines => simple expression (no parens) [(stx) (and (or attrs lines)
;; impossible: either we saw []s or {}s, or we read a (append (or attrs '()) (or lines '())))]
;; scheme expression [(stx) (or (and cmd stx (cons cmd stx)) ; all parts
(read-error "internal error [dispatcher]"))] stx ; no cmd part => just a parenthesized expression
[stx (let loop ([pfx pfx] [stx stx]) cmd ; no attrs/lines => simple expression (no parens)
(if (null? pfx) stx ;; impossible: either we saw []s or {}s, or we read a
(loop (cdr pfx) (list (car pfx) stx))))]) ;; scheme expression
(read-error "internal error [dispatcher]"))]
[(stx)
;; wrap the prefixes around the result
(let loop ([rpfxs rpfxs] [stx stx])
(if (null? rpfxs)
stx
(loop (cdr rpfxs) (list (car rpfxs) stx))))])
(datum->syntax-object #f stx (datum->syntax-object #f stx
(list source-name line-num col-num position (list source-name line-num col-num position
(span-from position))))])) (span-from position))))]))
@ -356,13 +448,20 @@
;; readtables ;; readtables
(define at-readtable (define at-readtable
(make-readtable #f cmd-char 'terminating-macro (dispatcher #f))) (make-readtable #f ch:command 'terminating-macro (dispatcher #f)))
;; similar to plain Scheme, but with `|' as a terminating macro (provide use-at-readtable)
(define cmd-readtable (define (use-at-readtable)
(port-count-lines! (current-input-port))
(current-readtable at-readtable))
;; similar to plain Scheme (scribble, actually), but with `|' as a
;; terminating macro (otherwise it behaves the same; the only difference is
;; that `a|b|c' is three symbols)
(define command-readtable
(make-readtable at-readtable #\| 'terminating-macro (make-readtable at-readtable #\| 'terminating-macro
(lambda (char inp source-name line-num col-num position) (lambda (char inp source-name line-num col-num position)
(let ([m (regexp-match/fail-without-reading #rx#"^([^|]*)\\|" inp)]) (let ([m (*regexp-match #rx#"^([^|]*)\\|" inp)])
(unless m (unless m
(raise-read-error (raise-read-error
"unbalanced `|'" source-name line-num col-num position #f)) "unbalanced `|'" source-name line-num col-num position #f))
@ -371,11 +470,6 @@
(list source-name line-num col-num position (list source-name line-num col-num position
(add1 (bytes-length (car m))))))))) (add1 (bytes-length (car m)))))))))
(provide use-at-readtable)
(define (use-at-readtable)
(port-count-lines! (current-input-port))
(current-readtable at-readtable))
(define default-src (gensym)) (define default-src (gensym))
(define (src-name src port) (define (src-name src port)
(if (eq? src default-src) (object-name port) src)) (if (eq? src default-src) (object-name port) src))