better reader implementation
svn: r6767
This commit is contained in:
parent
9aaf939a88
commit
c641584342
|
@ -4,21 +4,13 @@
|
||||||
(lib "kw.ss"))
|
(lib "kw.ss"))
|
||||||
|
|
||||||
(provide (rename *read read)
|
(provide (rename *read read)
|
||||||
(rename *read-syntax read-syntax))
|
(rename *read-syntax read-syntax))
|
||||||
|
|
||||||
(define (call-with-scribble-params t)
|
|
||||||
(parameterize ([scribble:read-insert-indents #f])
|
|
||||||
(t)))
|
|
||||||
|
|
||||||
(define/kw (*read #:optional [inp (current-input-port)])
|
(define/kw (*read #:optional [inp (current-input-port)])
|
||||||
(call-with-scribble-params
|
(wrap inp (scribble:read-inside inp)))
|
||||||
(lambda ()
|
|
||||||
(wrap inp (scribble:read-inside inp)))))
|
|
||||||
|
|
||||||
(define/kw (*read-syntax #:optional src [port (current-input-port)])
|
(define/kw (*read-syntax #:optional src [port (current-input-port)])
|
||||||
(call-with-scribble-params
|
(wrap port (scribble:read-inside-syntax src port))))
|
||||||
(lambda ()
|
|
||||||
(wrap port (scribble:read-inside-syntax src port)))))
|
|
||||||
|
|
||||||
(define (wrap port body)
|
(define (wrap port body)
|
||||||
(let* ([p-name (object-name port)]
|
(let* ([p-name (object-name port)]
|
||||||
|
|
|
@ -2,66 +2,114 @@
|
||||||
;; Implements the @-reader macro for embedding text in Scheme code.
|
;; Implements the @-reader macro for embedding text in Scheme code.
|
||||||
|
|
||||||
(module reader mzscheme
|
(module reader mzscheme
|
||||||
(require (lib "string.ss") (lib "kw.ss") (lib "readerr.ss" "syntax"))
|
|
||||||
|
(require (lib "kw.ss") (lib "string.ss") (lib "readerr.ss" "syntax"))
|
||||||
|
|
||||||
;; --------------------------------------------------------------------------
|
;; --------------------------------------------------------------------------
|
||||||
;; customization
|
;; utilities for syntax specifications below
|
||||||
|
|
||||||
(provide read-insert-indents)
|
;; regexps
|
||||||
(define read-insert-indents (make-parameter #t))
|
(define (px . args)
|
||||||
|
(let* ([args (let loop ([xs args])
|
||||||
|
(if (list? xs) (apply append (map loop xs)) (list xs)))]
|
||||||
|
[args (map (lambda (x)
|
||||||
|
(cond
|
||||||
|
[(bytes? x) x]
|
||||||
|
[(string? x) (string->bytes/utf-8 x)]
|
||||||
|
[(char? x) (regexp-quote (bytes (char->integer x)))]
|
||||||
|
[else (error 'reader "internal error [px]")]))
|
||||||
|
args)])
|
||||||
|
(byte-pregexp (apply bytes-append args))))
|
||||||
|
(define (^px . args) (px #"^" args))
|
||||||
|
|
||||||
|
;; reverses a byte string visually
|
||||||
|
(define reverse-bytes
|
||||||
|
(let ([pairs (let ([xs (bytes->list #"([{<")]
|
||||||
|
[ys (bytes->list #")]}>")])
|
||||||
|
(append (map cons xs ys) (map cons ys xs)))])
|
||||||
|
(define (rev-byte b)
|
||||||
|
(cond [(assq b pairs) => cdr]
|
||||||
|
[else b]))
|
||||||
|
(lambda (bs) (list->bytes (map rev-byte (reverse! (bytes->list bs)))))))
|
||||||
|
|
||||||
;; --------------------------------------------------------------------------
|
;; --------------------------------------------------------------------------
|
||||||
;; syntax
|
;; syntax
|
||||||
|
|
||||||
(define cmd-char #\@)
|
;; basic customization
|
||||||
|
(define ch:command #\@)
|
||||||
|
(define ch:comment #\;)
|
||||||
|
(define ch:bar-quote #\|)
|
||||||
|
(define ch:command-quote #\\)
|
||||||
|
(define ch:attrs-begin #\[)
|
||||||
|
(define ch:attrs-end #\])
|
||||||
|
(define ch:lines-begin #\{)
|
||||||
|
(define ch:lines-end #\})
|
||||||
|
|
||||||
(define bars-quoted #rx#"^[ \t\r\n]*\\|([^|]*)\\|")
|
(define str:lines-begin* #"\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*\\{")
|
||||||
;; attrs open with a `[', and read in one shot as a list
|
|
||||||
(define open-attrs #rx#"^[ \t\r\n]*[[][ \t\r\n]*")
|
|
||||||
(define open-lines #rx#"^[ \t\r\n]*[{](?:[ \t]*\r?\n[ \t]*)?") ; 1 newline
|
|
||||||
(define open-lines* '(#"^[ \t\r\n]*" #"(?:[ \t]*\r?\n[ \t]*)?"))
|
|
||||||
(define open-lines-special ; a special ending expected: @foo{<{ ... }>} etc
|
|
||||||
#rx#"^[ \t\r\n]*([|][^a-zA-Z0-9 \t\r\n@\\]*?[{])(?:[ \t]*\r?\n[ \t]*)?")
|
|
||||||
(define open-attr/lines #rx#"^[ \t\r\n]*[[{][ \t\r\n]*")
|
|
||||||
(define close-attrs #rx#"^[]]")
|
|
||||||
(define close-lines #rx#"^(?:[ \t]*\r?\n[ \t]*)?[}]") ; swallow 1 newline
|
|
||||||
(define close-lines* '(#"^(?:[ \t]*\r?\n[ \t]*)?" #""))
|
|
||||||
(define comment-start #rx#"^[ \t]*;")
|
|
||||||
(define comment-line #rx#"^[^\r\n]*\r?\n[ \t]*") ; like tex's `%' nl & space
|
|
||||||
(define sub-start #rx#"^[@]")
|
|
||||||
(define line-item #rx#"^(?:[^{}@\r\n]*[^\\{}@\r\n]|[\\]+[{}@])+")
|
|
||||||
(define line-item* '(#"^(?:[^{}@\r\n]*[^\\{}@\r\n]|[\\]+(?:[@]|" #"))+"))
|
|
||||||
(define end-of-line #rx#"^([\\]+)?\r?\n[ \t]*") ; make \-eoln possible
|
|
||||||
(define bar-pfx-remove #rx#"^[|]")
|
|
||||||
(define bslash-unquote #rx#"[\\]([\\]*[{}@])")
|
|
||||||
(define bslash-unquote* '(#"[\\]([\\]+(?:[@]|" #"))"))
|
|
||||||
|
|
||||||
(define byte-pairs
|
(define re:command (^px ch:command
|
||||||
(map (lambda (b) (cons (bytes-ref b 0) (bytes-ref b 1)))
|
;; the following identifies string escapes, see
|
||||||
'(#"()" #"[]" #"{}" #"<>")))
|
;; hoe it is used below
|
||||||
|
"("ch:bar-quote"?\")?"))
|
||||||
|
(define re:whitespaces (^px "\\s+"))
|
||||||
|
(define re:comment-start (^px ch:comment))
|
||||||
|
(define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%'
|
||||||
|
(define re:expr-escape (^px ch:bar-quote))
|
||||||
|
(define re:attrs-begin (^px ch:attrs-begin))
|
||||||
|
(define re:attrs-end (^px ch:attrs-end))
|
||||||
|
(define re:lines-begin (^px ch:lines-begin))
|
||||||
|
(define re:lines-begin* (^px str:lines-begin*))
|
||||||
|
(define re:lines-end (^px ch:lines-end))
|
||||||
|
(define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line
|
||||||
|
(define re:end-of-line (^px str:end-of-line))
|
||||||
|
(define (re:line-item* bgn end)
|
||||||
|
(^px "(.+?)(?:"bgn"|"end
|
||||||
|
"|"ch:command-quote"*"ch:command
|
||||||
|
"|"str:end-of-line")"))
|
||||||
|
(define re:line-item (re:line-item* ch:lines-begin ch:lines-end))
|
||||||
|
(define re:line-item-no-nests (^px "(.+?)(?:"ch:command-quote"*"ch:command
|
||||||
|
"|"str:end-of-line")"))
|
||||||
|
(define re:command-unquote (^px ch:command-quote
|
||||||
|
"("ch:command-quote"*"ch:command")"))
|
||||||
|
|
||||||
;; --------------------------------------------------------------------------
|
;; --------------------------------------------------------------------------
|
||||||
;; utilities
|
;; utilities
|
||||||
|
|
||||||
(define make-spaces
|
;; like `regexp-match/fail-without-reading', without extras; the regexp that
|
||||||
(let ([t (make-hash-table)])
|
;; is used must be anchored -- nothing is dropped
|
||||||
(lambda (n)
|
(define (*regexp-match-peek-positions pattern input-port)
|
||||||
(hash-table-get t n
|
(unless (and (byte-regexp? pattern)
|
||||||
(lambda ()
|
(regexp-match? #rx#"^\\^" (object-name pattern)))
|
||||||
(let ([s (make-string n #\space)])
|
(error 'reader "internal error [invalid bregexp] ~e" pattern))
|
||||||
(hash-table-put! t n s) s))))))
|
(regexp-match-peek-positions pattern input-port))
|
||||||
|
;; the following doesn't work -- must peek first
|
||||||
|
;; (define (*regexp-match-positions pattern input-port)
|
||||||
|
;; (unless (and (byte-regexp? pattern)
|
||||||
|
;; (regexp-match? #rx#"^\\^" (object-name pattern)))
|
||||||
|
;; (error 'reader "internal error [invalid bregexp] ~e" pattern))
|
||||||
|
;; (regexp-match-peek-positions pattern input-port))
|
||||||
|
(define (*regexp-match pattern input-port)
|
||||||
|
(let ([m (*regexp-match-peek-positions pattern input-port)])
|
||||||
|
(and m (let ([s (read-bytes (cdar m) input-port)])
|
||||||
|
(cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p))))
|
||||||
|
(cdr m)))))))
|
||||||
|
;; like regexp-match, but returns the whole match
|
||||||
|
(define (*regexp-match1 pattern input-port)
|
||||||
|
(let ([m (*regexp-match-peek-positions pattern input-port)])
|
||||||
|
(and m (read-bytes (cdar m) input-port))))
|
||||||
|
|
||||||
;; Skips whitespace characters, sensitive to the current readtable's
|
;; Skips whitespace characters, sensitive to the current readtable's
|
||||||
;; definition of whitespace; optimizes common spaces when possible
|
;; definition of whitespace; optimizes common spaces when possible
|
||||||
(define skip-whitespace
|
(define skip-whitespace
|
||||||
(let* ([plain-readtables (make-hash-table 'weak)]
|
(let* ([plain-readtables (make-hash-table 'weak)]
|
||||||
[plain-spaces '(#\space #\tab #\newline #\return #\page)]
|
[plain-spaces " \t\n\r\f"]
|
||||||
[plain-spaces-re
|
[plain-spaces-list (string->list " \t\n\r\f")]
|
||||||
(regexp (string-append "^["(apply string plain-spaces)"]*"))])
|
[plain-spaces-re (^px "[" plain-spaces "]*")])
|
||||||
(define (skip-plain-spaces port)
|
(define (skip-plain-spaces port)
|
||||||
;; hack: according to the specs, this might consume more characters
|
;; hack: according to the specs, this might consume more characters
|
||||||
;; than needed, but it seems to work fine with a simple <ch>* regexp
|
;; than needed, but it works fine with a simple <ch>* regexp (because
|
||||||
(regexp-match-positions plain-spaces-re port))
|
;; it can always match an empty string)
|
||||||
|
(*regexp-match-peek-positions plain-spaces-re port))
|
||||||
(define (whitespace? ch rt)
|
(define (whitespace? ch rt)
|
||||||
(if rt
|
(if rt
|
||||||
(let-values ([(like-ch/sym _1 _2) (readtable-mapping rt ch)])
|
(let-values ([(like-ch/sym _1 _2) (readtable-mapping rt ch)])
|
||||||
|
@ -73,7 +121,7 @@
|
||||||
(hash-table-get plain-readtables rt
|
(hash-table-get plain-readtables rt
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(let ([plain? (andmap (lambda (ch) (whitespace? ch rt))
|
(let ([plain? (andmap (lambda (ch) (whitespace? ch rt))
|
||||||
plain-spaces)])
|
plain-spaces-list)])
|
||||||
(hash-table-put! plain-readtables rt #t)
|
(hash-table-put! plain-readtables rt #t)
|
||||||
rt))))
|
rt))))
|
||||||
(lambda (port)
|
(lambda (port)
|
||||||
|
@ -102,9 +150,19 @@
|
||||||
(datum->syntax-object #f d (placeholder-loc sp))
|
(datum->syntax-object #f d (placeholder-loc sp))
|
||||||
(datum->syntax-object sp d sp)))
|
(datum->syntax-object sp d sp)))
|
||||||
|
|
||||||
|
;; make n spaces, cached for n
|
||||||
|
(define make-spaces
|
||||||
|
(let ([t (make-hash-table)])
|
||||||
|
(lambda (n)
|
||||||
|
(hash-table-get t n
|
||||||
|
(lambda ()
|
||||||
|
(let ([s (make-string n #\space)])
|
||||||
|
(hash-table-put! t n s) s))))))
|
||||||
|
|
||||||
|
;; a unique eol string
|
||||||
(define eol-token "\n")
|
(define eol-token "\n")
|
||||||
(define (eol-syntax? stx) (eq? eol-token (syntax-e stx)))
|
(define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x))))
|
||||||
;; sanity check, in case this gets violated in the future
|
;; sanity check, in case this property gets violated in the future
|
||||||
(unless (eol-syntax? (datum->syntax-object #f eol-token))
|
(unless (eol-syntax? (datum->syntax-object #f eol-token))
|
||||||
(error 'reader "internal error [invalid assumption]"))
|
(error 'reader "internal error [invalid assumption]"))
|
||||||
|
|
||||||
|
@ -114,9 +172,20 @@
|
||||||
(define ((dispatcher start-inside?)
|
(define ((dispatcher start-inside?)
|
||||||
char inp source-name line-num col-num position)
|
char inp source-name line-num col-num position)
|
||||||
|
|
||||||
|
(define (read-error* line col pos span msg . xs)
|
||||||
|
(let* ([eof? (and (eq? 'eof msg) (pair? xs))]
|
||||||
|
[msg (apply format (if eof? xs (cons msg xs)))])
|
||||||
|
((if eof? raise-read-error raise-read-eof-error)
|
||||||
|
msg source-name line col pos span)))
|
||||||
(define (read-error msg . xs)
|
(define (read-error msg . xs)
|
||||||
(let-values ([(line col pos) (port-next-location inp)])
|
(let-values ([(line col pos) (port-next-location inp)])
|
||||||
(raise-read-error (apply format msg xs) source-name line col pos #f)))
|
(apply read-error* line col pos #f msg xs)))
|
||||||
|
|
||||||
|
(define (*match rx) (*regexp-match rx inp))
|
||||||
|
(define (*match1 rx) (*regexp-match1 rx inp))
|
||||||
|
;; (define (*skip rx) (*regexp-match-positions rx inp)) <- see above
|
||||||
|
(define (*skip rx) (*regexp-match1 rx inp))
|
||||||
|
(define (*peek rx) (*regexp-match-peek-positions rx inp))
|
||||||
|
|
||||||
(define (cur-pos)
|
(define (cur-pos)
|
||||||
(let-values ([(line col pos) (port-next-location inp)])
|
(let-values ([(line col pos) (port-next-location inp)])
|
||||||
|
@ -125,229 +194,252 @@
|
||||||
(define (span-from start)
|
(define (span-from start)
|
||||||
(and start (- (cur-pos) start)))
|
(and start (- (cur-pos) start)))
|
||||||
|
|
||||||
(define (read-delimited-list end-re)
|
(define (read-delimited-list end-re end-ch)
|
||||||
(let loop ([r '()])
|
(let loop ([r '()])
|
||||||
(skip-whitespace inp)
|
(skip-whitespace inp)
|
||||||
(if (regexp-match/fail-without-reading end-re inp)
|
(if (*skip end-re)
|
||||||
(reverse! r)
|
(reverse! r)
|
||||||
(let ([x (read-syntax/recursive source-name inp)])
|
(let ([x (read-syntax/recursive source-name inp)])
|
||||||
(if (eof-object? x)
|
(if (eof-object? x)
|
||||||
(read-error "expected a ']'")
|
(read-error 'eof "expected a '~a'" end-ch)
|
||||||
(loop (if (special-comment? x) r (cons x r))))))))
|
(loop (if (special-comment? x) r (cons x r))))))))
|
||||||
|
|
||||||
(define (read-from-bytes-exact-or-identifier bs)
|
;; adds indentation (as new syntaxes, not merged); if the first line was
|
||||||
(let ([inp (open-input-bytes bs)]
|
;; not empty, then it is treated specially. called with at least two items
|
||||||
[default (lambda _ (string->symbol (bytes->string/utf-8 bs)))])
|
;; (see below).
|
||||||
(with-handlers ([void default])
|
(define (add-indents stxs 1st-eol?)
|
||||||
(let ([x (read inp)])
|
(unless (andmap (lambda (x)
|
||||||
;; must match all -- otherwise: default
|
(and (or (syntax? x) (placeholder? x))
|
||||||
(if (regexp-match #rx#"^[ \t\r\n]*$" inp) x (default))))))
|
(syntax/placeholder-column x)
|
||||||
|
(syntax/placeholder-line x)))
|
||||||
|
stxs)
|
||||||
|
;; the reader always turns on line counting
|
||||||
|
(read-error "internal error [add-indents] ~s" stxs))
|
||||||
|
(let* ([mincol
|
||||||
|
(let loop ([min #f] [stxs (if 1st-eol? stxs (cdr stxs))])
|
||||||
|
(if (null? stxs)
|
||||||
|
(or min (error "internal error [add-indents]"))
|
||||||
|
(loop (if (eol-syntax? (car stxs))
|
||||||
|
min
|
||||||
|
(let ([c (syntax/placeholder-column (car stxs))])
|
||||||
|
(if (or (not min) (< c min)) c min)))
|
||||||
|
(cdr stxs))))]
|
||||||
|
[mincol (if 1st-eol?
|
||||||
|
mincol
|
||||||
|
(min mincol (syntax/placeholder-column (car stxs))))])
|
||||||
|
(let loop (;; no indentation for text on the first '{' line
|
||||||
|
[newline? 1st-eol?] [curline -1] [stxs stxs] [r '()])
|
||||||
|
(if (null? stxs)
|
||||||
|
(reverse! r)
|
||||||
|
(let* ([stx (car stxs)]
|
||||||
|
[line (syntax/placeholder-line stx)])
|
||||||
|
(loop (eol-syntax? stx) line (cdr stxs)
|
||||||
|
(let ([stxcol (syntax/placeholder-column stx)]
|
||||||
|
[stx* (syntax/placeholder-strip stx)])
|
||||||
|
(if (and newline? (< curline line) (< mincol stxcol))
|
||||||
|
(list* stx*
|
||||||
|
(datum->syntax-object/placeholder stx
|
||||||
|
(make-spaces (- stxcol mincol)))
|
||||||
|
r)
|
||||||
|
(cons stx* r)))))))))
|
||||||
|
|
||||||
(define (reverse-bytes bytes)
|
;; gets an accumulated (reversed) list of syntaxes, sorts things out
|
||||||
(define (rev-byte b)
|
;; (remove prefix and suffix newlines, adds indentation if needed)
|
||||||
(cond [(assq b byte-pairs) => cdr]
|
(define (done-lines rlines)
|
||||||
[else b]))
|
(cond
|
||||||
(let* ([len (bytes-length bytes)] [r (make-bytes len)])
|
[(andmap eol-syntax? rlines)
|
||||||
(let loop ([i (sub1 len)])
|
;; nothing to do (includes null, so the code below can assume a pair)
|
||||||
(when (<= 0 i)
|
(reverse! rlines)]
|
||||||
(bytes-set! r i (rev-byte (bytes-ref bytes (- len i 1))))
|
[start-inside?
|
||||||
(loop (sub1 i))))
|
;; no newlines removed
|
||||||
r))
|
(add-indents (reverse! rlines) #t)] ; don't ignore the 1st line
|
||||||
|
[else
|
||||||
|
;; strip off leading and trailing newlines
|
||||||
|
(let* ([rlines (if (eol-syntax? (car rlines)) (cdr rlines) rlines)]
|
||||||
|
[lines (reverse! rlines)]
|
||||||
|
[1st-eol? (eol-syntax? (car lines))]
|
||||||
|
[lines (if 1st-eol? (cdr lines) lines)])
|
||||||
|
(if (null? (cdr lines)) ; common case: one string
|
||||||
|
(list (syntax/placeholder-strip (car lines)))
|
||||||
|
(add-indents lines 1st-eol?)))]))
|
||||||
|
|
||||||
(define (get-attrs)
|
;; cons stx (new syntax) to the list of stxs, merging it if both are
|
||||||
(and (regexp-match/fail-without-reading open-attrs inp)
|
|
||||||
(read-delimited-list close-attrs)))
|
|
||||||
|
|
||||||
(define ((get-line open open-re close close-re item-re unquote-re level))
|
|
||||||
(let-values ([(line col pos) (port-next-location inp)])
|
|
||||||
(define (make-stx sexpr)
|
|
||||||
(datum->syntax-object #f
|
|
||||||
(if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr)
|
|
||||||
(list source-name line col pos (span-from pos))))
|
|
||||||
(cond [(regexp-match/fail-without-reading close-re inp)
|
|
||||||
=> (lambda (m)
|
|
||||||
(let ([l (sub1 (unbox level))])
|
|
||||||
(set-box! level l)
|
|
||||||
(and (<= 0 l) (make-stx (car m)))))]
|
|
||||||
[(regexp-match/fail-without-reading open-re inp)
|
|
||||||
=> (lambda (m)
|
|
||||||
(set-box! level (add1 (unbox level)))
|
|
||||||
(make-stx (car m)))]
|
|
||||||
[(regexp-match-peek-positions sub-start inp)
|
|
||||||
;; read the next value, include comment objs, keep source
|
|
||||||
;; location manually (see above)
|
|
||||||
(let ([x (read-syntax/recursive source-name inp)])
|
|
||||||
(if (or (syntax? x) (special-comment? x))
|
|
||||||
x
|
|
||||||
(make-placeholder x
|
|
||||||
(list source-name line col pos (span-from pos)))))]
|
|
||||||
[(regexp-match/fail-without-reading end-of-line inp)
|
|
||||||
=> (lambda (m)
|
|
||||||
(if (cadr m) ; backslashes?
|
|
||||||
(list (make-stx (cadr m)) (make-stx eol-token))
|
|
||||||
(make-stx eol-token)))]
|
|
||||||
[(regexp-match/fail-without-reading item-re inp)
|
|
||||||
=> (lambda (m)
|
|
||||||
(let* ([m (car m)]
|
|
||||||
[m (regexp-replace bar-pfx-remove m #"")]
|
|
||||||
[m (regexp-replace* unquote-re m #"\\1")])
|
|
||||||
(make-stx m)))]
|
|
||||||
[(and (not (eq? item-re line-item))
|
|
||||||
(regexp-match/fail-without-reading #rx#"[{}]" inp))
|
|
||||||
=> (lambda (m)
|
|
||||||
(make-stx (car m)))]
|
|
||||||
[(regexp-match/fail-without-reading #rx#"^$" inp)
|
|
||||||
(if start-inside? #f (read-error "missing `~a'" close))]
|
|
||||||
[else (read-error "internal error [get-line]")])))
|
|
||||||
|
|
||||||
;; adds stx (new syntax) to the list of stxs, merging it if both are
|
|
||||||
;; strings, except for newline markers
|
;; strings, except for newline markers
|
||||||
(define (maybe-merge stx stxs)
|
(define (maybe-merge stx stxs)
|
||||||
(if (and (pair? stxs) (syntax? stx) (syntax? (car stxs))
|
(let* ([2nd (and (syntax? stx) (syntax-e stx))]
|
||||||
(string? (syntax-e stx))
|
[stx0 (and (pair? stxs) (car stxs))]
|
||||||
(string? (syntax-e (car stxs)))
|
[1st (and (syntax? stx0) (syntax-e stx0))])
|
||||||
(not (eol-syntax? stx))
|
(if (and (string? 1st) (not (eq? eol-token 1st))
|
||||||
(not (eol-syntax? (car stxs))))
|
(string? 2nd) (not (eq? eol-token 2nd)))
|
||||||
(let ([fst (car stxs)])
|
(cons (datum->syntax-object stx0
|
||||||
(cons (datum->syntax-object stx
|
(string-append 1st 2nd)
|
||||||
(string-append (syntax-e fst) (syntax-e stx))
|
(list (syntax-source stx0)
|
||||||
(list (syntax-source fst)
|
(syntax-line stx0)
|
||||||
(syntax-line fst)
|
(syntax-column stx0)
|
||||||
(syntax-column fst)
|
(syntax-position stx0)
|
||||||
(syntax-position fst)
|
;; this is called right after reading stx
|
||||||
(span-from (syntax-position fst))))
|
(span-from (syntax-position stx0))))
|
||||||
(cdr stxs)))
|
(cdr stxs))
|
||||||
(cons stx stxs)))
|
(cons stx stxs))))
|
||||||
|
|
||||||
(define (add-indents stxs)
|
(define (get-lines* re:begin re:end re:item end-token)
|
||||||
(unless (andmap (lambda (x)
|
;; re:begin, re:end, end-token can be false if start-inside? is #t
|
||||||
(or (and (syntax? x) (syntax-line x) (syntax-column x))
|
(let loop ([lvl 0] [r '()])
|
||||||
(placeholder? x)))
|
(let-values ([(line col pos) (port-next-location inp)])
|
||||||
stxs)
|
(define (make-stx sexpr)
|
||||||
(read-error "internal error [add-indents] ~s" stxs))
|
(datum->syntax-object #f
|
||||||
(cond
|
(if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr)
|
||||||
[(not (read-insert-indents)) (map syntax/placeholder-strip stxs)]
|
(list source-name line col pos (span-from pos))))
|
||||||
[(null? stxs) '()]
|
(cond [(and re:begin (*match1 re:begin))
|
||||||
[else (let ([mincol (apply min (map syntax/placeholder-column stxs))])
|
=> (lambda (m) (loop (add1 lvl) (maybe-merge (make-stx m) r)))]
|
||||||
(let loop ([curline line-num] [stxs stxs] [r '()])
|
[(and re:end (*match1 re:end))
|
||||||
(if (null? stxs)
|
=> (lambda (m)
|
||||||
(reverse! r)
|
(if (and (zero? lvl) (not start-inside?))
|
||||||
(let* ([stx (car stxs)]
|
(done-lines r)
|
||||||
[line (syntax/placeholder-line stx)])
|
(loop (sub1 lvl) (maybe-merge (make-stx m) r))))]
|
||||||
(loop line (cdr stxs)
|
[(*skip re:end-of-line)
|
||||||
(let ([stxcol (syntax/placeholder-column stx)]
|
(loop lvl (cons (make-stx eol-token) r))] ; no merge needed
|
||||||
[stx* (syntax/placeholder-strip stx)])
|
[(*match re:command-unquote)
|
||||||
(if (and (< curline line) (< mincol stxcol))
|
=> (lambda (m)
|
||||||
(list* stx*
|
(loop lvl (maybe-merge (make-stx (cadr m)) r)))]
|
||||||
(datum->syntax-object/placeholder stx
|
[(*peek re:command)
|
||||||
(make-spaces (- stxcol mincol)))
|
;; read the next value, include comment objs, keep source
|
||||||
r)
|
;; location manually (see above)
|
||||||
(cons stx* r))))))))]))
|
=> (lambda (m)
|
||||||
|
;; if the command is a string escape, use `read-syntax',
|
||||||
|
;; so that we don't get a placeholder, and we can merge
|
||||||
|
;; the string to others
|
||||||
|
(let* ([reader (if (cadr m)
|
||||||
|
read-syntax read-syntax/recursive)]
|
||||||
|
[x (reader source-name inp)])
|
||||||
|
(loop lvl
|
||||||
|
(cond [(special-comment? x) r]
|
||||||
|
[(syntax? x) (maybe-merge x r)]
|
||||||
|
;; otherwise it's a placeholder to wrap
|
||||||
|
[else (cons (make-placeholder x ; no merge
|
||||||
|
(list source-name line col pos
|
||||||
|
(span-from pos)))
|
||||||
|
r)]))))]
|
||||||
|
;; must be last, since it will always succeed with 1 char
|
||||||
|
[(*peek re:item) ; don't read: regexp grabs the following text
|
||||||
|
=> (lambda (m)
|
||||||
|
(loop lvl
|
||||||
|
(maybe-merge (make-stx (read-bytes (cdadr m) inp))
|
||||||
|
r)))]
|
||||||
|
[(*peek #rx#"^$")
|
||||||
|
(if end-token
|
||||||
|
(read-error 'eof "missing closing `~a'" end-token)
|
||||||
|
(done-lines r))]
|
||||||
|
[else (read-error "internal error [get-lines*]")]))))
|
||||||
|
|
||||||
(define (get-lines)
|
(define (get-lines)
|
||||||
(define get
|
(cond [(*skip re:lines-begin)
|
||||||
(cond [start-inside?
|
(get-lines* re:lines-begin re:lines-end re:line-item ch:lines-end)]
|
||||||
(get-line "{" open-lines "}" close-lines
|
[(*match1 re:lines-begin*)
|
||||||
line-item bslash-unquote (box 0))]
|
=> (lambda (bgn)
|
||||||
[(regexp-match/fail-without-reading open-lines-special inp)
|
(let* ([end (reverse-bytes bgn)]
|
||||||
=> (lambda (m)
|
[bgn* (regexp-quote bgn)]
|
||||||
(let* ([open (cadr m)]
|
[end* (regexp-quote end)])
|
||||||
[close (reverse-bytes open)]
|
(get-lines* (^px bgn*) (^px end*)
|
||||||
[open-re (regexp-quote open)]
|
(re:line-item* bgn* end*)
|
||||||
[close-re (regexp-quote close)]
|
end)))]
|
||||||
[either-re (bytes-append open-re #"|" close-re)]
|
[else #f]))
|
||||||
[bre (lambda (pfx/sfx re)
|
|
||||||
(byte-regexp
|
(define (get-attrs)
|
||||||
(bytes-append (car pfx/sfx)
|
(and (*skip re:attrs-begin)
|
||||||
re
|
(read-delimited-list re:attrs-end ch:attrs-end)))
|
||||||
(cadr pfx/sfx))))])
|
|
||||||
(get-line open (bre open-lines* open-re)
|
(define (get-escape-expr)
|
||||||
close (bre close-lines* close-re)
|
(define-values (line col pos) (port-next-location inp))
|
||||||
(bre line-item* either-re)
|
(and (*skip re:expr-escape)
|
||||||
(bre bslash-unquote* either-re)
|
(begin
|
||||||
(box 0))))]
|
(skip-whitespace inp)
|
||||||
[(regexp-match/fail-without-reading open-lines inp)
|
(begin0 (parameterize ([current-readtable command-readtable])
|
||||||
(get-line "{" open-lines "}" close-lines
|
(let loop ()
|
||||||
line-item bslash-unquote (box 0))]
|
(let ([expr
|
||||||
[else #f]))
|
;; should be `read-syntax/recursive', but see
|
||||||
(and get (let loop ([lines '()] [more '()])
|
;; the next comment (this also means that we
|
||||||
(let-values ([(line more) (if (pair? more)
|
;; never get a special-comment)
|
||||||
(values (car more) (cdr more))
|
(read-syntax)])
|
||||||
(values (get) more))])
|
(if (special-comment? expr)
|
||||||
(cond [(not line) (add-indents (reverse! lines))]
|
(loop)
|
||||||
;; can happen from a sub @;{...} comment
|
;; we need to use the proper source location,
|
||||||
[(special-comment? line) (loop lines more)]
|
;; including the initial "@|" so if an escape is
|
||||||
[(list? line) (loop lines (append line more))]
|
;; at the beginning of a line no bogus indentation
|
||||||
[else (loop (maybe-merge line lines) more)])))))
|
;; is added later
|
||||||
|
(datum->syntax-object expr (syntax-e expr)
|
||||||
|
(list source-name line-num col-num position
|
||||||
|
(span-from position)))))))
|
||||||
|
(skip-whitespace inp)
|
||||||
|
(unless (*skip re:expr-escape)
|
||||||
|
(read-error* line col pos #f
|
||||||
|
"expecting a terminating '~a'" ch:bar-quote))))))
|
||||||
|
|
||||||
|
;; called only when we must see a command in the input
|
||||||
|
(define (get-command)
|
||||||
|
(define-values (line col pos) (port-next-location inp))
|
||||||
|
(let ([cmd (parameterize ([current-readtable command-readtable])
|
||||||
|
(read-syntax/recursive source-name inp))])
|
||||||
|
(if (special-comment? cmd)
|
||||||
|
(read-error* line col pos (span-from pos)
|
||||||
|
"expecting a command expression, got a comment")
|
||||||
|
cmd)))
|
||||||
|
|
||||||
(define (get-rprefixes) ; return punctuation prefixes in reverse
|
(define (get-rprefixes) ; return punctuation prefixes in reverse
|
||||||
(cond
|
(let loop ([r '()])
|
||||||
[(regexp-match/fail-without-reading
|
(let-values ([(line col pos) (port-next-location inp)])
|
||||||
#rx#"^(?:[ \t\r\n]*(?:'|`|,@?))+" inp)
|
(cond
|
||||||
=> (lambda (m)
|
[(*match1 #rx#"^(?:'|`|,@?)")
|
||||||
;; accumulate prefixes in reverse
|
=> (lambda (m)
|
||||||
(let loop ([s (car m)] [r '()])
|
(let ([sym (cond
|
||||||
(cond
|
[(assoc m '([#"'" quote]
|
||||||
[(equal? #"" s) r]
|
[#"`" quasiquote]
|
||||||
[(regexp-match #rx#"^[ \t\r\n]*('|`|,@?)(.*)$" s)
|
[#"," unquote]
|
||||||
=> (lambda (m)
|
[#",@" unquote-splicing]))
|
||||||
(loop (caddr m)
|
=> cadr]
|
||||||
(cons (let ([m (cadr m)])
|
[else (read-error "internal error [rpfxs]")])])
|
||||||
(cond
|
(loop (cons (datum->syntax-object #f sym
|
||||||
[(assoc m '([#"'" quote]
|
(list source-name line col pos
|
||||||
[#"`" quasiquote]
|
(span-from pos)))
|
||||||
[#"," unquote]
|
r))))]
|
||||||
[#",@" unquote-splicing]))
|
[(*peek re:whitespaces)
|
||||||
=> cadr]
|
(read-error "unexpected whitespace after ~a" ch:command)]
|
||||||
[else (read-error
|
[else r]))))
|
||||||
"internal error [rpfxs]")]))
|
|
||||||
r)))]
|
|
||||||
[else (read-error "internal error [rpfxs]")])))]
|
|
||||||
[else '()]))
|
|
||||||
|
|
||||||
(define (get-command) ; #f means no command
|
|
||||||
(let-values ([(line col pos) (port-next-location inp)])
|
|
||||||
(cond [(regexp-match-peek-positions open-attr/lines inp)
|
|
||||||
(values #f #f)]
|
|
||||||
[(regexp-match/fail-without-reading bars-quoted inp)
|
|
||||||
=> (lambda (m)
|
|
||||||
(values (datum->syntax-object #f
|
|
||||||
(read-from-bytes-exact-or-identifier (cadr m))
|
|
||||||
(list source-name line col pos (span-from pos)))
|
|
||||||
#t))]
|
|
||||||
[else (values
|
|
||||||
(parameterize ([current-readtable cmd-readtable])
|
|
||||||
(let loop ()
|
|
||||||
(let ([x (read-syntax/recursive source-name inp)])
|
|
||||||
(if (special-comment? x) (loop) x))))
|
|
||||||
#f)])))
|
|
||||||
|
|
||||||
(cond
|
(cond
|
||||||
[start-inside?
|
[start-inside?
|
||||||
(datum->syntax-object #f (get-lines)
|
(datum->syntax-object #f (get-lines* #f #f re:line-item-no-nests #f)
|
||||||
(list source-name line-num col-num position (span-from position)))]
|
(list source-name line-num col-num position (span-from position)))]
|
||||||
[(regexp-match/fail-without-reading comment-start inp)
|
[(*peek re:whitespaces)
|
||||||
(if (regexp-match-peek-positions open-lines inp)
|
(read-error "unexpected whitespace after ~a" ch:command)]
|
||||||
(get-lines) (regexp-match comment-line inp))
|
[(*skip re:comment-start)
|
||||||
|
(unless (get-lines) (*skip re:comment-line))
|
||||||
(make-special-comment #f)]
|
(make-special-comment #f)]
|
||||||
[else
|
[else
|
||||||
(let* ([pfx (get-rprefixes)]
|
(let*-values
|
||||||
[bars? #f]
|
([(rpfxs) (get-rprefixes)]
|
||||||
[cmd (let-values ([(cmd bs?) (get-command)])
|
[(cmd attrs lines)
|
||||||
(set! bars? bs?) cmd)] ; #f means no command
|
(cond
|
||||||
[attrs (and (not bars?) (get-attrs))]
|
;; try get-lines first -- so @|{...}| is not used as a
|
||||||
[lines (and (not bars?) (get-lines))]
|
;; simple expression escape, same for get-attrs
|
||||||
[stx (and (or attrs lines)
|
[(get-lines) => (lambda (lines) (values #f #f lines))]
|
||||||
(append (or attrs '()) (or lines '())))]
|
[(get-attrs) => (lambda (attrs) (values #f attrs (get-lines)))]
|
||||||
[stx (or (and cmd stx (cons cmd stx)) ; all parts
|
[(get-escape-expr) => (lambda (expr) (values expr #f #f))]
|
||||||
stx ; no cmd part => just a parenthesized expression
|
[else (values (get-command) (get-attrs) (get-lines))])]
|
||||||
cmd ; no attrs/lines => simple expression (no parens)
|
[(stx) (and (or attrs lines)
|
||||||
;; impossible: either we saw []s or {}s, or we read a
|
(append (or attrs '()) (or lines '())))]
|
||||||
;; scheme expression
|
[(stx) (or (and cmd stx (cons cmd stx)) ; all parts
|
||||||
(read-error "internal error [dispatcher]"))]
|
stx ; no cmd part => just a parenthesized expression
|
||||||
[stx (let loop ([pfx pfx] [stx stx])
|
cmd ; no attrs/lines => simple expression (no parens)
|
||||||
(if (null? pfx) stx
|
;; impossible: either we saw []s or {}s, or we read a
|
||||||
(loop (cdr pfx) (list (car pfx) stx))))])
|
;; scheme expression
|
||||||
|
(read-error "internal error [dispatcher]"))]
|
||||||
|
[(stx)
|
||||||
|
;; wrap the prefixes around the result
|
||||||
|
(let loop ([rpfxs rpfxs] [stx stx])
|
||||||
|
(if (null? rpfxs)
|
||||||
|
stx
|
||||||
|
(loop (cdr rpfxs) (list (car rpfxs) stx))))])
|
||||||
(datum->syntax-object #f stx
|
(datum->syntax-object #f stx
|
||||||
(list source-name line-num col-num position
|
(list source-name line-num col-num position
|
||||||
(span-from position))))]))
|
(span-from position))))]))
|
||||||
|
@ -356,13 +448,20 @@
|
||||||
;; readtables
|
;; readtables
|
||||||
|
|
||||||
(define at-readtable
|
(define at-readtable
|
||||||
(make-readtable #f cmd-char 'terminating-macro (dispatcher #f)))
|
(make-readtable #f ch:command 'terminating-macro (dispatcher #f)))
|
||||||
|
|
||||||
;; similar to plain Scheme, but with `|' as a terminating macro
|
(provide use-at-readtable)
|
||||||
(define cmd-readtable
|
(define (use-at-readtable)
|
||||||
|
(port-count-lines! (current-input-port))
|
||||||
|
(current-readtable at-readtable))
|
||||||
|
|
||||||
|
;; similar to plain Scheme (scribble, actually), but with `|' as a
|
||||||
|
;; terminating macro (otherwise it behaves the same; the only difference is
|
||||||
|
;; that `a|b|c' is three symbols)
|
||||||
|
(define command-readtable
|
||||||
(make-readtable at-readtable #\| 'terminating-macro
|
(make-readtable at-readtable #\| 'terminating-macro
|
||||||
(lambda (char inp source-name line-num col-num position)
|
(lambda (char inp source-name line-num col-num position)
|
||||||
(let ([m (regexp-match/fail-without-reading #rx#"^([^|]*)\\|" inp)])
|
(let ([m (*regexp-match #rx#"^([^|]*)\\|" inp)])
|
||||||
(unless m
|
(unless m
|
||||||
(raise-read-error
|
(raise-read-error
|
||||||
"unbalanced `|'" source-name line-num col-num position #f))
|
"unbalanced `|'" source-name line-num col-num position #f))
|
||||||
|
@ -371,11 +470,6 @@
|
||||||
(list source-name line-num col-num position
|
(list source-name line-num col-num position
|
||||||
(add1 (bytes-length (car m)))))))))
|
(add1 (bytes-length (car m)))))))))
|
||||||
|
|
||||||
(provide use-at-readtable)
|
|
||||||
(define (use-at-readtable)
|
|
||||||
(port-count-lines! (current-input-port))
|
|
||||||
(current-readtable at-readtable))
|
|
||||||
|
|
||||||
(define default-src (gensym))
|
(define default-src (gensym))
|
||||||
(define (src-name src port)
|
(define (src-name src port)
|
||||||
(if (eq? src default-src) (object-name port) src))
|
(if (eq? src default-src) (object-name port) src))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user