296 lines
14 KiB
Scheme
296 lines
14 KiB
Scheme
;; Implements the @-reader macro for embedding text in Scheme code.
|
|
(module reader mzscheme
|
|
(require (lib "string.ss") (lib "kw.ss") (lib "readerr.ss" "syntax"))
|
|
|
|
(define cmd-char #\@)
|
|
|
|
(define bars-quoted #rx#"^[ \t\r\n]*\\|([^|]*)\\|")
|
|
(define open-attrs #rx#"^[ \t\r\n]*[[][ \t\r\n]*")
|
|
(define open-lines #rx#"^[ \t\r\n]*[{](?:[ \t]*\r?\n[ \t]*)?") ; 1 newline
|
|
(define open-lines* '(#"^[ \t\r\n]*" #"(?:[ \t]*\r?\n[ \t]*)?"))
|
|
(define open-lines-special ; a special ending expected: @foo{<{ ... }>} etc
|
|
#rx#"^[ \t\r\n]*([|][^a-zA-Z0-9 \t\r\n@\\]*?[{])(?:[ \t]*\r?\n[ \t]*)?")
|
|
(define open-attr/lines #rx#"^[ \t\r\n]*[[{][ \t\r\n]*")
|
|
(define close-attrs #rx#"^[ \t\r\n]*[]]")
|
|
(define close-lines #rx#"^(?:[ \t]*\r?\n[ \t]*)?[}]") ; swallow 1 newline
|
|
(define close-lines* '(#"^(?:[ \t]*\r?\n[ \t]*)?" #""))
|
|
(define comment-start #rx#"^[ \t]*;")
|
|
(define comment-line #rx#"^[^\r\n]*\r?\n[ \t]*") ; like tex's `%' nl & space
|
|
(define attr-sep #rx#"^[ \t\r\n]*=[ \t\r\n]*")
|
|
(define sub-start #rx#"^[@]")
|
|
(define line-item #rx#"^(?:[^{}@\r\n]*[^\\{}@\r\n]|[\\]+[{}@])+")
|
|
(define line-item* '(#"^(?:[^{}@\r\n]*[^\\{}@\r\n]|[\\]+(?:[@]|" #"))+"))
|
|
(define end-of-line #rx#"^([\\]+)?\r?\n[ \t]*") ; make \-eoln possible
|
|
(define bar-pfx-remove #rx#"^[|]")
|
|
(define bslash-unquote #rx#"[\\]([\\]*[{}@])")
|
|
(define bslash-unquote* '(#"[\\]([\\]+(?:[@]|" #"))"))
|
|
|
|
(define byte-pairs
|
|
(map (lambda (b) (cons (bytes-ref b 0) (bytes-ref b 1)))
|
|
'(#"()" #"[]" #"{}" #"<>")))
|
|
|
|
(define make-spaces
|
|
(let ([t (make-hash-table)])
|
|
(lambda (n)
|
|
(hash-table-get t n
|
|
(lambda ()
|
|
(let ([s (make-string n #\space)])
|
|
(hash-table-put! t n s) s))))))
|
|
|
|
(define (dispatcher char inp source-name line-num col-num position)
|
|
(define/kw (next-syntax readtable #:optional plain?)
|
|
(let ([read (if plain? read-syntax read-syntax/recursive)])
|
|
(parameterize ([current-readtable readtable])
|
|
(let loop ()
|
|
(let ([x (read source-name inp)])
|
|
(if (special-comment? x) (loop) x))))))
|
|
(define (cur-pos)
|
|
(let-values ([(line col pos) (port-next-location inp)])
|
|
pos))
|
|
(define (span-from start)
|
|
(and start (- (cur-pos) start)))
|
|
(define (read-error msg . xs)
|
|
(let-values ([(line col pos) (port-next-location inp)])
|
|
(raise-read-error (apply format msg xs) source-name line col pos #f)))
|
|
(define (read-from-bytes-exact-or-identifier bs)
|
|
(let ([inp (open-input-bytes bs)]
|
|
[default (lambda _ (string->symbol (bytes->string/utf-8 bs)))])
|
|
(with-handlers ([void default])
|
|
(let ([x (read inp)])
|
|
;; must match all -- otherwise: default
|
|
(if (regexp-match #rx#"^[ \t\r\n]*$" inp) x (default))))))
|
|
(define (reverse-bytes bytes)
|
|
(define (rev-byte b)
|
|
(cond [(assq b byte-pairs) => cdr]
|
|
[else b]))
|
|
(let* ([len (bytes-length bytes)] [r (make-bytes len)])
|
|
(let loop ([i (sub1 len)])
|
|
(when (<= 0 i)
|
|
(bytes-set! r i (rev-byte (bytes-ref bytes (- len i 1))))
|
|
(loop (sub1 i))))
|
|
r))
|
|
(define eol-token "\n")
|
|
(define (get-attr)
|
|
(if (regexp-match/fail-without-reading close-attrs inp) #f
|
|
(let* ([fst (next-syntax
|
|
;; hack: if we see an open paren or other nested
|
|
;; constructs, use the usual readtable so a nested `='
|
|
;; behaves correctly
|
|
(if (regexp-match-peek-positions
|
|
#rx#"^[ \t\r\n]*['`,]*[[({@]" inp)
|
|
at-readtable attr-readtable)
|
|
#t)]
|
|
[snd (and (symbol? (syntax-e fst))
|
|
(regexp-match/fail-without-reading attr-sep inp)
|
|
(next-syntax at-readtable))])
|
|
(if snd
|
|
(list (string->keyword (symbol->string (syntax-e fst))) snd)
|
|
(list fst)))))
|
|
(define (get-attrs)
|
|
(and (regexp-match/fail-without-reading open-attrs inp)
|
|
(let loop ([attrs '()])
|
|
(let ([a (get-attr)])
|
|
(if a (loop (append! (reverse! a) attrs)) (reverse! attrs))))))
|
|
(define ((get-line open open-re close close-re item-re unquote-re level))
|
|
(let-values ([(line col pos) (port-next-location inp)])
|
|
(define (make-stx sexpr)
|
|
(datum->syntax-object #f
|
|
(if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr)
|
|
(list source-name line col pos (span-from pos))))
|
|
(cond [(regexp-match/fail-without-reading close-re inp)
|
|
=> (lambda (m)
|
|
(let ([l (sub1 (unbox level))])
|
|
(set-box! level l)
|
|
(and (<= 0 l) (make-stx (car m)))))]
|
|
[(regexp-match/fail-without-reading open-re inp)
|
|
=> (lambda (m)
|
|
(set-box! level (add1 (unbox level)))
|
|
(make-stx (car m)))]
|
|
[(regexp-match-peek-positions sub-start inp)
|
|
(read-syntax source-name inp)] ; include comment objs
|
|
[(regexp-match/fail-without-reading end-of-line inp)
|
|
=> (lambda (m)
|
|
(if (cadr m) ; backslashes?
|
|
(list (make-stx (cadr m)) (make-stx eol-token))
|
|
(make-stx eol-token)))]
|
|
[(regexp-match/fail-without-reading item-re inp)
|
|
=> (lambda (m)
|
|
(let* ([m (car m)]
|
|
[m (regexp-replace bar-pfx-remove m #"")]
|
|
[m (regexp-replace* unquote-re m #"\\1")])
|
|
(make-stx m)))]
|
|
[(and (not (eq? item-re line-item))
|
|
(regexp-match/fail-without-reading #rx#"[{}]" inp))
|
|
=> (lambda (m)
|
|
(make-stx (car m)))]
|
|
[(regexp-match/fail-without-reading #rx#"^$" inp)
|
|
(read-error "missing `~a'" close)]
|
|
[else (read-error "internal error")])))
|
|
;; adds stx (new syntax) to the list of stxs, merging it if both are
|
|
;; strings, except for newline markers
|
|
(define (maybe-merge stx stxs)
|
|
(if (and (pair? stxs) (syntax? stx) (syntax? (car stxs))
|
|
(string? (syntax-e stx))
|
|
(string? (syntax-e (car stxs)))
|
|
(not (eq? eol-token (syntax-e stx)))
|
|
(not (eq? eol-token (syntax-e (car stxs)))))
|
|
(let ([fst (car stxs)])
|
|
(cons (datum->syntax-object stx
|
|
(string-append (syntax-e fst) (syntax-e stx))
|
|
(list (syntax-source fst)
|
|
(syntax-line fst)
|
|
(syntax-column fst)
|
|
(syntax-position fst)
|
|
(span-from (syntax-position fst))))
|
|
(cdr stxs)))
|
|
(cons stx stxs)))
|
|
(define (add-indents stxs)
|
|
(if (or (null? stxs)
|
|
(not (andmap (lambda (s) (and (syntax-line s) (syntax-column s)))
|
|
stxs)))
|
|
stxs
|
|
(let ([mincol (apply min (map syntax-column stxs))])
|
|
(let loop ([curline line-num] [stxs stxs] [r '()])
|
|
(if (null? stxs)
|
|
(reverse! r)
|
|
(let* ([stx (car stxs)] [line (syntax-line stx)])
|
|
(loop line (cdr stxs)
|
|
(if (and (< curline line) (< mincol (syntax-column stx)))
|
|
(list* stx
|
|
(datum->syntax-object stx
|
|
(make-spaces (- (syntax-column stx) mincol))
|
|
stx)
|
|
r)
|
|
(cons stx r)))))))))
|
|
(define (get-lines)
|
|
(define get
|
|
(cond [(regexp-match/fail-without-reading open-lines-special inp)
|
|
=> (lambda (m)
|
|
(let* ([open (cadr m)]
|
|
[close (reverse-bytes open)]
|
|
[open-re (regexp-quote open)]
|
|
[close-re (regexp-quote close)]
|
|
[either-re (bytes-append open-re #"|" close-re)]
|
|
[bre (lambda (pfx/sfx re)
|
|
(byte-regexp
|
|
(bytes-append (car pfx/sfx)
|
|
re
|
|
(cadr pfx/sfx))))])
|
|
(get-line open (bre open-lines* open-re)
|
|
close (bre close-lines* close-re)
|
|
(bre line-item* either-re)
|
|
(bre bslash-unquote* either-re)
|
|
(box 0))))]
|
|
[(regexp-match/fail-without-reading open-lines inp)
|
|
(get-line "{" open-lines "}" close-lines
|
|
line-item bslash-unquote (box 0))]
|
|
[else #f]))
|
|
(and get (let loop ([lines '()] [more '()])
|
|
(let-values ([(line more) (if (pair? more)
|
|
(values (car more) (cdr more))
|
|
(values (get) more))])
|
|
(cond [(not line) (add-indents (reverse! lines))]
|
|
;; can happen from a sub @;{...} comment
|
|
[(special-comment? line) (loop lines more)]
|
|
[(list? line) (loop lines (append line more))]
|
|
[else (loop (maybe-merge line lines) more)])))))
|
|
(define (get-rprefixes) ; return punctuation prefixes in reverse
|
|
(cond
|
|
[(regexp-match/fail-without-reading
|
|
#rx#"^(?:[ \t\r\n]*(?:'|`|,@?))+" inp)
|
|
=> (lambda (m)
|
|
;; accumulate prefixes in reverse
|
|
(let loop ([s (car m)] [r '()])
|
|
(cond
|
|
[(equal? #"" s) r]
|
|
[(regexp-match #rx#"^[ \t\r\n]*('|`|,@?)(.*)$" s)
|
|
=> (lambda (m)
|
|
(loop (caddr m)
|
|
(cons (let ([m (cadr m)])
|
|
(cadr (or (assoc
|
|
m '([#"'" quote]
|
|
[#"`" quasiquote]
|
|
[#"," unquote]
|
|
[#",@" unquote-splicing]))
|
|
(error "something bad"))))
|
|
r)))]
|
|
[else (error "something bad happened")])))]
|
|
[else '()]))
|
|
(define (get-command) ; #f means no command
|
|
(let-values ([(line col pos) (port-next-location inp)])
|
|
(cond [(regexp-match-peek-positions open-attr/lines inp)
|
|
(values #f #f)]
|
|
[(regexp-match/fail-without-reading bars-quoted inp)
|
|
=> (lambda (m)
|
|
(values (datum->syntax-object #f
|
|
(read-from-bytes-exact-or-identifier (cadr m))
|
|
(list source-name line col pos (span-from pos)))
|
|
#t))]
|
|
[else (values (next-syntax cmd-readtable) #f)])))
|
|
(cond
|
|
[(regexp-match/fail-without-reading comment-start inp)
|
|
(if (regexp-match-peek-positions open-lines inp)
|
|
(get-lines) (regexp-match comment-line inp))
|
|
(make-special-comment #f)]
|
|
[else
|
|
(let* ([pfx (get-rprefixes)]
|
|
[bars? #f]
|
|
[cmd (let-values ([(cmd bs?) (get-command)])
|
|
(set! bars? bs?) cmd)] ; #f means no command
|
|
[attrs (and (not bars?) (get-attrs))]
|
|
[lines (and (not bars?) (get-lines))]
|
|
[stx (and (or attrs lines)
|
|
(append (or attrs '()) (or lines '())))]
|
|
[stx (or (and cmd stx (cons cmd stx)) ; all parts
|
|
stx ; no cmd part => just a parenthesized expression
|
|
cmd ; no attrs/lines => simple expression (no parens)
|
|
;; impossible: either we saw []s or {}s, or we read a
|
|
;; scheme expression
|
|
(error "something bad happened"))]
|
|
[stx (let loop ([pfx pfx] [stx stx])
|
|
(if (null? pfx) stx
|
|
(loop (cdr pfx) (list (car pfx) stx))))])
|
|
(datum->syntax-object #f stx
|
|
(list source-name line-num col-num position (span-from position))))]))
|
|
|
|
(define at-readtable
|
|
(make-readtable #f cmd-char 'terminating-macro dispatcher))
|
|
|
|
;; similar to plain Scheme, but with `|' as a terminating macro
|
|
(define cmd-readtable
|
|
(make-readtable at-readtable #\| 'terminating-macro
|
|
(lambda (char inp source-name line-num col-num position)
|
|
(let ([m (regexp-match #rx#"^([^|]*)\\|" inp)])
|
|
(unless m
|
|
(raise-read-error
|
|
"unbalanced `|'" source-name line-num col-num position #f))
|
|
(datum->syntax-object
|
|
#f (string->symbol (bytes->string/utf-8 (cadr m)))
|
|
(list source-name line-num col-num position
|
|
(add1 (bytes-length (car m)))))))))
|
|
|
|
;; similar to plain Scheme, but with `=' always parsed as a separate symbol
|
|
(define attr-readtable
|
|
(make-readtable at-readtable #\= 'terminating-macro
|
|
(lambda (char inp source-name line-num col-num position)
|
|
(datum->syntax-object
|
|
#f (string->symbol (string char))
|
|
(list source-name line-num col-num position 1)))))
|
|
|
|
(provide use-at-readtable)
|
|
(define (use-at-readtable)
|
|
(port-count-lines! (current-input-port))
|
|
(current-readtable at-readtable))
|
|
|
|
(define/kw (*read #:optional [inp (current-input-port)])
|
|
(parameterize ([current-readtable at-readtable])
|
|
(read inp)))
|
|
|
|
(define/kw (*read-syntax #:optional src [port (current-input-port)])
|
|
(parameterize ([current-readtable at-readtable])
|
|
(read-syntax (or src (object-name port)) port)))
|
|
|
|
(provide (rename *read read) (rename *read-syntax read-syntax))
|
|
|
|
)
|