added a scribble/text language for preprocessing

svn: r8818
This commit is contained in:
Eli Barzilay 2008-02-27 21:34:33 +00:00
parent c9a326111d
commit 2aa9e5fade
3 changed files with 613 additions and 554 deletions

View File

@ -1,30 +1,29 @@
;; ============================================================================ ;; ============================================================================
;; Implements the @-reader macro for embedding text in Scheme code. ;; Implements the @-reader macro for embedding text in Scheme code.
(module reader scheme/base #lang scheme/base
(require mzlib/string syntax/readerr) (require mzlib/string syntax/readerr)
;; -------------------------------------------------------------------------- ;; ----------------------------------------------------------------------------
;; utilities for syntax specifications below ;; utilities for syntax specifications below
;; regexps ;; regexps
(define (px . args) (define (px . args)
(let* ([args (let loop ([xs args]) (let* ([args (let loop ([xs args])
(if (list? xs) (apply append (map loop xs)) (list xs)))] (if (list? xs) (apply append (map loop xs)) (list xs)))]
[args (map (lambda (x) [args (map (lambda (x)
(cond (cond [(bytes? x) x]
[(bytes? x) x]
[(string? x) (string->bytes/utf-8 x)] [(string? x) (string->bytes/utf-8 x)]
[(char? x) (regexp-quote (bytes (char->integer x)))] [(char? x) (regexp-quote (bytes (char->integer x)))]
[(not x) #""] [(not x) #""]
[else (internal-error 'px)])) [else (internal-error 'px)]))
args)]) args)])
(byte-pregexp (apply bytes-append args)))) (byte-pregexp (apply bytes-append args))))
(define (^px . args) (px #"^" args)) (define (^px . args) (px #"^" args))
;; reverses a byte string visually ;; reverses a byte string visually
(define reverse-bytes (define reverse-bytes
(let ([pairs (let ([xs (bytes->list #"([{<")] (let ([pairs (let ([xs (bytes->list #"([{<")]
[ys (bytes->list #")]}>")]) [ys (bytes->list #")]}>")])
(append (map cons xs ys) (map cons ys xs)))]) (append (map cons xs ys) (map cons ys xs)))])
@ -33,65 +32,66 @@
[else b])) [else b]))
(lambda (bs) (list->bytes (map rev-byte (reverse (bytes->list bs))))))) (lambda (bs) (list->bytes (map rev-byte (reverse (bytes->list bs)))))))
;; -------------------------------------------------------------------------- ;; ----------------------------------------------------------------------------
;; syntax ;; syntax
;; basic syntax customization ;; basic syntax customization
(define ch:command #\@) (define ch:command #\@)
(define ch:comment #\;) (define ch:comment #\;)
(define ch:expr-escape #\|) (define ch:expr-escape #\|)
(define ch:datums-begin #\[) (define ch:datums-begin #\[)
(define ch:datums-end #\]) (define ch:datums-end #\])
(define ch:lines-begin #\{) (define ch:lines-begin #\{)
(define ch:lines-end #\}) (define ch:lines-end #\})
(define str:lines-begin* #"(\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*)\\{") (define str:lines-begin* #"(\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*)\\{")
(define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line (define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line
;; regexps based on the above (more in make-dispatcher) ;; regexps based on the above (more in make-dispatcher)
(define re:whitespaces (^px "\\s+")) (define re:whitespaces (^px "\\s+"))
(define re:comment-start (^px ch:comment)) (define re:comment-start (^px ch:comment))
(define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%' (define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%'
(define re:expr-escape (^px ch:expr-escape)) (define re:expr-escape (^px ch:expr-escape))
(define re:datums-begin (^px ch:datums-begin)) (define re:datums-begin (^px ch:datums-begin))
(define re:datums-end (^px ch:datums-end)) (define re:datums-end (^px ch:datums-end))
(define re:lines-begin (^px ch:lines-begin)) (define re:lines-begin (^px ch:lines-begin))
(define re:lines-begin* (^px str:lines-begin*)) (define re:lines-begin* (^px str:lines-begin*))
(define re:lines-end (^px ch:lines-end)) (define re:lines-end (^px ch:lines-end))
(define re:end-of-line (^px str:end-of-line)) (define re:end-of-line (^px str:end-of-line))
;; -------------------------------------------------------------------------- ;; ----------------------------------------------------------------------------
;; utilities ;; utilities
(define (internal-error label) (define (internal-error label)
(error 'scribble-reader "internal error [~a]" label)) (error 'scribble-reader "internal error [~a]" label))
;; like `regexp-match/fail-without-reading', without extras; the regexp that ;; like `regexp-match/fail-without-reading', without extras; the regexp that
;; is used must be anchored -- nothing is dropped ;; is used must be anchored -- nothing is dropped
(define (*regexp-match-peek-positions pattern input-port) (define (*regexp-match-peek-positions pattern input-port)
#; ; sanity checks, not needed unless this file is edited #; ; sanity checks, not needed unless this file is edited
(unless (and (byte-regexp? pattern) (unless (and (byte-regexp? pattern)
(regexp-match? #rx#"^\\^" (object-name pattern))) (regexp-match? #rx#"^\\^" (object-name pattern)))
(internal-error 'invalid-bregexp)) (internal-error 'invalid-bregexp))
(regexp-match-peek-positions pattern input-port)) (regexp-match-peek-positions pattern input-port))
;; the following doesn't work -- must peek first ;; the following doesn't work -- must peek first
;; (define (*regexp-match-positions pattern input-port) ;; (define (*regexp-match-positions pattern input-port)
;; (unless (and (byte-regexp? pattern) ;; #; ; sanity checks, not needed unless this file is edited
;; (regexp-match? #rx#"^\\^" (object-name pattern))) ;; (unless (and (byte-regexp? pattern)
;; (internal-error 'invalid-bregexp)) ;; (regexp-match? #rx#"^\\^" (object-name pattern)))
;; (regexp-match-peek-positions pattern input-port)) ;; (internal-error 'invalid-bregexp))
(define (*regexp-match pattern input-port) ;; (regexp-match-peek-positions pattern input-port))
(define (*regexp-match pattern input-port)
(let ([m (*regexp-match-peek-positions pattern input-port)]) (let ([m (*regexp-match-peek-positions pattern input-port)])
(and m (let ([s (read-bytes (cdar m) input-port)]) (and m (let ([s (read-bytes (cdar m) input-port)])
(cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p)))) (cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p))))
(cdr m))))))) (cdr m)))))))
;; like regexp-match, but returns the whole match ;; like regexp-match, but returns the whole match
(define (*regexp-match1 pattern input-port) (define (*regexp-match1 pattern input-port)
(let ([m (*regexp-match-peek-positions pattern input-port)]) (let ([m (*regexp-match-peek-positions pattern input-port)])
(and m (read-bytes (cdar m) input-port)))) (and m (read-bytes (cdar m) input-port))))
;; Utility for readtable-based caches ;; Utility for readtable-based caches
(define (readtable-cached fun) (define (readtable-cached fun)
(let ([cache (make-hash-table 'weak)]) (let ([cache (make-hash-table 'weak)])
(letrec ([readtable-cached (letrec ([readtable-cached
(case-lambda (case-lambda
@ -103,9 +103,9 @@
[() (readtable-cached (current-readtable))])]) [() (readtable-cached (current-readtable))])])
readtable-cached))) readtable-cached)))
;; Skips whitespace characters, sensitive to the current readtable's ;; Skips whitespace characters, sensitive to the current readtable's
;; definition of whitespace; optimizes common spaces when possible ;; definition of whitespace; optimizes common spaces when possible
(define skip-whitespace (define skip-whitespace
(let* ([plain-readtables (make-hash-table 'weak)] (let* ([plain-readtables (make-hash-table 'weak)]
[plain-spaces " \t\n\r\f"] [plain-spaces " \t\n\r\f"]
[plain-spaces-list (string->list " \t\n\r\f")] [plain-spaces-list (string->list " \t\n\r\f")]
@ -134,8 +134,8 @@
(unless (eof-object? ch) (unless (eof-object? ch)
(when (whitespace? ch rt) (read-char port) (loop))))))))) (when (whitespace? ch rt) (read-char port) (loop)))))))))
;; make n spaces, cached for n ;; make n spaces, cached for n
(define make-spaces (define make-spaces
(let ([t (make-hash-table)]) (let ([t (make-hash-table)])
(lambda (n) (lambda (n)
(hash-table-get t n (hash-table-get t n
@ -143,28 +143,27 @@
(let ([s (make-string n #\space)]) (let ([s (make-string n #\space)])
(hash-table-put! t n s) s)))))) (hash-table-put! t n s) s))))))
(define (bytes-width bs start) (define (bytes-width bs start)
(let ([len (bytes-length bs)]) (let ([len (bytes-length bs)])
(if (regexp-match? #rx"^ *$" bs start) (if (regexp-match? #rx"^ *$" bs start)
(- (bytes-length bs) start) (- (bytes-length bs) start)
(let loop ([i start] [w 0]) (let loop ([i start] [w 0])
(if (= i len) (if (= i len)
w w
(loop (add1 i) (+ w (if (eq? 9 (bytes-ref bs i)) (loop (add1 i)
(- 8 (modulo w 8)) (+ w (if (eq? 9 (bytes-ref bs i)) (- 8 (modulo w 8)) 1))))))))
1))))))))
;; a unique eol string ;; a unique eol string
(define eol-token "\n") (define eol-token "\n")
(define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x)))) (define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x))))
;; sanity check, in case this property gets violated in the future ;; sanity check, in case this property gets violated in the future
(unless (eol-syntax? (datum->syntax #f eol-token)) (unless (eol-syntax? (datum->syntax #f eol-token))
(internal-error 'invalid-assumption)) (internal-error 'invalid-assumption))
;; -------------------------------------------------------------------------- ;; ----------------------------------------------------------------------------
;; main reader function for @ constructs ;; main reader function for @ constructs
(define (dispatcher char inp source-name line-num col-num position (define (dispatcher char inp source-name line-num col-num position
start-inside? command-readtable ch:command start-inside? command-readtable ch:command
re:command re:line-item* re:line-item re:command re:line-item* re:line-item
re:line-item-no-nests datum-readtable re:line-item-no-nests datum-readtable
@ -187,7 +186,7 @@
(define (*match rx) (*regexp-match rx inp)) (define (*match rx) (*regexp-match rx inp))
(define (*match1 rx) (*regexp-match1 rx inp)) (define (*match1 rx) (*regexp-match1 rx inp))
;; (define (*skip rx) (*regexp-match-positions rx inp)) <- see above ;; (define (*skip rx) (*regexp-match-positions rx inp)) ; <- see above
(define (*skip rx) (*regexp-match1 rx inp)) (define (*skip rx) (*regexp-match1 rx inp))
(define (*peek rx) (*regexp-match-peek-positions rx inp)) (define (*peek rx) (*regexp-match-peek-positions rx inp))
@ -212,13 +211,12 @@
;; needed) ;; needed)
(define (done-items xs) (define (done-items xs)
;; a column marker is either a non-negative integer N (saying the the ;; a column marker is either a non-negative integer N (saying the the
;; following code came from at column N), or a negative integer -N ;; following code came from at column N), or a negative integer -N (saying
;; (saying that the following code came from column N but no need to add ;; that the following code came from column N but no need to add
;; indentation at this point because it is at the openning of a {...}); ;; indentation at this point because it is at the openning of a {...});
;; `get-lines*' is careful not to include column markers before a newline ;; `get-lines*' is careful not to include column markers before a newline
;; or the end of the text, and a -N marker can only come from the ;; or the end of the text, and a -N marker can only come from the beginning
;; beginning of the text (and it's never there if the text began with a ;; of the text (and it's never there if the text began with a newline)
;; newline)
(if (andmap eol-syntax? xs) (if (andmap eol-syntax? xs)
;; nothing to do ;; nothing to do
(reverse xs) (reverse xs)
@ -277,8 +275,8 @@
(cdr stxs)) (cdr stxs))
(cons stx stxs)))) (cons stx stxs))))
;; helper for `get-lines*' drop a column marker if the previous item was ;; helper for `get-lines*' drop a column marker if the previous item was also
;; also a newline (or the beginning) ;; a newline (or the beginning)
(define (maybe-drop-marker r) (define (maybe-drop-marker r)
(if (and (pair? r) (integer? (car r)) (if (and (pair? r) (integer? (car r))
(or (null? (cdr r)) (eol-syntax? (cadr r)))) (or (null? (cdr r)) (eol-syntax? (cadr r))))
@ -325,9 +323,9 @@
=> (lambda (m) => (lambda (m)
(let ([x (cond (let ([x (cond
[(cadr m) [(cadr m)
;; the command is a string escape, use `read-stx*' ;; the command is a string escape, use `read-stx*' to
;; to not get a placeholder, so we can merge the ;; not get a placeholder, so we can merge the string
;; string to others ;; to others
(read-stx*)] (read-stx*)]
[(caddr m) [(caddr m)
;; it's an expression escape, get multiple ;; it's an expression escape, get multiple
@ -377,8 +375,8 @@
(read-delimited-list re:datums-begin re:datums-end ch:datums-end))) (read-delimited-list re:datums-begin re:datums-end ch:datums-end)))
(define (get-escape-expr single?) (define (get-escape-expr single?)
;; single? means expect just one expression (or none, which is returned ;; single? means expect just one expression (or none, which is returned as
;; as a special-comment) ;; a special-comment)
(let ([get (lambda () (let ([get (lambda ()
(parameterize ([current-readtable command-readtable]) (parameterize ([current-readtable command-readtable])
(read-delimited-list re:expr-escape re:expr-escape (read-delimited-list re:expr-escape re:expr-escape
@ -405,11 +403,9 @@
(define (get-rprefixes) ; return punctuation prefixes in reverse (define (get-rprefixes) ; return punctuation prefixes in reverse
(let loop ([r '()]) (let loop ([r '()])
(let-values ([(line col pos) (port-next-location inp)]) (let-values ([(line col pos) (port-next-location inp)])
(cond (cond [(*match1 #rx#"^(?:'|`|,@?)")
[(*match1 #rx#"^(?:'|`|,@?)")
=> (lambda (m) => (lambda (m)
(let ([sym (cond (let ([sym (cond [(assoc m '([#"'" quote]
[(assoc m '([#"'" quote]
[#"`" quasiquote] [#"`" quasiquote]
[#"," unquote] [#"," unquote]
[#",@" unquote-splicing])) [#",@" unquote-splicing]))
@ -437,8 +433,8 @@
([(rpfxs) (get-rprefixes)] ([(rpfxs) (get-rprefixes)]
[(cmd datums lines) [(cmd datums lines)
(cond [(get-lines) (cond [(get-lines)
;; try get-lines first -- so @|{...}| is not used as a ;; try get-lines first -- so @|{...}| is not used as a simple
;; simple expression escape, same for get-datums ;; expression escape, same for get-datums
=> (lambda (lines) (values #f #f lines))] => (lambda (lines) (values #f #f lines))]
[(get-datums) [(get-datums)
=> (lambda (datums) (values #f datums (get-lines)))] => (lambda (datums) (values #f datums (get-lines)))]
@ -470,11 +466,10 @@
(if (null? rpfxs) (if (null? rpfxs)
stx stx
(loop (cdr rpfxs) (list (car rpfxs) stx))))]) (loop (cdr rpfxs) (list (car rpfxs) stx))))])
(datum->syntax #f stx (datum->syntax #f stx (list source-name line-num col-num position
(list source-name line-num col-num position
(span-from position))))])) (span-from position))))]))
(define (make-dispatcher start-inside? ch:command (define (make-dispatcher start-inside? ch:command
get-command-readtable get-datum-readtable get-command-readtable get-datum-readtable
syntax-post-processor) syntax-post-processor)
(define re:command (^px ch:command (define re:command (^px ch:command
@ -492,11 +487,11 @@
re:command re:line-item* re:line-item re:line-item-no-nests re:command re:line-item* re:line-item re:line-item-no-nests
(get-datum-readtable) syntax-post-processor))) (get-datum-readtable) syntax-post-processor)))
;; -------------------------------------------------------------------------- ;; ----------------------------------------------------------------------------
;; readtable ;; readtable
(provide make-at-readtable) (provide make-at-readtable)
(define (make-at-readtable (define (make-at-readtable
#:readtable [readtable (current-readtable)] #:readtable [readtable (current-readtable)]
#:command-char [command-char ch:command] #:command-char [command-char ch:command]
#:start-inside? [start-inside? #f] #:start-inside? [start-inside? #f]
@ -534,19 +529,19 @@
"bad datum-readtable: ~e" datum-readtable)])) "bad datum-readtable: ~e" datum-readtable)]))
at-rt) at-rt)
(provide use-at-readtable) (provide use-at-readtable)
(define use-at-readtable (define use-at-readtable
(make-keyword-procedure (make-keyword-procedure
(lambda (kws kw-args . rest) (lambda (kws kw-args . rest)
(port-count-lines! (current-input-port)) (port-count-lines! (current-input-port))
(current-readtable (current-readtable
(keyword-apply make-at-readtable kws kw-args rest))))) (keyword-apply make-at-readtable kws kw-args rest)))))
;; utilities for below ;; utilities for below
(define make-default-at-readtable (define make-default-at-readtable
(readtable-cached (readtable-cached
(lambda (rt) (make-at-readtable #:readtable rt)))) (lambda (rt) (make-at-readtable #:readtable rt))))
(define make-default-at-dispatcher/inside (define make-default-at-dispatcher/inside
(readtable-cached (readtable-cached
(lambda (rt) (lambda (rt)
(let-values ([(_1 disp _2) (let-values ([(_1 disp _2)
@ -555,42 +550,40 @@
ch:command)]) ch:command)])
disp)))) disp))))
;; -------------------------------------------------------------------------- ;; ----------------------------------------------------------------------------
;; readers ;; readers
(define default-src (gensym 'scribble-reader)) (define default-src (gensym 'scribble-reader))
(define (src-name src port) (define (src-name src port)
(if (eq? src default-src) (object-name port) src)) (if (eq? src default-src) (object-name port) src))
(define-syntax with-at-reader (define-syntax with-at-reader
(syntax-rules () (syntax-rules ()
[(_ body ...) [(_ body ...)
(parameterize ([current-readtable (make-default-at-readtable)]) (parameterize ([current-readtable (make-default-at-readtable)])
body ...)])) body ...)]))
(define (*read [inp (current-input-port)]) (define (*read [inp (current-input-port)])
(with-at-reader (read inp))) (with-at-reader (read inp)))
(define (*read-syntax [src default-src] (define (*read-syntax [src default-src]
[inp (current-input-port)]) [inp (current-input-port)])
(with-at-reader (read-syntax (src-name src inp) inp))) (with-at-reader (read-syntax (src-name src inp) inp)))
(define (read-inside [inp (current-input-port)]) (define (read-inside [inp (current-input-port)])
(let*-values ([(line col pos) (port-next-location inp)] (let*-values ([(line col pos) (port-next-location inp)]
[(inside-dispatcher) (make-default-at-dispatcher/inside)]) [(inside-dispatcher) (make-default-at-dispatcher/inside)])
(with-at-reader (with-at-reader
(syntax->datum (syntax->datum
(inside-dispatcher #f inp (object-name inp) line col pos))))) (inside-dispatcher #f inp (object-name inp) line col pos)))))
(define (read-inside-syntax [src default-src] (define (read-inside-syntax [src default-src]
[inp (current-input-port)]) [inp (current-input-port)])
(let*-values ([(line col pos) (port-next-location inp)] (let*-values ([(line col pos) (port-next-location inp)]
[(inside-dispatcher) (make-default-at-dispatcher/inside)]) [(inside-dispatcher) (make-default-at-dispatcher/inside)])
(with-at-reader (with-at-reader
(inside-dispatcher #f inp (src-name src inp) line col pos)))) (inside-dispatcher #f inp (src-name src inp) line col pos))))
(provide (rename-out [*read read] (provide (rename-out [*read read]
[*read-syntax read-syntax]) [*read-syntax read-syntax])
read-inside read-inside-syntax) read-inside read-inside-syntax)
)

34
collects/scribble/text.ss Normal file
View File

@ -0,0 +1,34 @@
#lang scheme/base
(require scheme/promise)
(provide (all-from-out scheme/base scheme/promise))
(define (show x p)
(let show ([x x])
(cond [(or (void? x) (not x) (null? x)) (void)]
[(pair? x) (show (car x)) (show (cdr x))]
[(promise? x) (show (force x))]
[(keyword? x) (show (keyword->string x))]
[(and (procedure? x) (procedure-arity-includes? x 0)) (show (x))]
;; display won't work, since it calls us back
;; [else (display x p)]
;; things that are printed directly
[(bytes? x) (write-bytes x p)]
[(string? x) (write-string x p)]
[(char? x) (write-char x p)]
[(number? x) (write x p)]
;; generic fallback
[else (show (format "~a" x))])))
;; this is too much -- it also changes error messages
;; (global-port-print-handler show)
(port-display-handler (current-output-port) show)
;; the default prints a newline too, avoid that
(current-print display)
;; make it possible to use this language through a repl
;; --> won't work: need an `inside' reader that reads a single expression
;; (require (prefix-in * "text/lang/reader.ss"))
;; (current-prompt-read
;; (lambda () (parameterize ([read-accept-reader #t]) (*read-syntax))))

View File

@ -0,0 +1,32 @@
#lang scheme/base
(require (prefix-in s: "../../reader.ss"))
(provide (rename-out [*read read])
(rename-out [*read-syntax read-syntax]))
(define (*read [inp (current-input-port)])
(wrap inp (s:read-inside inp)))
(define (*read-syntax [src #f] [port (current-input-port)])
(wrap port (s:read-inside-syntax src port)))
(define (wrap port body)
(define (strip-leading-newlines stxs)
(if (null? stxs)
stxs
(let ([p (syntax-property (car stxs) 'scribble)])
(if (and (pair? p) (eq? (car p) 'newline))
(strip-leading-newlines (cdr stxs))
stxs))))
(let* ([p-name (object-name port)]
[name (if (path? p-name)
(let-values ([(base name dir?) (split-path p-name)])
(string->symbol (path->string (path-replace-suffix
name #""))))
'page)]
[id 'doc]
[body (if (syntax? body)
(strip-leading-newlines (syntax->list body))
body)])
`(module ,name scribble/text (#%module-begin . ,body))))