added a scribble/text language for preprocessing

svn: r8818
This commit is contained in:
Eli Barzilay 2008-02-27 21:34:33 +00:00
parent c9a326111d
commit 2aa9e5fade
3 changed files with 613 additions and 554 deletions

View File

@ -1,30 +1,29 @@
;; ============================================================================
;; Implements the @-reader macro for embedding text in Scheme code.
(module reader scheme/base
#lang scheme/base
(require mzlib/string syntax/readerr)
(require mzlib/string syntax/readerr)
;; --------------------------------------------------------------------------
;; utilities for syntax specifications below
;; ----------------------------------------------------------------------------
;; utilities for syntax specifications below
;; regexps
(define (px . args)
;; regexps
(define (px . args)
(let* ([args (let loop ([xs args])
(if (list? xs) (apply append (map loop xs)) (list xs)))]
[args (map (lambda (x)
(cond
[(bytes? x) x]
(cond [(bytes? x) x]
[(string? x) (string->bytes/utf-8 x)]
[(char? x) (regexp-quote (bytes (char->integer x)))]
[(not x) #""]
[else (internal-error 'px)]))
args)])
(byte-pregexp (apply bytes-append args))))
(define (^px . args) (px #"^" args))
(define (^px . args) (px #"^" args))
;; reverses a byte string visually
(define reverse-bytes
;; reverses a byte string visually
(define reverse-bytes
(let ([pairs (let ([xs (bytes->list #"([{<")]
[ys (bytes->list #")]}>")])
(append (map cons xs ys) (map cons ys xs)))])
@ -33,65 +32,66 @@
[else b]))
(lambda (bs) (list->bytes (map rev-byte (reverse (bytes->list bs)))))))
;; --------------------------------------------------------------------------
;; syntax
;; ----------------------------------------------------------------------------
;; syntax
;; basic syntax customization
(define ch:command #\@)
(define ch:comment #\;)
(define ch:expr-escape #\|)
(define ch:datums-begin #\[)
(define ch:datums-end #\])
(define ch:lines-begin #\{)
(define ch:lines-end #\})
;; basic syntax customization
(define ch:command #\@)
(define ch:comment #\;)
(define ch:expr-escape #\|)
(define ch:datums-begin #\[)
(define ch:datums-end #\])
(define ch:lines-begin #\{)
(define ch:lines-end #\})
(define str:lines-begin* #"(\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*)\\{")
(define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line
(define str:lines-begin* #"(\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*)\\{")
(define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line
;; regexps based on the above (more in make-dispatcher)
(define re:whitespaces (^px "\\s+"))
(define re:comment-start (^px ch:comment))
(define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%'
(define re:expr-escape (^px ch:expr-escape))
(define re:datums-begin (^px ch:datums-begin))
(define re:datums-end (^px ch:datums-end))
(define re:lines-begin (^px ch:lines-begin))
(define re:lines-begin* (^px str:lines-begin*))
(define re:lines-end (^px ch:lines-end))
(define re:end-of-line (^px str:end-of-line))
;; regexps based on the above (more in make-dispatcher)
(define re:whitespaces (^px "\\s+"))
(define re:comment-start (^px ch:comment))
(define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%'
(define re:expr-escape (^px ch:expr-escape))
(define re:datums-begin (^px ch:datums-begin))
(define re:datums-end (^px ch:datums-end))
(define re:lines-begin (^px ch:lines-begin))
(define re:lines-begin* (^px str:lines-begin*))
(define re:lines-end (^px ch:lines-end))
(define re:end-of-line (^px str:end-of-line))
;; --------------------------------------------------------------------------
;; utilities
;; ----------------------------------------------------------------------------
;; utilities
(define (internal-error label)
(define (internal-error label)
(error 'scribble-reader "internal error [~a]" label))
;; like `regexp-match/fail-without-reading', without extras; the regexp that
;; is used must be anchored -- nothing is dropped
(define (*regexp-match-peek-positions pattern input-port)
;; like `regexp-match/fail-without-reading', without extras; the regexp that
;; is used must be anchored -- nothing is dropped
(define (*regexp-match-peek-positions pattern input-port)
#; ; sanity checks, not needed unless this file is edited
(unless (and (byte-regexp? pattern)
(regexp-match? #rx#"^\\^" (object-name pattern)))
(internal-error 'invalid-bregexp))
(regexp-match-peek-positions pattern input-port))
;; the following doesn't work -- must peek first
;; (define (*regexp-match-positions pattern input-port)
;; (unless (and (byte-regexp? pattern)
;; (regexp-match? #rx#"^\\^" (object-name pattern)))
;; (internal-error 'invalid-bregexp))
;; (regexp-match-peek-positions pattern input-port))
(define (*regexp-match pattern input-port)
;; the following doesn't work -- must peek first
;; (define (*regexp-match-positions pattern input-port)
;; #; ; sanity checks, not needed unless this file is edited
;; (unless (and (byte-regexp? pattern)
;; (regexp-match? #rx#"^\\^" (object-name pattern)))
;; (internal-error 'invalid-bregexp))
;; (regexp-match-peek-positions pattern input-port))
(define (*regexp-match pattern input-port)
(let ([m (*regexp-match-peek-positions pattern input-port)])
(and m (let ([s (read-bytes (cdar m) input-port)])
(cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p))))
(cdr m)))))))
;; like regexp-match, but returns the whole match
(define (*regexp-match1 pattern input-port)
;; like regexp-match, but returns the whole match
(define (*regexp-match1 pattern input-port)
(let ([m (*regexp-match-peek-positions pattern input-port)])
(and m (read-bytes (cdar m) input-port))))
;; Utility for readtable-based caches
(define (readtable-cached fun)
;; Utility for readtable-based caches
(define (readtable-cached fun)
(let ([cache (make-hash-table 'weak)])
(letrec ([readtable-cached
(case-lambda
@ -103,9 +103,9 @@
[() (readtable-cached (current-readtable))])])
readtable-cached)))
;; Skips whitespace characters, sensitive to the current readtable's
;; definition of whitespace; optimizes common spaces when possible
(define skip-whitespace
;; Skips whitespace characters, sensitive to the current readtable's
;; definition of whitespace; optimizes common spaces when possible
(define skip-whitespace
(let* ([plain-readtables (make-hash-table 'weak)]
[plain-spaces " \t\n\r\f"]
[plain-spaces-list (string->list " \t\n\r\f")]
@ -134,8 +134,8 @@
(unless (eof-object? ch)
(when (whitespace? ch rt) (read-char port) (loop)))))))))
;; make n spaces, cached for n
(define make-spaces
;; make n spaces, cached for n
(define make-spaces
(let ([t (make-hash-table)])
(lambda (n)
(hash-table-get t n
@ -143,28 +143,27 @@
(let ([s (make-string n #\space)])
(hash-table-put! t n s) s))))))
(define (bytes-width bs start)
(define (bytes-width bs start)
(let ([len (bytes-length bs)])
(if (regexp-match? #rx"^ *$" bs start)
(- (bytes-length bs) start)
(let loop ([i start] [w 0])
(if (= i len)
w
(loop (add1 i) (+ w (if (eq? 9 (bytes-ref bs i))
(- 8 (modulo w 8))
1))))))))
(loop (add1 i)
(+ w (if (eq? 9 (bytes-ref bs i)) (- 8 (modulo w 8)) 1))))))))
;; a unique eol string
(define eol-token "\n")
(define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x))))
;; sanity check, in case this property gets violated in the future
(unless (eol-syntax? (datum->syntax #f eol-token))
;; a unique eol string
(define eol-token "\n")
(define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x))))
;; sanity check, in case this property gets violated in the future
(unless (eol-syntax? (datum->syntax #f eol-token))
(internal-error 'invalid-assumption))
;; --------------------------------------------------------------------------
;; main reader function for @ constructs
;; ----------------------------------------------------------------------------
;; main reader function for @ constructs
(define (dispatcher char inp source-name line-num col-num position
(define (dispatcher char inp source-name line-num col-num position
start-inside? command-readtable ch:command
re:command re:line-item* re:line-item
re:line-item-no-nests datum-readtable
@ -187,7 +186,7 @@
(define (*match rx) (*regexp-match rx inp))
(define (*match1 rx) (*regexp-match1 rx inp))
;; (define (*skip rx) (*regexp-match-positions rx inp)) <- see above
;; (define (*skip rx) (*regexp-match-positions rx inp)) ; <- see above
(define (*skip rx) (*regexp-match1 rx inp))
(define (*peek rx) (*regexp-match-peek-positions rx inp))
@ -212,13 +211,12 @@
;; needed)
(define (done-items xs)
;; a column marker is either a non-negative integer N (saying the the
;; following code came from at column N), or a negative integer -N
;; (saying that the following code came from column N but no need to add
;; following code came from at column N), or a negative integer -N (saying
;; that the following code came from column N but no need to add
;; indentation at this point because it is at the openning of a {...});
;; `get-lines*' is careful not to include column markers before a newline
;; or the end of the text, and a -N marker can only come from the
;; beginning of the text (and it's never there if the text began with a
;; newline)
;; or the end of the text, and a -N marker can only come from the beginning
;; of the text (and it's never there if the text began with a newline)
(if (andmap eol-syntax? xs)
;; nothing to do
(reverse xs)
@ -277,8 +275,8 @@
(cdr stxs))
(cons stx stxs))))
;; helper for `get-lines*' drop a column marker if the previous item was
;; also a newline (or the beginning)
;; helper for `get-lines*' drop a column marker if the previous item was also
;; a newline (or the beginning)
(define (maybe-drop-marker r)
(if (and (pair? r) (integer? (car r))
(or (null? (cdr r)) (eol-syntax? (cadr r))))
@ -325,9 +323,9 @@
=> (lambda (m)
(let ([x (cond
[(cadr m)
;; the command is a string escape, use `read-stx*'
;; to not get a placeholder, so we can merge the
;; string to others
;; the command is a string escape, use `read-stx*' to
;; not get a placeholder, so we can merge the string
;; to others
(read-stx*)]
[(caddr m)
;; it's an expression escape, get multiple
@ -377,8 +375,8 @@
(read-delimited-list re:datums-begin re:datums-end ch:datums-end)))
(define (get-escape-expr single?)
;; single? means expect just one expression (or none, which is returned
;; as a special-comment)
;; single? means expect just one expression (or none, which is returned as
;; a special-comment)
(let ([get (lambda ()
(parameterize ([current-readtable command-readtable])
(read-delimited-list re:expr-escape re:expr-escape
@ -405,11 +403,9 @@
(define (get-rprefixes) ; return punctuation prefixes in reverse
(let loop ([r '()])
(let-values ([(line col pos) (port-next-location inp)])
(cond
[(*match1 #rx#"^(?:'|`|,@?)")
(cond [(*match1 #rx#"^(?:'|`|,@?)")
=> (lambda (m)
(let ([sym (cond
[(assoc m '([#"'" quote]
(let ([sym (cond [(assoc m '([#"'" quote]
[#"`" quasiquote]
[#"," unquote]
[#",@" unquote-splicing]))
@ -437,8 +433,8 @@
([(rpfxs) (get-rprefixes)]
[(cmd datums lines)
(cond [(get-lines)
;; try get-lines first -- so @|{...}| is not used as a
;; simple expression escape, same for get-datums
;; try get-lines first -- so @|{...}| is not used as a simple
;; expression escape, same for get-datums
=> (lambda (lines) (values #f #f lines))]
[(get-datums)
=> (lambda (datums) (values #f datums (get-lines)))]
@ -470,11 +466,10 @@
(if (null? rpfxs)
stx
(loop (cdr rpfxs) (list (car rpfxs) stx))))])
(datum->syntax #f stx
(list source-name line-num col-num position
(datum->syntax #f stx (list source-name line-num col-num position
(span-from position))))]))
(define (make-dispatcher start-inside? ch:command
(define (make-dispatcher start-inside? ch:command
get-command-readtable get-datum-readtable
syntax-post-processor)
(define re:command (^px ch:command
@ -492,11 +487,11 @@
re:command re:line-item* re:line-item re:line-item-no-nests
(get-datum-readtable) syntax-post-processor)))
;; --------------------------------------------------------------------------
;; readtable
;; ----------------------------------------------------------------------------
;; readtable
(provide make-at-readtable)
(define (make-at-readtable
(provide make-at-readtable)
(define (make-at-readtable
#:readtable [readtable (current-readtable)]
#:command-char [command-char ch:command]
#:start-inside? [start-inside? #f]
@ -534,19 +529,19 @@
"bad datum-readtable: ~e" datum-readtable)]))
at-rt)
(provide use-at-readtable)
(define use-at-readtable
(provide use-at-readtable)
(define use-at-readtable
(make-keyword-procedure
(lambda (kws kw-args . rest)
(port-count-lines! (current-input-port))
(current-readtable
(keyword-apply make-at-readtable kws kw-args rest)))))
;; utilities for below
(define make-default-at-readtable
;; utilities for below
(define make-default-at-readtable
(readtable-cached
(lambda (rt) (make-at-readtable #:readtable rt))))
(define make-default-at-dispatcher/inside
(define make-default-at-dispatcher/inside
(readtable-cached
(lambda (rt)
(let-values ([(_1 disp _2)
@ -555,42 +550,40 @@
ch:command)])
disp))))
;; --------------------------------------------------------------------------
;; readers
;; ----------------------------------------------------------------------------
;; readers
(define default-src (gensym 'scribble-reader))
(define (src-name src port)
(define default-src (gensym 'scribble-reader))
(define (src-name src port)
(if (eq? src default-src) (object-name port) src))
(define-syntax with-at-reader
(define-syntax with-at-reader
(syntax-rules ()
[(_ body ...)
(parameterize ([current-readtable (make-default-at-readtable)])
body ...)]))
(define (*read [inp (current-input-port)])
(define (*read [inp (current-input-port)])
(with-at-reader (read inp)))
(define (*read-syntax [src default-src]
(define (*read-syntax [src default-src]
[inp (current-input-port)])
(with-at-reader (read-syntax (src-name src inp) inp)))
(define (read-inside [inp (current-input-port)])
(define (read-inside [inp (current-input-port)])
(let*-values ([(line col pos) (port-next-location inp)]
[(inside-dispatcher) (make-default-at-dispatcher/inside)])
(with-at-reader
(syntax->datum
(inside-dispatcher #f inp (object-name inp) line col pos)))))
(define (read-inside-syntax [src default-src]
(define (read-inside-syntax [src default-src]
[inp (current-input-port)])
(let*-values ([(line col pos) (port-next-location inp)]
[(inside-dispatcher) (make-default-at-dispatcher/inside)])
(with-at-reader
(inside-dispatcher #f inp (src-name src inp) line col pos))))
(provide (rename-out [*read read]
(provide (rename-out [*read read]
[*read-syntax read-syntax])
read-inside read-inside-syntax)
)

34
collects/scribble/text.ss Normal file
View File

@ -0,0 +1,34 @@
#lang scheme/base
(require scheme/promise)
(provide (all-from-out scheme/base scheme/promise))
(define (show x p)
(let show ([x x])
(cond [(or (void? x) (not x) (null? x)) (void)]
[(pair? x) (show (car x)) (show (cdr x))]
[(promise? x) (show (force x))]
[(keyword? x) (show (keyword->string x))]
[(and (procedure? x) (procedure-arity-includes? x 0)) (show (x))]
;; display won't work, since it calls us back
;; [else (display x p)]
;; things that are printed directly
[(bytes? x) (write-bytes x p)]
[(string? x) (write-string x p)]
[(char? x) (write-char x p)]
[(number? x) (write x p)]
;; generic fallback
[else (show (format "~a" x))])))
;; this is too much -- it also changes error messages
;; (global-port-print-handler show)
(port-display-handler (current-output-port) show)
;; the default prints a newline too, avoid that
(current-print display)
;; make it possible to use this language through a repl
;; --> won't work: need an `inside' reader that reads a single expression
;; (require (prefix-in * "text/lang/reader.ss"))
;; (current-prompt-read
;; (lambda () (parameterize ([read-accept-reader #t]) (*read-syntax))))

View File

@ -0,0 +1,32 @@
#lang scheme/base
(require (prefix-in s: "../../reader.ss"))
(provide (rename-out [*read read])
(rename-out [*read-syntax read-syntax]))
(define (*read [inp (current-input-port)])
(wrap inp (s:read-inside inp)))
(define (*read-syntax [src #f] [port (current-input-port)])
(wrap port (s:read-inside-syntax src port)))
(define (wrap port body)
(define (strip-leading-newlines stxs)
(if (null? stxs)
stxs
(let ([p (syntax-property (car stxs) 'scribble)])
(if (and (pair? p) (eq? (car p) 'newline))
(strip-leading-newlines (cdr stxs))
stxs))))
(let* ([p-name (object-name port)]
[name (if (path? p-name)
(let-values ([(base name dir?) (split-path p-name)])
(string->symbol (path->string (path-replace-suffix
name #""))))
'page)]
[id 'doc]
[body (if (syntax? body)
(strip-leading-newlines (syntax->list body))
body)])
`(module ,name scribble/text (#%module-begin . ,body))))