From 6714169149d0918a0f6927cf427bf0781163e8f1 Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Thu, 18 May 2006 01:43:31 +0000 Subject: [PATCH] initial version svn: r2960 --- collects/scribble/doc.txt | 228 +++++++++++++++++++++++++++++ collects/scribble/info.ss | 5 + collects/scribble/reader.ss | 232 ++++++++++++++++++++++++++++++ collects/scribble/run-scribble.ss | 70 +++++++++ collects/scribble/scribble.ss | 73 ++++++++++ 5 files changed, 608 insertions(+) create mode 100644 collects/scribble/doc.txt create mode 100644 collects/scribble/info.ss create mode 100644 collects/scribble/reader.ss create mode 100644 collects/scribble/run-scribble.ss create mode 100644 collects/scribble/scribble.ss diff --git a/collects/scribble/doc.txt b/collects/scribble/doc.txt new file mode 100644 index 0000000000..e27560adae --- /dev/null +++ b/collects/scribble/doc.txt @@ -0,0 +1,228 @@ +Implements the @-reader macro for embedding text in Scheme code. + +*** Introduction + +The @-reader is designed to be a convenient facility for embedding +Scheme code and text. "@" is chosen as one of the least-used characters +in Scheme code (the options are: "&" (969 uses in the collects +hierarchy), "|" (1676), "@" (2105) "^" (2257) "$" (2259)). + +To use this file, you can use MzScheme's #reader form: + + #reader(file "...path to this file...") + +But note that this will only do the concrete-level translation, and not +give you any useful bindings. Alternatively, you can start MzScheme, +require this file and use the `use-at-readtable' function to switch the +current readtable to the at-readtable. You can do this in a single +command line: + + mzscheme -te ...this-file... '(use-at-readtable)' + +*** Concrete Syntax + +The *concrete* syntax of @-commands is (informally, more details below): + + "@" "[" "]" "{" "}" + +where all parts are optional, but at least one should be present. +(Note: since the reader will try to see if there is a "{...body...}" in +the input, it is awkward to use body-less constructs on an interactive +REPL since reading an expression succeeds only when there is a new +expression available.) "@" is set as a terminating reader macro, so if +you want to use it in Scheme code, you need to quote it with `\@' or the +whole identifier with `|ba@rs|'. This has no effect occurrences of "@" +in Scheme strings. + +Roughly speaking, such a construct is translated to: + + ( ... ...) + +so the part determines what Scheme code the whole construct is +translated into. The common case is when is a Scheme identifier, +which generates a plain Scheme form with keyword-values and the body +text. The body is given as a sequence of strings, with a separate "\n" +string for each end of line. For example: + + @foo{bar baz --is-read-as--> (foo "bar baz" "\n" "blah") + blah} + +It is your responsibility to make sure that `foo' is bound (in any way: +it can be a macro). To see the forms, you can use quote as usual, for +example: + + '@foo{bar} + +** Concrete Syntax: the command part + +The command can have Scheme punctuation prefixes, which will end up +wrapping the *whole* expression. For example: + + @`',@foo{blah} --is-read-as--> `',@(foo "blah") + +When writing Scheme code, this means that @`',@foo{blah} is exactly the +same as `@',@foo{blah} and `',@@foo{blah}, but these constructs can +appear in body texts where they will be read differently (see below). + +The command itself is not limited to a Scheme identifier -- it can be +any Scheme expression: + + @(lambda (x) x){blah} --is-read-as--> ((lambda (x) x) "blah") + +In addition, the command can be omitted altogether, which will omit it +from the translation, resulting in an s-expression that usually contains +just strings: + + @{foo bar --is-read-as--> ("foo bar" "\n" "baz") + baz} + + @'{foo bar --is-read-as--> (quote ("foo bar" "\n" "baz")) + baz} + +If the command part begins with a ";" (with no newline between the "@" +and the ";"), then the construct is a comment. There are two comment +forms, one for an arbitrary-text, possibly nested comments, and another +one for a -to-the-end-of-the-line comment: + + @; * { ...any-text-including-newlines... } + @; + +Note that in the first form the commented body must still parse +correctly (see the description of the body syntax below). + +Tip: if you're editing in some Scheme-mode, it is useful to comment out +blocks like this: + + @; + { + ... + } + +or + + @;{ + ... + ;} + +otherwise you will probably confuse the editor into treating the file as +having imbalanced parenthesis. + +Finally, note that there are no special rules for using "@" in the +command itself, which can lead to things like: + + @@foo{bar}{baz} --is-read-as--> ((foo "bar") "baz") + +but you should *not* rely on such behavior, since "@@" might be used +differently in the future (eg, making "@@" be "@" in a body text). + +** Concrete Syntax: the body part + +The syntax of the body part is intended to be as convenient as possible +for writing free text. It can contain free text, and the only +characters with special meaning are braces, "@", "$", "|". As described +above, the text turns to string arguments for the resulting forms. +Spaces at the beginning of lines are discarded, and newlines turn to +"\n" strings. As part of trying to do the `right thing', an empty line +at the beginning and at the end are discarded, so + + @foo{ + bar --is-read-as--> (foo "bar") <--is-read-as-- @foo{bar} + } + +If an "@" appears in the input, then it is interpreted as Scheme code, +which means that the at-reader will be applied recursively, and the +resulting syntax will appear as an argument, among other string +contents. For example: + + @foo{a @bar{b} c} --is-read-as--> (foo "a " (bar "b") " c") + +A "$" also switches to Scheme mode, but it is a simple escape back to +Scheme: it will read the next Scheme expression and plant it in the +form. The expression can be wrapped in braces in case it touches text +that you don't want to include. Examples + + @foo{foo $bar foo} --is-read-as--> (foo "foo " bar " foo") + @foo{foo$bar.} --is-read-as--> (foo "foo" bar.) + @foo{foo${bar}.} --is-read-as--> (foo "foo" bar ".") + +Braces are only problematic because a "}" is used to mark the end of the +text. They are therefore allowed, as long as they are balanced. For +example: + + @foo{f{o}o} --is-read-as--> (foo "f{o}o") + +There is also an alternative syntax for the body, one that specifies a +new marker for the end. To do this, use two openning braces with +punctuation characters between them (no spaces, and no alphanumerics). +If this form is used, then the reversed form (reverse the charcters and +swap round, square, curly, and angle parentheses) is used to close the +text. For example: + + @foo{<{foo{{{bar}>} --is-read-as--> (foo "foo{{{bar") + +For situations where spaces at the beinning of lines matter (various +verbatim environments), you should begin a line with a "|". It has no +other special meaning -- so to use a "|" as the first character in the +text, simply use another before it. + + @code{ + |(define (foo x) --is-read-as--> (code "(define (foo x)" "\n" + | |error|) " |error|)") + } + +In other situations, newlines matter -- you might want to avoid a +newline token in some place. To avoid a newline and still break the +source line, use a line comment. As in TeX, these will consume text +upto and including the end of the line and all following whitespace. +Example: + + @foo{bar @; + baz@; --is-read-as--> (foo "bar baz.") + .} + +A "|" that follows this is still used for marking the beginning of the +text: + + @foo{bar @; + baz@; --is-read-as--> (foo "bar baz .") + | .} + +Finally, to quote braces, "@" or "$", precede them with a backslash. +Note that this is an irregular use of backslash quoting! To use "\@" in +your text, simply precede it with a backslash. The general rule is that +to use N backslashes-and-a-special-character, you should precede it with +one extra backslash. Any other use of a backslash (one that is not +followed by more bslashes and a special character) is preserved in the +text as usual. Examples: + + @foo{b\$ar} --is-read-as--> (foo "b$ar") + @foo{b\\$ar} --is-read-as--> (foo "b\\$ar") + @foo{b\\\$ar} --is-read-as--> (foo "b\\\\$ar") + @foo{b\{\$\@ar} --is-read-as--> (foo "b{$@ar") + @foo{b\ar} --is-read-as--> (foo "b\\ar") + @foo{b\\ar} --is-read-as--> (foo "b\\\\ar") + +** Concrete Syntax: the keyword-value part + +The keyword-value part can contain arbitrary Scheme expressions, which +are simply stacked before the body text: + + @foo[1 (* 2 3)]{bar} --is-read-as--> (foo 1 (* 2 3) "bar") + +But there is one change that makes it easy to use for keyword/values: +first of all, "=" is a terminating character in the textual scope. +Secondly, if there is a "=" sequence (spaces +optional), then it is converted to "#:identifier ": + + @foo[(* 2 3) a=b]{bar} --is-read-as--> (foo (* 2 3) #:a b "bar") + +*** How should this be used? + +This facility can be used in any way you want. All you need is to use +function names that you bind. You can even use quasi-quotes, skipping +the need for functions, for example: + + > (define (important . text) @`b{@u{@big{$,@text}}}) + > (important @`p{This is an important announcement! + Read it!}) + (b (u (big (p "This is an important announcement!" eol "Read it!")))) diff --git a/collects/scribble/info.ss b/collects/scribble/info.ss new file mode 100644 index 0000000000..bdf114c9a3 --- /dev/null +++ b/collects/scribble/info.ss @@ -0,0 +1,5 @@ +(module info (lib "infotab.ss" "setup") + (define name "Scribble") + (define blurb '("MzScheme extensions for writing text.")) + (define mzscheme-launcher-names '("scribble")) + (define mzscheme-launcher-libraries '("run-scribble.ss"))) diff --git a/collects/scribble/reader.ss b/collects/scribble/reader.ss new file mode 100644 index 0000000000..7a45d7a313 --- /dev/null +++ b/collects/scribble/reader.ss @@ -0,0 +1,232 @@ +(module reader mzscheme + (require (lib "string.ss") (lib "readerr.ss" "syntax")) + + (define cmd-char #\@) + + (define open-attrs #rx#"^[ \t\r\n]*[[][ \t\r\n]*") + (define open-lines #rx#"^[ \t\r\n]*[{](?:[ \t]*\r?\n[ \t]*)?") ; 1 newline + (define open-lines* '(#"^[ \t\r\n]*" #"(?:[ \t]*\r?\n[ \t]*)?")) + (define open-lines-special ; a special ending expected: @foo{<{ ... }>} etc + #rx#"^[ \t\r\n]*([{][^a-zA-Z0-9 \t\r\n@$\\]*[{])(?:[ \t]*\r?\n[ \t]*)?") + (define open-attr/lines #rx#"^[ \t\r\n]*[[{][ \t\r\n]*") + (define close-attrs #rx#"^[ \t\r\n]*[]]") + (define close-lines #rx#"^(?:[ \t]*\r?\n[ \t]*)?[}]") ; swallow 1 newline + (define close-lines* '(#"^(?:[ \t]*\r?\n[ \t]*)?" #"")) + (define comment-start #rx#"^[ \t]*;") + (define comment-line #rx#"^[^\r\n]*\r?\n[ \t]*") ; like tex's `%' nl & space + (define attr-sep #rx#"^[ \t\r\n]*=[ \t\r\n]*") + (define scheme-start #rx#"^[$]") + (define scheme-start* #rx#"^[$][ \t\r\n]*{") + (define scheme-end* #rx#"^[ \t\r\n]*}") + (define sub-start #rx#"^[@]") + (define line-item #rx#"^(?:[^{}@$\r\n]*[^\\{}@$\r\n]|[\\]+[{}@$])+") + (define line-item* '(#"^(?:[^{}@$\r\n]*[^\\{}@$\r\n]|[\\]+(?:[@$]|" #"))+")) + (define end-of-line #rx#"^([\\]+)?\r?\n[ \t]*") ; make \-eoln possible + (define bar-pfx-remove #rx#"^[|]") + (define bslash-unquote #rx#"[\\]([\\]*[{}@$])") + (define bslash-unquote* '(#"[\\]([\\]+(?:[@$]|" #"))")) + + (define byte-pairs + (map (lambda (b) (cons (bytes-ref b 0) (bytes-ref b 1))) + '(#"()" #"[]" #"{}" #"<>"))) + + (define attr-readtable + (make-readtable #f #\= 'terminating-macro + (lambda (char inp source-name line-num col-num position) + (datum->syntax-object + #f (string->symbol (string char)) + (list source-name line-num col-num position 1))))) + + (define (dispatcher char inp source-name line-num col-num position) + (define (next-syntax . plain?) + (let ([x ((if (and (pair? plain?) (car plain?)) + read-syntax read-syntax/recursive) + source-name inp)]) + (if (special-comment? x) (apply next-syntax plain?) x))) + (define (cur-pos) + (let-values ([(line col pos) (port-next-location inp)]) + pos)) + (define (span-from start) + (and start (- (cur-pos) start))) + (define (read-error msg . xs) + (let-values ([(line col pos) (port-next-location inp)]) + (raise-read-error (apply format msg xs) source-name line col pos #f))) + (define (reverse-bytes bytes) + (define (rev-byte b) + (cond [(assq b byte-pairs) => cdr] + [else b])) + (let* ([len (bytes-length bytes)] [r (make-bytes len)]) + (let loop ([i (sub1 len)]) + (when (<= 0 i) + (bytes-set! r i (rev-byte (bytes-ref bytes (- len i 1)))) + (loop (sub1 i)))) + r)) + (define eol-token "\n") + (define (get-attr) + (if (regexp-match/fail-without-reading close-attrs inp) #f + (let* ([fst (next-syntax #t)] + [snd (and (symbol? (syntax-e fst)) + (regexp-match/fail-without-reading attr-sep inp) + (next-syntax))]) + (if snd + (list (string->keyword (symbol->string (syntax-e fst))) snd) + (list fst))))) + (define (get-attrs) + (and (regexp-match/fail-without-reading open-attrs inp) + (parameterize ([current-readtable attr-readtable]) + (let loop ([attrs '()]) + (let ([a (get-attr)]) + (if a + (loop (append! (reverse! a) attrs)) + (reverse! attrs))))))) + (define ((get-line open open-re close close-re item-re unquote-re level)) + (let-values ([(line col pos) (port-next-location inp)]) + (define (make-stx sexpr) + (datum->syntax-object #f + (if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr) + (list source-name line col pos (span-from pos)))) + (cond [(regexp-match/fail-without-reading close-re inp) + ;; #f + => (lambda (m) + (let ([l (sub1 (unbox level))]) + (set-box! level l) + (and (<= 0 l) (make-stx (car m)))))] + ;; [(regexp-match-peek-positions open-re inp) + ;; (read-error "unexpected `~a'" open)] + [(regexp-match/fail-without-reading open-re inp) + => (lambda (m) + (set-box! level (add1 (unbox level))) + (make-stx (car m)))] + [(regexp-match/fail-without-reading scheme-start* inp) + (let ([s (next-syntax)]) + (if (regexp-match/fail-without-reading scheme-end* inp) + s (read-error "expected `}'")))] + [(regexp-match/fail-without-reading scheme-start inp) + (next-syntax)] ; read a real expression here + [(regexp-match-peek-positions sub-start inp) + (read-syntax/recursive source-name inp)] ; include comment objs + [(regexp-match/fail-without-reading end-of-line inp) + => (lambda (m) + (if (cadr m) ; backslashes? + (list (make-stx (cadr m)) (make-stx eol-token)) + (make-stx eol-token)))] + [(regexp-match/fail-without-reading item-re inp) + => (lambda (m) + (let* ([m (car m)] + [m (regexp-replace bar-pfx-remove m #"")] + [m (regexp-replace* unquote-re m #"\\1")]) + (make-stx m)))] + [(and (not (eq? item-re line-item)) + (regexp-match/fail-without-reading #rx#"[{}]" inp)) + => (lambda (m) + (make-stx (car m)))] + [(regexp-match/fail-without-reading #rx#"^$" inp) + (read-error "missing `~a'" close)] + [else (read-error "internal error")]))) + ;; adds stx (new syntax) to the list of stxs, merging it if both are + ;; strings, except for newline markers + (define (maybe-merge stx stxs) + (if (and (pair? stxs) (syntax? stx) (syntax? (car stxs)) + (string? (syntax-e stx)) + (string? (syntax-e (car stxs))) + (not (eq? eol-token (syntax-e stx))) + (not (eq? eol-token (syntax-e (car stxs))))) + (let ([fst (car stxs)]) + (cons (datum->syntax-object stx + (string-append (syntax-e fst) (syntax-e stx)) + (list (syntax-source fst) + (syntax-line fst) + (syntax-column fst) + (syntax-position fst) + (span-from (syntax-position fst)))) + (cdr stxs))) + (cons stx stxs))) + (define (get-lines) + (define get + (cond [(regexp-match/fail-without-reading open-lines-special inp) + => (lambda (m) + (let* ([open (cadr m)] + [close (reverse-bytes open)] + [open-re (regexp-quote open)] + [close-re (regexp-quote close)] + [either-re (bytes-append open-re #"|" close-re)] + [bre (lambda (pfx/sfx re) + (byte-regexp + (bytes-append (car pfx/sfx) + re + (cadr pfx/sfx))))]) + (get-line open (bre open-lines* open-re) + close (bre close-lines* close-re) + (bre line-item* either-re) + (bre bslash-unquote* either-re) + (box 0))))] + [(regexp-match/fail-without-reading open-lines inp) + (get-line "{" open-lines "}" close-lines + line-item bslash-unquote (box 0))] + [else #f])) + (and get (let loop ([lines '()] [more '()]) + (let-values ([(line more) (if (pair? more) + (values (car more) (cdr more)) + (values (get) more))]) + (cond [(not line) (reverse! lines)] + ;; can happen from a sub @;{...} comment + [(special-comment? line) (loop lines more)] + [(list? line) (loop lines (append line more))] + [else (loop (maybe-merge line lines) more)]))))) + (cond + [(regexp-match/fail-without-reading comment-start inp) + (if (regexp-match-peek-positions open-lines inp) + (get-lines) (regexp-match comment-line inp)) + (make-special-comment #f)] + [else + (let* ([pfx (regexp-match/fail-without-reading + #rx#"^(?:[ \t\r\n]*(?:'|`|,@?))+" inp)] + [pfx + (if pfx + ;; accumulate prefixes in reverse + (let loop ([s (car pfx)] [r '()]) + (cond + [(equal? #"" s) r] + [(regexp-match #rx#"^[ \t\r\n]*('|`|,@?)(.*)$" s) + => (lambda (m) + (loop + (caddr m) + (cons (let ([m (cadr m)]) + (cond [(equal? m #"'") 'quote] + [(equal? m #"`") 'quasiquote] + [(equal? m #",") 'unquote] + [(equal? m #",@") 'unquote-splicing] + [else (error "something bad")])) + r)))] + [else (error "something bad happened")])) + '())] + [cmd (if (regexp-match-peek-positions open-attr/lines inp) + #f + (next-syntax))] ; never #f + [attrs (get-attrs)] + [lines (get-lines)] + [stx (append (or attrs '()) (or lines '()))] + [stx (if cmd (cons cmd stx) stx)] + [stx (let loop ([pfx pfx] [stx stx]) + (if (null? pfx) stx + (loop (cdr pfx) (list (car pfx) stx))))]) + (datum->syntax-object #f stx + (list source-name line-num col-num position (span-from position))))])) + + (define readtable + (make-readtable #f cmd-char 'terminating-macro dispatcher)) + + (provide use-at-readtable) + (define (use-at-readtable) (current-readtable readtable)) + + (define (*read inp) + (parameterize ([current-readtable readtable]) + (read inp))) + + (define (*read-syntax src port) + (parameterize ([current-readtable readtable]) + (read-syntax src port))) + + (provide (rename *read read) (rename *read-syntax read-syntax)) + + ) diff --git a/collects/scribble/run-scribble.ss b/collects/scribble/run-scribble.ss new file mode 100644 index 0000000000..012f12b96f --- /dev/null +++ b/collects/scribble/run-scribble.ss @@ -0,0 +1,70 @@ +(module run-scribble mzscheme + + (require (lib "cmdline.ss")) + + (define exe-name 'scribble) ; for errors + + (define (error* msg . args) + (apply raise-user-error exe-name msg args)) + + (define formats + `([sexpr ,(lambda (v) + ((dynamic-require '(lib "pretty.ss") 'pretty-print) v))])) + (define default-format 'sexpr) + + (define (format->renderer format) + (cond [(assq format formats) + => (lambda (f) + (let ([f (cadr f)]) + (cond [(procedure? f) f] + [else (error 'format->renderer + "internal error: ~s" f)])))] + [else (error* "unknown format ~e (use -L for a list of formats)" + format)])) + + (define (render-file input output format) + (unless (file-exists? input) + (error* "cannot find input file: ~e" input)) + (let* ([contents (dynamic-require `(file ,input) 'contents)] + [renderer (format->renderer format)] + [render (lambda () (renderer contents))]) + (if (equal? output "-") + (render) + (with-output-to-file output render 'truncate)))) + + (define (no-suffix file) + (cond [(regexp-match #rx"^(.*)[.](?:[^./]*)$" + (if (path? file) (path->string file) file)) + => cadr] + [else file])) + + (provide main) + (define (main args) + (define *output-name #f) + (define *format #f) + (command-line (car args) (cdr args) + [once-each + [("-o" "--output") output-name "output name (sometimes a directory)" + (set! *output-name output-name)] + [("-f" "--format") format "output format (implies suffix)" + "(use -L to list available formats)" + (set! *format (string->symbol format))] + [("-L" "--list-formats") "show available output-formats" + (printf "Available formats:\n") + (for-each (lambda (f) (printf " ~a\n" (car f))) formats) + (printf "The default is ~a\n" default-format) + (exit)]] + [args (input-file) + (let* ([fmt (cond [*format *format] + [(and *output-name + (regexp-match #rx"[.]([^.]+)$" *output-name)) + => (lambda (m) (string->symbol (cadr m)))] + [else default-format])] + [output (or *output-name + (format "~a.~a" (no-suffix input-file) fmt))]) + (render-file input-file output fmt))])) + + (main (cons (symbol->string exe-name) + (vector->list (current-command-line-arguments)))) + +) diff --git a/collects/scribble/scribble.ss b/collects/scribble/scribble.ss new file mode 100644 index 0000000000..f1f8c634bc --- /dev/null +++ b/collects/scribble/scribble.ss @@ -0,0 +1,73 @@ +(module scribble mzscheme + (require (prefix a: "reader.ss") (lib "kw.ss") (lib "list.ss")) + (provide (all-from-except mzscheme read read-syntax define lambda) + (rename a:read read) (rename a:read-syntax read-syntax) + (rename define/kw define) (rename lambda/kw lambda)) + + ;; -------------------------------------------------------------------------- + ;; Utilities + + (define-syntax define* + (syntax-rules () + [(_ (name . args) . body) + (begin (provide name) (define/kw (name . args) . body))] + [(_ name val) + (begin (provide name) (define name val))])) + + (define-syntax define-format-element + (syntax-rules () + ([_ name tag] + (begin (define (name . args) (cons tag args)) + (provide name))))) + + ;; allows specifying attributes through sub-elements + (define (subs->keys x keys) + (let ([syms+keys + (append (map (lambda (k) (string->symbol (keyword->string k))) keys) + keys)] + [tag (car x)]) + (define (amb-error key) + (error tag "ambiguous `~a' specification" key)) + (let loop ([xs (cdr x)] [kvs '()] [seen '()]) + (if (not (or (null? xs) (null? (cdr xs)) (not (keyword? (car xs))))) + (let ([key (car xs)]) + (when (memq key seen) (amb-error key)) + (loop (cddr xs) (list* (cadr xs) key kvs) (cons key seen))) + (let loop ([xs xs] [body '()] [seen seen]) + (cond [(null? xs) + (cons tag (append! (reverse! kvs) (reverse! body)))] + [(or (not (pair? (car xs))) (not (memq (caar xs) syms+keys))) + (loop (cdr xs) (cons (car xs) body) seen)] + [else + (let ([key (if (keyword? (caar xs)) + (caar xs) + (string->keyword (symbol->string (caar xs))))]) + (when (memq key seen) (amb-error (caar xs))) + (when (and (pair? (cdar xs)) (keyword? (cadar xs))) + (error tag "sub-element for `~s' key as its own keys" + (caar xs))) + (set! kvs (list* (cdar xs) key kvs)) + (loop (cdr xs) body (cons key seen)))])))))) + + ;; -------------------------------------------------------------------------- + ;; Formatting values and functions + + (define* (document . body) + (subs->keys (cons 'document body) '(#:title #:author #:date))) + + (define-format-element b 'bold) + (define-format-element bf 'bold) + (define-format-element bold 'bold) + (define-format-element i 'italic) + (define-format-element it 'italic) + (define-format-element italic 'italic) + (define-format-element u 'underline) + (define-format-element ul 'underline) + (define-format-element underline 'underline) + (define-format-element tt 'tt) + + (define-format-element title 'title) + (define-format-element author 'author) + ;; (define-format-element date 'date) + + )