diff --git a/collects/scribble/reader.ss b/collects/scribble/reader.ss index 04f55fd82e..d49dcf5c96 100644 --- a/collects/scribble/reader.ss +++ b/collects/scribble/reader.ss @@ -1,596 +1,589 @@ ;; ============================================================================ ;; Implements the @-reader macro for embedding text in Scheme code. -(module reader scheme/base +#lang scheme/base - (require mzlib/string syntax/readerr) +(require mzlib/string syntax/readerr) - ;; -------------------------------------------------------------------------- - ;; utilities for syntax specifications below +;; ---------------------------------------------------------------------------- +;; utilities for syntax specifications below - ;; regexps - (define (px . args) - (let* ([args (let loop ([xs args]) - (if (list? xs) (apply append (map loop xs)) (list xs)))] - [args (map (lambda (x) - (cond - [(bytes? x) x] - [(string? x) (string->bytes/utf-8 x)] - [(char? x) (regexp-quote (bytes (char->integer x)))] - [(not x) #""] - [else (internal-error 'px)])) - args)]) - (byte-pregexp (apply bytes-append args)))) - (define (^px . args) (px #"^" args)) +;; regexps +(define (px . args) + (let* ([args (let loop ([xs args]) + (if (list? xs) (apply append (map loop xs)) (list xs)))] + [args (map (lambda (x) + (cond [(bytes? x) x] + [(string? x) (string->bytes/utf-8 x)] + [(char? x) (regexp-quote (bytes (char->integer x)))] + [(not x) #""] + [else (internal-error 'px)])) + args)]) + (byte-pregexp (apply bytes-append args)))) +(define (^px . args) (px #"^" args)) - ;; reverses a byte string visually - (define reverse-bytes - (let ([pairs (let ([xs (bytes->list #"([{<")] - [ys (bytes->list #")]}>")]) - (append (map cons xs ys) (map cons ys xs)))]) - (define (rev-byte b) - (cond [(assq b pairs) => cdr] - [else b])) - (lambda (bs) (list->bytes (map rev-byte (reverse (bytes->list bs))))))) +;; reverses a byte string visually +(define reverse-bytes + (let ([pairs (let ([xs (bytes->list #"([{<")] + [ys (bytes->list #")]}>")]) + (append (map cons xs ys) (map cons ys xs)))]) + (define (rev-byte b) + (cond [(assq b pairs) => cdr] + [else b])) + (lambda (bs) (list->bytes (map rev-byte (reverse (bytes->list bs))))))) - ;; -------------------------------------------------------------------------- - ;; syntax +;; ---------------------------------------------------------------------------- +;; syntax - ;; basic syntax customization - (define ch:command #\@) - (define ch:comment #\;) - (define ch:expr-escape #\|) - (define ch:datums-begin #\[) - (define ch:datums-end #\]) - (define ch:lines-begin #\{) - (define ch:lines-end #\}) +;; basic syntax customization +(define ch:command #\@) +(define ch:comment #\;) +(define ch:expr-escape #\|) +(define ch:datums-begin #\[) +(define ch:datums-end #\]) +(define ch:lines-begin #\{) +(define ch:lines-end #\}) - (define str:lines-begin* #"(\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*)\\{") - (define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line +(define str:lines-begin* #"(\\|[^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*)\\{") +(define str:end-of-line "[ \t]*\r?\n[ \t]*") ; eat spaces on the next line - ;; regexps based on the above (more in make-dispatcher) - (define re:whitespaces (^px "\\s+")) - (define re:comment-start (^px ch:comment)) - (define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%' - (define re:expr-escape (^px ch:expr-escape)) - (define re:datums-begin (^px ch:datums-begin)) - (define re:datums-end (^px ch:datums-end)) - (define re:lines-begin (^px ch:lines-begin)) - (define re:lines-begin* (^px str:lines-begin*)) - (define re:lines-end (^px ch:lines-end)) - (define re:end-of-line (^px str:end-of-line)) +;; regexps based on the above (more in make-dispatcher) +(define re:whitespaces (^px "\\s+")) +(define re:comment-start (^px ch:comment)) +(define re:comment-line (^px "[^\n]*\n[ \t]*")) ; like tex's `%' +(define re:expr-escape (^px ch:expr-escape)) +(define re:datums-begin (^px ch:datums-begin)) +(define re:datums-end (^px ch:datums-end)) +(define re:lines-begin (^px ch:lines-begin)) +(define re:lines-begin* (^px str:lines-begin*)) +(define re:lines-end (^px ch:lines-end)) +(define re:end-of-line (^px str:end-of-line)) - ;; -------------------------------------------------------------------------- - ;; utilities +;; ---------------------------------------------------------------------------- +;; utilities - (define (internal-error label) - (error 'scribble-reader "internal error [~a]" label)) +(define (internal-error label) + (error 'scribble-reader "internal error [~a]" label)) - ;; like `regexp-match/fail-without-reading', without extras; the regexp that - ;; is used must be anchored -- nothing is dropped - (define (*regexp-match-peek-positions pattern input-port) - #; ; sanity checks, not needed unless this file is edited - (unless (and (byte-regexp? pattern) - (regexp-match? #rx#"^\\^" (object-name pattern))) - (internal-error 'invalid-bregexp)) - (regexp-match-peek-positions pattern input-port)) - ;; the following doesn't work -- must peek first - ;; (define (*regexp-match-positions pattern input-port) - ;; (unless (and (byte-regexp? pattern) - ;; (regexp-match? #rx#"^\\^" (object-name pattern))) - ;; (internal-error 'invalid-bregexp)) - ;; (regexp-match-peek-positions pattern input-port)) - (define (*regexp-match pattern input-port) - (let ([m (*regexp-match-peek-positions pattern input-port)]) - (and m (let ([s (read-bytes (cdar m) input-port)]) - (cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p)))) - (cdr m))))))) - ;; like regexp-match, but returns the whole match - (define (*regexp-match1 pattern input-port) - (let ([m (*regexp-match-peek-positions pattern input-port)]) - (and m (read-bytes (cdar m) input-port)))) +;; like `regexp-match/fail-without-reading', without extras; the regexp that +;; is used must be anchored -- nothing is dropped +(define (*regexp-match-peek-positions pattern input-port) + #; ; sanity checks, not needed unless this file is edited + (unless (and (byte-regexp? pattern) + (regexp-match? #rx#"^\\^" (object-name pattern))) + (internal-error 'invalid-bregexp)) + (regexp-match-peek-positions pattern input-port)) +;; the following doesn't work -- must peek first +;; (define (*regexp-match-positions pattern input-port) +;; #; ; sanity checks, not needed unless this file is edited +;; (unless (and (byte-regexp? pattern) +;; (regexp-match? #rx#"^\\^" (object-name pattern))) +;; (internal-error 'invalid-bregexp)) +;; (regexp-match-peek-positions pattern input-port)) +(define (*regexp-match pattern input-port) + (let ([m (*regexp-match-peek-positions pattern input-port)]) + (and m (let ([s (read-bytes (cdar m) input-port)]) + (cons s (map (lambda (p) (and p (subbytes s (car p) (cdr p)))) + (cdr m))))))) +;; like regexp-match, but returns the whole match +(define (*regexp-match1 pattern input-port) + (let ([m (*regexp-match-peek-positions pattern input-port)]) + (and m (read-bytes (cdar m) input-port)))) - ;; Utility for readtable-based caches - (define (readtable-cached fun) - (let ([cache (make-hash-table 'weak)]) - (letrec ([readtable-cached - (case-lambda - [(rt) (hash-table-get cache rt - (lambda () - (let ([r (fun rt)]) - (hash-table-put! cache rt r) - r)))] - [() (readtable-cached (current-readtable))])]) - readtable-cached))) +;; Utility for readtable-based caches +(define (readtable-cached fun) + (let ([cache (make-hash-table 'weak)]) + (letrec ([readtable-cached + (case-lambda + [(rt) (hash-table-get cache rt + (lambda () + (let ([r (fun rt)]) + (hash-table-put! cache rt r) + r)))] + [() (readtable-cached (current-readtable))])]) + readtable-cached))) - ;; Skips whitespace characters, sensitive to the current readtable's - ;; definition of whitespace; optimizes common spaces when possible - (define skip-whitespace - (let* ([plain-readtables (make-hash-table 'weak)] - [plain-spaces " \t\n\r\f"] - [plain-spaces-list (string->list " \t\n\r\f")] - [plain-spaces-re (^px "[" plain-spaces "]*")]) - (define (skip-plain-spaces port) - ;; hack: according to the specs, this might consume more characters - ;; than needed, but it works fine with a simple * regexp (because - ;; it can always match an empty string) - (*regexp-match-peek-positions plain-spaces-re port)) - (define (whitespace? ch rt) - (if rt - (let-values ([(like-ch/sym _1 _2) (readtable-mapping rt ch)]) - ;; if like-ch/sym is whitespace, then ch is whitespace - (and (char? like-ch/sym) (char-whitespace? like-ch/sym))) - ;; `char-whitespace?' is fine for the default readtable - (char-whitespace? ch))) - (define plain-readtable? - (readtable-cached - (lambda (rt) - (andmap (lambda (ch) (whitespace? ch rt)) plain-spaces-list)))) - (lambda (port) - (let* ([rt (current-readtable)] [plain? (plain-readtable? rt)]) - (let loop () - (when plain? (skip-plain-spaces port)) - (let ([ch (peek-char port)]) - (unless (eof-object? ch) - (when (whitespace? ch rt) (read-char port) (loop))))))))) +;; Skips whitespace characters, sensitive to the current readtable's +;; definition of whitespace; optimizes common spaces when possible +(define skip-whitespace + (let* ([plain-readtables (make-hash-table 'weak)] + [plain-spaces " \t\n\r\f"] + [plain-spaces-list (string->list " \t\n\r\f")] + [plain-spaces-re (^px "[" plain-spaces "]*")]) + (define (skip-plain-spaces port) + ;; hack: according to the specs, this might consume more characters + ;; than needed, but it works fine with a simple * regexp (because + ;; it can always match an empty string) + (*regexp-match-peek-positions plain-spaces-re port)) + (define (whitespace? ch rt) + (if rt + (let-values ([(like-ch/sym _1 _2) (readtable-mapping rt ch)]) + ;; if like-ch/sym is whitespace, then ch is whitespace + (and (char? like-ch/sym) (char-whitespace? like-ch/sym))) + ;; `char-whitespace?' is fine for the default readtable + (char-whitespace? ch))) + (define plain-readtable? + (readtable-cached + (lambda (rt) + (andmap (lambda (ch) (whitespace? ch rt)) plain-spaces-list)))) + (lambda (port) + (let* ([rt (current-readtable)] [plain? (plain-readtable? rt)]) + (let loop () + (when plain? (skip-plain-spaces port)) + (let ([ch (peek-char port)]) + (unless (eof-object? ch) + (when (whitespace? ch rt) (read-char port) (loop))))))))) - ;; make n spaces, cached for n - (define make-spaces - (let ([t (make-hash-table)]) - (lambda (n) - (hash-table-get t n - (lambda () - (let ([s (make-string n #\space)]) - (hash-table-put! t n s) s)))))) +;; make n spaces, cached for n +(define make-spaces + (let ([t (make-hash-table)]) + (lambda (n) + (hash-table-get t n + (lambda () + (let ([s (make-string n #\space)]) + (hash-table-put! t n s) s)))))) - (define (bytes-width bs start) - (let ([len (bytes-length bs)]) - (if (regexp-match? #rx"^ *$" bs start) - (- (bytes-length bs) start) - (let loop ([i start] [w 0]) - (if (= i len) - w - (loop (add1 i) (+ w (if (eq? 9 (bytes-ref bs i)) - (- 8 (modulo w 8)) - 1)))))))) +(define (bytes-width bs start) + (let ([len (bytes-length bs)]) + (if (regexp-match? #rx"^ *$" bs start) + (- (bytes-length bs) start) + (let loop ([i start] [w 0]) + (if (= i len) + w + (loop (add1 i) + (+ w (if (eq? 9 (bytes-ref bs i)) (- 8 (modulo w 8)) 1)))))))) - ;; a unique eol string - (define eol-token "\n") - (define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x)))) - ;; sanity check, in case this property gets violated in the future - (unless (eol-syntax? (datum->syntax #f eol-token)) - (internal-error 'invalid-assumption)) +;; a unique eol string +(define eol-token "\n") +(define (eol-syntax? x) (and (syntax? x) (eq? eol-token (syntax-e x)))) +;; sanity check, in case this property gets violated in the future +(unless (eol-syntax? (datum->syntax #f eol-token)) + (internal-error 'invalid-assumption)) - ;; -------------------------------------------------------------------------- - ;; main reader function for @ constructs +;; ---------------------------------------------------------------------------- +;; main reader function for @ constructs - (define (dispatcher char inp source-name line-num col-num position - start-inside? command-readtable ch:command - re:command re:line-item* re:line-item - re:line-item-no-nests datum-readtable - syntax-post-processor) +(define (dispatcher char inp source-name line-num col-num position + start-inside? command-readtable ch:command + re:command re:line-item* re:line-item + re:line-item-no-nests datum-readtable + syntax-post-processor) - (define (read-error line col pos msg . xs) - (let* ([eof? (and (eq? 'eof msg) (pair? xs))] - [msg (apply format (if eof? xs (cons msg xs)))]) - ((if eof? raise-read-error raise-read-eof-error) - msg source-name line col pos (span-from pos)))) - (define (read-error* . xs) - (apply read-error line-num col-num position xs)) + (define (read-error line col pos msg . xs) + (let* ([eof? (and (eq? 'eof msg) (pair? xs))] + [msg (apply format (if eof? xs (cons msg xs)))]) + ((if eof? raise-read-error raise-read-eof-error) + msg source-name line col pos (span-from pos)))) + (define (read-error* . xs) + (apply read-error line-num col-num position xs)) - (define (read-stx) (read-syntax/recursive source-name inp)) - (define (read-stx/rt rt) (read-syntax/recursive source-name inp #f rt)) - ;; use this to avoid placeholders - (define (read-stx*) - ;; (read-syntax/recursive source-name inp #f (current-readtable) #f) - (read-syntax source-name inp)) + (define (read-stx) (read-syntax/recursive source-name inp)) + (define (read-stx/rt rt) (read-syntax/recursive source-name inp #f rt)) + ;; use this to avoid placeholders + (define (read-stx*) + ;; (read-syntax/recursive source-name inp #f (current-readtable) #f) + (read-syntax source-name inp)) - (define (*match rx) (*regexp-match rx inp)) - (define (*match1 rx) (*regexp-match1 rx inp)) - ;; (define (*skip rx) (*regexp-match-positions rx inp)) <- see above - (define (*skip rx) (*regexp-match1 rx inp)) - (define (*peek rx) (*regexp-match-peek-positions rx inp)) + (define (*match rx) (*regexp-match rx inp)) + (define (*match1 rx) (*regexp-match1 rx inp)) + ;; (define (*skip rx) (*regexp-match-positions rx inp)) ; <- see above + (define (*skip rx) (*regexp-match1 rx inp)) + (define (*peek rx) (*regexp-match-peek-positions rx inp)) - (define (span-from start) - (and start (let-values ([(line col pos) (port-next-location inp)]) - (- pos start)))) + (define (span-from start) + (and start (let-values ([(line col pos) (port-next-location inp)]) + (- pos start)))) - (define (read-delimited-list begin-re end-re end-ch) + (define (read-delimited-list begin-re end-re end-ch) + (let-values ([(line col pos) (port-next-location inp)]) + (and (*skip begin-re) + (let loop ([r '()]) + (skip-whitespace inp) + (if (*skip end-re) + (reverse r) + (let ([x (read-stx)]) + (if (eof-object? x) + (read-error line col pos 'eof "expected a '~a'" end-ch) + (loop (if (special-comment? x) r (cons x r)))))))))) + + ;; gets an accumulated (reversed) list of syntaxes and column markers, and + ;; sorts things out (remove prefix and suffix newlines, adds indentation if + ;; needed) + (define (done-items xs) + ;; a column marker is either a non-negative integer N (saying the the + ;; following code came from at column N), or a negative integer -N (saying + ;; that the following code came from column N but no need to add + ;; indentation at this point because it is at the openning of a {...}); + ;; `get-lines*' is careful not to include column markers before a newline + ;; or the end of the text, and a -N marker can only come from the beginning + ;; of the text (and it's never there if the text began with a newline) + (if (andmap eol-syntax? xs) + ;; nothing to do + (reverse xs) + (let ([mincol (let loop ([xs xs] [m #f]) + (if (null? xs) + m + (let ([x (car xs)]) + (loop (cdr xs) + (if (integer? x) + (let ([x (abs x)]) (if (and m (< m x)) m x)) + m)))))]) + (let loop ([xs (if (and (not start-inside?) (eol-syntax? (car xs))) + (cdr xs) ; trim last eol + xs)] + [r '()]) + (if (or (null? xs) + (and (not start-inside?) + ;; trim first eol + (null? (cdr xs)) (eol-syntax? (car xs)))) + r + (loop + (cdr xs) + (let ([x (car xs)]) + (cond [(integer? x) + (if (or (< x 0) (= x mincol)) + r ; no indentation marker, or zero indentation + (let ([eol (cadr xs)] + [spaces (make-spaces (- x mincol))]) + ;; markers always follow end-of-lines + (unless (eol-syntax? eol) + (internal-error 'done-items)) + (cons (syntax-property + (datum->syntax eol spaces eol) + 'scribble 'indentation) + r)))] + ;; can have special comment values from "@||" + [(special-comment? x) r] + [else (cons x r)])))))))) + + ;; cons stx (new syntax) to the list of stxs, merging it if both are + ;; strings, except for newline markers + (define (maybe-merge stx stxs) + (let* ([2nd (and (syntax? stx) (syntax-e stx))] + [stx0 (and (pair? stxs) (car stxs))] + [1st (and (syntax? stx0) (syntax-e stx0))]) + (if (and (string? 1st) (not (eq? eol-token 1st)) + (string? 2nd) (not (eq? eol-token 2nd))) + (cons (datum->syntax stx0 + (string-append 1st 2nd) + (list (syntax-source stx0) + (syntax-line stx0) + (syntax-column stx0) + (syntax-position stx0) + ;; this is called right after reading stx + (span-from (syntax-position stx0)))) + (cdr stxs)) + (cons stx stxs)))) + + ;; helper for `get-lines*' drop a column marker if the previous item was also + ;; a newline (or the beginning) + (define (maybe-drop-marker r) + (if (and (pair? r) (integer? (car r)) + (or (null? (cdr r)) (eol-syntax? (cadr r)))) + (cdr r) + r)) + + (define (get-lines* re:begin re:end re:cmd-pfx re:item end-token) + ;; re:begin, re:end, end-token can be false if start-inside? is #t; + ;; re:cmd-pfx is a regexp when we do sub-@-reads only after a prefix + (let loop ([lvl 0] + [r (let-values ([(l c p) (port-next-location inp)]) + ;; marker for the beginning of the text + (if c (list (- c)) '()))]) + ;; this loop collects lines etc for the body, and also puts in column + ;; markers (integers) after newlines -- the result is handed off to + ;; `done-items' to finish the job + (define-values (line col pos) (port-next-location inp)) + (define (make-stx sexpr) + (datum->syntax #f + (if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr) + (list source-name line col pos (span-from pos)))) + (cond + [(and re:begin (*match1 re:begin)) + => (lambda (m) (loop (add1 lvl) (maybe-merge (make-stx m) r)))] + [(and re:end (*match1 re:end)) + => (lambda (m) + (if (and (zero? lvl) (not start-inside?)) + ;; drop a marker if it's after a last eol item + (done-items (maybe-drop-marker r)) + (loop (sub1 lvl) (maybe-merge (make-stx m) r))))] + [(*match1 re:end-of-line) + => (lambda (m) + (let ([n (car (regexp-match-positions #rx#"\n" m))]) + (loop lvl (list* ; no merge needed + (bytes-width m (cdr n)) + (syntax-property + (make-stx eol-token) + 'scribble `(newline ,(bytes->string/utf-8 m))) + (maybe-drop-marker r)))))] + [(if re:cmd-pfx + (and (*skip re:cmd-pfx) (*peek re:command)) + (*peek re:command)) + ;; read the next value + => (lambda (m) + (let ([x (cond + [(cadr m) + ;; the command is a string escape, use `read-stx*' to + ;; not get a placeholder, so we can merge the string + ;; to others + (read-stx*)] + [(caddr m) + ;; it's an expression escape, get multiple + ;; expressions and put them all here + (read-bytes (caaddr m) inp) + (get-escape-expr #f)] + [else (read-stx)])]) ; otherwise: a plain sub-read + (loop lvl (cond [(eof-object? x) + ;; shouldn't happen -- the sub-read would + ;; raise an error + (internal-error 'get-lines*-sub-read)] + ;; throw away comments + [(special-comment? x) r] + ;; escaped expressions: no merge + [(pair? x) (append (reverse x) r)] + [(null? x) (cons (make-special-comment #f) r)] + [else (maybe-merge x r)]))))] + ;; must be last, since it will always succeed with 1 char + [(*peek re:item) ; don't read: regexp grabs the following text + => (lambda (m) + (loop lvl + (maybe-merge (make-stx (read-bytes (cdadr m) inp)) r)))] + [(*peek #rx#"^$") + (if end-token + (read-error* 'eof "missing closing `~a'" end-token) + (done-items r))] + [else (internal-error 'get-lines*)]))) + + (define (get-lines) + (cond [(*skip re:lines-begin) (get-lines* re:lines-begin re:lines-end #f + re:line-item ch:lines-end)] + [(*match re:lines-begin*) + => (lambda (m) + (let* ([bgn (car m)] + [end (reverse-bytes bgn)] + [bgn* (regexp-quote bgn)] + [end* (regexp-quote end)] + [cmd-pfx* (regexp-quote (cadr m))]) + (get-lines* (^px bgn*) (^px end*) + (^px cmd-pfx* "(?=" ch:command ")") + (re:line-item* bgn* end* cmd-pfx*) + end)))] + [else #f])) + + (define (get-datums) + (parameterize ([current-readtable datum-readtable]) + (read-delimited-list re:datums-begin re:datums-end ch:datums-end))) + + (define (get-escape-expr single?) + ;; single? means expect just one expression (or none, which is returned as + ;; a special-comment) + (let ([get (lambda () + (parameterize ([current-readtable command-readtable]) + (read-delimited-list re:expr-escape re:expr-escape + ch:expr-escape)))]) + (if single? + (let*-values ([(line col pos) (port-next-location inp)] + [(xs) (get)]) + (cond [(not xs) xs] + [(null? xs) (make-special-comment #f)] + [(null? (cdr xs)) (car xs)] + [else (read-error line col pos + "too many escape expressions")])) + (get)))) + + ;; called only when we must see a command in the input + (define (get-command) + (let ([cmd (read-stx/rt command-readtable)]) + (cond [(special-comment? cmd) + (read-error* "expecting a command expression, got a comment")] + [(eof-object? cmd) + (read-error* 'eof "missing command")] + [else cmd]))) + + (define (get-rprefixes) ; return punctuation prefixes in reverse + (let loop ([r '()]) (let-values ([(line col pos) (port-next-location inp)]) - (and (*skip begin-re) - (let loop ([r '()]) - (skip-whitespace inp) - (if (*skip end-re) - (reverse r) - (let ([x (read-stx)]) - (if (eof-object? x) - (read-error line col pos 'eof "expected a '~a'" end-ch) - (loop (if (special-comment? x) r (cons x r)))))))))) + (cond [(*match1 #rx#"^(?:'|`|,@?)") + => (lambda (m) + (let ([sym (cond [(assoc m '([#"'" quote] + [#"`" quasiquote] + [#"," unquote] + [#",@" unquote-splicing])) + => cadr] + [else (internal-error 'get-rprefixes)])]) + (loop (cons (datum->syntax #f sym + (list source-name line col pos + (span-from pos))) + r))))] + [(*skip re:whitespaces) + (read-error* "unexpected whitespace after ~a" ch:command)] + [else r])))) - ;; gets an accumulated (reversed) list of syntaxes and column markers, and - ;; sorts things out (remove prefix and suffix newlines, adds indentation if - ;; needed) - (define (done-items xs) - ;; a column marker is either a non-negative integer N (saying the the - ;; following code came from at column N), or a negative integer -N - ;; (saying that the following code came from column N but no need to add - ;; indentation at this point because it is at the openning of a {...}); - ;; `get-lines*' is careful not to include column markers before a newline - ;; or the end of the text, and a -N marker can only come from the - ;; beginning of the text (and it's never there if the text began with a - ;; newline) - (if (andmap eol-syntax? xs) - ;; nothing to do - (reverse xs) - (let ([mincol (let loop ([xs xs] [m #f]) - (if (null? xs) - m - (let ([x (car xs)]) - (loop (cdr xs) - (if (integer? x) - (let ([x (abs x)]) (if (and m (< m x)) m x)) - m)))))]) - (let loop ([xs (if (and (not start-inside?) (eol-syntax? (car xs))) - (cdr xs) ; trim last eol - xs)] - [r '()]) - (if (or (null? xs) - (and (not start-inside?) - ;; trim first eol - (null? (cdr xs)) (eol-syntax? (car xs)))) - r - (loop - (cdr xs) - (let ([x (car xs)]) - (cond [(integer? x) - (if (or (< x 0) (= x mincol)) - r ; no indentation marker, or zero indentation - (let ([eol (cadr xs)] - [spaces (make-spaces (- x mincol))]) - ;; markers always follow end-of-lines - (unless (eol-syntax? eol) - (internal-error 'done-items)) - (cons (syntax-property - (datum->syntax eol spaces eol) - 'scribble 'indentation) - r)))] - ;; can have special comment values from "@||" - [(special-comment? x) r] - [else (cons x r)])))))))) + (cond + [start-inside? + (datum->syntax #f (get-lines* #f #f #f re:line-item-no-nests #f) + (list source-name line-num col-num position (span-from position)))] + [(*skip re:whitespaces) + (read-error* "unexpected whitespace after ~a" ch:command)] + [(*skip re:comment-start) + (unless (get-lines) (*skip re:comment-line)) + (make-special-comment #f)] + [else + (let*-values + ([(rpfxs) (get-rprefixes)] + [(cmd datums lines) + (cond [(get-lines) + ;; try get-lines first -- so @|{...}| is not used as a simple + ;; expression escape, same for get-datums + => (lambda (lines) (values #f #f lines))] + [(get-datums) + => (lambda (datums) (values #f datums (get-lines)))] + [(get-escape-expr #t) => (lambda (expr) (values expr #f #f))] + [else (values (get-command) (get-datums) (get-lines))])] + [(stx) (and (or datums lines) + (append (or datums '()) (or lines '())))] + [(stx) (or (and cmd stx (cons cmd stx)) ; all parts + stx ; no cmd part => just a parenthesized expression + cmd ; no datums/lines => simple expression (no parens) + ;; impossible: either we saw []s or {}s, or we read a + ;; scheme expression + (internal-error 'dispatcher))] + [(stx) (let ([ds (and datums (length datums))] + [ls (and lines (length lines))]) + (if (or ds ls) + (syntax-property + (if (syntax? stx) + stx + (datum->syntax #f stx + (list source-name line-num col-num position + (span-from position)))) + 'scribble (list 'form ds ls)) + stx))] + [(stx) (syntax-post-processor stx)] + [(stx) + ;; wrap the prefixes around the result + (let loop ([rpfxs rpfxs] [stx stx]) + (if (null? rpfxs) + stx + (loop (cdr rpfxs) (list (car rpfxs) stx))))]) + (datum->syntax #f stx (list source-name line-num col-num position + (span-from position))))])) - ;; cons stx (new syntax) to the list of stxs, merging it if both are - ;; strings, except for newline markers - (define (maybe-merge stx stxs) - (let* ([2nd (and (syntax? stx) (syntax-e stx))] - [stx0 (and (pair? stxs) (car stxs))] - [1st (and (syntax? stx0) (syntax-e stx0))]) - (if (and (string? 1st) (not (eq? eol-token 1st)) - (string? 2nd) (not (eq? eol-token 2nd))) - (cons (datum->syntax stx0 - (string-append 1st 2nd) - (list (syntax-source stx0) - (syntax-line stx0) - (syntax-column stx0) - (syntax-position stx0) - ;; this is called right after reading stx - (span-from (syntax-position stx0)))) - (cdr stxs)) - (cons stx stxs)))) +(define (make-dispatcher start-inside? ch:command + get-command-readtable get-datum-readtable + syntax-post-processor) + (define re:command (^px ch:command + ;; the following identifies string and expression + ;; escapes, see how it is used above + "(?:(\")|("ch:expr-escape"))?")) + (define (re:line-item* bgn end cmd-prefix) + (^px "(.+?)(?:" (and bgn `(,bgn"|")) (and end `(,end"|")) + cmd-prefix ch:command"|"str:end-of-line"|$)")) + (define re:line-item (re:line-item* ch:lines-begin ch:lines-end #f)) + (define re:line-item-no-nests (and start-inside? (re:line-item* #f #f #f))) + (lambda (char inp source-name line-num col-num position) + (dispatcher char inp source-name line-num col-num position + start-inside? (get-command-readtable) ch:command + re:command re:line-item* re:line-item re:line-item-no-nests + (get-datum-readtable) syntax-post-processor))) - ;; helper for `get-lines*' drop a column marker if the previous item was - ;; also a newline (or the beginning) - (define (maybe-drop-marker r) - (if (and (pair? r) (integer? (car r)) - (or (null? (cdr r)) (eol-syntax? (cadr r)))) - (cdr r) - r)) +;; ---------------------------------------------------------------------------- +;; readtable - (define (get-lines* re:begin re:end re:cmd-pfx re:item end-token) - ;; re:begin, re:end, end-token can be false if start-inside? is #t; - ;; re:cmd-pfx is a regexp when we do sub-@-reads only after a prefix - (let loop ([lvl 0] - [r (let-values ([(l c p) (port-next-location inp)]) - ;; marker for the beginning of the text - (if c (list (- c)) '()))]) - ;; this loop collects lines etc for the body, and also puts in column - ;; markers (integers) after newlines -- the result is handed off to - ;; `done-items' to finish the job - (define-values (line col pos) (port-next-location inp)) - (define (make-stx sexpr) - (datum->syntax #f - (if (bytes? sexpr) (bytes->string/utf-8 sexpr) sexpr) - (list source-name line col pos (span-from pos)))) - (cond - [(and re:begin (*match1 re:begin)) - => (lambda (m) (loop (add1 lvl) (maybe-merge (make-stx m) r)))] - [(and re:end (*match1 re:end)) - => (lambda (m) - (if (and (zero? lvl) (not start-inside?)) - ;; drop a marker if it's after a last eol item - (done-items (maybe-drop-marker r)) - (loop (sub1 lvl) (maybe-merge (make-stx m) r))))] - [(*match1 re:end-of-line) - => (lambda (m) - (let ([n (car (regexp-match-positions #rx#"\n" m))]) - (loop lvl (list* ; no merge needed - (bytes-width m (cdr n)) - (syntax-property - (make-stx eol-token) - 'scribble `(newline ,(bytes->string/utf-8 m))) - (maybe-drop-marker r)))))] - [(if re:cmd-pfx - (and (*skip re:cmd-pfx) (*peek re:command)) - (*peek re:command)) - ;; read the next value - => (lambda (m) - (let ([x (cond - [(cadr m) - ;; the command is a string escape, use `read-stx*' - ;; to not get a placeholder, so we can merge the - ;; string to others - (read-stx*)] - [(caddr m) - ;; it's an expression escape, get multiple - ;; expressions and put them all here - (read-bytes (caaddr m) inp) - (get-escape-expr #f)] - [else (read-stx)])]) ; otherwise: a plain sub-read - (loop lvl (cond [(eof-object? x) - ;; shouldn't happen -- the sub-read would - ;; raise an error - (internal-error 'get-lines*-sub-read)] - ;; throw away comments - [(special-comment? x) r] - ;; escaped expressions: no merge - [(pair? x) (append (reverse x) r)] - [(null? x) (cons (make-special-comment #f) r)] - [else (maybe-merge x r)]))))] - ;; must be last, since it will always succeed with 1 char - [(*peek re:item) ; don't read: regexp grabs the following text - => (lambda (m) - (loop lvl - (maybe-merge (make-stx (read-bytes (cdadr m) inp)) r)))] - [(*peek #rx#"^$") - (if end-token - (read-error* 'eof "missing closing `~a'" end-token) - (done-items r))] - [else (internal-error 'get-lines*)]))) - - (define (get-lines) - (cond [(*skip re:lines-begin) (get-lines* re:lines-begin re:lines-end #f - re:line-item ch:lines-end)] - [(*match re:lines-begin*) - => (lambda (m) - (let* ([bgn (car m)] - [end (reverse-bytes bgn)] - [bgn* (regexp-quote bgn)] - [end* (regexp-quote end)] - [cmd-pfx* (regexp-quote (cadr m))]) - (get-lines* (^px bgn*) (^px end*) - (^px cmd-pfx* "(?=" ch:command ")") - (re:line-item* bgn* end* cmd-pfx*) - end)))] - [else #f])) - - (define (get-datums) - (parameterize ([current-readtable datum-readtable]) - (read-delimited-list re:datums-begin re:datums-end ch:datums-end))) - - (define (get-escape-expr single?) - ;; single? means expect just one expression (or none, which is returned - ;; as a special-comment) - (let ([get (lambda () - (parameterize ([current-readtable command-readtable]) - (read-delimited-list re:expr-escape re:expr-escape - ch:expr-escape)))]) - (if single? - (let*-values ([(line col pos) (port-next-location inp)] - [(xs) (get)]) - (cond [(not xs) xs] - [(null? xs) (make-special-comment #f)] - [(null? (cdr xs)) (car xs)] - [else (read-error line col pos - "too many escape expressions")])) - (get)))) - - ;; called only when we must see a command in the input - (define (get-command) - (let ([cmd (read-stx/rt command-readtable)]) - (cond [(special-comment? cmd) - (read-error* "expecting a command expression, got a comment")] - [(eof-object? cmd) - (read-error* 'eof "missing command")] - [else cmd]))) - - (define (get-rprefixes) ; return punctuation prefixes in reverse - (let loop ([r '()]) - (let-values ([(line col pos) (port-next-location inp)]) - (cond - [(*match1 #rx#"^(?:'|`|,@?)") - => (lambda (m) - (let ([sym (cond - [(assoc m '([#"'" quote] - [#"`" quasiquote] - [#"," unquote] - [#",@" unquote-splicing])) - => cadr] - [else (internal-error 'get-rprefixes)])]) - (loop (cons (datum->syntax #f sym - (list source-name line col pos - (span-from pos))) - r))))] - [(*skip re:whitespaces) - (read-error* "unexpected whitespace after ~a" ch:command)] - [else r])))) - - (cond - [start-inside? - (datum->syntax #f (get-lines* #f #f #f re:line-item-no-nests #f) - (list source-name line-num col-num position (span-from position)))] - [(*skip re:whitespaces) - (read-error* "unexpected whitespace after ~a" ch:command)] - [(*skip re:comment-start) - (unless (get-lines) (*skip re:comment-line)) - (make-special-comment #f)] - [else - (let*-values - ([(rpfxs) (get-rprefixes)] - [(cmd datums lines) - (cond [(get-lines) - ;; try get-lines first -- so @|{...}| is not used as a - ;; simple expression escape, same for get-datums - => (lambda (lines) (values #f #f lines))] - [(get-datums) - => (lambda (datums) (values #f datums (get-lines)))] - [(get-escape-expr #t) => (lambda (expr) (values expr #f #f))] - [else (values (get-command) (get-datums) (get-lines))])] - [(stx) (and (or datums lines) - (append (or datums '()) (or lines '())))] - [(stx) (or (and cmd stx (cons cmd stx)) ; all parts - stx ; no cmd part => just a parenthesized expression - cmd ; no datums/lines => simple expression (no parens) - ;; impossible: either we saw []s or {}s, or we read a - ;; scheme expression - (internal-error 'dispatcher))] - [(stx) (let ([ds (and datums (length datums))] - [ls (and lines (length lines))]) - (if (or ds ls) - (syntax-property - (if (syntax? stx) - stx - (datum->syntax #f stx - (list source-name line-num col-num position - (span-from position)))) - 'scribble (list 'form ds ls)) - stx))] - [(stx) (syntax-post-processor stx)] - [(stx) - ;; wrap the prefixes around the result - (let loop ([rpfxs rpfxs] [stx stx]) - (if (null? rpfxs) - stx - (loop (cdr rpfxs) (list (car rpfxs) stx))))]) - (datum->syntax #f stx +(provide make-at-readtable) +(define (make-at-readtable + #:readtable [readtable (current-readtable)] + #:command-char [command-char ch:command] + #:start-inside? [start-inside? #f] + #:datum-readtable [datum-readtable #t] + #:syntax-post-processor [syntax-post-processor values]) + (define dispatcher + (make-dispatcher start-inside? command-char + (lambda () cmd-rt) (lambda () datum-rt) + syntax-post-processor)) + (define at-rt + (make-readtable readtable command-char 'non-terminating-macro dispatcher)) + (define cmd-rt + ;; similar to plain Scheme (scribble, actually), but with `@' and `|' as + ;; terminating macro characters (otherwise it behaves the same; the only + ;; difference is that `a|b|c' is three symbols and `@foo@bar' are two + ;; @-forms) + (make-readtable readtable + command-char 'terminating-macro dispatcher + #\| 'terminating-macro + (lambda (char inp source-name line-num col-num position) + (let ([m (*regexp-match #rx#"^([^|]*)\\|" inp)]) + (unless m + (raise-read-error "unbalanced `|'" source-name + line-num col-num position #f)) + (datum->syntax + #f (string->symbol (bytes->string/utf-8 (cadr m))) (list source-name line-num col-num position - (span-from position))))])) + (add1 (bytes-length (car m))))))))) + (define datum-rt + (cond [(or (not datum-readtable) (readtable? datum-readtable)) + datum-readtable] + [(eq? #t datum-readtable) at-rt] + [(procedure? datum-readtable) (datum-readtable at-rt)] + [else (error 'make-at-readtable + "bad datum-readtable: ~e" datum-readtable)])) + at-rt) - (define (make-dispatcher start-inside? ch:command - get-command-readtable get-datum-readtable - syntax-post-processor) - (define re:command (^px ch:command - ;; the following identifies string and expression - ;; escapes, see how it is used above - "(?:(\")|("ch:expr-escape"))?")) - (define (re:line-item* bgn end cmd-prefix) - (^px "(.+?)(?:" (and bgn `(,bgn"|")) (and end `(,end"|")) - cmd-prefix ch:command"|"str:end-of-line"|$)")) - (define re:line-item (re:line-item* ch:lines-begin ch:lines-end #f)) - (define re:line-item-no-nests (and start-inside? (re:line-item* #f #f #f))) - (lambda (char inp source-name line-num col-num position) - (dispatcher char inp source-name line-num col-num position - start-inside? (get-command-readtable) ch:command - re:command re:line-item* re:line-item re:line-item-no-nests - (get-datum-readtable) syntax-post-processor))) +(provide use-at-readtable) +(define use-at-readtable + (make-keyword-procedure + (lambda (kws kw-args . rest) + (port-count-lines! (current-input-port)) + (current-readtable + (keyword-apply make-at-readtable kws kw-args rest))))) - ;; -------------------------------------------------------------------------- - ;; readtable +;; utilities for below +(define make-default-at-readtable + (readtable-cached + (lambda (rt) (make-at-readtable #:readtable rt)))) +(define make-default-at-dispatcher/inside + (readtable-cached + (lambda (rt) + (let-values ([(_1 disp _2) + (readtable-mapping + (make-at-readtable #:readtable rt #:start-inside? #t) + ch:command)]) + disp)))) - (provide make-at-readtable) - (define (make-at-readtable - #:readtable [readtable (current-readtable)] - #:command-char [command-char ch:command] - #:start-inside? [start-inside? #f] - #:datum-readtable [datum-readtable #t] - #:syntax-post-processor [syntax-post-processor values]) - (define dispatcher - (make-dispatcher start-inside? command-char - (lambda () cmd-rt) (lambda () datum-rt) - syntax-post-processor)) - (define at-rt - (make-readtable readtable command-char 'non-terminating-macro dispatcher)) - (define cmd-rt - ;; similar to plain Scheme (scribble, actually), but with `@' and `|' as - ;; terminating macro characters (otherwise it behaves the same; the only - ;; difference is that `a|b|c' is three symbols and `@foo@bar' are two - ;; @-forms) - (make-readtable readtable - command-char 'terminating-macro dispatcher - #\| 'terminating-macro - (lambda (char inp source-name line-num col-num position) - (let ([m (*regexp-match #rx#"^([^|]*)\\|" inp)]) - (unless m - (raise-read-error "unbalanced `|'" source-name - line-num col-num position #f)) - (datum->syntax - #f (string->symbol (bytes->string/utf-8 (cadr m))) - (list source-name line-num col-num position - (add1 (bytes-length (car m))))))))) - (define datum-rt - (cond [(or (not datum-readtable) (readtable? datum-readtable)) - datum-readtable] - [(eq? #t datum-readtable) at-rt] - [(procedure? datum-readtable) (datum-readtable at-rt)] - [else (error 'make-at-readtable - "bad datum-readtable: ~e" datum-readtable)])) - at-rt) +;; ---------------------------------------------------------------------------- +;; readers - (provide use-at-readtable) - (define use-at-readtable - (make-keyword-procedure - (lambda (kws kw-args . rest) - (port-count-lines! (current-input-port)) - (current-readtable - (keyword-apply make-at-readtable kws kw-args rest))))) +(define default-src (gensym 'scribble-reader)) +(define (src-name src port) + (if (eq? src default-src) (object-name port) src)) - ;; utilities for below - (define make-default-at-readtable - (readtable-cached - (lambda (rt) (make-at-readtable #:readtable rt)))) - (define make-default-at-dispatcher/inside - (readtable-cached - (lambda (rt) - (let-values ([(_1 disp _2) - (readtable-mapping - (make-at-readtable #:readtable rt #:start-inside? #t) - ch:command)]) - disp)))) +(define-syntax with-at-reader + (syntax-rules () + [(_ body ...) + (parameterize ([current-readtable (make-default-at-readtable)]) + body ...)])) - ;; -------------------------------------------------------------------------- - ;; readers +(define (*read [inp (current-input-port)]) + (with-at-reader (read inp))) - (define default-src (gensym 'scribble-reader)) - (define (src-name src port) - (if (eq? src default-src) (object-name port) src)) +(define (*read-syntax [src default-src] + [inp (current-input-port)]) + (with-at-reader (read-syntax (src-name src inp) inp))) - (define-syntax with-at-reader - (syntax-rules () - [(_ body ...) - (parameterize ([current-readtable (make-default-at-readtable)]) - body ...)])) +(define (read-inside [inp (current-input-port)]) + (let*-values ([(line col pos) (port-next-location inp)] + [(inside-dispatcher) (make-default-at-dispatcher/inside)]) + (with-at-reader + (syntax->datum + (inside-dispatcher #f inp (object-name inp) line col pos))))) - (define (*read [inp (current-input-port)]) - (with-at-reader (read inp))) +(define (read-inside-syntax [src default-src] + [inp (current-input-port)]) + (let*-values ([(line col pos) (port-next-location inp)] + [(inside-dispatcher) (make-default-at-dispatcher/inside)]) + (with-at-reader + (inside-dispatcher #f inp (src-name src inp) line col pos)))) - (define (*read-syntax [src default-src] - [inp (current-input-port)]) - (with-at-reader (read-syntax (src-name src inp) inp))) - - (define (read-inside [inp (current-input-port)]) - (let*-values ([(line col pos) (port-next-location inp)] - [(inside-dispatcher) (make-default-at-dispatcher/inside)]) - (with-at-reader - (syntax->datum - (inside-dispatcher #f inp (object-name inp) line col pos))))) - - (define (read-inside-syntax [src default-src] - [inp (current-input-port)]) - (let*-values ([(line col pos) (port-next-location inp)] - [(inside-dispatcher) (make-default-at-dispatcher/inside)]) - (with-at-reader - (inside-dispatcher #f inp (src-name src inp) line col pos)))) - - (provide (rename-out [*read read] - [*read-syntax read-syntax]) - read-inside read-inside-syntax) - - ) +(provide (rename-out [*read read] + [*read-syntax read-syntax]) + read-inside read-inside-syntax) diff --git a/collects/scribble/text.ss b/collects/scribble/text.ss new file mode 100644 index 0000000000..4ea9c6fbcf --- /dev/null +++ b/collects/scribble/text.ss @@ -0,0 +1,34 @@ +#lang scheme/base + +(require scheme/promise) +(provide (all-from-out scheme/base scheme/promise)) + +(define (show x p) + (let show ([x x]) + (cond [(or (void? x) (not x) (null? x)) (void)] + [(pair? x) (show (car x)) (show (cdr x))] + [(promise? x) (show (force x))] + [(keyword? x) (show (keyword->string x))] + [(and (procedure? x) (procedure-arity-includes? x 0)) (show (x))] + ;; display won't work, since it calls us back + ;; [else (display x p)] + ;; things that are printed directly + [(bytes? x) (write-bytes x p)] + [(string? x) (write-string x p)] + [(char? x) (write-char x p)] + [(number? x) (write x p)] + ;; generic fallback + [else (show (format "~a" x))]))) + +;; this is too much -- it also changes error messages +;; (global-port-print-handler show) +(port-display-handler (current-output-port) show) + +;; the default prints a newline too, avoid that +(current-print display) + +;; make it possible to use this language through a repl +;; --> won't work: need an `inside' reader that reads a single expression +;; (require (prefix-in * "text/lang/reader.ss")) +;; (current-prompt-read +;; (lambda () (parameterize ([read-accept-reader #t]) (*read-syntax)))) diff --git a/collects/scribble/text/lang/reader.ss b/collects/scribble/text/lang/reader.ss new file mode 100644 index 0000000000..94ba4968e6 --- /dev/null +++ b/collects/scribble/text/lang/reader.ss @@ -0,0 +1,32 @@ +#lang scheme/base + +(require (prefix-in s: "../../reader.ss")) + +(provide (rename-out [*read read]) + (rename-out [*read-syntax read-syntax])) + +(define (*read [inp (current-input-port)]) + (wrap inp (s:read-inside inp))) + +(define (*read-syntax [src #f] [port (current-input-port)]) + (wrap port (s:read-inside-syntax src port))) + +(define (wrap port body) + (define (strip-leading-newlines stxs) + (if (null? stxs) + stxs + (let ([p (syntax-property (car stxs) 'scribble)]) + (if (and (pair? p) (eq? (car p) 'newline)) + (strip-leading-newlines (cdr stxs)) + stxs)))) + (let* ([p-name (object-name port)] + [name (if (path? p-name) + (let-values ([(base name dir?) (split-path p-name)]) + (string->symbol (path->string (path-replace-suffix + name #"")))) + 'page)] + [id 'doc] + [body (if (syntax? body) + (strip-leading-newlines (syntax->list body)) + body)]) + `(module ,name scribble/text (#%module-begin . ,body))))