diff --git a/collects/at-exp/lang/reader.ss b/collects/at-exp/lang/reader.ss index 98d6c77cf5..67accf5192 100644 --- a/collects/at-exp/lang/reader.ss +++ b/collects/at-exp/lang/reader.ss @@ -31,8 +31,15 @@ (bad (cadr spec) #f))))) (define (get-info inp mod line col pos) - (at-get inp 'get-info (object-name inp) line col pos - (lambda (spec) (lambda () (lambda (tag) #f))))) + (let ([r (at-get inp 'get-info (object-name inp) line col pos + (lambda (spec) (lambda () (lambda (inp mod line col pos) + (lambda (tag) #f)))))]) + (let ([proc (r inp mod line col pos)]) + (lambda (key) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/scribble-lexer 'scribble-lexer)] + [else (and proc (proc key))]))))) (define at-readtable (make-at-readtable)) diff --git a/collects/framework/private/color.ss b/collects/framework/private/color.ss index a5b0526bbb..8532fda14f 100644 --- a/collects/framework/private/color.ss +++ b/collects/framework/private/color.ss @@ -32,6 +32,10 @@ added get-regions (define (should-color-type? type) (not (memq type '(white-space no-color)))) +(define (make-data type mode) (cons type mode)) +(define (data-type data) (car data)) +(define (data-lexer-mode data) (cdr data)) + (define -text<%> (interface (text:basic<%>) start-colorer @@ -102,6 +106,11 @@ added get-regions invalid-tokens-start ; = +inf.0 ;; The position right before the next token to be read current-pos + ;; Thread a mode through lexing, and remember the mode + ;; at each token boundary, so that lexing can depend on + ;; previous tokens. This is the mode for lexing at + ;; current-pos: + current-lexer-mode ;; Paren-matching parens ) @@ -118,6 +127,7 @@ added get-regions (new token-tree%) +inf.0 start + #f (new paren-tree% (matches pairs)))) (define lexer-states (list (make-new-lexer-state 0 'end))) @@ -228,6 +238,7 @@ added get-regions (set-lexer-state-invalid-tokens-start! ls +inf.0) (set-lexer-state-up-to-date?! ls #t) (set-lexer-state-current-pos! ls (lexer-state-start-pos ls)) + (set-lexer-state-current-lexer-mode! ls #f) (set-lexer-state-parens! ls (new paren-tree% (matches pairs)))) lexer-states) (set! restart-callback #f) @@ -258,12 +269,12 @@ added get-regions (set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length))) (sync-invalid ls)))) - (define/private (re-tokenize ls in in-start-pos enable-suspend) - (let-values ([(lexeme type data new-token-start new-token-end) + (define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend) + (let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode) (begin (enable-suspend #f) (begin0 - (get-token in) + (get-token in in-lexer-mode) (enable-suspend #t)))]) (unless (eq? 'eof type) (enable-suspend #f) @@ -271,6 +282,7 @@ added get-regions (+ in-start-pos (sub1 new-token-end))) (let ((len (- new-token-end new-token-start))) (set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls))) + (set-lexer-state-current-lexer-mode! ls new-lexer-mode) (sync-invalid ls) (when (and should-color? (should-color-type? type) (not frozen?)) (set! colors @@ -286,7 +298,7 @@ added get-regions ;; version. In other words, the new greatly outweighs the tree ;; operations. ;;(insert-last! tokens (new token-tree% (length len) (data type))) - (insert-last-spec! (lexer-state-tokens ls) len type) + (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode)) (send (lexer-state-parens ls) add-token data len) (cond ((and (not (send (lexer-state-invalid-tokens ls) is-empty?)) @@ -301,7 +313,7 @@ added get-regions (enable-suspend #t)) (else (enable-suspend #t) - (re-tokenize ls in in-start-pos enable-suspend))))))) + (re-tokenize ls in in-start-pos new-lexer-mode enable-suspend))))))) (define/private (do-insert/delete/ls ls edit-start-pos change-length) (unless (lexer-state-up-to-date? ls) @@ -318,7 +330,14 @@ added get-regions (if (send (lexer-state-invalid-tokens ls) is-empty?) +inf.0 (+ (lexer-state-start-pos ls) orig-token-end change-length))) - (set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start)) + (let ([start (+ (lexer-state-start-pos ls) orig-token-start)]) + (set-lexer-state-current-pos! ls start) + (set-lexer-state-current-lexer-mode! ls + (if (= start (lexer-state-start-pos ls)) + #f + (begin + (send valid-tree search-max!) + (data-lexer-mode (send valid-tree get-root-data)))))) (set-lexer-state-up-to-date?! ls #f) (queue-callback (λ () (colorer-callback)) #f))) ((>= edit-start-pos (lexer-state-invalid-tokens-start ls)) @@ -340,7 +359,13 @@ added get-regions (send (lexer-state-parens ls) truncate tok-start) (set-lexer-state-tokens! ls valid-tree) (set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls))) - (set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start)))))) + (let ([start (+ (lexer-state-start-pos ls) tok-start)]) + (set-lexer-state-current-pos! ls start) + (if (= start (lexer-state-start-pos ls)) + #f + (begin + (send valid-tree search-max!) + (data-lexer-mode (send valid-tree get-root-data))))))))) (define/private (do-insert/delete edit-start-pos change-length) (unless (or stopped? force-stop?) @@ -378,6 +403,7 @@ added get-regions (λ (x) #f)) (enable-suspend #t))) (lexer-state-current-pos ls) + (lexer-state-current-lexer-mode ls) enable-suspend)) lexer-states))))) (set! rev (get-revision-number))) @@ -427,7 +453,14 @@ added get-regions (reset-tokens) (set! should-color? (preferences:get 'framework:coloring-active)) (set! token-sym->style token-sym->style-) - (set! get-token get-token-) + (set! get-token (if (procedure-arity-includes? get-token- 2) + ;; New interface: thread through a mode: + get-token- + ;; Old interface: no mode + (lambda (in mode) + (let-values ([(lexeme type data new-token-start new-token-end) + (get-token- in)]) + (values lexeme type data new-token-start new-token-end #f))))) (set! pairs pairs-) (for-each (lambda (ls) @@ -739,7 +772,7 @@ added get-regions (let ([tokens (lexer-state-tokens ls)]) (tokenize-to-pos ls position) (send tokens search! (- position (lexer-state-start-pos ls))) - (send tokens get-root-data))))) + (data-type (send tokens get-root-data)))))) (define/private (tokenize-to-pos ls position) (when (and (not (lexer-state-up-to-date? ls)) @@ -768,8 +801,8 @@ added get-regions (send tokens search! (- (if (eq? direction 'backward) (sub1 position) position) start-pos)) (cond - ((or (eq? 'white-space (send tokens get-root-data)) - (and comments? (eq? 'comment (send tokens get-root-data)))) + ((or (eq? 'white-space (data-type (send tokens get-root-data))) + (and comments? (eq? 'comment (data-type (send tokens get-root-data))))) (skip-whitespace (+ start-pos (if (eq? direction 'forward) (send tokens get-root-end-position) diff --git a/collects/framework/private/scheme.ss b/collects/framework/private/scheme.ss index 3535effdb5..6af2a469c4 100644 --- a/collects/framework/private/scheme.ss +++ b/collects/framework/private/scheme.ss @@ -6,7 +6,7 @@ (require string-constants scheme/class mred/mred-sig - syntax-color/scheme-lexer + syntax-color/module-lexer "collapsed-snipclass-helpers.ss" "sig.ss" "../gui-utils.ss" @@ -1172,14 +1172,14 @@ (preferences:add-callback 'framework:tabify (lambda (k v) (set! tabify-pref v))) - (define/private (scheme-lexer-wrapper in) - (let-values (((lexeme type paren start end) (scheme-lexer in))) + (define/private (scheme-lexer-wrapper in mode) + (let-values (((lexeme type paren start end mode) (module-lexer in mode))) (cond ((and (eq? type 'symbol) (get-keyword-type lexeme tabify-pref)) - (values lexeme 'keyword paren start end)) + (values lexeme 'keyword paren start end mode)) (else - (values lexeme type paren start end))))) + (values lexeme type paren start end mode))))) (define/override (put-file text sup directory default-name) (parameterize ([finder:default-extension "ss"] @@ -1188,7 +1188,7 @@ ;; don't call the surrogate's super, since it sets the default extension (sup directory default-name))) - (super-new (get-token (lambda (in) (scheme-lexer-wrapper in))) + (super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode))) (token-sym->style short-sym->style-name) (matches '((|(| |)|) (|[| |]|) diff --git a/collects/scribble/base.ss b/collects/scribble/base.ss index c6a4f2292d..f0ba8d007e 100644 --- a/collects/scribble/base.ss +++ b/collects/scribble/base.ss @@ -259,6 +259,7 @@ (->* () () #:rest (listof pre-content?) element?)) (provide/contract + [linebreak (-> element?)] [hspace (-> exact-nonnegative-integer? element?)] [elem (->* () (#:style element-style?) @@ -291,6 +292,9 @@ h)) (make-element 'hspace (list (make-string n #\space))))) +(define (linebreak) + (make-element 'newline '("\n"))) + (define (elem #:style [style plain] . str) (make-element style (decode-content str))) diff --git a/collects/scribble/base/lang/reader.ss b/collects/scribble/base/lang/reader.ss index 5839ac06e8..1cbdf6c0f0 100644 --- a/collects/scribble/base/lang/reader.ss +++ b/collects/scribble/base/lang/reader.ss @@ -6,5 +6,10 @@ scribble/base/lang #:read-syntax scribble:read-syntax-inside #:whole-body-readers? #t #:wrapper1 (lambda (t) (list* 'doc 'values '() (t))) +#:info (lambda (key default) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)] + [else (default key)])) (require (prefix-in scribble: "../../reader.ss")) diff --git a/collects/scribble/doc/lang/reader.ss b/collects/scribble/doc/lang/reader.ss index 7fb458175b..9b2f93c0e5 100644 --- a/collects/scribble/doc/lang/reader.ss +++ b/collects/scribble/doc/lang/reader.ss @@ -1,3 +1,11 @@ #lang scheme/base (require (prefix-in doc: scribble/doc/reader)) -(provide (rename-out [doc:read read] [doc:read-syntax read-syntax])) +(provide (rename-out [doc:read read] [doc:read-syntax read-syntax]) + get-info) + +(define (get-info . args) + (lambda (key) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)] + [else #f]))) diff --git a/collects/scribble/manual/lang/reader.ss b/collects/scribble/manual/lang/reader.ss index b573019e95..2407939f7e 100644 --- a/collects/scribble/manual/lang/reader.ss +++ b/collects/scribble/manual/lang/reader.ss @@ -6,5 +6,10 @@ scribble/manual/lang #:read-syntax scribble:read-syntax-inside #:whole-body-readers? #t #:wrapper1 (lambda (t) (cons 'doc (t))) +#:info (lambda (key default) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)] + [else (default key)])) (require (prefix-in scribble: "../../reader.ss")) diff --git a/collects/scribble/scheme.ss b/collects/scribble/scheme.ss index 1aec663b0d..0a25cdf8f6 100644 --- a/collects/scribble/scheme.ss +++ b/collects/scribble/scheme.ss @@ -47,6 +47,7 @@ (struct-out var-id) (struct-out shaped-parens) (struct-out just-context) + (struct-out alternate-display) (struct-out literal-syntax) (for-syntax make-variable-id variable-id? @@ -178,11 +179,12 @@ (memq (syntax-e c) (current-variable-list)))] [(s it? sub?) (let ([sc (syntax-e c)]) - (let ([s (format "~s" (if (literal-syntax? sc) - (literal-syntax-stx sc) - (if (var-id? sc) - (var-id-sym sc) - sc)))]) + (let ([s (or (syntax-property c 'display-string) + (format "~s" (if (literal-syntax? sc) + (literal-syntax-stx sc) + (if (var-id? sc) + (var-id-sym sc) + sc))))]) (if (and (symbol? sc) ((string-length s) . > . 1) (char=? (string-ref s 0) #\_) @@ -763,6 +765,7 @@ (define-struct var-id (sym)) (define-struct shaped-parens (val shape)) (define-struct just-context (val ctx)) + (define-struct alternate-display (id string)) (define-struct literal-syntax (stx)) (define-struct graph-reference (bx)) @@ -792,6 +795,11 @@ s s (just-context-ctx v)))] + [(alternate-display? v) + (let ([s (do-syntax-ize (alternate-display-id v) col line ht #f)]) + (syntax-property s + 'display-string + (alternate-display-string v)))] [(hash-ref (unbox ht) v #f) => (lambda (m) (unless (unbox m) diff --git a/collects/scribble/sigplan/lang/reader.ss b/collects/scribble/sigplan/lang/reader.ss index f0fd21629f..f67d928afd 100644 --- a/collects/scribble/sigplan/lang/reader.ss +++ b/collects/scribble/sigplan/lang/reader.ss @@ -6,5 +6,10 @@ scribble/sigplan/lang #:read-syntax scribble:read-syntax-inside #:whole-body-readers? #t #:wrapper1 (lambda (t) (cons 'doc (t))) +#:info (lambda (key default) + (case key + [(color-lexer) + (dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)] + [else (default key)])) (require (prefix-in scribble: "../../reader.ss")) diff --git a/collects/scribblings/framework/color.scrbl b/collects/scribblings/framework/color.scrbl index 50fb40889c..9d2e66f72f 100644 --- a/collects/scribblings/framework/color.scrbl +++ b/collects/scribblings/framework/color.scrbl @@ -9,19 +9,29 @@ that knows how to color itself. It also describes how to query the lexical and s-expression structure of the text. @defmethod*[(((start-colorer (token-sym->style (-> symbol? string?)) - (get-token (-> input-port? (values any/c - symbol? - (or/c false? symbol?) - exact-nonnegative-integer? - exact-nonnegative-integer?))) + (get-token (or/c (-> input-port? + (values any/c + symbol? + (or/c false? symbol?) + exact-nonnegative-integer? + exact-nonnegative-integer?)) + (-> input-port? + any/c + (values any/c + symbol? + (or/c false? symbol?) + exact-nonnegative-integer? + exact-nonnegative-integer? + any/c)))) (pairs (listof (list/p symbol? symbol?)))) void))]{ Starts tokenizing the buffer for coloring and parenthesis matching. + The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token], + and it should return the style-name that the token should be colored. - The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token] - and should return the style-name that the token should be colored. + The @scheme[get-token] argument takes an input port and optionally a mode value. + When it accepts just an input port, @scheme[get-token] returns the next token as 5 values: - The @scheme[get-token] argument takes an input port and returns the next token as 5 values: @itemize[ @item{ An unused value. This value is intended to represent the textual @@ -42,32 +52,44 @@ @item{ The ending position of the token.}] - The @scheme[get-token] function will usually be implemented with a lexer using the - @scheme[parser-tools/lex] library. - get-token must obey the following invariants: + When @scheme[get-token] accepts a mode value in addition to an + input port, it must also return an extra result, which is a new + mode. When @scheme[get-token] is called for the beginning on a + stream, the mode argument is @scheme[#f]. Thereafter, the mode + returned for the previous token is provided to @scheme[get-token] + for the next token. The mode should not be a mutable value; if + part of the stream is re-tokenized, the mode saved from the + immediately preceding token is given again to the + @scheme[get-token] function. + + The @scheme[get-token] function is usually be implemented with a lexer using the + @scheme[parser-tools/lex] library. The + @scheme[get-token] function must obey the following invariants: @itemize[ @item{ Every position in the buffer must be accounted for in exactly one token.} @item{ The token returned by @scheme[get-token] must rely only on the contents of the - input port argument. This means that the tokenization of some part of - the input cannot depend on earlier parts of the input.} + input port argument plus the mode argument. This constraint means that the + tokenization of some part of the input cannot depend on earlier parts of the + input except through the mode (and implicitly through the starting positions + for tokens).} @item{ - No edit to the buffer can change the tokenization of the buffer prior - to the token immediately preceding the edit. In the following - example this invariant does not hold. If the buffer contains: - @verbatim{" 1 2 3} + A change to the stream must not change the tokenization of the stream prior + to the token immediately preceding the change. In the following + example this invariant does not hold. If the buffer contains + @verbatim[#:indent 2]{" 1 2 3} and the tokenizer treats the unmatched " as its own token (a string error token), and separately tokenizes the 1 2 and 3, an edit to make - the buffer look like: - @verbatim{" 1 2 3"} + the buffer look like + @verbatim[#:indent 2]{" 1 2 3"} would result in a single string token modifying previous tokens. To handle these situations, @scheme[get-token] must treat the first line as a single token.}] The @scheme[pairs] argument is a list of different kinds of matching parens. The second - value returned by get-token is compared to this list to see how the + value returned by @scheme[get-token] is compared to this list to see how the paren matcher should treat the token. An example: Suppose pairs is @scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there are three kinds of parens. Any token which has @scheme['begin] as its second diff --git a/collects/scribblings/scribble/base.scrbl b/collects/scribblings/scribble/base.scrbl index 30d54df0c5..689bc466e7 100644 --- a/collects/scribblings/scribble/base.scrbl +++ b/collects/scribblings/scribble/base.scrbl @@ -272,6 +272,9 @@ gets progressively larger.} @defproc[(emph [pre-content pre-content?] ...) element?]{ The same as @scheme[italic].} +@defproc[(linebreak) element?]{ +Produces an element that forces a line break.} + @defproc[(hspace [n exact-nonnegative-integer?]) element?]{ Produces an element containing @scheme[n] spaces and style diff --git a/collects/scribblings/scribble/config.scrbl b/collects/scribblings/scribble/config.scrbl index 51bb08f059..e7c7c2291f 100644 --- a/collects/scribblings/scribble/config.scrbl +++ b/collects/scribblings/scribble/config.scrbl @@ -60,7 +60,7 @@ in the case of Latex). To add a mapping from your own style name to a CSS configuration, add a @scheme[css-addition] structure instance to a style's @tech{style property} list. To map a style name to a Latex macro or environment, add a -scheme[tex-addition] structure instance. A @scheme[css-addition] or +@scheme[tex-addition] structure instance. A @scheme[css-addition] or @scheme[tex-addition] is normally associated with the style whose name is implemented by the adition, but it can also be added to the style for an enclosing part. diff --git a/collects/scribblings/scribble/reader.scrbl b/collects/scribblings/scribble/reader.scrbl index 4bc757afab..e04ca0364d 100644 --- a/collects/scribblings/scribble/reader.scrbl +++ b/collects/scribblings/scribble/reader.scrbl @@ -167,7 +167,7 @@ needed. @{blah @|foo|: blah} }===| -Actually, the command part can be any Scheme expression (that does +Actually, the command part can be any Scheme expression (that does not start with @litchar["["], @litchar["{"], or @litchar["|"]), which is particularly useful with such escapes since they can be used with any expression. diff --git a/collects/syntax-color/module-lexer.ss b/collects/syntax-color/module-lexer.ss new file mode 100644 index 0000000000..ae9e997ae3 --- /dev/null +++ b/collects/syntax-color/module-lexer.ss @@ -0,0 +1,54 @@ +#lang scheme/base +(require scheme/port + "scheme-lexer.ss") + +(provide module-lexer) + +(define (module-lexer in mode) + (cond + [(not mode) + ;; Starting out: look for #lang: + (let*-values ([(p) (peeking-input-port in)] + [(init) (file-position p)] + [(start-line start-col start-pos) (port-next-location p)]) + (let ([get-info (with-handlers ([exn:fail? (lambda (exn) 'fail)]) + ;; FIXME: set the reader guard to disable access to + ;; untrusted planet packages. + (read-language in (lambda () #f)))]) + (cond + [(procedure? get-info) + ;; Produce language as first token: + (let*-values ([(bytes-len) (- (file-position p) init)] + [(bstr) (read-bytes bytes-len in)] + [(end-line end-col end-pos) (port-next-location in)]) + (values + bstr + 'other + #f + start-pos + end-pos + (or (let ([v (get-info 'color-lexer)]) + (and v + (if (procedure-arity-includes? v 2) + (cons v #f) + v))) + scheme-lexer)))] + [(eq? 'fail get-info) + (copy-port in (open-output-nowhere)) + (let*-values ([(end-line end-col end-pos) (port-next-location in)]) + (values #f 'error #f start-pos end-pos + (lambda (in) + (values #f 'eof #f end-pos end-pos))))] + [else + ;; Start over using the Scheme lexer + (module-lexer in scheme-lexer)])))] + [(pair? mode) + ;; #lang-selected language consumes and produces a mode: + (let-values ([(lexeme type data new-token-start new-token-end new-mode) + ((car mode) in (cdr mode))]) + (values lexeme type data new-token-start new-token-end (cons (car mode) new-mode)))] + [else + ;; #lang-selected language (or default) doesn't deal with modes: + (let-values ([(lexeme type data new-token-start new-token-end) + (mode in)]) + (values lexeme type data new-token-start new-token-end mode))])) diff --git a/collects/syntax-color/scheme-lexer.ss b/collects/syntax-color/scheme-lexer.ss index 681cc14fd4..f8e7798abc 100644 --- a/collects/syntax-color/scheme-lexer.ss +++ b/collects/syntax-color/scheme-lexer.ss @@ -3,7 +3,9 @@ (require parser-tools/lex (prefix : parser-tools/lex-sre)) - (provide scheme-lexer) + (provide scheme-lexer + scheme-lexer/status + scheme-nobar-lexer/status) (define-lex-abbrevs @@ -120,6 +122,13 @@ [identifier (:: identifier-start (:* identifier-escapes identifier-chars))] + [nobar-identifier-escapes (:: "\\" any-char)] + [nobar-identifier-start (:or nobar-identifier-escapes + (:~ identifier-delims "\\" "|" "#") + "#%")] + [nobar-identifier (:: nobar-identifier-start + (:* nobar-identifier-escapes identifier-chars))] + [bad-id-start (:or identifier-escapes (:~ identifier-delims "\\" "|"))] [bad-id-escapes (:or identifier-escapes @@ -129,8 +138,17 @@ (:? "\\" bad-id-escapes)) "\\" bad-id-escapes)] + + + [nobar-bad-id-escapes nobar-identifier-escapes] + [nobar-bad-id (:or (:: bad-id-start + (:* nobar-identifier-escapes identifier-chars) + (:? "\\" nobar-bad-id-escapes)) + "\\" + nobar-bad-id-escapes)] [keyword (:: "#:" (:* identifier-escapes identifier-chars))] + [nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))] [reader-command (:or (:: "#" c s) (:: "#" c i))] [sharing (:or (:: "#" (make-uinteger digit10) "=") @@ -189,8 +207,8 @@ ((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit)))))) - (define (ret lexeme type paren start-pos end-pos) - (values lexeme type paren (position-offset start-pos) (position-offset end-pos))) + (define (ret lexeme type paren start-pos end-pos status) + (values lexeme type paren (position-offset start-pos) (position-offset end-pos) status)) (define get-next-comment @@ -208,11 +226,11 @@ (define (read-nested-comment num-opens start-pos input) (let-values (((diff end) (get-next-comment input))) (cond - ((eq? 'eof diff) (ret "" 'error #f start-pos end)) + ((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue)) (else (let ((next-num-opens (+ diff num-opens))) (cond - ((= 0 next-num-opens) (ret "" 'comment #f start-pos end)) + ((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue)) (else (read-nested-comment next-num-opens start-pos input)))))))) (define (get-offset i) @@ -253,7 +271,7 @@ (next-char (peek-char-or-special i))) (cond ((or (equal? ender "") (not (eq? #\newline next-char))) - (values (string-append "#<<" ender) 'error #f start-pos (get-offset i))) + (values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum)) (else (read-char i) (let loop ((acc (list (string-append "#<<" ender "\n")))) @@ -262,85 +280,96 @@ (cond ((not (or (char? next-char) (eof-object? next-char))) ;; a special (values (apply string-append (reverse (cons next-line acc))) - 'error #f start-pos (get-offset i))) + 'error #f start-pos (get-offset i) + 'datum)) ((equal? next-line ender) ;; end of string (values (apply string-append (reverse (cons next-line acc))) - 'string #f start-pos (get-offset i))) + 'string #f start-pos (get-offset i) + 'datum)) ((eof-object? next-char) (values (apply string-append (reverse (cons next-line acc))) - 'error #f start-pos (get-offset i))) + 'error #f start-pos (get-offset i) + 'datum)) (else (read-char i) (loop (cons (string-append next-line "\n") acc)))))))))) + + (define (scheme-lexer in) + (let-values ([(lexeme type paren start end adj) (scheme-lexer/status in)]) + (values lexeme type paren start end))) - (define scheme-lexer + (define-syntax-rule (lexer/status identifier keyword bad-id) (lexer [(:+ scheme-whitespace) - (ret lexeme 'white-space #f start-pos end-pos)] + (ret lexeme 'white-space #f start-pos end-pos 'continue)] [(:or "#t" "#f" "#T" "#F" character (make-num digit2 radix2) (make-num digit8 radix8) (make-num digit10 (:? radix10)) (make-num digit16 radix16)) - (ret lexeme 'constant #f start-pos end-pos)] - [keyword (ret lexeme 'parenthesis #f start-pos end-pos)] - [str (ret lexeme 'string #f start-pos end-pos)] + (ret lexeme 'constant #f start-pos end-pos 'datum)] + [keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)] + [str (ret lexeme 'string #f start-pos end-pos 'datum)] [";" (values (apply string (read-line/skip-over-specials input-port)) 'comment #f (position-offset start-pos) - (get-offset input-port))] + (get-offset input-port) + 'continue)] #; [line-comment (ret lexeme 'comment #f start-pos end-pos)] ["#;" - (ret lexeme 'sexp-comment #f start-pos end-pos)] + (ret lexeme 'sexp-comment #f start-pos end-pos 'continue)] ["#|" (read-nested-comment 1 start-pos input-port)] [script - (ret lexeme 'comment #f start-pos end-pos)] + (ret lexeme 'comment #f start-pos end-pos 'continue)] [(:: list-prefix "(") - (ret lexeme 'parenthesis '|(| start-pos end-pos)] + (ret lexeme 'parenthesis '|(| start-pos end-pos 'open)] [(:: list-prefix "[") - (ret lexeme 'parenthesis '|[| start-pos end-pos)] + (ret lexeme 'parenthesis '|[| start-pos end-pos 'open)] [(:: list-prefix "{") - (ret lexeme 'parenthesis '|{| start-pos end-pos)] + (ret lexeme 'parenthesis '|{| start-pos end-pos 'open)] [(:or ")" "]" "}") - (ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos)] + (ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)] [(:or "'" "`" "#'" "#`" "#&") - (ret lexeme 'constant #f start-pos end-pos)] + (ret lexeme 'constant #f start-pos end-pos 'continue)] [(:or sharing reader-command "." "," ",@" "#," "#,@") - (ret lexeme 'other #f start-pos end-pos)] + (ret lexeme 'other #f start-pos end-pos 'continue)] [(:: (:or "#lang " "#!") (:or langchar (:: langchar (:* (:or langchar "/")) langchar))) - (ret lexeme 'other #f start-pos end-pos)] + (ret lexeme 'other #f start-pos end-pos 'continue)] [(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace)))) - (ret lexeme 'error #f start-pos end-pos)] + (ret lexeme 'error #f start-pos end-pos 'continue)] [identifier - (ret lexeme 'symbol #f start-pos end-pos)] + (ret lexeme 'symbol #f start-pos end-pos 'datum)] ["#<<" (get-here-string (position-offset start-pos) input-port)] [(special) - (ret "" 'no-color #f start-pos end-pos)] + (ret "" 'no-color #f start-pos end-pos 'datum)] [(special-comment) - (ret "" 'comment #f start-pos end-pos)] - [(eof) (values lexeme 'eof #f #f #f)] + (ret "" 'comment #f start-pos end-pos 'continue)] + [(eof) (values lexeme 'eof #f #f #f #f)] [(:or bad-char bad-str (:& bad-id (complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script) any-string)))) - (ret lexeme 'error #f start-pos end-pos)] + (ret lexeme 'error #f start-pos end-pos 'bad)] [any-char (extend-error lexeme start-pos end-pos input-port)])) + + (define scheme-lexer/status (lexer/status identifier keyword bad-id)) + (define scheme-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id)) (define (extend-error lexeme start end in) (if (memq (peek-char-or-special in) `(special #\newline #\return #\tab #\space #\vtab #\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\; ,eof)) - (ret lexeme 'error #f start end) + (ret lexeme 'error #f start end 'bad) (let-values (((rest end-pos) (get-chunk in))) - (ret (string-append lexeme rest) 'error #f start end-pos)))) + (ret (string-append lexeme rest) 'error #f start end-pos 'bad)))) (define get-chunk (lexer diff --git a/collects/syntax-color/scribble-lexer.ss b/collects/syntax-color/scribble-lexer.ss new file mode 100644 index 0000000000..7838b46495 --- /dev/null +++ b/collects/syntax-color/scribble-lexer.ss @@ -0,0 +1,253 @@ +#lang scheme/base +(require "scheme-lexer.ss") + +(provide scribble-inside-lexer + scribble-lexer) + +(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren)) +(define-struct scheme (status)) +(define-struct args ()) +(define-struct text-args ()) + +(define (scribble-inside-lexer in mode) + (let ([mode (or mode + (list + (make-text #rx"^@" + #f + #f + #rx".*?(?:(?=@)|$)" + #f + #f)))]) + (let-values ([(line col pos) (port-next-location in)] + [(l) (car mode)]) + (define (enter-@ comment-k) + (if (equal? #\; (peek-char in)) + ;; Comment + (begin + (read-char in) + (if (or (equal? #\{ (peek-char in)) + (equal? #\| (peek-char in))) + ;; Bracketed comment: + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (comment-k "@;" + 'comment + #f + pos + end-pos + (cons (make-text-args) + mode))) + ;; Line comment: + (begin + (regexp-match #rx"\r\n|\r|\n" in) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (comment-k "@;" + 'comment + #f + pos + end-pos + mode))))) + (let ([new-mode + (cond + [(equal? #\| (peek-char in)) + (read-char in) + (list* (make-scheme 'bar) + mode)] + [else + (list* (make-scheme 'one) + (make-args) + mode)])]) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "@" + 'other + #f + pos + end-pos + new-mode))))) + + (if (eof-object? (peek-char in)) + (values eof + 'eof + #f + pos + pos + #f) + (cond + [(text? l) + (cond + [(and (text-scheme-rx l) + (regexp-try-match (text-scheme-rx l) in)) + ;; Found @ + (enter-@ values)] + [(and (text-end-rx l) + (regexp-try-match (text-end-rx l) in)) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "}" + 'other + (text-close-paren l) + pos + end-pos + (cdr mode)))] + [(and (text-sub-rx l) + (regexp-try-match (text-sub-rx l) in)) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "{" + 'other + (text-open-paren l) + pos + end-pos + (cons (car mode) mode)))] + [else + ;; Read string up to @ or } + (regexp-match? (text-string-rx l) in) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values 'string + 'string + #f + pos + end-pos + mode))])] + [(scheme? l) + (let ([status (scheme-status l)]) + (cond + [(and (eq? status 'bracket) + (regexp-try-match #px"^\\s*?[]]" in)) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "]" + 'other + '|]| + pos + end-pos + (cdr mode)))] + [(and (memq status '(bar bar-no-more)) + (regexp-try-match #px"^\\s*?[|]" in)) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "|" + 'other + #f + pos + end-pos + (cdr mode)))] + [(regexp-try-match #rx"^@" in) + (enter-@ (lambda (lexeme type paren start end mode) + (values lexeme + (if (eq? status 'one) + 'error + type) + paren + start + end + mode)))] + [else + (let-values ([(lexeme type paren start end adj) + (case status + [(bar bar-no-more one) (scheme-nobar-lexer/status in)] + [else (scheme-lexer/status in)])] + [(consume) (lambda (status) + (case status + [(many) mode] + [(one) (cdr mode)] + [(bracket bar-no-more) + (cons (make-scheme status) (cdr mode))] + [(bar) (cons (make-scheme 'bar-no-more) (cdr mode))] + [else (error "bad status")]))]) + (values lexeme + (cond + [(or (eq? type 'comment) + (eq? type 'white-space)) + (if (eq? status 'one) + 'error + type)] + [(eq? status 'bar-no-more) + ;; Too many S-expressions in @| ... | + 'error] + [else type]) + paren + start + end + (case adj + [(continue) mode] + [(datum) + (cond + [(pair? status) mode] + [else (consume status)])] + [(open) + (cons (make-scheme (cons #t status)) + (cdr mode))] + [(close) + (if (pair? status) + (let ([v (cdr status)]) + (if (symbol? v) + (consume v) + (cons (make-scheme v) (cdr mode)))) + (consume status))] + [(bad) (consume status)] + [else (error "bad adj")])))]))] + [(args? l) + (cond + [(regexp-try-match #rx"^\\[" in) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "[" + 'other + '|[| + pos + end-pos + (list* (make-scheme 'bracket) + mode)))] + [else + (scribble-lexer in (cons (make-text-args) (cdr mode)))])] + [(text-args? l) + (cond + [(regexp-try-match #rx"^[|]([^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*){" in) + => (lambda (m) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values (cadr m) + 'other + #f + pos + end-pos + (let ([closer (regexp-quote + (bytes-append #"}" + (regexp-replace** (list #rx"[(]" #rx"[[]" #rx"{" #rx"<") + (cadr m) + (list #")" #"]" #"}" #">")) + #"|"))] + [re-opener (regexp-quote (cadr m))]) + (cons (make-text (byte-regexp (bytes-append #"^[|]" re-opener #"@")) + (byte-regexp (bytes-append #"^" closer)) + (byte-regexp (bytes-append #"^[|]" re-opener #"{")) + (byte-regexp (bytes-append + #".*?(?:(?=[|]" + re-opener + #"[@{])|(?=" + closer + #")|$)")) + #f + #f) + (cdr mode))))))] + [(regexp-try-match #rx"^{" in) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) + (values "{" + 'other + '|{| + pos + end-pos + (cons (make-text #rx"^@" + #rx"^}" + #rx"^{" + #rx".*?(?:(?=[@{}])|$)" + '|{| + '|}|) + (cdr mode))))] + [else + (scribble-lexer in (cdr mode))])] + [else (error "bad mode")]))))) + +(define (scribble-lexer in mode) + (scribble-inside-lexer in (or mode (list (make-scheme 'many))))) + +(define (regexp-replace** rxs str strs) + (if (null? rxs) + str + (regexp-replace** (cdr rxs) + (regexp-replace* (car rxs) str (car strs)) + (cdr strs)))) diff --git a/collects/syntax-color/syntax-color.scrbl b/collects/syntax-color/syntax-color.scrbl index 3c1a2d766d..3f528ab8db 100644 --- a/collects/syntax-color/syntax-color.scrbl +++ b/collects/syntax-color/syntax-color.scrbl @@ -3,6 +3,8 @@ (for-label syntax-color/token-tree syntax-color/paren-tree syntax-color/scheme-lexer + syntax-color/module-lexer + syntax-color/scribble-lexer syntax-color/default-lexer framework/framework framework/private/color @@ -38,7 +40,7 @@ Parenthesis matching code built on top of @scheme[token-tree%]. symbol? (or/c symbol? false/c) (or/c number? false/c) - (or/c number? false/c))] + (or/c number? false/c))]{ A lexer for Scheme, including reader extensions (@secref[#:doc'(lib "scribblings/reference/reference.scrbl")]{Reader_Extension}), built @@ -60,6 +62,35 @@ The @scheme[scheme-lexer] function returns 5 values: @item{A number representing the ending position of the match (or @scheme[#f] if eof).}] +} + +@defproc[(scheme-lexer/status [in input-port?]) + (values (or/c string? eof-object?) + symbol? + (or/c symbol? false/c) + (or/c number? false/c) + (or/c number? false/c) + (or/c 'datum 'open 'close 'continue))]{ + +Like @scheme[scheme-lexer], but returns an extra value. The last +return value indicates whether the consumed token should count as a +datum, an opening parenthesis (or similar starting token to group +other tokens), a closing parenthesis (or similar), or a prefix (such +as whitespace) on a datum.} + +@defproc[(scheme-nobar-lexer/status [in input-port?]) + (values (or/c string? eof-object?) + symbol? + (or/c symbol? false/c) + (or/c number? false/c) + (or/c number? false/c) + (or/c 'datum 'open 'close 'continue))]{ + +Like @scheme[scheme-lexer/status], but for a dialect of Scheme where +@litchar{|} is a delimiter instead of quoting syntax for a symbol. +This function is used by @scheme[scribble-lexer].} + + @section{Default lexer} @defmodule[syntax-color/default-lexer] @@ -90,6 +121,90 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[}, @item{A number representing the ending position of the match (or @scheme[#f] if eof).}] +@section{Module Lexer} + +@defmodule[syntax-color/module-lexer] + +@defproc[(module-lexer [in input-port?] + [mode (or/c #f + (-> input-port? any) + (cons/c (-> input-port? any/c any) any/c))]) + (values (or/c string? eof-object?) + symbol? + (or/c symbol? false/c) + (or/c number? false/c) + (or/c number? false/c) + (or/c #f + (-> input-port? any) + (cons/c (-> input-port? any/c any) any/c)))]{ + +Like @scheme[scheme-lexer], but + +@itemize[ + + @item{A @scheme[module-lexer] accepts (and returns) a lexer mode, + instead of just an input port.} + + @item{When @scheme[mode] is @scheme[#f] (indicating the start of the + stream), the lexer checks @scheme[in] for a @hash-lang[] + specification. + + If a @hash-lang[] line is present but the specified + language does not exist, the entire @scheme[in] input is + consumed and colored as @scheme['error]. + + If the language exists and the language provides a + @scheme[get-info] function, then it is called with + @scheme['color-lexer]. If the result is not @scheme[#f], then + it should be a lexer function for use with + @scheme[color:text%]. The result mode is the lexer---paired + with @scheme[#f] if the lexer is a procedure arity 2---so that + future calls will dispatch to the language-supplied lexer. + + If the language is specified but it provides no + @scheme[get-info] or @scheme['color-lexer] result, then + @scheme[scheme-lexer] is returned as the mode.} + + @item{When @scheme[mode] is a lexer procedure, the lexer is applied + to @scheme[in]. The lexer's results are returned, plus the + lexer again as the mode.} + + @item{When @scheme[mode] is a pair, then the lexer procedure in the + @scheme[car] is applied to @scheme[in] and the mode in the + @scheme[cdr]. The lexer's results are returned, except that its + mode result is paired back with the lexer procedure.} + +]} + +@section{Scribble Lexer} + +@defmodule[syntax-color/scribble-lexer] + +@defproc[(scribble-lexer [in input-port?] + [mode any/c]) + (values (or/c string? eof-object?) + symbol? + (or/c symbol? false/c) + (or/c number? false/c) + (or/c number? false/c) + any/c)]{ + +Like @scheme[scheme-lexer], but for Scheme extended with Scribbles +@"@" notation (see @secref[#:doc '(lib +"scribblings/scribble/scribble.scrbl") "reader"]).} + +@defproc[(scribble-inside-lexer [in input-port?] + [mode any/c]) + (values (or/c string? eof-object?) + symbol? + (or/c symbol? false/c) + (or/c number? false/c) + (or/c number? false/c) + any/c)]{ + +Like @scheme[scribble-lexer], but starting in ``text'' mode instead of +Scheme mode.} + @; ---------------------------------------------------------------------- @section{Splay Tree for Tokenization} diff --git a/collects/tests/syntax-color/scribble-lexer.ss b/collects/tests/syntax-color/scribble-lexer.ss new file mode 100644 index 0000000000..9a8e952017 --- /dev/null +++ b/collects/tests/syntax-color/scribble-lexer.ss @@ -0,0 +1,201 @@ +#lang scheme +(require syntax-color/scribble-lexer) + +(define in (open-input-string "@|x #|10|#| @me[1 2 #| comment |# ]{10}")) + +(define (color str) + (let ([in (open-input-string str)]) + (let loop ([mode #f]) + (let-values ([(lexeme type paren start end mode) (scribble-inside-lexer in mode)]) + (if (eq? type 'eof) + null + (cons (list start end type) + (loop mode))))))) + +(define (test* str len-val line) + (let ([v (color str)] + [val (let loop ([pos 1][l len-val]) + (if (null? l) + null + (cons (list pos (+ pos (caar l)) (cadar l)) + (loop (+ (+ pos (caar l))) (cdr l)))))]) + (unless (equal? v val) + (printf "FAILED, line ~s\n" line) + (printf " result\n") + (pretty-print v) + (printf " is not expected\n") + (pretty-print val) + (printf "\n")))) + +(define-syntax-rule (test str len-val) + (test* str len-val (syntax-line #'str))) + +(test "x" '((1 string))) +(test "x{}" '((3 string))) +(test "@x" '((1 other) + (1 symbol))) + +(test "@x str" '((1 other) + (1 symbol) + (4 string))) + +(test "@x[] str" '((1 other) + (1 symbol) + (1 other) + (1 other) + (4 string))) + +(test "@x[z] str" '((1 other) + (1 symbol) + (1 other) + (1 symbol) + (1 other) + (4 string))) + +(test "@x[z +1.5] str" '((1 other) + (1 symbol) + (1 other) + (1 symbol) + (1 white-space) + (4 constant) + (1 other) + (4 string))) + +(test "@x[z @w{10}] str" '((1 other) + (1 symbol) ; x + (1 other) + (1 symbol) ; z + (1 white-space) + (1 other) + (1 symbol) ; w + (1 other) + (2 string) + (1 other) + (1 other) + (4 string))) + +(test "@x[a@b]{a}{b}" '((1 other) + (1 symbol) + (1 other) + (3 symbol) + (1 other) + (1 other) + (1 string) + (1 other) + (3 string))) +(test "@x{{}}" '((1 other) + (1 symbol) + (1 other) + (1 other) ; { + (1 other) ; } + (1 other))) + +(test "@|x|str" '((2 other) + (1 symbol) + (1 other) + (3 string))) +(test "@|x #|ok|#|str" '((2 other) + (1 symbol) + (1 white-space) + (6 comment) + (1 other) + (3 string))) +(test "@| x ; c\n| str" '((2 other) + (1 white-space) + (1 symbol) + (1 white-space) + (3 comment) + (2 other) + (4 string))) +(test "@|(a|b|)|str" '((2 other) + (1 parenthesis) + (4 symbol) + (1 parenthesis) + (1 other) + (3 string))) + +(test "@#|bad|#x str" '((1 other) + (7 error) + (1 symbol) + (4 string))) +(test "@@x" '((1 other) + (1 other) + (1 symbol))) +(test "@|@x|z" '((2 other) + (1 other) + (1 symbol) + (1 other) + (1 string))) +(test "@@x[1 2][3]" '((1 other) + (1 other) + (1 symbol) + (1 other) + (1 constant) + (1 white-space) + (1 constant) + (1 other) + (1 other) + (1 constant) + (1 other))) + +(test "@x|{10}|" '((1 other) + (1 symbol) + (2 other) + (2 string) + (2 other))) +(test "@x|{@q}|" '((1 other) + (1 symbol) + (2 other) + (2 string) + (2 other))) +(test "@x|!!{@q}!!|" '((1 other) + (1 symbol) + (4 other) + (2 string) + (4 other))) +(test "@x|(({@q}))|" '((1 other) + (1 symbol) + (4 other) + (2 string) + (4 other))) +(test "@x|<<{a|<<@a[10]}>>|" '((1 other) + (1 symbol) + (4 other) + (1 string) + (4 other) + (1 symbol) + (1 other) + (2 constant) + (1 other) + (4 other))) +(test "@x|{ |{ } }|}|" '((1 other) + (1 symbol) + (2 other) + (1 string) + (2 other) ; |{ + (3 string) + (2 other) ; }| + (2 other))) + +(test "@`',@foo{blah}" '((1 other) + (1 constant) ; ` + (1 constant) ; ' + (2 other) ; ,@ + (3 symbol) + (1 other) + (4 string) + (1 other))) + +(test "@; 1" '((4 comment))) +(test "@; 1\nv" '((5 comment) + (1 string))) +(test "@;{1}v" '((2 comment) + (1 other) + (1 string) + (1 other) + (1 string))) +(test "@;|{1 }} }|v" '((2 comment) + (2 other) + (5 string) + (2 other) + (1 string))) diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index ea2637528f..6810b1b05c 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -1953,7 +1953,7 @@ static jit_insn *generate_proc_struct_retry(mz_jit_state *jitter, int num_rands, return ref2; } -/* Support for interectpting direct calls to primitives: */ +/* Support for intercepting direct calls to primitives: */ #if 1 # define mz_prepare_direct_prim(n) mz_prepare(n) # define mz_finishr_direct_prim(reg, proc) mz_finishr(reg) diff --git a/src/mzscheme/src/portfun.c b/src/mzscheme/src/portfun.c index 5309de578e..d618ccaeae 100644 --- a/src/mzscheme/src/portfun.c +++ b/src/mzscheme/src/portfun.c @@ -263,7 +263,7 @@ scheme_init_port_fun(Scheme_Env *env) GLOBAL_NONCM_PRIM("read-honu/recursive", read_honu_recur_f, 0, 1, env); GLOBAL_NONCM_PRIM("read-honu-syntax", read_honu_syntax_f, 0, 2, env); GLOBAL_NONCM_PRIM("read-honu-syntax/recursive", read_honu_syntax_recur_f, 0, 2, env); - GLOBAL_NONCM_PRIM("read-language", read_language, 0, 2, env); + GLOBAL_PRIM_W_ARITY2("read-language", read_language, 0, 2, 0, -1, env); GLOBAL_NONCM_PRIM("read-char", read_char, 0, 1, env); GLOBAL_NONCM_PRIM("read-char-or-special", read_char_spec, 0, 1, env); GLOBAL_NONCM_PRIM("read-byte", read_byte, 0, 1, env);