diff --git a/collects/framework/private/color.ss b/collects/framework/private/color.ss index a5b0526b..8532fda1 100644 --- a/collects/framework/private/color.ss +++ b/collects/framework/private/color.ss @@ -32,6 +32,10 @@ added get-regions (define (should-color-type? type) (not (memq type '(white-space no-color)))) +(define (make-data type mode) (cons type mode)) +(define (data-type data) (car data)) +(define (data-lexer-mode data) (cdr data)) + (define -text<%> (interface (text:basic<%>) start-colorer @@ -102,6 +106,11 @@ added get-regions invalid-tokens-start ; = +inf.0 ;; The position right before the next token to be read current-pos + ;; Thread a mode through lexing, and remember the mode + ;; at each token boundary, so that lexing can depend on + ;; previous tokens. This is the mode for lexing at + ;; current-pos: + current-lexer-mode ;; Paren-matching parens ) @@ -118,6 +127,7 @@ added get-regions (new token-tree%) +inf.0 start + #f (new paren-tree% (matches pairs)))) (define lexer-states (list (make-new-lexer-state 0 'end))) @@ -228,6 +238,7 @@ added get-regions (set-lexer-state-invalid-tokens-start! ls +inf.0) (set-lexer-state-up-to-date?! ls #t) (set-lexer-state-current-pos! ls (lexer-state-start-pos ls)) + (set-lexer-state-current-lexer-mode! ls #f) (set-lexer-state-parens! ls (new paren-tree% (matches pairs)))) lexer-states) (set! restart-callback #f) @@ -258,12 +269,12 @@ added get-regions (set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length))) (sync-invalid ls)))) - (define/private (re-tokenize ls in in-start-pos enable-suspend) - (let-values ([(lexeme type data new-token-start new-token-end) + (define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend) + (let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode) (begin (enable-suspend #f) (begin0 - (get-token in) + (get-token in in-lexer-mode) (enable-suspend #t)))]) (unless (eq? 'eof type) (enable-suspend #f) @@ -271,6 +282,7 @@ added get-regions (+ in-start-pos (sub1 new-token-end))) (let ((len (- new-token-end new-token-start))) (set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls))) + (set-lexer-state-current-lexer-mode! ls new-lexer-mode) (sync-invalid ls) (when (and should-color? (should-color-type? type) (not frozen?)) (set! colors @@ -286,7 +298,7 @@ added get-regions ;; version. In other words, the new greatly outweighs the tree ;; operations. ;;(insert-last! tokens (new token-tree% (length len) (data type))) - (insert-last-spec! (lexer-state-tokens ls) len type) + (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode)) (send (lexer-state-parens ls) add-token data len) (cond ((and (not (send (lexer-state-invalid-tokens ls) is-empty?)) @@ -301,7 +313,7 @@ added get-regions (enable-suspend #t)) (else (enable-suspend #t) - (re-tokenize ls in in-start-pos enable-suspend))))))) + (re-tokenize ls in in-start-pos new-lexer-mode enable-suspend))))))) (define/private (do-insert/delete/ls ls edit-start-pos change-length) (unless (lexer-state-up-to-date? ls) @@ -318,7 +330,14 @@ added get-regions (if (send (lexer-state-invalid-tokens ls) is-empty?) +inf.0 (+ (lexer-state-start-pos ls) orig-token-end change-length))) - (set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start)) + (let ([start (+ (lexer-state-start-pos ls) orig-token-start)]) + (set-lexer-state-current-pos! ls start) + (set-lexer-state-current-lexer-mode! ls + (if (= start (lexer-state-start-pos ls)) + #f + (begin + (send valid-tree search-max!) + (data-lexer-mode (send valid-tree get-root-data)))))) (set-lexer-state-up-to-date?! ls #f) (queue-callback (λ () (colorer-callback)) #f))) ((>= edit-start-pos (lexer-state-invalid-tokens-start ls)) @@ -340,7 +359,13 @@ added get-regions (send (lexer-state-parens ls) truncate tok-start) (set-lexer-state-tokens! ls valid-tree) (set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls))) - (set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start)))))) + (let ([start (+ (lexer-state-start-pos ls) tok-start)]) + (set-lexer-state-current-pos! ls start) + (if (= start (lexer-state-start-pos ls)) + #f + (begin + (send valid-tree search-max!) + (data-lexer-mode (send valid-tree get-root-data))))))))) (define/private (do-insert/delete edit-start-pos change-length) (unless (or stopped? force-stop?) @@ -378,6 +403,7 @@ added get-regions (λ (x) #f)) (enable-suspend #t))) (lexer-state-current-pos ls) + (lexer-state-current-lexer-mode ls) enable-suspend)) lexer-states))))) (set! rev (get-revision-number))) @@ -427,7 +453,14 @@ added get-regions (reset-tokens) (set! should-color? (preferences:get 'framework:coloring-active)) (set! token-sym->style token-sym->style-) - (set! get-token get-token-) + (set! get-token (if (procedure-arity-includes? get-token- 2) + ;; New interface: thread through a mode: + get-token- + ;; Old interface: no mode + (lambda (in mode) + (let-values ([(lexeme type data new-token-start new-token-end) + (get-token- in)]) + (values lexeme type data new-token-start new-token-end #f))))) (set! pairs pairs-) (for-each (lambda (ls) @@ -739,7 +772,7 @@ added get-regions (let ([tokens (lexer-state-tokens ls)]) (tokenize-to-pos ls position) (send tokens search! (- position (lexer-state-start-pos ls))) - (send tokens get-root-data))))) + (data-type (send tokens get-root-data)))))) (define/private (tokenize-to-pos ls position) (when (and (not (lexer-state-up-to-date? ls)) @@ -768,8 +801,8 @@ added get-regions (send tokens search! (- (if (eq? direction 'backward) (sub1 position) position) start-pos)) (cond - ((or (eq? 'white-space (send tokens get-root-data)) - (and comments? (eq? 'comment (send tokens get-root-data)))) + ((or (eq? 'white-space (data-type (send tokens get-root-data))) + (and comments? (eq? 'comment (data-type (send tokens get-root-data))))) (skip-whitespace (+ start-pos (if (eq? direction 'forward) (send tokens get-root-end-position) diff --git a/collects/framework/private/scheme.ss b/collects/framework/private/scheme.ss index 3535effd..6af2a469 100644 --- a/collects/framework/private/scheme.ss +++ b/collects/framework/private/scheme.ss @@ -6,7 +6,7 @@ (require string-constants scheme/class mred/mred-sig - syntax-color/scheme-lexer + syntax-color/module-lexer "collapsed-snipclass-helpers.ss" "sig.ss" "../gui-utils.ss" @@ -1172,14 +1172,14 @@ (preferences:add-callback 'framework:tabify (lambda (k v) (set! tabify-pref v))) - (define/private (scheme-lexer-wrapper in) - (let-values (((lexeme type paren start end) (scheme-lexer in))) + (define/private (scheme-lexer-wrapper in mode) + (let-values (((lexeme type paren start end mode) (module-lexer in mode))) (cond ((and (eq? type 'symbol) (get-keyword-type lexeme tabify-pref)) - (values lexeme 'keyword paren start end)) + (values lexeme 'keyword paren start end mode)) (else - (values lexeme type paren start end))))) + (values lexeme type paren start end mode))))) (define/override (put-file text sup directory default-name) (parameterize ([finder:default-extension "ss"] @@ -1188,7 +1188,7 @@ ;; don't call the surrogate's super, since it sets the default extension (sup directory default-name))) - (super-new (get-token (lambda (in) (scheme-lexer-wrapper in))) + (super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode))) (token-sym->style short-sym->style-name) (matches '((|(| |)|) (|[| |]|) diff --git a/collects/scribblings/framework/color.scrbl b/collects/scribblings/framework/color.scrbl index 50fb4088..9d2e66f7 100644 --- a/collects/scribblings/framework/color.scrbl +++ b/collects/scribblings/framework/color.scrbl @@ -9,19 +9,29 @@ that knows how to color itself. It also describes how to query the lexical and s-expression structure of the text. @defmethod*[(((start-colorer (token-sym->style (-> symbol? string?)) - (get-token (-> input-port? (values any/c - symbol? - (or/c false? symbol?) - exact-nonnegative-integer? - exact-nonnegative-integer?))) + (get-token (or/c (-> input-port? + (values any/c + symbol? + (or/c false? symbol?) + exact-nonnegative-integer? + exact-nonnegative-integer?)) + (-> input-port? + any/c + (values any/c + symbol? + (or/c false? symbol?) + exact-nonnegative-integer? + exact-nonnegative-integer? + any/c)))) (pairs (listof (list/p symbol? symbol?)))) void))]{ Starts tokenizing the buffer for coloring and parenthesis matching. + The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token], + and it should return the style-name that the token should be colored. - The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token] - and should return the style-name that the token should be colored. + The @scheme[get-token] argument takes an input port and optionally a mode value. + When it accepts just an input port, @scheme[get-token] returns the next token as 5 values: - The @scheme[get-token] argument takes an input port and returns the next token as 5 values: @itemize[ @item{ An unused value. This value is intended to represent the textual @@ -42,32 +52,44 @@ @item{ The ending position of the token.}] - The @scheme[get-token] function will usually be implemented with a lexer using the - @scheme[parser-tools/lex] library. - get-token must obey the following invariants: + When @scheme[get-token] accepts a mode value in addition to an + input port, it must also return an extra result, which is a new + mode. When @scheme[get-token] is called for the beginning on a + stream, the mode argument is @scheme[#f]. Thereafter, the mode + returned for the previous token is provided to @scheme[get-token] + for the next token. The mode should not be a mutable value; if + part of the stream is re-tokenized, the mode saved from the + immediately preceding token is given again to the + @scheme[get-token] function. + + The @scheme[get-token] function is usually be implemented with a lexer using the + @scheme[parser-tools/lex] library. The + @scheme[get-token] function must obey the following invariants: @itemize[ @item{ Every position in the buffer must be accounted for in exactly one token.} @item{ The token returned by @scheme[get-token] must rely only on the contents of the - input port argument. This means that the tokenization of some part of - the input cannot depend on earlier parts of the input.} + input port argument plus the mode argument. This constraint means that the + tokenization of some part of the input cannot depend on earlier parts of the + input except through the mode (and implicitly through the starting positions + for tokens).} @item{ - No edit to the buffer can change the tokenization of the buffer prior - to the token immediately preceding the edit. In the following - example this invariant does not hold. If the buffer contains: - @verbatim{" 1 2 3} + A change to the stream must not change the tokenization of the stream prior + to the token immediately preceding the change. In the following + example this invariant does not hold. If the buffer contains + @verbatim[#:indent 2]{" 1 2 3} and the tokenizer treats the unmatched " as its own token (a string error token), and separately tokenizes the 1 2 and 3, an edit to make - the buffer look like: - @verbatim{" 1 2 3"} + the buffer look like + @verbatim[#:indent 2]{" 1 2 3"} would result in a single string token modifying previous tokens. To handle these situations, @scheme[get-token] must treat the first line as a single token.}] The @scheme[pairs] argument is a list of different kinds of matching parens. The second - value returned by get-token is compared to this list to see how the + value returned by @scheme[get-token] is compared to this list to see how the paren matcher should treat the token. An example: Suppose pairs is @scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there are three kinds of parens. Any token which has @scheme['begin] as its second