From d6b408543309e6017e3da223af787b2f99556a92 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 29 Jul 2009 03:31:29 +0000 Subject: [PATCH] extended syntax colorer to support lexer-specific backup; fix problems with new color lexers svn: r15617 original commit: d807421a07e3d86b6ebf0802b4000d34af472372 --- collects/framework/private/color.ss | 56 ++++++++++++++++------ collects/framework/private/scheme.ss | 10 ++-- collects/scribblings/framework/color.scrbl | 30 +++++++----- 3 files changed, 65 insertions(+), 31 deletions(-) diff --git a/collects/framework/private/color.ss b/collects/framework/private/color.ss index 3aee99a9..4fb414cd 100644 --- a/collects/framework/private/color.ss +++ b/collects/framework/private/color.ss @@ -32,9 +32,13 @@ added get-regions (define (should-color-type? type) (not (memq type '(white-space no-color)))) -(define (make-data type mode) (cons type mode)) -(define (data-type data) (car data)) -(define (data-lexer-mode data) (cdr data)) +(define (make-data type mode backup-delta) + (if (zero? backup-delta) + (cons type mode) + (vector type mode backup-delta))) +(define (data-type data) (if (pair? data) (car data) (vector-ref data 0))) +(define (data-lexer-mode data) (if (pair? data) (cdr data) (vector-ref data 1))) +(define (data-backup-delta data) (if (vector? data) (vector-ref data 2) 0)) (define -text<%> (interface (text:basic<%>) @@ -274,11 +278,11 @@ added get-regions (sync-invalid ls)))) (define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend) - (let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode) + (let-values ([(lexeme type data new-token-start new-token-end backup-delta new-lexer-mode) (begin (enable-suspend #f) (begin0 - (get-token in in-lexer-mode) + (get-token in in-start-pos in-lexer-mode) (enable-suspend #t)))]) (unless (eq? 'eof type) (enable-suspend #f) @@ -302,7 +306,8 @@ added get-regions ;; version. In other words, the new greatly outweighs the tree ;; operations. ;;(insert-last! tokens (new token-tree% (length len) (data type))) - (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode)) + (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode backup-delta)) + #; (show-tree (lexer-state-tokens ls)) (send (lexer-state-parens ls) add-token data len) (cond ((and (not (send (lexer-state-invalid-tokens ls) is-empty?)) @@ -320,6 +325,29 @@ added get-regions (else (enable-suspend #t) (re-tokenize ls in in-start-pos new-lexer-mode enable-suspend))))))) + + (define/private (show-tree t) + (printf "Tree:\n") + (send t search-min!) + (let loop ([old-s -inf.0]) + (let ([s (send t get-root-start-position)] + [e (send t get-root-end-position)]) + (unless (= s old-s) + (printf " ~s\n" (list s e)) + (send t search! e) + (loop s))))) + + (define/private (split-backward ls valid-tree pos) + (let loop ([pos pos][valid-tree valid-tree][old-invalid-tree #f]) + (let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data) + (send valid-tree split/data (- pos (lexer-state-start-pos ls))))) + (let ([backup-pos (- pos (data-backup-delta orig-data))] + [invalid-tree (or old-invalid-tree invalid-tree)]) + (if (backup-pos . < . pos) + ;; back up more: + (loop pos valid-tree invalid-tree) + ;; that was far enough: + (values orig-token-start orig-token-end valid-tree invalid-tree orig-data)))))) (define/private (do-insert/delete/ls ls edit-start-pos change-length) (unless (lexer-state-up-to-date? ls) @@ -327,7 +355,7 @@ added get-regions (cond ((lexer-state-up-to-date? ls) (let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data) - (send (lexer-state-tokens ls) split/data (- edit-start-pos (lexer-state-start-pos ls))))) + (split-backward ls (lexer-state-tokens ls) edit-start-pos))) (send (lexer-state-parens ls) split-tree orig-token-start) (set-lexer-state-invalid-tokens! ls invalid-tree) (set-lexer-state-tokens! ls valid-tree) @@ -349,8 +377,7 @@ added get-regions (queue-callback (λ () (colorer-callback)) #f))) ((>= edit-start-pos (lexer-state-invalid-tokens-start ls)) (let-values (((tok-start tok-end valid-tree invalid-tree orig-data) - (send (lexer-state-invalid-tokens ls) split/data - (- edit-start-pos (lexer-state-start-pos ls))))) + (split-backward ls (lexer-state-invalid-tokens ls) edit-start-pos))) (set-lexer-state-invalid-tokens! ls invalid-tree) (set-lexer-state-invalid-tokens-start! ls @@ -362,8 +389,7 @@ added get-regions (+ change-length (lexer-state-invalid-tokens-start ls)))) (else (let-values (((tok-start tok-end valid-tree invalid-tree data) - (send (lexer-state-tokens ls) split/data - (- edit-start-pos (lexer-state-start-pos ls))))) + (split-backward ls (lexer-state-tokens ls) edit-start-pos))) (send (lexer-state-parens ls) truncate tok-start) (set-lexer-state-tokens! ls valid-tree) (set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls))) @@ -463,14 +489,14 @@ added get-regions (reset-tokens) (set! should-color? (preferences:get 'framework:coloring-active)) (set! token-sym->style token-sym->style-) - (set! get-token (if (procedure-arity-includes? get-token- 2) + (set! get-token (if (procedure-arity-includes? get-token- 3) ;; New interface: thread through a mode: get-token- - ;; Old interface: no mode - (lambda (in mode) + ;; Old interface: no offset, backup delta, or mode + (lambda (in offset mode) (let-values ([(lexeme type data new-token-start new-token-end) (get-token- in)]) - (values lexeme type data new-token-start new-token-end #f))))) + (values lexeme type data new-token-start new-token-end 0 #f))))) (set! pairs pairs-) (for-each (lambda (ls) diff --git a/collects/framework/private/scheme.ss b/collects/framework/private/scheme.ss index 6af2a469..6ee835ba 100644 --- a/collects/framework/private/scheme.ss +++ b/collects/framework/private/scheme.ss @@ -1172,14 +1172,14 @@ (preferences:add-callback 'framework:tabify (lambda (k v) (set! tabify-pref v))) - (define/private (scheme-lexer-wrapper in mode) - (let-values (((lexeme type paren start end mode) (module-lexer in mode))) + (define/private (scheme-lexer-wrapper in offset mode) + (let-values (((lexeme type paren start end backup-delta mode) (module-lexer in offset mode))) (cond ((and (eq? type 'symbol) (get-keyword-type lexeme tabify-pref)) - (values lexeme 'keyword paren start end mode)) + (values lexeme 'keyword paren start end backup-delta mode)) (else - (values lexeme type paren start end mode))))) + (values lexeme type paren start end backup-delta mode))))) (define/override (put-file text sup directory default-name) (parameterize ([finder:default-extension "ss"] @@ -1188,7 +1188,7 @@ ;; don't call the surrogate's super, since it sets the default extension (sup directory default-name))) - (super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode))) + (super-new (get-token (lambda (in offset mode) (scheme-lexer-wrapper in offset mode))) (token-sym->style short-sym->style-name) (matches '((|(| |)|) (|[| |]|) diff --git a/collects/scribblings/framework/color.scrbl b/collects/scribblings/framework/color.scrbl index 9d2e66f7..9f33186a 100644 --- a/collects/scribblings/framework/color.scrbl +++ b/collects/scribblings/framework/color.scrbl @@ -16,12 +16,14 @@ exact-nonnegative-integer? exact-nonnegative-integer?)) (-> input-port? + exact-nonnegative-integer? any/c (values any/c symbol? (or/c false? symbol?) exact-nonnegative-integer? exact-nonnegative-integer? + exact-nonnegative-integer? any/c)))) (pairs (listof (list/p symbol? symbol?)))) void))]{ Starts tokenizing the buffer for coloring and parenthesis matching. @@ -29,7 +31,7 @@ The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token], and it should return the style-name that the token should be colored. - The @scheme[get-token] argument takes an input port and optionally a mode value. + The @scheme[get-token] argument takes an input port and optionally an offset and mode value. When it accepts just an input port, @scheme[get-token] returns the next token as 5 values: @itemize[ @@ -52,15 +54,21 @@ @item{ The ending position of the token.}] - When @scheme[get-token] accepts a mode value in addition to an - input port, it must also return an extra result, which is a new - mode. When @scheme[get-token] is called for the beginning on a - stream, the mode argument is @scheme[#f]. Thereafter, the mode + When @scheme[get-token] accepts an offset and mode value in addition to an + input port, it must also return two extra results, which are a backup + distance and new mode. The offset given to @scheme[get-token] can be added + to the position of the input port to obtain absolute coordinates within a + text stream. The mode argument allows @scheme[get-token] to communicate information + from earlier parsing to later. + When @scheme[get-token] is called for the beginning on a + stream, the mode argument is @scheme[#f]; thereafter, the mode returned for the previous token is provided to @scheme[get-token] for the next token. The mode should not be a mutable value; if part of the stream is re-tokenized, the mode saved from the immediately preceding token is given again to the - @scheme[get-token] function. + @scheme[get-token] function. The backup distance returned by @scheme[get-token] + indicates the maximum number of characters to back up (counting from the start of the token) + and for re-parsing after a change to the editor within the token's region. The @scheme[get-token] function is usually be implemented with a lexer using the @scheme[parser-tools/lex] library. The @@ -68,7 +76,7 @@ @itemize[ @item{ Every position in the buffer must be accounted for in exactly one - token.} + token, and every token must have a non-zero width.} @item{ The token returned by @scheme[get-token] must rely only on the contents of the input port argument plus the mode argument. This constraint means that the @@ -77,16 +85,16 @@ for tokens).} @item{ A change to the stream must not change the tokenization of the stream prior - to the token immediately preceding the change. In the following - example this invariant does not hold. If the buffer contains + to the token immediately preceding the change plus the backup distance. In the following + example, this invariant does not hold for a zero backup distance: If the buffer contains @verbatim[#:indent 2]{" 1 2 3} and the tokenizer treats the unmatched " as its own token (a string error token), and separately tokenizes the 1 2 and 3, an edit to make the buffer look like @verbatim[#:indent 2]{" 1 2 3"} would result in a single string token modifying previous tokens. To - handle these situations, @scheme[get-token] must treat the first line as a - single token.}] + handle these situations, @scheme[get-token] can treat the first line as a + single token, or it can precisely track backup distances.}] The @scheme[pairs] argument is a list of different kinds of matching parens. The second value returned by @scheme[get-token] is compared to this list to see how the