extended syntax colorer to support lexer-specific backup; fix problems with new color lexers

svn: r15617

original commit: d807421a07e3d86b6ebf0802b4000d34af472372
This commit is contained in:
Matthew Flatt 2009-07-29 03:31:29 +00:00
parent 3e64e5f4fb
commit d6b4085433
3 changed files with 65 additions and 31 deletions

View File

@ -32,9 +32,13 @@ added get-regions
(define (should-color-type? type)
(not (memq type '(white-space no-color))))
(define (make-data type mode) (cons type mode))
(define (data-type data) (car data))
(define (data-lexer-mode data) (cdr data))
(define (make-data type mode backup-delta)
(if (zero? backup-delta)
(cons type mode)
(vector type mode backup-delta)))
(define (data-type data) (if (pair? data) (car data) (vector-ref data 0)))
(define (data-lexer-mode data) (if (pair? data) (cdr data) (vector-ref data 1)))
(define (data-backup-delta data) (if (vector? data) (vector-ref data 2) 0))
(define -text<%>
(interface (text:basic<%>)
@ -274,11 +278,11 @@ added get-regions
(sync-invalid ls))))
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-lexer-mode)
(begin
(enable-suspend #f)
(begin0
(get-token in in-lexer-mode)
(get-token in in-start-pos in-lexer-mode)
(enable-suspend #t)))])
(unless (eq? 'eof type)
(enable-suspend #f)
@ -302,7 +306,8 @@ added get-regions
;; version. In other words, the new greatly outweighs the tree
;; operations.
;;(insert-last! tokens (new token-tree% (length len) (data type)))
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode backup-delta))
#; (show-tree (lexer-state-tokens ls))
(send (lexer-state-parens ls) add-token data len)
(cond
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
@ -320,6 +325,29 @@ added get-regions
(else
(enable-suspend #t)
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
(define/private (show-tree t)
(printf "Tree:\n")
(send t search-min!)
(let loop ([old-s -inf.0])
(let ([s (send t get-root-start-position)]
[e (send t get-root-end-position)])
(unless (= s old-s)
(printf " ~s\n" (list s e))
(send t search! e)
(loop s)))))
(define/private (split-backward ls valid-tree pos)
(let loop ([pos pos][valid-tree valid-tree][old-invalid-tree #f])
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
(send valid-tree split/data (- pos (lexer-state-start-pos ls)))))
(let ([backup-pos (- pos (data-backup-delta orig-data))]
[invalid-tree (or old-invalid-tree invalid-tree)])
(if (backup-pos . < . pos)
;; back up more:
(loop pos valid-tree invalid-tree)
;; that was far enough:
(values orig-token-start orig-token-end valid-tree invalid-tree orig-data))))))
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
(unless (lexer-state-up-to-date? ls)
@ -327,7 +355,7 @@ added get-regions
(cond
((lexer-state-up-to-date? ls)
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
(send (lexer-state-tokens ls) split/data (- edit-start-pos (lexer-state-start-pos ls)))))
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
(send (lexer-state-parens ls) split-tree orig-token-start)
(set-lexer-state-invalid-tokens! ls invalid-tree)
(set-lexer-state-tokens! ls valid-tree)
@ -349,8 +377,7 @@ added get-regions
(queue-callback (λ () (colorer-callback)) #f)))
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
(let-values (((tok-start tok-end valid-tree invalid-tree orig-data)
(send (lexer-state-invalid-tokens ls) split/data
(- edit-start-pos (lexer-state-start-pos ls)))))
(split-backward ls (lexer-state-invalid-tokens ls) edit-start-pos)))
(set-lexer-state-invalid-tokens! ls invalid-tree)
(set-lexer-state-invalid-tokens-start!
ls
@ -362,8 +389,7 @@ added get-regions
(+ change-length (lexer-state-invalid-tokens-start ls))))
(else
(let-values (((tok-start tok-end valid-tree invalid-tree data)
(send (lexer-state-tokens ls) split/data
(- edit-start-pos (lexer-state-start-pos ls)))))
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
(send (lexer-state-parens ls) truncate tok-start)
(set-lexer-state-tokens! ls valid-tree)
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
@ -463,14 +489,14 @@ added get-regions
(reset-tokens)
(set! should-color? (preferences:get 'framework:coloring-active))
(set! token-sym->style token-sym->style-)
(set! get-token (if (procedure-arity-includes? get-token- 2)
(set! get-token (if (procedure-arity-includes? get-token- 3)
;; New interface: thread through a mode:
get-token-
;; Old interface: no mode
(lambda (in mode)
;; Old interface: no offset, backup delta, or mode
(lambda (in offset mode)
(let-values ([(lexeme type data new-token-start new-token-end)
(get-token- in)])
(values lexeme type data new-token-start new-token-end #f)))))
(values lexeme type data new-token-start new-token-end 0 #f)))))
(set! pairs pairs-)
(for-each
(lambda (ls)

View File

@ -1172,14 +1172,14 @@
(preferences:add-callback
'framework:tabify
(lambda (k v) (set! tabify-pref v)))
(define/private (scheme-lexer-wrapper in mode)
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
(define/private (scheme-lexer-wrapper in offset mode)
(let-values (((lexeme type paren start end backup-delta mode) (module-lexer in offset mode)))
(cond
((and (eq? type 'symbol)
(get-keyword-type lexeme tabify-pref))
(values lexeme 'keyword paren start end mode))
(values lexeme 'keyword paren start end backup-delta mode))
(else
(values lexeme type paren start end mode)))))
(values lexeme type paren start end backup-delta mode)))))
(define/override (put-file text sup directory default-name)
(parameterize ([finder:default-extension "ss"]
@ -1188,7 +1188,7 @@
;; don't call the surrogate's super, since it sets the default extension
(sup directory default-name)))
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
(super-new (get-token (lambda (in offset mode) (scheme-lexer-wrapper in offset mode)))
(token-sym->style short-sym->style-name)
(matches '((|(| |)|)
(|[| |]|)

View File

@ -16,12 +16,14 @@
exact-nonnegative-integer?
exact-nonnegative-integer?))
(-> input-port?
exact-nonnegative-integer?
any/c
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?
exact-nonnegative-integer?
any/c))))
(pairs (listof (list/p symbol? symbol?)))) void))]{
Starts tokenizing the buffer for coloring and parenthesis matching.
@ -29,7 +31,7 @@
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
and it should return the style-name that the token should be colored.
The @scheme[get-token] argument takes an input port and optionally a mode value.
The @scheme[get-token] argument takes an input port and optionally an offset and mode value.
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
@itemize[
@ -52,15 +54,21 @@
@item{
The ending position of the token.}]
When @scheme[get-token] accepts a mode value in addition to an
input port, it must also return an extra result, which is a new
mode. When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]. Thereafter, the mode
When @scheme[get-token] accepts an offset and mode value in addition to an
input port, it must also return two extra results, which are a backup
distance and new mode. The offset given to @scheme[get-token] can be added
to the position of the input port to obtain absolute coordinates within a
text stream. The mode argument allows @scheme[get-token] to communicate information
from earlier parsing to later.
When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]; thereafter, the mode
returned for the previous token is provided to @scheme[get-token]
for the next token. The mode should not be a mutable value; if
part of the stream is re-tokenized, the mode saved from the
immediately preceding token is given again to the
@scheme[get-token] function.
@scheme[get-token] function. The backup distance returned by @scheme[get-token]
indicates the maximum number of characters to back up (counting from the start of the token)
and for re-parsing after a change to the editor within the token's region.
The @scheme[get-token] function is usually be implemented with a lexer using the
@scheme[parser-tools/lex] library. The
@ -68,7 +76,7 @@
@itemize[
@item{
Every position in the buffer must be accounted for in exactly one
token.}
token, and every token must have a non-zero width.}
@item{
The token returned by @scheme[get-token] must rely only on the contents of the
input port argument plus the mode argument. This constraint means that the
@ -77,16 +85,16 @@
for tokens).}
@item{
A change to the stream must not change the tokenization of the stream prior
to the token immediately preceding the change. In the following
example this invariant does not hold. If the buffer contains
to the token immediately preceding the change plus the backup distance. In the following
example, this invariant does not hold for a zero backup distance: If the buffer contains
@verbatim[#:indent 2]{" 1 2 3}
and the tokenizer treats the unmatched " as its own token (a string
error token), and separately tokenizes the 1 2 and 3, an edit to make
the buffer look like
@verbatim[#:indent 2]{" 1 2 3"}
would result in a single string token modifying previous tokens. To
handle these situations, @scheme[get-token] must treat the first line as a
single token.}]
handle these situations, @scheme[get-token] can treat the first line as a
single token, or it can precisely track backup distances.}]
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
value returned by @scheme[get-token] is compared to this list to see how the