extended syntax colorer to support lexer-specific backup; fix problems with new color lexers
svn: r15617 original commit: d807421a07e3d86b6ebf0802b4000d34af472372
This commit is contained in:
parent
3e64e5f4fb
commit
d6b4085433
|
@ -32,9 +32,13 @@ added get-regions
|
|||
(define (should-color-type? type)
|
||||
(not (memq type '(white-space no-color))))
|
||||
|
||||
(define (make-data type mode) (cons type mode))
|
||||
(define (data-type data) (car data))
|
||||
(define (data-lexer-mode data) (cdr data))
|
||||
(define (make-data type mode backup-delta)
|
||||
(if (zero? backup-delta)
|
||||
(cons type mode)
|
||||
(vector type mode backup-delta)))
|
||||
(define (data-type data) (if (pair? data) (car data) (vector-ref data 0)))
|
||||
(define (data-lexer-mode data) (if (pair? data) (cdr data) (vector-ref data 1)))
|
||||
(define (data-backup-delta data) (if (vector? data) (vector-ref data 2) 0))
|
||||
|
||||
(define -text<%>
|
||||
(interface (text:basic<%>)
|
||||
|
@ -274,11 +278,11 @@ added get-regions
|
|||
(sync-invalid ls))))
|
||||
|
||||
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-lexer-mode)
|
||||
(begin
|
||||
(enable-suspend #f)
|
||||
(begin0
|
||||
(get-token in in-lexer-mode)
|
||||
(get-token in in-start-pos in-lexer-mode)
|
||||
(enable-suspend #t)))])
|
||||
(unless (eq? 'eof type)
|
||||
(enable-suspend #f)
|
||||
|
@ -302,7 +306,8 @@ added get-regions
|
|||
;; version. In other words, the new greatly outweighs the tree
|
||||
;; operations.
|
||||
;;(insert-last! tokens (new token-tree% (length len) (data type)))
|
||||
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
|
||||
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode backup-delta))
|
||||
#; (show-tree (lexer-state-tokens ls))
|
||||
(send (lexer-state-parens ls) add-token data len)
|
||||
(cond
|
||||
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
|
||||
|
@ -320,6 +325,29 @@ added get-regions
|
|||
(else
|
||||
(enable-suspend #t)
|
||||
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
|
||||
|
||||
(define/private (show-tree t)
|
||||
(printf "Tree:\n")
|
||||
(send t search-min!)
|
||||
(let loop ([old-s -inf.0])
|
||||
(let ([s (send t get-root-start-position)]
|
||||
[e (send t get-root-end-position)])
|
||||
(unless (= s old-s)
|
||||
(printf " ~s\n" (list s e))
|
||||
(send t search! e)
|
||||
(loop s)))))
|
||||
|
||||
(define/private (split-backward ls valid-tree pos)
|
||||
(let loop ([pos pos][valid-tree valid-tree][old-invalid-tree #f])
|
||||
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
|
||||
(send valid-tree split/data (- pos (lexer-state-start-pos ls)))))
|
||||
(let ([backup-pos (- pos (data-backup-delta orig-data))]
|
||||
[invalid-tree (or old-invalid-tree invalid-tree)])
|
||||
(if (backup-pos . < . pos)
|
||||
;; back up more:
|
||||
(loop pos valid-tree invalid-tree)
|
||||
;; that was far enough:
|
||||
(values orig-token-start orig-token-end valid-tree invalid-tree orig-data))))))
|
||||
|
||||
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
|
||||
(unless (lexer-state-up-to-date? ls)
|
||||
|
@ -327,7 +355,7 @@ added get-regions
|
|||
(cond
|
||||
((lexer-state-up-to-date? ls)
|
||||
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
|
||||
(send (lexer-state-tokens ls) split/data (- edit-start-pos (lexer-state-start-pos ls)))))
|
||||
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
|
||||
(send (lexer-state-parens ls) split-tree orig-token-start)
|
||||
(set-lexer-state-invalid-tokens! ls invalid-tree)
|
||||
(set-lexer-state-tokens! ls valid-tree)
|
||||
|
@ -349,8 +377,7 @@ added get-regions
|
|||
(queue-callback (λ () (colorer-callback)) #f)))
|
||||
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
|
||||
(let-values (((tok-start tok-end valid-tree invalid-tree orig-data)
|
||||
(send (lexer-state-invalid-tokens ls) split/data
|
||||
(- edit-start-pos (lexer-state-start-pos ls)))))
|
||||
(split-backward ls (lexer-state-invalid-tokens ls) edit-start-pos)))
|
||||
(set-lexer-state-invalid-tokens! ls invalid-tree)
|
||||
(set-lexer-state-invalid-tokens-start!
|
||||
ls
|
||||
|
@ -362,8 +389,7 @@ added get-regions
|
|||
(+ change-length (lexer-state-invalid-tokens-start ls))))
|
||||
(else
|
||||
(let-values (((tok-start tok-end valid-tree invalid-tree data)
|
||||
(send (lexer-state-tokens ls) split/data
|
||||
(- edit-start-pos (lexer-state-start-pos ls)))))
|
||||
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
|
||||
(send (lexer-state-parens ls) truncate tok-start)
|
||||
(set-lexer-state-tokens! ls valid-tree)
|
||||
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
|
||||
|
@ -463,14 +489,14 @@ added get-regions
|
|||
(reset-tokens)
|
||||
(set! should-color? (preferences:get 'framework:coloring-active))
|
||||
(set! token-sym->style token-sym->style-)
|
||||
(set! get-token (if (procedure-arity-includes? get-token- 2)
|
||||
(set! get-token (if (procedure-arity-includes? get-token- 3)
|
||||
;; New interface: thread through a mode:
|
||||
get-token-
|
||||
;; Old interface: no mode
|
||||
(lambda (in mode)
|
||||
;; Old interface: no offset, backup delta, or mode
|
||||
(lambda (in offset mode)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(get-token- in)])
|
||||
(values lexeme type data new-token-start new-token-end #f)))))
|
||||
(values lexeme type data new-token-start new-token-end 0 #f)))))
|
||||
(set! pairs pairs-)
|
||||
(for-each
|
||||
(lambda (ls)
|
||||
|
|
|
@ -1172,14 +1172,14 @@
|
|||
(preferences:add-callback
|
||||
'framework:tabify
|
||||
(lambda (k v) (set! tabify-pref v)))
|
||||
(define/private (scheme-lexer-wrapper in mode)
|
||||
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
|
||||
(define/private (scheme-lexer-wrapper in offset mode)
|
||||
(let-values (((lexeme type paren start end backup-delta mode) (module-lexer in offset mode)))
|
||||
(cond
|
||||
((and (eq? type 'symbol)
|
||||
(get-keyword-type lexeme tabify-pref))
|
||||
(values lexeme 'keyword paren start end mode))
|
||||
(values lexeme 'keyword paren start end backup-delta mode))
|
||||
(else
|
||||
(values lexeme type paren start end mode)))))
|
||||
(values lexeme type paren start end backup-delta mode)))))
|
||||
|
||||
(define/override (put-file text sup directory default-name)
|
||||
(parameterize ([finder:default-extension "ss"]
|
||||
|
@ -1188,7 +1188,7 @@
|
|||
;; don't call the surrogate's super, since it sets the default extension
|
||||
(sup directory default-name)))
|
||||
|
||||
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
|
||||
(super-new (get-token (lambda (in offset mode) (scheme-lexer-wrapper in offset mode)))
|
||||
(token-sym->style short-sym->style-name)
|
||||
(matches '((|(| |)|)
|
||||
(|[| |]|)
|
||||
|
|
|
@ -16,12 +16,14 @@
|
|||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))
|
||||
(-> input-port?
|
||||
exact-nonnegative-integer?
|
||||
any/c
|
||||
(values any/c
|
||||
symbol?
|
||||
(or/c false? symbol?)
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?
|
||||
any/c))))
|
||||
(pairs (listof (list/p symbol? symbol?)))) void))]{
|
||||
Starts tokenizing the buffer for coloring and parenthesis matching.
|
||||
|
@ -29,7 +31,7 @@
|
|||
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
|
||||
and it should return the style-name that the token should be colored.
|
||||
|
||||
The @scheme[get-token] argument takes an input port and optionally a mode value.
|
||||
The @scheme[get-token] argument takes an input port and optionally an offset and mode value.
|
||||
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
|
||||
|
||||
@itemize[
|
||||
|
@ -52,15 +54,21 @@
|
|||
@item{
|
||||
The ending position of the token.}]
|
||||
|
||||
When @scheme[get-token] accepts a mode value in addition to an
|
||||
input port, it must also return an extra result, which is a new
|
||||
mode. When @scheme[get-token] is called for the beginning on a
|
||||
stream, the mode argument is @scheme[#f]. Thereafter, the mode
|
||||
When @scheme[get-token] accepts an offset and mode value in addition to an
|
||||
input port, it must also return two extra results, which are a backup
|
||||
distance and new mode. The offset given to @scheme[get-token] can be added
|
||||
to the position of the input port to obtain absolute coordinates within a
|
||||
text stream. The mode argument allows @scheme[get-token] to communicate information
|
||||
from earlier parsing to later.
|
||||
When @scheme[get-token] is called for the beginning on a
|
||||
stream, the mode argument is @scheme[#f]; thereafter, the mode
|
||||
returned for the previous token is provided to @scheme[get-token]
|
||||
for the next token. The mode should not be a mutable value; if
|
||||
part of the stream is re-tokenized, the mode saved from the
|
||||
immediately preceding token is given again to the
|
||||
@scheme[get-token] function.
|
||||
@scheme[get-token] function. The backup distance returned by @scheme[get-token]
|
||||
indicates the maximum number of characters to back up (counting from the start of the token)
|
||||
and for re-parsing after a change to the editor within the token's region.
|
||||
|
||||
The @scheme[get-token] function is usually be implemented with a lexer using the
|
||||
@scheme[parser-tools/lex] library. The
|
||||
|
@ -68,7 +76,7 @@
|
|||
@itemize[
|
||||
@item{
|
||||
Every position in the buffer must be accounted for in exactly one
|
||||
token.}
|
||||
token, and every token must have a non-zero width.}
|
||||
@item{
|
||||
The token returned by @scheme[get-token] must rely only on the contents of the
|
||||
input port argument plus the mode argument. This constraint means that the
|
||||
|
@ -77,16 +85,16 @@
|
|||
for tokens).}
|
||||
@item{
|
||||
A change to the stream must not change the tokenization of the stream prior
|
||||
to the token immediately preceding the change. In the following
|
||||
example this invariant does not hold. If the buffer contains
|
||||
to the token immediately preceding the change plus the backup distance. In the following
|
||||
example, this invariant does not hold for a zero backup distance: If the buffer contains
|
||||
@verbatim[#:indent 2]{" 1 2 3}
|
||||
and the tokenizer treats the unmatched " as its own token (a string
|
||||
error token), and separately tokenizes the 1 2 and 3, an edit to make
|
||||
the buffer look like
|
||||
@verbatim[#:indent 2]{" 1 2 3"}
|
||||
would result in a single string token modifying previous tokens. To
|
||||
handle these situations, @scheme[get-token] must treat the first line as a
|
||||
single token.}]
|
||||
handle these situations, @scheme[get-token] can treat the first line as a
|
||||
single token, or it can precisely track backup distances.}]
|
||||
|
||||
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
|
||||
value returned by @scheme[get-token] is compared to this list to see how the
|
||||
|
|
Loading…
Reference in New Issue
Block a user