#lang syntax coloring; Scribble syntax coloring

svn: r15607

original commit: 1ba7cf0926a46122d7f759a2dccf086e2ba939a9
This commit is contained in:
Matthew Flatt 2009-07-28 18:06:14 +00:00
parent c59af01586
commit 1aa23be957
3 changed files with 92 additions and 37 deletions

View File

@ -32,6 +32,10 @@ added get-regions
(define (should-color-type? type)
(not (memq type '(white-space no-color))))
(define (make-data type mode) (cons type mode))
(define (data-type data) (car data))
(define (data-lexer-mode data) (cdr data))
(define -text<%>
(interface (text:basic<%>)
start-colorer
@ -102,6 +106,11 @@ added get-regions
invalid-tokens-start ; = +inf.0
;; The position right before the next token to be read
current-pos
;; Thread a mode through lexing, and remember the mode
;; at each token boundary, so that lexing can depend on
;; previous tokens. This is the mode for lexing at
;; current-pos:
current-lexer-mode
;; Paren-matching
parens
)
@ -118,6 +127,7 @@ added get-regions
(new token-tree%)
+inf.0
start
#f
(new paren-tree% (matches pairs))))
(define lexer-states (list (make-new-lexer-state 0 'end)))
@ -228,6 +238,7 @@ added get-regions
(set-lexer-state-invalid-tokens-start! ls +inf.0)
(set-lexer-state-up-to-date?! ls #t)
(set-lexer-state-current-pos! ls (lexer-state-start-pos ls))
(set-lexer-state-current-lexer-mode! ls #f)
(set-lexer-state-parens! ls (new paren-tree% (matches pairs))))
lexer-states)
(set! restart-callback #f)
@ -258,12 +269,12 @@ added get-regions
(set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length)))
(sync-invalid ls))))
(define/private (re-tokenize ls in in-start-pos enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end)
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
(begin
(enable-suspend #f)
(begin0
(get-token in)
(get-token in in-lexer-mode)
(enable-suspend #t)))])
(unless (eq? 'eof type)
(enable-suspend #f)
@ -271,6 +282,7 @@ added get-regions
(+ in-start-pos (sub1 new-token-end)))
(let ((len (- new-token-end new-token-start)))
(set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls)))
(set-lexer-state-current-lexer-mode! ls new-lexer-mode)
(sync-invalid ls)
(when (and should-color? (should-color-type? type) (not frozen?))
(set! colors
@ -286,7 +298,7 @@ added get-regions
;; version. In other words, the new greatly outweighs the tree
;; operations.
;;(insert-last! tokens (new token-tree% (length len) (data type)))
(insert-last-spec! (lexer-state-tokens ls) len type)
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
(send (lexer-state-parens ls) add-token data len)
(cond
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
@ -301,7 +313,7 @@ added get-regions
(enable-suspend #t))
(else
(enable-suspend #t)
(re-tokenize ls in in-start-pos enable-suspend)))))))
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
(unless (lexer-state-up-to-date? ls)
@ -318,7 +330,14 @@ added get-regions
(if (send (lexer-state-invalid-tokens ls) is-empty?)
+inf.0
(+ (lexer-state-start-pos ls) orig-token-end change-length)))
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start))
(let ([start (+ (lexer-state-start-pos ls) orig-token-start)])
(set-lexer-state-current-pos! ls start)
(set-lexer-state-current-lexer-mode! ls
(if (= start (lexer-state-start-pos ls))
#f
(begin
(send valid-tree search-max!)
(data-lexer-mode (send valid-tree get-root-data))))))
(set-lexer-state-up-to-date?! ls #f)
(queue-callback (λ () (colorer-callback)) #f)))
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
@ -340,7 +359,13 @@ added get-regions
(send (lexer-state-parens ls) truncate tok-start)
(set-lexer-state-tokens! ls valid-tree)
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start))))))
(let ([start (+ (lexer-state-start-pos ls) tok-start)])
(set-lexer-state-current-pos! ls start)
(if (= start (lexer-state-start-pos ls))
#f
(begin
(send valid-tree search-max!)
(data-lexer-mode (send valid-tree get-root-data)))))))))
(define/private (do-insert/delete edit-start-pos change-length)
(unless (or stopped? force-stop?)
@ -378,6 +403,7 @@ added get-regions
(λ (x) #f))
(enable-suspend #t)))
(lexer-state-current-pos ls)
(lexer-state-current-lexer-mode ls)
enable-suspend))
lexer-states)))))
(set! rev (get-revision-number)))
@ -427,7 +453,14 @@ added get-regions
(reset-tokens)
(set! should-color? (preferences:get 'framework:coloring-active))
(set! token-sym->style token-sym->style-)
(set! get-token get-token-)
(set! get-token (if (procedure-arity-includes? get-token- 2)
;; New interface: thread through a mode:
get-token-
;; Old interface: no mode
(lambda (in mode)
(let-values ([(lexeme type data new-token-start new-token-end)
(get-token- in)])
(values lexeme type data new-token-start new-token-end #f)))))
(set! pairs pairs-)
(for-each
(lambda (ls)
@ -739,7 +772,7 @@ added get-regions
(let ([tokens (lexer-state-tokens ls)])
(tokenize-to-pos ls position)
(send tokens search! (- position (lexer-state-start-pos ls)))
(send tokens get-root-data)))))
(data-type (send tokens get-root-data))))))
(define/private (tokenize-to-pos ls position)
(when (and (not (lexer-state-up-to-date? ls))
@ -768,8 +801,8 @@ added get-regions
(send tokens search! (- (if (eq? direction 'backward) (sub1 position) position)
start-pos))
(cond
((or (eq? 'white-space (send tokens get-root-data))
(and comments? (eq? 'comment (send tokens get-root-data))))
((or (eq? 'white-space (data-type (send tokens get-root-data)))
(and comments? (eq? 'comment (data-type (send tokens get-root-data)))))
(skip-whitespace (+ start-pos
(if (eq? direction 'forward)
(send tokens get-root-end-position)

View File

@ -6,7 +6,7 @@
(require string-constants
scheme/class
mred/mred-sig
syntax-color/scheme-lexer
syntax-color/module-lexer
"collapsed-snipclass-helpers.ss"
"sig.ss"
"../gui-utils.ss"
@ -1172,14 +1172,14 @@
(preferences:add-callback
'framework:tabify
(lambda (k v) (set! tabify-pref v)))
(define/private (scheme-lexer-wrapper in)
(let-values (((lexeme type paren start end) (scheme-lexer in)))
(define/private (scheme-lexer-wrapper in mode)
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
(cond
((and (eq? type 'symbol)
(get-keyword-type lexeme tabify-pref))
(values lexeme 'keyword paren start end))
(values lexeme 'keyword paren start end mode))
(else
(values lexeme type paren start end)))))
(values lexeme type paren start end mode)))))
(define/override (put-file text sup directory default-name)
(parameterize ([finder:default-extension "ss"]
@ -1188,7 +1188,7 @@
;; don't call the surrogate's super, since it sets the default extension
(sup directory default-name)))
(super-new (get-token (lambda (in) (scheme-lexer-wrapper in)))
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
(token-sym->style short-sym->style-name)
(matches '((|(| |)|)
(|[| |]|)

View File

@ -9,19 +9,29 @@
that knows how to color itself. It also describes how to query the
lexical and s-expression structure of the text.
@defmethod*[(((start-colorer (token-sym->style (-> symbol? string?))
(get-token (-> input-port? (values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?)))
(get-token (or/c (-> input-port?
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?))
(-> input-port?
any/c
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?
any/c))))
(pairs (listof (list/p symbol? symbol?)))) void))]{
Starts tokenizing the buffer for coloring and parenthesis matching.
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
and it should return the style-name that the token should be colored.
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token]
and should return the style-name that the token should be colored.
The @scheme[get-token] argument takes an input port and optionally a mode value.
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
The @scheme[get-token] argument takes an input port and returns the next token as 5 values:
@itemize[
@item{
An unused value. This value is intended to represent the textual
@ -42,32 +52,44 @@
@item{
The ending position of the token.}]
The @scheme[get-token] function will usually be implemented with a lexer using the
@scheme[parser-tools/lex] library.
get-token must obey the following invariants:
When @scheme[get-token] accepts a mode value in addition to an
input port, it must also return an extra result, which is a new
mode. When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]. Thereafter, the mode
returned for the previous token is provided to @scheme[get-token]
for the next token. The mode should not be a mutable value; if
part of the stream is re-tokenized, the mode saved from the
immediately preceding token is given again to the
@scheme[get-token] function.
The @scheme[get-token] function is usually be implemented with a lexer using the
@scheme[parser-tools/lex] library. The
@scheme[get-token] function must obey the following invariants:
@itemize[
@item{
Every position in the buffer must be accounted for in exactly one
token.}
@item{
The token returned by @scheme[get-token] must rely only on the contents of the
input port argument. This means that the tokenization of some part of
the input cannot depend on earlier parts of the input.}
input port argument plus the mode argument. This constraint means that the
tokenization of some part of the input cannot depend on earlier parts of the
input except through the mode (and implicitly through the starting positions
for tokens).}
@item{
No edit to the buffer can change the tokenization of the buffer prior
to the token immediately preceding the edit. In the following
example this invariant does not hold. If the buffer contains:
@verbatim{" 1 2 3}
A change to the stream must not change the tokenization of the stream prior
to the token immediately preceding the change. In the following
example this invariant does not hold. If the buffer contains
@verbatim[#:indent 2]{" 1 2 3}
and the tokenizer treats the unmatched " as its own token (a string
error token), and separately tokenizes the 1 2 and 3, an edit to make
the buffer look like:
@verbatim{" 1 2 3"}
the buffer look like
@verbatim[#:indent 2]{" 1 2 3"}
would result in a single string token modifying previous tokens. To
handle these situations, @scheme[get-token] must treat the first line as a
single token.}]
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
value returned by get-token is compared to this list to see how the
value returned by @scheme[get-token] is compared to this list to see how the
paren matcher should treat the token. An example: Suppose pairs is
@scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there
are three kinds of parens. Any token which has @scheme['begin] as its second