#lang syntax coloring; Scribble syntax coloring

svn: r15607

original commit: 1ba7cf0926a46122d7f759a2dccf086e2ba939a9
This commit is contained in:
Matthew Flatt 2009-07-28 18:06:14 +00:00
parent c59af01586
commit 1aa23be957
3 changed files with 92 additions and 37 deletions

View File

@ -32,6 +32,10 @@ added get-regions
(define (should-color-type? type) (define (should-color-type? type)
(not (memq type '(white-space no-color)))) (not (memq type '(white-space no-color))))
(define (make-data type mode) (cons type mode))
(define (data-type data) (car data))
(define (data-lexer-mode data) (cdr data))
(define -text<%> (define -text<%>
(interface (text:basic<%>) (interface (text:basic<%>)
start-colorer start-colorer
@ -102,6 +106,11 @@ added get-regions
invalid-tokens-start ; = +inf.0 invalid-tokens-start ; = +inf.0
;; The position right before the next token to be read ;; The position right before the next token to be read
current-pos current-pos
;; Thread a mode through lexing, and remember the mode
;; at each token boundary, so that lexing can depend on
;; previous tokens. This is the mode for lexing at
;; current-pos:
current-lexer-mode
;; Paren-matching ;; Paren-matching
parens parens
) )
@ -118,6 +127,7 @@ added get-regions
(new token-tree%) (new token-tree%)
+inf.0 +inf.0
start start
#f
(new paren-tree% (matches pairs)))) (new paren-tree% (matches pairs))))
(define lexer-states (list (make-new-lexer-state 0 'end))) (define lexer-states (list (make-new-lexer-state 0 'end)))
@ -228,6 +238,7 @@ added get-regions
(set-lexer-state-invalid-tokens-start! ls +inf.0) (set-lexer-state-invalid-tokens-start! ls +inf.0)
(set-lexer-state-up-to-date?! ls #t) (set-lexer-state-up-to-date?! ls #t)
(set-lexer-state-current-pos! ls (lexer-state-start-pos ls)) (set-lexer-state-current-pos! ls (lexer-state-start-pos ls))
(set-lexer-state-current-lexer-mode! ls #f)
(set-lexer-state-parens! ls (new paren-tree% (matches pairs)))) (set-lexer-state-parens! ls (new paren-tree% (matches pairs))))
lexer-states) lexer-states)
(set! restart-callback #f) (set! restart-callback #f)
@ -258,12 +269,12 @@ added get-regions
(set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length))) (set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length)))
(sync-invalid ls)))) (sync-invalid ls))))
(define/private (re-tokenize ls in in-start-pos enable-suspend) (define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end) (let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
(begin (begin
(enable-suspend #f) (enable-suspend #f)
(begin0 (begin0
(get-token in) (get-token in in-lexer-mode)
(enable-suspend #t)))]) (enable-suspend #t)))])
(unless (eq? 'eof type) (unless (eq? 'eof type)
(enable-suspend #f) (enable-suspend #f)
@ -271,6 +282,7 @@ added get-regions
(+ in-start-pos (sub1 new-token-end))) (+ in-start-pos (sub1 new-token-end)))
(let ((len (- new-token-end new-token-start))) (let ((len (- new-token-end new-token-start)))
(set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls))) (set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls)))
(set-lexer-state-current-lexer-mode! ls new-lexer-mode)
(sync-invalid ls) (sync-invalid ls)
(when (and should-color? (should-color-type? type) (not frozen?)) (when (and should-color? (should-color-type? type) (not frozen?))
(set! colors (set! colors
@ -286,7 +298,7 @@ added get-regions
;; version. In other words, the new greatly outweighs the tree ;; version. In other words, the new greatly outweighs the tree
;; operations. ;; operations.
;;(insert-last! tokens (new token-tree% (length len) (data type))) ;;(insert-last! tokens (new token-tree% (length len) (data type)))
(insert-last-spec! (lexer-state-tokens ls) len type) (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
(send (lexer-state-parens ls) add-token data len) (send (lexer-state-parens ls) add-token data len)
(cond (cond
((and (not (send (lexer-state-invalid-tokens ls) is-empty?)) ((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
@ -301,7 +313,7 @@ added get-regions
(enable-suspend #t)) (enable-suspend #t))
(else (else
(enable-suspend #t) (enable-suspend #t)
(re-tokenize ls in in-start-pos enable-suspend))))))) (re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
(define/private (do-insert/delete/ls ls edit-start-pos change-length) (define/private (do-insert/delete/ls ls edit-start-pos change-length)
(unless (lexer-state-up-to-date? ls) (unless (lexer-state-up-to-date? ls)
@ -318,7 +330,14 @@ added get-regions
(if (send (lexer-state-invalid-tokens ls) is-empty?) (if (send (lexer-state-invalid-tokens ls) is-empty?)
+inf.0 +inf.0
(+ (lexer-state-start-pos ls) orig-token-end change-length))) (+ (lexer-state-start-pos ls) orig-token-end change-length)))
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start)) (let ([start (+ (lexer-state-start-pos ls) orig-token-start)])
(set-lexer-state-current-pos! ls start)
(set-lexer-state-current-lexer-mode! ls
(if (= start (lexer-state-start-pos ls))
#f
(begin
(send valid-tree search-max!)
(data-lexer-mode (send valid-tree get-root-data))))))
(set-lexer-state-up-to-date?! ls #f) (set-lexer-state-up-to-date?! ls #f)
(queue-callback (λ () (colorer-callback)) #f))) (queue-callback (λ () (colorer-callback)) #f)))
((>= edit-start-pos (lexer-state-invalid-tokens-start ls)) ((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
@ -340,7 +359,13 @@ added get-regions
(send (lexer-state-parens ls) truncate tok-start) (send (lexer-state-parens ls) truncate tok-start)
(set-lexer-state-tokens! ls valid-tree) (set-lexer-state-tokens! ls valid-tree)
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls))) (set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start)))))) (let ([start (+ (lexer-state-start-pos ls) tok-start)])
(set-lexer-state-current-pos! ls start)
(if (= start (lexer-state-start-pos ls))
#f
(begin
(send valid-tree search-max!)
(data-lexer-mode (send valid-tree get-root-data)))))))))
(define/private (do-insert/delete edit-start-pos change-length) (define/private (do-insert/delete edit-start-pos change-length)
(unless (or stopped? force-stop?) (unless (or stopped? force-stop?)
@ -378,6 +403,7 @@ added get-regions
(λ (x) #f)) (λ (x) #f))
(enable-suspend #t))) (enable-suspend #t)))
(lexer-state-current-pos ls) (lexer-state-current-pos ls)
(lexer-state-current-lexer-mode ls)
enable-suspend)) enable-suspend))
lexer-states))))) lexer-states)))))
(set! rev (get-revision-number))) (set! rev (get-revision-number)))
@ -427,7 +453,14 @@ added get-regions
(reset-tokens) (reset-tokens)
(set! should-color? (preferences:get 'framework:coloring-active)) (set! should-color? (preferences:get 'framework:coloring-active))
(set! token-sym->style token-sym->style-) (set! token-sym->style token-sym->style-)
(set! get-token get-token-) (set! get-token (if (procedure-arity-includes? get-token- 2)
;; New interface: thread through a mode:
get-token-
;; Old interface: no mode
(lambda (in mode)
(let-values ([(lexeme type data new-token-start new-token-end)
(get-token- in)])
(values lexeme type data new-token-start new-token-end #f)))))
(set! pairs pairs-) (set! pairs pairs-)
(for-each (for-each
(lambda (ls) (lambda (ls)
@ -739,7 +772,7 @@ added get-regions
(let ([tokens (lexer-state-tokens ls)]) (let ([tokens (lexer-state-tokens ls)])
(tokenize-to-pos ls position) (tokenize-to-pos ls position)
(send tokens search! (- position (lexer-state-start-pos ls))) (send tokens search! (- position (lexer-state-start-pos ls)))
(send tokens get-root-data))))) (data-type (send tokens get-root-data))))))
(define/private (tokenize-to-pos ls position) (define/private (tokenize-to-pos ls position)
(when (and (not (lexer-state-up-to-date? ls)) (when (and (not (lexer-state-up-to-date? ls))
@ -768,8 +801,8 @@ added get-regions
(send tokens search! (- (if (eq? direction 'backward) (sub1 position) position) (send tokens search! (- (if (eq? direction 'backward) (sub1 position) position)
start-pos)) start-pos))
(cond (cond
((or (eq? 'white-space (send tokens get-root-data)) ((or (eq? 'white-space (data-type (send tokens get-root-data)))
(and comments? (eq? 'comment (send tokens get-root-data)))) (and comments? (eq? 'comment (data-type (send tokens get-root-data)))))
(skip-whitespace (+ start-pos (skip-whitespace (+ start-pos
(if (eq? direction 'forward) (if (eq? direction 'forward)
(send tokens get-root-end-position) (send tokens get-root-end-position)

View File

@ -6,7 +6,7 @@
(require string-constants (require string-constants
scheme/class scheme/class
mred/mred-sig mred/mred-sig
syntax-color/scheme-lexer syntax-color/module-lexer
"collapsed-snipclass-helpers.ss" "collapsed-snipclass-helpers.ss"
"sig.ss" "sig.ss"
"../gui-utils.ss" "../gui-utils.ss"
@ -1172,14 +1172,14 @@
(preferences:add-callback (preferences:add-callback
'framework:tabify 'framework:tabify
(lambda (k v) (set! tabify-pref v))) (lambda (k v) (set! tabify-pref v)))
(define/private (scheme-lexer-wrapper in) (define/private (scheme-lexer-wrapper in mode)
(let-values (((lexeme type paren start end) (scheme-lexer in))) (let-values (((lexeme type paren start end mode) (module-lexer in mode)))
(cond (cond
((and (eq? type 'symbol) ((and (eq? type 'symbol)
(get-keyword-type lexeme tabify-pref)) (get-keyword-type lexeme tabify-pref))
(values lexeme 'keyword paren start end)) (values lexeme 'keyword paren start end mode))
(else (else
(values lexeme type paren start end))))) (values lexeme type paren start end mode)))))
(define/override (put-file text sup directory default-name) (define/override (put-file text sup directory default-name)
(parameterize ([finder:default-extension "ss"] (parameterize ([finder:default-extension "ss"]
@ -1188,7 +1188,7 @@
;; don't call the surrogate's super, since it sets the default extension ;; don't call the surrogate's super, since it sets the default extension
(sup directory default-name))) (sup directory default-name)))
(super-new (get-token (lambda (in) (scheme-lexer-wrapper in))) (super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
(token-sym->style short-sym->style-name) (token-sym->style short-sym->style-name)
(matches '((|(| |)|) (matches '((|(| |)|)
(|[| |]|) (|[| |]|)

View File

@ -9,19 +9,29 @@
that knows how to color itself. It also describes how to query the that knows how to color itself. It also describes how to query the
lexical and s-expression structure of the text. lexical and s-expression structure of the text.
@defmethod*[(((start-colorer (token-sym->style (-> symbol? string?)) @defmethod*[(((start-colorer (token-sym->style (-> symbol? string?))
(get-token (-> input-port? (values any/c (get-token (or/c (-> input-port?
(values any/c
symbol? symbol?
(or/c false? symbol?) (or/c false? symbol?)
exact-nonnegative-integer? exact-nonnegative-integer?
exact-nonnegative-integer?))) exact-nonnegative-integer?))
(-> input-port?
any/c
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?
any/c))))
(pairs (listof (list/p symbol? symbol?)))) void))]{ (pairs (listof (list/p symbol? symbol?)))) void))]{
Starts tokenizing the buffer for coloring and parenthesis matching. Starts tokenizing the buffer for coloring and parenthesis matching.
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
and it should return the style-name that the token should be colored.
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token] The @scheme[get-token] argument takes an input port and optionally a mode value.
and should return the style-name that the token should be colored. When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
The @scheme[get-token] argument takes an input port and returns the next token as 5 values:
@itemize[ @itemize[
@item{ @item{
An unused value. This value is intended to represent the textual An unused value. This value is intended to represent the textual
@ -42,32 +52,44 @@
@item{ @item{
The ending position of the token.}] The ending position of the token.}]
The @scheme[get-token] function will usually be implemented with a lexer using the When @scheme[get-token] accepts a mode value in addition to an
@scheme[parser-tools/lex] library. input port, it must also return an extra result, which is a new
get-token must obey the following invariants: mode. When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]. Thereafter, the mode
returned for the previous token is provided to @scheme[get-token]
for the next token. The mode should not be a mutable value; if
part of the stream is re-tokenized, the mode saved from the
immediately preceding token is given again to the
@scheme[get-token] function.
The @scheme[get-token] function is usually be implemented with a lexer using the
@scheme[parser-tools/lex] library. The
@scheme[get-token] function must obey the following invariants:
@itemize[ @itemize[
@item{ @item{
Every position in the buffer must be accounted for in exactly one Every position in the buffer must be accounted for in exactly one
token.} token.}
@item{ @item{
The token returned by @scheme[get-token] must rely only on the contents of the The token returned by @scheme[get-token] must rely only on the contents of the
input port argument. This means that the tokenization of some part of input port argument plus the mode argument. This constraint means that the
the input cannot depend on earlier parts of the input.} tokenization of some part of the input cannot depend on earlier parts of the
input except through the mode (and implicitly through the starting positions
for tokens).}
@item{ @item{
No edit to the buffer can change the tokenization of the buffer prior A change to the stream must not change the tokenization of the stream prior
to the token immediately preceding the edit. In the following to the token immediately preceding the change. In the following
example this invariant does not hold. If the buffer contains: example this invariant does not hold. If the buffer contains
@verbatim{" 1 2 3} @verbatim[#:indent 2]{" 1 2 3}
and the tokenizer treats the unmatched " as its own token (a string and the tokenizer treats the unmatched " as its own token (a string
error token), and separately tokenizes the 1 2 and 3, an edit to make error token), and separately tokenizes the 1 2 and 3, an edit to make
the buffer look like: the buffer look like
@verbatim{" 1 2 3"} @verbatim[#:indent 2]{" 1 2 3"}
would result in a single string token modifying previous tokens. To would result in a single string token modifying previous tokens. To
handle these situations, @scheme[get-token] must treat the first line as a handle these situations, @scheme[get-token] must treat the first line as a
single token.}] single token.}]
The @scheme[pairs] argument is a list of different kinds of matching parens. The second The @scheme[pairs] argument is a list of different kinds of matching parens. The second
value returned by get-token is compared to this list to see how the value returned by @scheme[get-token] is compared to this list to see how the
paren matcher should treat the token. An example: Suppose pairs is paren matcher should treat the token. An example: Suppose pairs is
@scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there @scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there
are three kinds of parens. Any token which has @scheme['begin] as its second are three kinds of parens. Any token which has @scheme['begin] as its second