#lang syntax coloring; Scribble syntax coloring

svn: r15607 original commit: 1ba7cf0926a46122d7f759a2dccf086e2ba939a9
2009-07-28 18:06:14 +00:00 · 2009-07-28 18:06:14 +00:00 · 1aa23be957
commit 1aa23be957
parent c59af01586
3 changed files with 92 additions and 37 deletions
--- a/collects/framework/private/color.ss
+++ b/collects/framework/private/color.ss
@ -32,6 +32,10 @@ added get-regions
 (define (should-color-type? type)
  (not (memq type '(white-space no-color))))

+(define (make-data type mode) (cons type mode))
+(define (data-type data) (car data))
+(define (data-lexer-mode data) (cdr data))
+
 (define -text<%>
  (interface (text:basic<%>)
    start-colorer
@ -102,6 +106,11 @@ added get-regions
       invalid-tokens-start ; = +inf.0
       ;; The position right before the next token to be read
       current-pos
+       ;; Thread a mode through lexing, and remember the mode
+       ;;  at each token boundary, so that lexing can depend on
+       ;;  previous tokens. This is the mode for lexing at
+       ;;  current-pos:
+       current-lexer-mode
       ;; Paren-matching
       parens 
       )
@ -118,6 +127,7 @@ added get-regions
                        (new token-tree%)
                        +inf.0
                        start
+                        #f
                        (new paren-tree% (matches pairs))))

    (define lexer-states (list (make-new-lexer-state 0 'end)))
@ -228,6 +238,7 @@ added get-regions
         (set-lexer-state-invalid-tokens-start! ls +inf.0)
         (set-lexer-state-up-to-date?! ls #t)
         (set-lexer-state-current-pos! ls (lexer-state-start-pos ls))
+         (set-lexer-state-current-lexer-mode! ls #f)
         (set-lexer-state-parens! ls (new paren-tree% (matches pairs))))
       lexer-states)
      (set! restart-callback #f)
@ -258,12 +269,12 @@ added get-regions
            (set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length)))
          (sync-invalid ls))))
    
-    (define/private (re-tokenize ls in in-start-pos enable-suspend)
-      (let-values ([(lexeme type data new-token-start new-token-end) 
+    (define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
+      (let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode) 
                    (begin
                      (enable-suspend #f)
                      (begin0
-                       (get-token in)
+                       (get-token in in-lexer-mode)
                       (enable-suspend #t)))])
        (unless (eq? 'eof type)
          (enable-suspend #f)
@ -271,6 +282,7 @@ added get-regions
                     (+ in-start-pos (sub1 new-token-end)))
          (let ((len (- new-token-end new-token-start)))
            (set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls)))
+            (set-lexer-state-current-lexer-mode! ls new-lexer-mode)
            (sync-invalid ls)
            (when (and should-color? (should-color-type? type) (not frozen?))
              (set! colors
@ -286,7 +298,7 @@ added get-regions
            ;; version.  In other words, the new greatly outweighs the tree
            ;; operations.
            ;;(insert-last! tokens (new token-tree% (length len) (data type)))
-            (insert-last-spec! (lexer-state-tokens ls) len type)
+            (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
            (send (lexer-state-parens ls) add-token data len)
            (cond
             ((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
@ -301,7 +313,7 @@ added get-regions
              (enable-suspend #t))
             (else
              (enable-suspend #t)
-              (re-tokenize ls in in-start-pos enable-suspend)))))))
+              (re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
    
    (define/private (do-insert/delete/ls ls edit-start-pos change-length)
      (unless (lexer-state-up-to-date? ls)
@ -318,7 +330,14 @@ added get-regions
           (if (send (lexer-state-invalid-tokens ls) is-empty?)
               +inf.0
               (+ (lexer-state-start-pos ls) orig-token-end change-length)))
-          (set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start))
+          (let ([start (+ (lexer-state-start-pos ls) orig-token-start)])
+            (set-lexer-state-current-pos! ls start)
+            (set-lexer-state-current-lexer-mode! ls
+                                                 (if (= start (lexer-state-start-pos ls))
+                                                     #f
+                                                     (begin
+                                                       (send valid-tree search-max!)
+                                                       (data-lexer-mode (send valid-tree get-root-data))))))
          (set-lexer-state-up-to-date?! ls #f)
          (queue-callback (λ () (colorer-callback)) #f)))
       ((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
@ -340,7 +359,13 @@ added get-regions
          (send (lexer-state-parens ls) truncate tok-start)
          (set-lexer-state-tokens! ls valid-tree)
          (set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
-          (set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start))))))
+          (let ([start (+ (lexer-state-start-pos ls) tok-start)])
+            (set-lexer-state-current-pos! ls start)
+            (if (= start (lexer-state-start-pos ls))
+                #f
+                (begin
+                  (send valid-tree search-max!)
+                  (data-lexer-mode (send valid-tree get-root-data)))))))))

    (define/private (do-insert/delete edit-start-pos change-length)
      (unless (or stopped? force-stop?)
@ -378,6 +403,7 @@ added get-regions
                                                                 (λ (x) #f))
                                         (enable-suspend #t)))
                                      (lexer-state-current-pos ls)
+                                      (lexer-state-current-lexer-mode ls)
                                      enable-suspend))
                       lexer-states)))))
          (set! rev (get-revision-number)))
@ -427,7 +453,14 @@ added get-regions
        (reset-tokens)
        (set! should-color? (preferences:get 'framework:coloring-active))
        (set! token-sym->style token-sym->style-)
-        (set! get-token get-token-)
+        (set! get-token (if (procedure-arity-includes? get-token- 2)
+                            ;; New interface: thread through a mode:
+                            get-token-
+                            ;; Old interface: no mode
+                            (lambda (in mode)
+                              (let-values ([(lexeme type data new-token-start new-token-end) 
+                                            (get-token- in)])
+                                (values lexeme type data new-token-start new-token-end #f)))))
        (set! pairs pairs-)
        (for-each
         (lambda (ls)
@ -739,7 +772,7 @@ added get-regions
             (let ([tokens (lexer-state-tokens ls)])
               (tokenize-to-pos ls position)
               (send tokens search! (- position (lexer-state-start-pos ls)))
-               (send tokens get-root-data)))))
+               (data-type (send tokens get-root-data))))))
    
    (define/private (tokenize-to-pos ls position)
      (when (and (not (lexer-state-up-to-date? ls)) 
@ -768,8 +801,8 @@ added get-regions
                (send tokens search! (- (if (eq? direction 'backward) (sub1 position) position)
                                        start-pos))
                (cond
-                 ((or (eq? 'white-space (send tokens get-root-data))
-                      (and comments? (eq? 'comment (send tokens get-root-data))))
+                 ((or (eq? 'white-space (data-type (send tokens get-root-data)))
+                      (and comments? (eq? 'comment (data-type (send tokens get-root-data)))))
                  (skip-whitespace (+ start-pos
                                      (if (eq? direction 'forward)
                                          (send tokens get-root-end-position)
--- a/collects/framework/private/scheme.ss
+++ b/collects/framework/private/scheme.ss
@ -6,7 +6,7 @@
 (require string-constants
         scheme/class
         mred/mred-sig
-         syntax-color/scheme-lexer
+         syntax-color/module-lexer
         "collapsed-snipclass-helpers.ss"
         "sig.ss"
         "../gui-utils.ss"
@ -1172,14 +1172,14 @@
    (preferences:add-callback
     'framework:tabify
     (lambda (k v) (set! tabify-pref v)))
-    (define/private (scheme-lexer-wrapper in)
-      (let-values (((lexeme type paren start end) (scheme-lexer in)))
+    (define/private (scheme-lexer-wrapper in mode)
+      (let-values (((lexeme type paren start end mode) (module-lexer in mode)))
        (cond
          ((and (eq? type 'symbol)
                (get-keyword-type lexeme tabify-pref))
-           (values lexeme 'keyword paren start end))
+           (values lexeme 'keyword paren start end mode))
          (else
-           (values lexeme type paren start end)))))
+           (values lexeme type paren start end mode)))))
    
    (define/override (put-file text sup directory default-name)
      (parameterize ([finder:default-extension "ss"]
@ -1188,7 +1188,7 @@
        ;; don't call the surrogate's super, since it sets the default extension
        (sup directory default-name)))
    
-    (super-new (get-token (lambda (in) (scheme-lexer-wrapper in)))
+    (super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
               (token-sym->style short-sym->style-name)
               (matches '((|(| |)|)
                          (|[| |]|)
--- a/collects/scribblings/framework/color.scrbl
+++ b/collects/scribblings/framework/color.scrbl
@ -9,19 +9,29 @@
  that knows how to color itself.  It also describes how to query the
  lexical and s-expression structure of the text.
  @defmethod*[(((start-colorer (token-sym->style (-> symbol? string?)) 
-                               (get-token (-> input-port? (values any/c 
-                                                                  symbol? 
-                                                                  (or/c false? symbol?) 
-                                                                  exact-nonnegative-integer?
-                                                                  exact-nonnegative-integer?)))
+                               (get-token (or/c (-> input-port? 
+                                                    (values any/c 
+                                                            symbol? 
+                                                            (or/c false? symbol?) 
+                                                            exact-nonnegative-integer?
+                                                            exact-nonnegative-integer?))
+                                                (-> input-port? 
+                                                    any/c
+                                                    (values any/c 
+                                                            symbol? 
+                                                            (or/c false? symbol?) 
+                                                            exact-nonnegative-integer?
+                                                            exact-nonnegative-integer?
+                                                            any/c))))
                               (pairs (listof (list/p symbol? symbol?)))) void))]{
    Starts tokenizing the buffer for coloring and parenthesis matching.

+    The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
+    and it should return the style-name that the token should be colored.

-    The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token]
-    and should return the style-name that the token should be colored.
+    The @scheme[get-token] argument takes an input port and optionally a mode value.
+    When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:

-    The @scheme[get-token] argument takes an input port and returns the next token as 5 values:
    @itemize[
    @item{
    An unused value.  This value is intended to represent the textual
@ -42,32 +52,44 @@
    @item{
    The ending position of the token.}]

-    The @scheme[get-token] function will usually be implemented with a lexer using the 
-    @scheme[parser-tools/lex] library.
-    get-token must obey the following invariants:
+    When @scheme[get-token] accepts a mode value in addition to an
+    input port, it must also return an extra result, which is a new
+    mode. When @scheme[get-token] is called for the beginning on a
+    stream, the mode argument is @scheme[#f].  Thereafter, the mode
+    returned for the previous token is provided to @scheme[get-token]
+    for the next token. The mode should not be a mutable value; if
+    part of the stream is re-tokenized, the mode saved from the
+    immediately preceding token is given again to the
+    @scheme[get-token] function.
+
+    The @scheme[get-token] function is usually be implemented with a lexer using the 
+    @scheme[parser-tools/lex] library. The
+    @scheme[get-token] function must obey the following invariants:
    @itemize[
    @item{
    Every position in the buffer must be accounted for in exactly one
    token.}
    @item{
    The token returned by @scheme[get-token] must rely only on the contents of the
-    input port argument.  This means that the tokenization of some part of
-    the input cannot depend on earlier parts of the input.}
+    input port argument plus the mode argument. This constraint  means that the
+    tokenization of some part of the input cannot depend on earlier parts of the 
+    input except through the mode (and implicitly through the starting positions
+    for tokens).}
    @item{
-    No edit to the buffer can change the tokenization of the buffer prior
-    to the token immediately preceding the edit.  In the following
-    example this invariant does not hold.  If the buffer contains:
-    @verbatim{" 1 2 3}
+    A change to the stream must not change the tokenization of the stream prior
+    to the token immediately preceding the change.  In the following
+    example this invariant does not hold. If the buffer contains
+    @verbatim[#:indent 2]{" 1 2 3}
    and the tokenizer treats the unmatched " as its own token (a string
    error token), and separately tokenizes the 1 2 and 3, an edit to make
-    the buffer look like:
-    @verbatim{" 1 2 3"}
+    the buffer look like
+    @verbatim[#:indent 2]{" 1 2 3"}
    would result in a single string token modifying previous tokens.  To
    handle these situations, @scheme[get-token] must treat the first line as a
    single token.}]

    The @scheme[pairs] argument is a list of different kinds of matching parens.  The second
-    value returned by get-token is compared to this list to see how the
+    value returned by @scheme[get-token] is compared to this list to see how the
    paren matcher should treat the token.  An example: Suppose pairs is
    @scheme['((|(| |)|) (|[| |]|) (begin end))].  This means that there
    are three kinds of parens.  Any token which has @scheme['begin] as its second