extend the lexer <-> framework's color:text api

to let lexers say "call me again before you change the buffer"

also, use this in the 2d lexer
This commit is contained in:
Robby Findler 2013-03-10 19:32:52 -05:00
parent 282cdcbb5b
commit 761054890d
6 changed files with 113 additions and 63 deletions

View File

@ -11,6 +11,7 @@ added get-regions
syntax-color/token-tree
syntax-color/paren-tree
syntax-color/default-lexer
syntax-color/lexer-contract
string-constants
"../preferences.rkt"
"sig.rkt"
@ -303,7 +304,7 @@ added get-regions
(set-lexer-state-invalid-tokens-mode! ls mode))
(sync-invalid ls))))
(define/private (re-tokenize-move-to-next-ls start-time did-something?)
(define/private (re-tokenize-move-to-next-ls start-time ok-to-stop?)
(cond
[(null? re-tokenize-lses)
;; done: return #t
@ -317,25 +318,29 @@ added get-regions
(lexer-state-end-pos ls)
(λ (x) #f)))
(port-count-lines! in)
(continue-re-tokenize start-time did-something? ls in
(continue-re-tokenize start-time ok-to-stop? ls in
(lexer-state-current-pos ls)
(lexer-state-current-lexer-mode ls))]))
(define re-tokenize-lses #f)
(define/private (continue-re-tokenize start-time did-something? ls in in-start-pos lexer-mode)
(define/private (continue-re-tokenize start-time ok-to-stop? ls in in-start-pos lexer-mode)
(cond
[(and did-something? ((+ start-time 20.0) . <= . (current-inexact-milliseconds)))
[(and ok-to-stop? ((+ start-time 20.0) . <= . (current-inexact-milliseconds)))
#f]
[else
(define-values (_line1 _col1 pos-before) (port-next-location in))
(define-values (lexeme type data new-token-start new-token-end backup-delta new-lexer-mode)
(define-values (lexeme type data new-token-start new-token-end backup-delta new-lexer-mode/cont)
(get-token in in-start-pos lexer-mode))
(define-values (_line2 _col2 pos-after) (port-next-location in))
(define new-lexer-mode (if (dont-stop? new-lexer-mode/cont)
(dont-stop-val new-lexer-mode/cont)
new-lexer-mode/cont))
(define next-ok-to-stop? (not (dont-stop? new-lexer-mode/cont)))
(cond
[(eq? 'eof type)
(set-lexer-state-up-to-date?! ls #t)
(re-tokenize-move-to-next-ls start-time #t)]
(re-tokenize-move-to-next-ls start-time next-ok-to-stop?)]
[else
(unless (<= pos-before new-token-start pos-after)
(error 'color:text<%>
@ -369,9 +374,9 @@ added get-regions
(lexer-state-invalid-tokens ls))
(set-lexer-state-invalid-tokens-start! ls +inf.0)
(set-lexer-state-up-to-date?! ls #t)
(re-tokenize-move-to-next-ls start-time #t)]
(re-tokenize-move-to-next-ls start-time next-ok-to-stop?)]
[else
(continue-re-tokenize start-time #t ls in in-start-pos new-lexer-mode)]))])]))
(continue-re-tokenize start-time next-ok-to-stop? ls in in-start-pos new-lexer-mode)]))])]))
(define/private (add-colorings type in-start-pos new-token-start new-token-end)
(define sp (+ in-start-pos (sub1 new-token-start)))
@ -511,7 +516,11 @@ added get-regions
[else (if (lexer-state-up-to-date? (car lexer-states))
(loop (cdr lexer-states))
lexer-states)])))
(define finished? (re-tokenize-move-to-next-ls (current-inexact-milliseconds) #f))
(define finished? (re-tokenize-move-to-next-ls
(current-inexact-milliseconds)
;; #f initially here ensures we do at least
;; one step of tokenization before giving up
#f))
(c-log (format "coloring stopped ~a" (if finished? "because it finished" "with more to do")))
(when finished?
(update-lexer-state-observers)

View File

@ -1,7 +1,6 @@
#lang scribble/doc
@(require scribble/manual scribble/extract)
@(require (for-label framework))
@(require (for-label scheme/gui))
@(require (for-label framework scheme/gui syntax-color/lexer-contract))
@title{Color}
@definterface[color:text<%> (text:basic<%>)]{
@ -17,7 +16,7 @@
(or/c exact-positive-integer? #f)))
(-> input-port?
exact-nonnegative-integer?
any/c
(not/c dont-stop?)
(values any/c
symbol?
(or/c symbol? #f)
@ -57,20 +56,36 @@
is also relative to the port's location, just like the previous value.}]
When @racket[get-token] accepts an offset and mode value in addition to an
input port, it must also return two extra results, which are a backup
distance and new mode. The offset given to @racket[get-token] can be added
input port, it must also return two extra results.
The offset given to @racket[get-token] can be added
to the position of the input port to obtain absolute coordinates within a
text stream. The mode argument allows @racket[get-token] to communicate
text stream. The extra two results are
@itemize[@item{a new mode;
The mode argument allows @racket[get-token] to communicate
information from earlier parsing to later. When @racket[get-token] is
called for the beginning on a stream, the mode argument is @racket[#f];
thereafter, the mode returned for the previous token is provided to
@racket[get-token] for the next token. The mode should not be a mutable
@racket[get-token] for the next token.
If the mode result is a @racket[dont-stop] struct, then the value inside
the struct is considered the new mode, and the colorer is guaranteed
not to be interrupted until at least the next call to this tokenizing
function that does not return a @racket[dont-stop] struct (unless, of course,
it returns an eof token, in which case the new mode result is ignored).
This is useful, for example, when a lexer has to read ahead in the buffer
to decide on the tokens at this point; then that read-ahead will be
inconsistent if an edit happens; returning a @racket[dont-stop]
struct ensures that no changes to the buffer happen.
The mode should not be a mutable
value; if part of the stream is re-tokenized, the mode saved from the
immediately preceding token is given again to the @racket[get-token]
function. The backup distance returned by @racket[get-token] indicates the
function.}
@item{a backup distance;
The backup distance returned by @racket[get-token] indicates the
maximum number of characters to back up (counting from the start of the
token) and for re-parsing after a change to the editor within the token's
region.
region.}]
The @racket[get-token] function must obey the following invariants:
@itemize[

View File

@ -1,17 +1,9 @@
#lang racket/base
(require racket/contract/base
unstable/options)
(provide lexer/c)
(provide lexer/c (struct-out dont-stop))
(define (end/c start type)
(cond
[(equal? 'eof type)
(or/c exact-positive-integer? #f)]
[start
(and/c exact-positive-integer?
(>/c start))]
[else
#f]))
(struct dont-stop (val) #:transparent)
(define lexer/c
(option/c
@ -23,7 +15,7 @@
[end (start type) (end/c start type)]))
(->i ([in (and/c input-port? port-counts-lines?)]
[offset exact-nonnegative-integer?]
[mode any/c])
[mode (not/c dont-stop?)])
(values [txt any/c]
[type symbol?]
[paren (or/c symbol? #f)]
@ -34,3 +26,13 @@
#:tester (λ (x) (and (procedure? x)
(or (procedure-arity-includes? x 1)
(procedure-arity-includes? x 3))))))
(define (end/c start type)
(cond
[(equal? 'eof type)
(or/c exact-positive-integer? #f)]
[start
(and/c exact-positive-integer?
(>/c start))]
[else
#f]))

View File

@ -108,7 +108,10 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
;; #lang-selected language consumes and produces a mode:
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
((car mode) in offset (cdr mode))])
(values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))]
(values lexeme type data new-token-start new-token-end backup-delta
(if (dont-stop? new-mode)
(dont-stop (cons (car mode) (dont-stop-val new-mode)))
(cons (car mode) new-mode))))]
[else
;; #lang-selected language (or default) doesn't deal with modes:
(let-values ([(lexeme type data new-token-start new-token-end)

View File

@ -6,7 +6,7 @@
syntax-color/module-lexer
syntax-color/scribble-lexer
syntax-color/default-lexer
framework/framework
framework
framework/private/color
racket))
@ -31,6 +31,21 @@ Parenthesis matching code built on top of @racket[token-tree%].
@; ----------------------------------------------------------------------
@section{Lexer Contract & the Don't Stop struct}
@defmodule[syntax-color/lexer-contract]
@defthing[lexer/c contract?]{
Checks to be sure a lexing function is well-behaved. For more
details, see @xmethod[color:text<%> start-colorer].
}
@defstruct[dont-stop ([val any/c])]{
A struct used to indicate to the lexer that it should not
allow itself to be interrupted. For more details,
see @xmethod[color:text<%> start-colorer].
}
@section{Racket Lexer}
@defmodule[syntax-color/racket-lexer]
@ -38,9 +53,9 @@ Parenthesis matching code built on top of @racket[token-tree%].
@defproc[(racket-lexer [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c))]{
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f))]{
A lexer for Racket, including reader extensions (@secref[#:doc'(lib
"scribblings/reference/reference.scrbl")]{Reader_Extension}), built
@ -67,9 +82,9 @@ The @racket[racket-lexer] function returns 5 values:
@defproc[(racket-lexer/status [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f)
(or/c 'datum 'open 'close 'continue))]{
Like @racket[racket-lexer], but returns an extra value. The last
@ -81,9 +96,9 @@ as whitespace) on a datum.}
@defproc[(racket-nobar-lexer/status [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f)
(or/c 'datum 'open 'close 'continue))]{
Like @racket[racket-lexer/status], except it treats
@ -97,9 +112,9 @@ This function is used by @racket[scribble-lexer].}
@defproc[(default-lexer [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c))]
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f))]
A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
@litchar{]}, @litchar["{"], and @litchar["}"] built specifically for
@ -132,9 +147,9 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
(cons/c (-> input-port? any/c any) any/c))])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f)
exact-nonnegative-integer?
(or/c #f
(-> input-port? any)
@ -191,9 +206,9 @@ Like @racket[racket-lexer], but with several differences:
[mode any/c])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f)
exact-nonnegative-integer?
any/c)]{
@ -206,9 +221,9 @@ Like @racket[racket-lexer], but for Racket extended with Scribble's
[mode any/c])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c symbol? #f)
(or/c number? #f)
(or/c number? #f)
exact-nonnegative-integer?
any/c)]{
@ -234,7 +249,7 @@ FIXME: many methods are not yet documented.
Creates a token tree with a single element.
}
@defmethod[(get-root) (or/c node? false/c)]{
@defmethod[(get-root) (or/c node? #f)]{
Returns the root node in the tree.
}
@ -251,8 +266,8 @@ FIXME: many methods are not yet documented.
@defproc[(node-token-length [n node?]) natural-number/c]
@defproc[(node-token-data [n node?]) any/c]
@defproc[(node-left-subtree-length [n node?]) natural-number/c]
@defproc[(node-left [n node?]) (or/c node? false/c)]
@defproc[(node-right [n node?]) (or/c node? false/c)]
@defproc[(node-left [n node?]) (or/c node? #f)]
@defproc[(node-right [n node?]) (or/c node? #f)]
)]{
Functions for working with nodes in a @racket[token-tree%].}

View File

@ -78,15 +78,20 @@ todo:
(loop (+ i 1)
str-offset)])))
(define next-tokens
(cdr (2d-lexer-state-pending-tokens
a-2d-lexer-state)))
(define new-state
(struct-copy 2d-lexer-state
a-2d-lexer-state
[pending-tokens next-tokens]))
(values val tok paren
pos
(+ (- end start) pos)
start
(struct-copy 2d-lexer-state
a-2d-lexer-state
[pending-tokens
(cdr (2d-lexer-state-pending-tokens
a-2d-lexer-state))]))]
(if (null? next-tokens)
new-state
(dont-stop new-state)))]
[(equal? #\# (peek-char port))
(define pp (peeking-input-port port))
(define chars (list (read-char pp) (read-char pp) (read-char pp)))
@ -347,9 +352,10 @@ todo:
(values first-tok-string 'hash-colon-keyword #f
pos (+ pos (string-length first-tok-string))
0
(2d-lexer-state final-tokens
#t
(2d-lexer-state-chained-state a-2d-lexer-state)))])]))
(dont-stop
(2d-lexer-state final-tokens
#t
(2d-lexer-state-chained-state a-2d-lexer-state))))])]))
(define (cropped-regions start end regions)
(define result-regions '())