From 761054890d8207d7689cce825d0fc777d15b1091 Mon Sep 17 00:00:00 2001 From: Robby Findler Date: Sun, 10 Mar 2013 19:32:52 -0500 Subject: [PATCH] extend the lexer <-> framework's color:text api to let lexers say "call me again before you change the buffer" also, use this in the 2d lexer --- collects/framework/private/color.rkt | 27 ++++++--- collects/scribblings/framework/color.scrbl | 33 ++++++++--- collects/syntax-color/lexer-contract.rkt | 24 ++++---- collects/syntax-color/module-lexer.rkt | 5 +- collects/syntax-color/syntax-color.scrbl | 65 +++++++++++++--------- collects/unstable/2d/private/lexer.rkt | 22 +++++--- 6 files changed, 113 insertions(+), 63 deletions(-) diff --git a/collects/framework/private/color.rkt b/collects/framework/private/color.rkt index 39e57b1c88..2164b0c3ea 100644 --- a/collects/framework/private/color.rkt +++ b/collects/framework/private/color.rkt @@ -11,6 +11,7 @@ added get-regions syntax-color/token-tree syntax-color/paren-tree syntax-color/default-lexer + syntax-color/lexer-contract string-constants "../preferences.rkt" "sig.rkt" @@ -303,7 +304,7 @@ added get-regions (set-lexer-state-invalid-tokens-mode! ls mode)) (sync-invalid ls)))) - (define/private (re-tokenize-move-to-next-ls start-time did-something?) + (define/private (re-tokenize-move-to-next-ls start-time ok-to-stop?) (cond [(null? re-tokenize-lses) ;; done: return #t @@ -317,25 +318,29 @@ added get-regions (lexer-state-end-pos ls) (λ (x) #f))) (port-count-lines! in) - (continue-re-tokenize start-time did-something? ls in + (continue-re-tokenize start-time ok-to-stop? ls in (lexer-state-current-pos ls) (lexer-state-current-lexer-mode ls))])) (define re-tokenize-lses #f) - (define/private (continue-re-tokenize start-time did-something? ls in in-start-pos lexer-mode) + (define/private (continue-re-tokenize start-time ok-to-stop? ls in in-start-pos lexer-mode) (cond - [(and did-something? ((+ start-time 20.0) . <= . (current-inexact-milliseconds))) + [(and ok-to-stop? ((+ start-time 20.0) . <= . (current-inexact-milliseconds))) #f] [else (define-values (_line1 _col1 pos-before) (port-next-location in)) - (define-values (lexeme type data new-token-start new-token-end backup-delta new-lexer-mode) + (define-values (lexeme type data new-token-start new-token-end backup-delta new-lexer-mode/cont) (get-token in in-start-pos lexer-mode)) (define-values (_line2 _col2 pos-after) (port-next-location in)) + (define new-lexer-mode (if (dont-stop? new-lexer-mode/cont) + (dont-stop-val new-lexer-mode/cont) + new-lexer-mode/cont)) + (define next-ok-to-stop? (not (dont-stop? new-lexer-mode/cont))) (cond [(eq? 'eof type) (set-lexer-state-up-to-date?! ls #t) - (re-tokenize-move-to-next-ls start-time #t)] + (re-tokenize-move-to-next-ls start-time next-ok-to-stop?)] [else (unless (<= pos-before new-token-start pos-after) (error 'color:text<%> @@ -369,9 +374,9 @@ added get-regions (lexer-state-invalid-tokens ls)) (set-lexer-state-invalid-tokens-start! ls +inf.0) (set-lexer-state-up-to-date?! ls #t) - (re-tokenize-move-to-next-ls start-time #t)] + (re-tokenize-move-to-next-ls start-time next-ok-to-stop?)] [else - (continue-re-tokenize start-time #t ls in in-start-pos new-lexer-mode)]))])])) + (continue-re-tokenize start-time next-ok-to-stop? ls in in-start-pos new-lexer-mode)]))])])) (define/private (add-colorings type in-start-pos new-token-start new-token-end) (define sp (+ in-start-pos (sub1 new-token-start))) @@ -511,7 +516,11 @@ added get-regions [else (if (lexer-state-up-to-date? (car lexer-states)) (loop (cdr lexer-states)) lexer-states)]))) - (define finished? (re-tokenize-move-to-next-ls (current-inexact-milliseconds) #f)) + (define finished? (re-tokenize-move-to-next-ls + (current-inexact-milliseconds) + ;; #f initially here ensures we do at least + ;; one step of tokenization before giving up + #f)) (c-log (format "coloring stopped ~a" (if finished? "because it finished" "with more to do"))) (when finished? (update-lexer-state-observers) diff --git a/collects/scribblings/framework/color.scrbl b/collects/scribblings/framework/color.scrbl index 184fb0efbf..ef15abaf7a 100644 --- a/collects/scribblings/framework/color.scrbl +++ b/collects/scribblings/framework/color.scrbl @@ -1,7 +1,6 @@ #lang scribble/doc @(require scribble/manual scribble/extract) -@(require (for-label framework)) -@(require (for-label scheme/gui)) +@(require (for-label framework scheme/gui syntax-color/lexer-contract)) @title{Color} @definterface[color:text<%> (text:basic<%>)]{ @@ -17,7 +16,7 @@ (or/c exact-positive-integer? #f))) (-> input-port? exact-nonnegative-integer? - any/c + (not/c dont-stop?) (values any/c symbol? (or/c symbol? #f) @@ -57,20 +56,36 @@ is also relative to the port's location, just like the previous value.}] When @racket[get-token] accepts an offset and mode value in addition to an - input port, it must also return two extra results, which are a backup - distance and new mode. The offset given to @racket[get-token] can be added + input port, it must also return two extra results. + The offset given to @racket[get-token] can be added to the position of the input port to obtain absolute coordinates within a - text stream. The mode argument allows @racket[get-token] to communicate + text stream. The extra two results are + @itemize[@item{a new mode; + The mode argument allows @racket[get-token] to communicate information from earlier parsing to later. When @racket[get-token] is called for the beginning on a stream, the mode argument is @racket[#f]; thereafter, the mode returned for the previous token is provided to - @racket[get-token] for the next token. The mode should not be a mutable + @racket[get-token] for the next token. + + If the mode result is a @racket[dont-stop] struct, then the value inside + the struct is considered the new mode, and the colorer is guaranteed + not to be interrupted until at least the next call to this tokenizing + function that does not return a @racket[dont-stop] struct (unless, of course, + it returns an eof token, in which case the new mode result is ignored). + This is useful, for example, when a lexer has to read ahead in the buffer + to decide on the tokens at this point; then that read-ahead will be + inconsistent if an edit happens; returning a @racket[dont-stop] + struct ensures that no changes to the buffer happen. + + The mode should not be a mutable value; if part of the stream is re-tokenized, the mode saved from the immediately preceding token is given again to the @racket[get-token] - function. The backup distance returned by @racket[get-token] indicates the + function.} + @item{a backup distance; + The backup distance returned by @racket[get-token] indicates the maximum number of characters to back up (counting from the start of the token) and for re-parsing after a change to the editor within the token's - region. + region.}] The @racket[get-token] function must obey the following invariants: @itemize[ diff --git a/collects/syntax-color/lexer-contract.rkt b/collects/syntax-color/lexer-contract.rkt index 96e266f579..d67a8dcf10 100644 --- a/collects/syntax-color/lexer-contract.rkt +++ b/collects/syntax-color/lexer-contract.rkt @@ -1,17 +1,9 @@ #lang racket/base (require racket/contract/base unstable/options) -(provide lexer/c) +(provide lexer/c (struct-out dont-stop)) -(define (end/c start type) - (cond - [(equal? 'eof type) - (or/c exact-positive-integer? #f)] - [start - (and/c exact-positive-integer? - (>/c start))] - [else - #f])) +(struct dont-stop (val) #:transparent) (define lexer/c (option/c @@ -23,7 +15,7 @@ [end (start type) (end/c start type)])) (->i ([in (and/c input-port? port-counts-lines?)] [offset exact-nonnegative-integer?] - [mode any/c]) + [mode (not/c dont-stop?)]) (values [txt any/c] [type symbol?] [paren (or/c symbol? #f)] @@ -34,3 +26,13 @@ #:tester (λ (x) (and (procedure? x) (or (procedure-arity-includes? x 1) (procedure-arity-includes? x 3)))))) + +(define (end/c start type) + (cond + [(equal? 'eof type) + (or/c exact-positive-integer? #f)] + [start + (and/c exact-positive-integer? + (>/c start))] + [else + #f])) diff --git a/collects/syntax-color/module-lexer.rkt b/collects/syntax-color/module-lexer.rkt index fa0e702eff..0747e3b1f0 100644 --- a/collects/syntax-color/module-lexer.rkt +++ b/collects/syntax-color/module-lexer.rkt @@ -108,7 +108,10 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode). ;; #lang-selected language consumes and produces a mode: (let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode) ((car mode) in offset (cdr mode))]) - (values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))] + (values lexeme type data new-token-start new-token-end backup-delta + (if (dont-stop? new-mode) + (dont-stop (cons (car mode) (dont-stop-val new-mode))) + (cons (car mode) new-mode))))] [else ;; #lang-selected language (or default) doesn't deal with modes: (let-values ([(lexeme type data new-token-start new-token-end) diff --git a/collects/syntax-color/syntax-color.scrbl b/collects/syntax-color/syntax-color.scrbl index d5b863d76d..5154e00f60 100644 --- a/collects/syntax-color/syntax-color.scrbl +++ b/collects/syntax-color/syntax-color.scrbl @@ -6,7 +6,7 @@ syntax-color/module-lexer syntax-color/scribble-lexer syntax-color/default-lexer - framework/framework + framework framework/private/color racket)) @@ -31,6 +31,21 @@ Parenthesis matching code built on top of @racket[token-tree%]. @; ---------------------------------------------------------------------- +@section{Lexer Contract & the Don't Stop struct} + +@defmodule[syntax-color/lexer-contract] + +@defthing[lexer/c contract?]{ + Checks to be sure a lexing function is well-behaved. For more + details, see @xmethod[color:text<%> start-colorer]. +} + +@defstruct[dont-stop ([val any/c])]{ + A struct used to indicate to the lexer that it should not + allow itself to be interrupted. For more details, + see @xmethod[color:text<%> start-colorer]. +} + @section{Racket Lexer} @defmodule[syntax-color/racket-lexer] @@ -38,9 +53,9 @@ Parenthesis matching code built on top of @racket[token-tree%]. @defproc[(racket-lexer [in input-port?]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c))]{ + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f))]{ A lexer for Racket, including reader extensions (@secref[#:doc'(lib "scribblings/reference/reference.scrbl")]{Reader_Extension}), built @@ -67,9 +82,9 @@ The @racket[racket-lexer] function returns 5 values: @defproc[(racket-lexer/status [in input-port?]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c) + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f) (or/c 'datum 'open 'close 'continue))]{ Like @racket[racket-lexer], but returns an extra value. The last @@ -81,9 +96,9 @@ as whitespace) on a datum.} @defproc[(racket-nobar-lexer/status [in input-port?]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c) + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f) (or/c 'datum 'open 'close 'continue))]{ Like @racket[racket-lexer/status], except it treats @@ -97,9 +112,9 @@ This function is used by @racket[scribble-lexer].} @defproc[(default-lexer [in input-port?]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c))] + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f))] A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[}, @litchar{]}, @litchar["{"], and @litchar["}"] built specifically for @@ -132,9 +147,9 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[}, (cons/c (-> input-port? any/c any) any/c))]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c) + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f) exact-nonnegative-integer? (or/c #f (-> input-port? any) @@ -191,9 +206,9 @@ Like @racket[racket-lexer], but with several differences: [mode any/c]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c) + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f) exact-nonnegative-integer? any/c)]{ @@ -206,9 +221,9 @@ Like @racket[racket-lexer], but for Racket extended with Scribble's [mode any/c]) (values (or/c string? eof-object?) symbol? - (or/c symbol? false/c) - (or/c number? false/c) - (or/c number? false/c) + (or/c symbol? #f) + (or/c number? #f) + (or/c number? #f) exact-nonnegative-integer? any/c)]{ @@ -234,7 +249,7 @@ FIXME: many methods are not yet documented. Creates a token tree with a single element. } - @defmethod[(get-root) (or/c node? false/c)]{ + @defmethod[(get-root) (or/c node? #f)]{ Returns the root node in the tree. } @@ -251,8 +266,8 @@ FIXME: many methods are not yet documented. @defproc[(node-token-length [n node?]) natural-number/c] @defproc[(node-token-data [n node?]) any/c] @defproc[(node-left-subtree-length [n node?]) natural-number/c] -@defproc[(node-left [n node?]) (or/c node? false/c)] -@defproc[(node-right [n node?]) (or/c node? false/c)] +@defproc[(node-left [n node?]) (or/c node? #f)] +@defproc[(node-right [n node?]) (or/c node? #f)] )]{ Functions for working with nodes in a @racket[token-tree%].} diff --git a/collects/unstable/2d/private/lexer.rkt b/collects/unstable/2d/private/lexer.rkt index 35e2c02fb3..7140475eab 100644 --- a/collects/unstable/2d/private/lexer.rkt +++ b/collects/unstable/2d/private/lexer.rkt @@ -78,15 +78,20 @@ todo: (loop (+ i 1) str-offset)]))) + (define next-tokens + (cdr (2d-lexer-state-pending-tokens + a-2d-lexer-state))) + (define new-state + (struct-copy 2d-lexer-state + a-2d-lexer-state + [pending-tokens next-tokens])) (values val tok paren pos (+ (- end start) pos) start - (struct-copy 2d-lexer-state - a-2d-lexer-state - [pending-tokens - (cdr (2d-lexer-state-pending-tokens - a-2d-lexer-state))]))] + (if (null? next-tokens) + new-state + (dont-stop new-state)))] [(equal? #\# (peek-char port)) (define pp (peeking-input-port port)) (define chars (list (read-char pp) (read-char pp) (read-char pp))) @@ -347,9 +352,10 @@ todo: (values first-tok-string 'hash-colon-keyword #f pos (+ pos (string-length first-tok-string)) 0 - (2d-lexer-state final-tokens - #t - (2d-lexer-state-chained-state a-2d-lexer-state)))])])) + (dont-stop + (2d-lexer-state final-tokens + #t + (2d-lexer-state-chained-state a-2d-lexer-state))))])])) (define (cropped-regions start end regions) (define result-regions '())