From d807421a07e3d86b6ebf0802b4000d34af472372 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 29 Jul 2009 03:31:29 +0000 Subject: [PATCH] extended syntax colorer to support lexer-specific backup; fix problems with new color lexers svn: r15617 --- collects/framework/private/color.ss | 56 ++++-- collects/framework/private/scheme.ss | 10 +- collects/scribblings/framework/color.scrbl | 30 ++-- collects/syntax-color/module-lexer.ss | 30 ++-- collects/syntax-color/scribble-lexer.ss | 161 ++++++++++++------ collects/syntax-color/syntax-color.scrbl | 15 +- collects/tests/syntax-color/scribble-lexer.ss | 36 ++-- 7 files changed, 224 insertions(+), 114 deletions(-) diff --git a/collects/framework/private/color.ss b/collects/framework/private/color.ss index 3aee99a9f2..4fb414cd7c 100644 --- a/collects/framework/private/color.ss +++ b/collects/framework/private/color.ss @@ -32,9 +32,13 @@ added get-regions (define (should-color-type? type) (not (memq type '(white-space no-color)))) -(define (make-data type mode) (cons type mode)) -(define (data-type data) (car data)) -(define (data-lexer-mode data) (cdr data)) +(define (make-data type mode backup-delta) + (if (zero? backup-delta) + (cons type mode) + (vector type mode backup-delta))) +(define (data-type data) (if (pair? data) (car data) (vector-ref data 0))) +(define (data-lexer-mode data) (if (pair? data) (cdr data) (vector-ref data 1))) +(define (data-backup-delta data) (if (vector? data) (vector-ref data 2) 0)) (define -text<%> (interface (text:basic<%>) @@ -274,11 +278,11 @@ added get-regions (sync-invalid ls)))) (define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend) - (let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode) + (let-values ([(lexeme type data new-token-start new-token-end backup-delta new-lexer-mode) (begin (enable-suspend #f) (begin0 - (get-token in in-lexer-mode) + (get-token in in-start-pos in-lexer-mode) (enable-suspend #t)))]) (unless (eq? 'eof type) (enable-suspend #f) @@ -302,7 +306,8 @@ added get-regions ;; version. In other words, the new greatly outweighs the tree ;; operations. ;;(insert-last! tokens (new token-tree% (length len) (data type))) - (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode)) + (insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode backup-delta)) + #; (show-tree (lexer-state-tokens ls)) (send (lexer-state-parens ls) add-token data len) (cond ((and (not (send (lexer-state-invalid-tokens ls) is-empty?)) @@ -320,6 +325,29 @@ added get-regions (else (enable-suspend #t) (re-tokenize ls in in-start-pos new-lexer-mode enable-suspend))))))) + + (define/private (show-tree t) + (printf "Tree:\n") + (send t search-min!) + (let loop ([old-s -inf.0]) + (let ([s (send t get-root-start-position)] + [e (send t get-root-end-position)]) + (unless (= s old-s) + (printf " ~s\n" (list s e)) + (send t search! e) + (loop s))))) + + (define/private (split-backward ls valid-tree pos) + (let loop ([pos pos][valid-tree valid-tree][old-invalid-tree #f]) + (let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data) + (send valid-tree split/data (- pos (lexer-state-start-pos ls))))) + (let ([backup-pos (- pos (data-backup-delta orig-data))] + [invalid-tree (or old-invalid-tree invalid-tree)]) + (if (backup-pos . < . pos) + ;; back up more: + (loop pos valid-tree invalid-tree) + ;; that was far enough: + (values orig-token-start orig-token-end valid-tree invalid-tree orig-data)))))) (define/private (do-insert/delete/ls ls edit-start-pos change-length) (unless (lexer-state-up-to-date? ls) @@ -327,7 +355,7 @@ added get-regions (cond ((lexer-state-up-to-date? ls) (let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data) - (send (lexer-state-tokens ls) split/data (- edit-start-pos (lexer-state-start-pos ls))))) + (split-backward ls (lexer-state-tokens ls) edit-start-pos))) (send (lexer-state-parens ls) split-tree orig-token-start) (set-lexer-state-invalid-tokens! ls invalid-tree) (set-lexer-state-tokens! ls valid-tree) @@ -349,8 +377,7 @@ added get-regions (queue-callback (λ () (colorer-callback)) #f))) ((>= edit-start-pos (lexer-state-invalid-tokens-start ls)) (let-values (((tok-start tok-end valid-tree invalid-tree orig-data) - (send (lexer-state-invalid-tokens ls) split/data - (- edit-start-pos (lexer-state-start-pos ls))))) + (split-backward ls (lexer-state-invalid-tokens ls) edit-start-pos))) (set-lexer-state-invalid-tokens! ls invalid-tree) (set-lexer-state-invalid-tokens-start! ls @@ -362,8 +389,7 @@ added get-regions (+ change-length (lexer-state-invalid-tokens-start ls)))) (else (let-values (((tok-start tok-end valid-tree invalid-tree data) - (send (lexer-state-tokens ls) split/data - (- edit-start-pos (lexer-state-start-pos ls))))) + (split-backward ls (lexer-state-tokens ls) edit-start-pos))) (send (lexer-state-parens ls) truncate tok-start) (set-lexer-state-tokens! ls valid-tree) (set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls))) @@ -463,14 +489,14 @@ added get-regions (reset-tokens) (set! should-color? (preferences:get 'framework:coloring-active)) (set! token-sym->style token-sym->style-) - (set! get-token (if (procedure-arity-includes? get-token- 2) + (set! get-token (if (procedure-arity-includes? get-token- 3) ;; New interface: thread through a mode: get-token- - ;; Old interface: no mode - (lambda (in mode) + ;; Old interface: no offset, backup delta, or mode + (lambda (in offset mode) (let-values ([(lexeme type data new-token-start new-token-end) (get-token- in)]) - (values lexeme type data new-token-start new-token-end #f))))) + (values lexeme type data new-token-start new-token-end 0 #f))))) (set! pairs pairs-) (for-each (lambda (ls) diff --git a/collects/framework/private/scheme.ss b/collects/framework/private/scheme.ss index 6af2a469c4..6ee835ba87 100644 --- a/collects/framework/private/scheme.ss +++ b/collects/framework/private/scheme.ss @@ -1172,14 +1172,14 @@ (preferences:add-callback 'framework:tabify (lambda (k v) (set! tabify-pref v))) - (define/private (scheme-lexer-wrapper in mode) - (let-values (((lexeme type paren start end mode) (module-lexer in mode))) + (define/private (scheme-lexer-wrapper in offset mode) + (let-values (((lexeme type paren start end backup-delta mode) (module-lexer in offset mode))) (cond ((and (eq? type 'symbol) (get-keyword-type lexeme tabify-pref)) - (values lexeme 'keyword paren start end mode)) + (values lexeme 'keyword paren start end backup-delta mode)) (else - (values lexeme type paren start end mode))))) + (values lexeme type paren start end backup-delta mode))))) (define/override (put-file text sup directory default-name) (parameterize ([finder:default-extension "ss"] @@ -1188,7 +1188,7 @@ ;; don't call the surrogate's super, since it sets the default extension (sup directory default-name))) - (super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode))) + (super-new (get-token (lambda (in offset mode) (scheme-lexer-wrapper in offset mode))) (token-sym->style short-sym->style-name) (matches '((|(| |)|) (|[| |]|) diff --git a/collects/scribblings/framework/color.scrbl b/collects/scribblings/framework/color.scrbl index 9d2e66f72f..9f33186ab7 100644 --- a/collects/scribblings/framework/color.scrbl +++ b/collects/scribblings/framework/color.scrbl @@ -16,12 +16,14 @@ exact-nonnegative-integer? exact-nonnegative-integer?)) (-> input-port? + exact-nonnegative-integer? any/c (values any/c symbol? (or/c false? symbol?) exact-nonnegative-integer? exact-nonnegative-integer? + exact-nonnegative-integer? any/c)))) (pairs (listof (list/p symbol? symbol?)))) void))]{ Starts tokenizing the buffer for coloring and parenthesis matching. @@ -29,7 +31,7 @@ The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token], and it should return the style-name that the token should be colored. - The @scheme[get-token] argument takes an input port and optionally a mode value. + The @scheme[get-token] argument takes an input port and optionally an offset and mode value. When it accepts just an input port, @scheme[get-token] returns the next token as 5 values: @itemize[ @@ -52,15 +54,21 @@ @item{ The ending position of the token.}] - When @scheme[get-token] accepts a mode value in addition to an - input port, it must also return an extra result, which is a new - mode. When @scheme[get-token] is called for the beginning on a - stream, the mode argument is @scheme[#f]. Thereafter, the mode + When @scheme[get-token] accepts an offset and mode value in addition to an + input port, it must also return two extra results, which are a backup + distance and new mode. The offset given to @scheme[get-token] can be added + to the position of the input port to obtain absolute coordinates within a + text stream. The mode argument allows @scheme[get-token] to communicate information + from earlier parsing to later. + When @scheme[get-token] is called for the beginning on a + stream, the mode argument is @scheme[#f]; thereafter, the mode returned for the previous token is provided to @scheme[get-token] for the next token. The mode should not be a mutable value; if part of the stream is re-tokenized, the mode saved from the immediately preceding token is given again to the - @scheme[get-token] function. + @scheme[get-token] function. The backup distance returned by @scheme[get-token] + indicates the maximum number of characters to back up (counting from the start of the token) + and for re-parsing after a change to the editor within the token's region. The @scheme[get-token] function is usually be implemented with a lexer using the @scheme[parser-tools/lex] library. The @@ -68,7 +76,7 @@ @itemize[ @item{ Every position in the buffer must be accounted for in exactly one - token.} + token, and every token must have a non-zero width.} @item{ The token returned by @scheme[get-token] must rely only on the contents of the input port argument plus the mode argument. This constraint means that the @@ -77,16 +85,16 @@ for tokens).} @item{ A change to the stream must not change the tokenization of the stream prior - to the token immediately preceding the change. In the following - example this invariant does not hold. If the buffer contains + to the token immediately preceding the change plus the backup distance. In the following + example, this invariant does not hold for a zero backup distance: If the buffer contains @verbatim[#:indent 2]{" 1 2 3} and the tokenizer treats the unmatched " as its own token (a string error token), and separately tokenizes the 1 2 and 3, an edit to make the buffer look like @verbatim[#:indent 2]{" 1 2 3"} would result in a single string token modifying previous tokens. To - handle these situations, @scheme[get-token] must treat the first line as a - single token.}] + handle these situations, @scheme[get-token] can treat the first line as a + single token, or it can precisely track backup distances.}] The @scheme[pairs] argument is a list of different kinds of matching parens. The second value returned by @scheme[get-token] is compared to this list to see how the diff --git a/collects/syntax-color/module-lexer.ss b/collects/syntax-color/module-lexer.ss index b40350ee0d..d4c1f69840 100644 --- a/collects/syntax-color/module-lexer.ss +++ b/collects/syntax-color/module-lexer.ss @@ -4,7 +4,7 @@ (provide module-lexer) -(define (module-lexer in mode) +(define (module-lexer in offset mode) (cond [(not mode) ;; Starting out: look for #lang: @@ -21,39 +21,41 @@ (old g))))]) ;; FIXME: set the reader guard to disable access to ;; untrusted planet packages. - (read-language in (lambda () #f))))]) + (read-language p (lambda () #f))))] + [sync-ports (lambda () + (read-bytes (- (file-position p) init) in))]) (cond [(procedure? get-info) ;; Produce language as first token: - (let*-values ([(bytes-len) (- (file-position p) init)] - [(bstr) (read-bytes bytes-len in)] - [(end-line end-col end-pos) (port-next-location in)]) + (sync-ports) + (let-values ([(end-line end-col end-pos) (port-next-location in)]) (values - bstr + "#lang" 'other #f start-pos end-pos + 0 (or (let ([v (get-info 'color-lexer)]) (and v - (if (procedure-arity-includes? v 2) + (if (procedure-arity-includes? v 3) (cons v #f) v))) scheme-lexer)))] [(eq? 'fail get-info) + (sync-ports) (let*-values ([(end-line end-col end-pos) (port-next-location in)]) - (values #f 'error #f start-pos end-pos - scheme-lexer))] + (values #f 'error #f start-pos end-pos 0 scheme-lexer))] [else ;; Start over using the Scheme lexer - (module-lexer in scheme-lexer)])))] + (module-lexer in offset scheme-lexer)])))] [(pair? mode) ;; #lang-selected language consumes and produces a mode: - (let-values ([(lexeme type data new-token-start new-token-end new-mode) - ((car mode) in (cdr mode))]) - (values lexeme type data new-token-start new-token-end (cons (car mode) new-mode)))] + (let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode) + ((car mode) in offset (cdr mode))]) + (values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))] [else ;; #lang-selected language (or default) doesn't deal with modes: (let-values ([(lexeme type data new-token-start new-token-end) (mode in)]) - (values lexeme type data new-token-start new-token-end mode))])) + (values lexeme type data new-token-start new-token-end 0 mode))])) diff --git a/collects/syntax-color/scribble-lexer.ss b/collects/syntax-color/scribble-lexer.ss index b85e2d0d33..0d56024dc7 100644 --- a/collects/syntax-color/scribble-lexer.ss +++ b/collects/syntax-color/scribble-lexer.ss @@ -4,14 +4,25 @@ (provide scribble-inside-lexer scribble-lexer) -(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren)) -(define-struct scheme (status)) -(define-struct args ()) -(define-struct text-args ()) +(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren) #:transparent) +(define-struct scheme (status backup) #:transparent) +(define-struct args () #:transparent) +(define-struct text-args () #:transparent) (define rx:opener #rx"^[|]([^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*){") -(define (scribble-inside-lexer in mode) +(define rxes (make-weak-hash)) +(define rx-keys (make-weak-hasheq)) + +(define (intern-byte-regexp bstr) + (let ([v (hash-ref rxes bstr #f)]) + (or v + (let ([rx (byte-regexp bstr)]) + (hash-set! rxes bstr rx) + (hash-set! rx-keys rx (make-ephemeron rx bstr)) + rx)))) + +(define (scribble-inside-lexer in offset mode) (let ([mode (or mode (list (make-text #rx"^@" @@ -22,6 +33,31 @@ #f)))]) (let-values ([(line col pos) (port-next-location in)] [(l) (car mode)]) + + ;; If we don't match rx:opener in a place where we might otherwise + ;; match, and there is a "|" at that point, then a change later + ;; could turn the non-match into a match, AND there could be + ;; arbitrarily many Scheme tokens in between. So we carry the backup + ;; position, use it as necessary (from places that might be between a "|" + ;; and a potential match creator), and cancel it when it's clearly + ;; not needed anymore (which includes after any token that isn't a + ;; Scheme token). + (define (backup-if-needed pos) + (if (and (scheme? (car mode)) + (scheme-backup (car mode))) + (- (+ pos offset) (scheme-backup (car mode))) + 0)) + (define (no-backup mode) + (if (and (scheme? (car mode)) + (scheme-backup (car mode))) + (cons (make-scheme (scheme-status (car mode)) #f) + (cdr mode)) + mode)) + (define (maybe-no-backup type mode) + (if (eq? type 'white-space) + ;; white space definitely ends the need for backup + (no-backup mode) + mode)) (define (enter-@ comment-k) (cond @@ -37,8 +73,9 @@ #f pos end-pos + (backup-if-needed pos) (cons (make-text-args) - mode))) + (no-backup mode)))) ;; Line comment: (begin (regexp-match? #rx".*?(?=[\r\n])" in) @@ -48,7 +85,8 @@ #f pos end-pos - mode))))] + (backup-if-needed pos) + (no-backup mode)))))] [(regexp-try-match rx:opener in) => (lambda (m) (enter-opener m mode))] [(regexp-try-match #rx"^{" in) @@ -58,18 +96,19 @@ (cond [(equal? #\| (peek-char in)) (read-char in) - (list* (make-scheme 'bar) - mode)] + (list* (make-scheme 'bar (+ offset pos)) + (no-backup mode))] [else - (list* (make-scheme 'one) + (list* (make-scheme 'one #f) (make-args) - mode)])]) + (no-backup mode))])]) (let-values ([(end-line end-col end-pos) (port-next-location in)]) (values "@" 'parenthesis #f pos end-pos + (backup-if-needed pos) new-mode)))])) (define (enter-simple-opener mode) @@ -79,13 +118,14 @@ '|{| pos end-pos + (backup-if-needed pos) (cons (make-text #rx"^@" #rx"^}" #rx"^{" #rx".*?(?:(?=[@{}\r\n])|$)" '|{| '|}|) - mode)))) + (no-backup mode))))) (define (enter-opener m mode) (let-values ([(end-line end-col end-pos) (port-next-location in)]) @@ -94,23 +134,24 @@ '|{| ;; Better complex paren? pos end-pos + (backup-if-needed pos) (let ([closer (regexp-quote (bytes-append #"}" (flip (cadr m)) #"|"))] [re-opener (regexp-quote (cadr m))]) - (cons (make-text (byte-regexp (bytes-append #"^[|]" re-opener #"@")) - (byte-regexp (bytes-append #"^" closer)) - (byte-regexp (bytes-append #"^[|]" re-opener #"{")) - (byte-regexp (bytes-append - #".*?(?:(?=[|]" - re-opener - #"[@{])|(?=" - closer - #")|(?=[\r\n])|$)")) + (cons (make-text (intern-byte-regexp (bytes-append #"^[|]" re-opener #"@")) + (intern-byte-regexp (bytes-append #"^" closer)) + (intern-byte-regexp (bytes-append #"^[|]" re-opener #"{")) + (intern-byte-regexp (bytes-append + #".*?(?:(?=[|]" + re-opener + #"[@{])|(?=" + closer + #")|(?=[\r\n])|$)")) '|{| ;; Better complex paren? '|}|) ;; Better complex paren? - mode))))) + (no-backup mode)))))) (if (eof-object? (peek-char in)) (values eof @@ -118,6 +159,7 @@ #f pos pos + 0 #f) (cond [(text? l) @@ -134,6 +176,7 @@ (text-close-paren l) pos end-pos + 0 (cdr mode)))] [(and (text-sub-rx l) (regexp-try-match (text-sub-rx l) in)) @@ -143,6 +186,7 @@ (text-open-paren l) pos end-pos + 0 (cons (car mode) mode)))] [(regexp-try-match #px"^(?:[\r\n])\\s*" in) ;; Treat a newline and leading whitespace in text mode as whitespace @@ -153,6 +197,7 @@ #f pos end-pos + 0 mode))] [else ;; Read string up to @, }, or newline @@ -163,6 +208,7 @@ #f pos end-pos + 0 mode))])] [(scheme? l) (let ([status (scheme-status l)]) @@ -175,6 +221,7 @@ '|]| pos end-pos + 0 (cdr mode)))] [(and (memq status '(bar bar-no-more)) (regexp-try-match #px"^\\s*?[|]" in)) @@ -184,9 +231,12 @@ #f pos end-pos + (backup-if-needed pos) (cdr mode)))] [(regexp-try-match #rx"^@" in) - (enter-@ (lambda (lexeme type paren start end mode) + ;; If we have a backup at this point, we can drop it, because + ;; edits after here cannot lead to a rx:opener match. + (enter-@ (lambda (lexeme type paren start end backup mode) (values lexeme (if (eq? status 'one) 'error @@ -194,6 +244,7 @@ paren start end + backup mode)))] [(and (eq? status 'one) (regexp-try-match rx:opener in)) @@ -212,6 +263,7 @@ #f pos end-pos + (backup-if-needed pos) mode))] [(and (eq? status 'one) (regexp-try-match #rx"^#?,@?" in)) @@ -222,6 +274,7 @@ #f pos end-pos + (backup-if-needed pos) mode))] [else (let-values ([(lexeme type paren start end adj) @@ -233,9 +286,11 @@ [(many) mode] [(one) (cdr mode)] [(bracket bar-no-more) - (cons (make-scheme status) (cdr mode))] - [(bar) (cons (make-scheme 'bar-no-more) (cdr mode))] - [else (error "bad status")]))]) + (cons (make-scheme status (scheme-backup l)) + (cdr mode))] + [(bar) (cons (make-scheme 'bar-no-more (scheme-backup l)) + (cdr mode))] + [else (error "bad status" status)]))]) (values lexeme (cond [(or (eq? type 'comment) @@ -250,24 +305,29 @@ paren start end - (case adj - [(continue) mode] - [(datum) - (cond - [(pair? status) mode] - [else (consume status)])] - [(open) - (cons (make-scheme (cons #t status)) - (cdr mode))] - [(close) - (if (pair? status) - (let ([v (cdr status)]) - (if (symbol? v) - (consume v) - (cons (make-scheme v) (cdr mode)))) - (consume status))] - [(bad) (consume status)] - [else (error "bad adj")])))]))] + (backup-if-needed start) + (maybe-no-backup + type + (case adj + [(continue) mode] + [(datum) + (cond + [(pair? status) mode] + [else (consume status)])] + [(open) + (cons (make-scheme (cons #t status) (scheme-backup l)) + (cdr mode))] + [(close) + (if (pair? status) + (let ([v (cdr status)]) + (if (symbol? v) + (consume v) + (cons (make-scheme v (scheme-backup l)) (cdr mode)))) + (consume status))] + [(bad) (if (pair? status) + mode + (consume status))] + [else (error "bad adj")]))))]))] [(args? l) (cond [(regexp-try-match #rx"^\\[" in) @@ -277,10 +337,11 @@ '|[| pos end-pos - (list* (make-scheme 'bracket) + 0 + (list* (make-scheme 'bracket #f) mode)))] [else - (scribble-lexer in (cons (make-text-args) (cdr mode)))])] + (scribble-inside-lexer in offset (cons (make-text-args) (cdr mode)))])] [(text-args? l) (cond [(regexp-try-match rx:opener in) @@ -288,11 +349,11 @@ [(regexp-try-match #rx"^{" in) (enter-simple-opener (cdr mode))] [else - (scribble-lexer in (cdr mode))])] + (scribble-inside-lexer in offset (cdr mode))])] [else (error "bad mode")]))))) -(define (scribble-lexer in mode) - (scribble-inside-lexer in (or mode (list (make-scheme 'many))))) +(define (scribble-lexer in offset mode) + (scribble-inside-lexer in offset (or mode (list (make-scheme 'many #f))))) (define (flip s) (list->bytes @@ -300,10 +361,8 @@ (cond [(equal? c (char->integer #\()) (char->integer #\))] [(equal? c (char->integer #\[)) (char->integer #\])] - [(equal? c (char->integer #\{)) (char->integer #\})] [(equal? c (char->integer #\<)) (char->integer #\>)] [(equal? c (char->integer #\))) (char->integer #\()] [(equal? c (char->integer #\])) (char->integer #\[)] - [(equal? c (char->integer #\})) (char->integer #\{)] [(equal? c (char->integer #\>)) (char->integer #\<)] [else c])))) diff --git a/collects/syntax-color/syntax-color.scrbl b/collects/syntax-color/syntax-color.scrbl index 3f528ab8db..59f6b369a8 100644 --- a/collects/syntax-color/syntax-color.scrbl +++ b/collects/syntax-color/syntax-color.scrbl @@ -126,6 +126,7 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[}, @defmodule[syntax-color/module-lexer] @defproc[(module-lexer [in input-port?] + [offset exact-nonnegative-integer?] [mode (or/c #f (-> input-port? any) (cons/c (-> input-port? any/c any) any/c))]) @@ -134,6 +135,7 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[}, (or/c symbol? false/c) (or/c number? false/c) (or/c number? false/c) + exact-nonnegative-integer? (or/c #f (-> input-port? any) (cons/c (-> input-port? any/c any) any/c)))]{ @@ -142,8 +144,9 @@ Like @scheme[scheme-lexer], but @itemize[ - @item{A @scheme[module-lexer] accepts (and returns) a lexer mode, - instead of just an input port.} + @item{A @scheme[module-lexer] accepts an offset and lexer mode, + instead of just an input port, and it returns a backup distance + and a new lexer mode.} @item{When @scheme[mode] is @scheme[#f] (indicating the start of the stream), the lexer checks @scheme[in] for a @hash-lang[] @@ -158,7 +161,7 @@ Like @scheme[scheme-lexer], but @scheme['color-lexer]. If the result is not @scheme[#f], then it should be a lexer function for use with @scheme[color:text%]. The result mode is the lexer---paired - with @scheme[#f] if the lexer is a procedure arity 2---so that + with @scheme[#f] if the lexer is a procedure arity 3---so that future calls will dispatch to the language-supplied lexer. If the language is specified but it provides no @@ -170,7 +173,7 @@ Like @scheme[scheme-lexer], but lexer again as the mode.} @item{When @scheme[mode] is a pair, then the lexer procedure in the - @scheme[car] is applied to @scheme[in] and the mode in the + @scheme[car] is applied to @scheme[in], @scheme[offset], and the mode in the @scheme[cdr]. The lexer's results are returned, except that its mode result is paired back with the lexer procedure.} @@ -181,12 +184,14 @@ Like @scheme[scheme-lexer], but @defmodule[syntax-color/scribble-lexer] @defproc[(scribble-lexer [in input-port?] + [offset exact-nonnegative-integer?] [mode any/c]) (values (or/c string? eof-object?) symbol? (or/c symbol? false/c) (or/c number? false/c) (or/c number? false/c) + exact-nonnegative-integer? any/c)]{ Like @scheme[scheme-lexer], but for Scheme extended with Scribbles @@ -194,12 +199,14 @@ Like @scheme[scheme-lexer], but for Scheme extended with Scribbles "scribblings/scribble/scribble.scrbl") "reader"]).} @defproc[(scribble-inside-lexer [in input-port?] + [offset exact-nonnegative-integer?] [mode any/c]) (values (or/c string? eof-object?) symbol? (or/c symbol? false/c) (or/c number? false/c) (or/c number? false/c) + exact-nonnegative-integer? any/c)]{ Like @scheme[scribble-lexer], but starting in ``text'' mode instead of diff --git a/collects/tests/syntax-color/scribble-lexer.ss b/collects/tests/syntax-color/scribble-lexer.ss index 9437cc1b07..98c3758733 100644 --- a/collects/tests/syntax-color/scribble-lexer.ss +++ b/collects/tests/syntax-color/scribble-lexer.ss @@ -6,10 +6,10 @@ (define (color str) (let ([in (open-input-string str)]) (let loop ([mode #f]) - (let-values ([(lexeme type paren start end mode) (scribble-inside-lexer in mode)]) + (let-values ([(lexeme type paren start end backup mode) (scribble-inside-lexer in 0 mode)]) (if (eq? type 'eof) null - (cons (list start end type) + (cons (list start end type backup) (loop mode))))))) (define (test* str len-val line) @@ -17,7 +17,10 @@ [val (let loop ([pos 1][l len-val]) (if (null? l) null - (cons (list pos (+ pos (caar l)) (cadar l)) + (cons (list pos (+ pos (caar l)) (cadar l) + (if (null? (cddar l)) + 0 + (caddar l))) (loop (+ (+ pos (caar l))) (cdr l)))))]) (unless (equal? v val) (printf "FAILED, line ~s\n" line) @@ -91,27 +94,27 @@ (1 parenthesis))) (test "@|x|str" '((2 parenthesis) - (1 symbol) - (1 parenthesis) + (1 symbol 2) + (1 parenthesis 3) (3 string))) (test "@|x #|ok|#|str" '((2 parenthesis) - (1 symbol) - (1 white-space) + (1 symbol 2) + (1 white-space 3) (6 comment) (1 parenthesis) (3 string))) (test "@| x ; c\n| str" '((2 parenthesis) - (1 white-space) + (1 white-space 2) (1 symbol) (1 white-space) (3 comment) (2 parenthesis) (4 string))) (test "@|(a|b|)|str" '((2 parenthesis) - (1 parenthesis) - (4 symbol) - (1 parenthesis) - (1 parenthesis) + (1 parenthesis 2) + (4 symbol 3) + (1 parenthesis 7) + (1 parenthesis 8) (3 string))) (test "@#|bad|#x str" '((1 parenthesis) @@ -122,7 +125,7 @@ (1 parenthesis) (1 symbol))) (test "@|@x|z" '((2 parenthesis) - (1 parenthesis) + (1 parenthesis 2) (1 symbol) (1 parenthesis) (1 string))) @@ -232,4 +235,9 @@ (1 string) (1 parenthesis))) -; (test "@|=@=|}" null) +(test "@|()|})|" '((2 parenthesis) + (1 parenthesis 2) + (1 parenthesis 3) + (1 parenthesis 4) + (3 string))) +