extended syntax colorer to support lexer-specific backup; fix problems with new color lexers

svn: r15617
This commit is contained in:
Matthew Flatt 2009-07-29 03:31:29 +00:00
parent 6dcc67cb25
commit d807421a07
7 changed files with 224 additions and 114 deletions

View File

@ -32,9 +32,13 @@ added get-regions
(define (should-color-type? type)
(not (memq type '(white-space no-color))))
(define (make-data type mode) (cons type mode))
(define (data-type data) (car data))
(define (data-lexer-mode data) (cdr data))
(define (make-data type mode backup-delta)
(if (zero? backup-delta)
(cons type mode)
(vector type mode backup-delta)))
(define (data-type data) (if (pair? data) (car data) (vector-ref data 0)))
(define (data-lexer-mode data) (if (pair? data) (cdr data) (vector-ref data 1)))
(define (data-backup-delta data) (if (vector? data) (vector-ref data 2) 0))
(define -text<%>
(interface (text:basic<%>)
@ -274,11 +278,11 @@ added get-regions
(sync-invalid ls))))
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-lexer-mode)
(begin
(enable-suspend #f)
(begin0
(get-token in in-lexer-mode)
(get-token in in-start-pos in-lexer-mode)
(enable-suspend #t)))])
(unless (eq? 'eof type)
(enable-suspend #f)
@ -302,7 +306,8 @@ added get-regions
;; version. In other words, the new greatly outweighs the tree
;; operations.
;;(insert-last! tokens (new token-tree% (length len) (data type)))
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode backup-delta))
#; (show-tree (lexer-state-tokens ls))
(send (lexer-state-parens ls) add-token data len)
(cond
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
@ -320,6 +325,29 @@ added get-regions
(else
(enable-suspend #t)
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
(define/private (show-tree t)
(printf "Tree:\n")
(send t search-min!)
(let loop ([old-s -inf.0])
(let ([s (send t get-root-start-position)]
[e (send t get-root-end-position)])
(unless (= s old-s)
(printf " ~s\n" (list s e))
(send t search! e)
(loop s)))))
(define/private (split-backward ls valid-tree pos)
(let loop ([pos pos][valid-tree valid-tree][old-invalid-tree #f])
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
(send valid-tree split/data (- pos (lexer-state-start-pos ls)))))
(let ([backup-pos (- pos (data-backup-delta orig-data))]
[invalid-tree (or old-invalid-tree invalid-tree)])
(if (backup-pos . < . pos)
;; back up more:
(loop pos valid-tree invalid-tree)
;; that was far enough:
(values orig-token-start orig-token-end valid-tree invalid-tree orig-data))))))
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
(unless (lexer-state-up-to-date? ls)
@ -327,7 +355,7 @@ added get-regions
(cond
((lexer-state-up-to-date? ls)
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
(send (lexer-state-tokens ls) split/data (- edit-start-pos (lexer-state-start-pos ls)))))
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
(send (lexer-state-parens ls) split-tree orig-token-start)
(set-lexer-state-invalid-tokens! ls invalid-tree)
(set-lexer-state-tokens! ls valid-tree)
@ -349,8 +377,7 @@ added get-regions
(queue-callback (λ () (colorer-callback)) #f)))
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
(let-values (((tok-start tok-end valid-tree invalid-tree orig-data)
(send (lexer-state-invalid-tokens ls) split/data
(- edit-start-pos (lexer-state-start-pos ls)))))
(split-backward ls (lexer-state-invalid-tokens ls) edit-start-pos)))
(set-lexer-state-invalid-tokens! ls invalid-tree)
(set-lexer-state-invalid-tokens-start!
ls
@ -362,8 +389,7 @@ added get-regions
(+ change-length (lexer-state-invalid-tokens-start ls))))
(else
(let-values (((tok-start tok-end valid-tree invalid-tree data)
(send (lexer-state-tokens ls) split/data
(- edit-start-pos (lexer-state-start-pos ls)))))
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
(send (lexer-state-parens ls) truncate tok-start)
(set-lexer-state-tokens! ls valid-tree)
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
@ -463,14 +489,14 @@ added get-regions
(reset-tokens)
(set! should-color? (preferences:get 'framework:coloring-active))
(set! token-sym->style token-sym->style-)
(set! get-token (if (procedure-arity-includes? get-token- 2)
(set! get-token (if (procedure-arity-includes? get-token- 3)
;; New interface: thread through a mode:
get-token-
;; Old interface: no mode
(lambda (in mode)
;; Old interface: no offset, backup delta, or mode
(lambda (in offset mode)
(let-values ([(lexeme type data new-token-start new-token-end)
(get-token- in)])
(values lexeme type data new-token-start new-token-end #f)))))
(values lexeme type data new-token-start new-token-end 0 #f)))))
(set! pairs pairs-)
(for-each
(lambda (ls)

View File

@ -1172,14 +1172,14 @@
(preferences:add-callback
'framework:tabify
(lambda (k v) (set! tabify-pref v)))
(define/private (scheme-lexer-wrapper in mode)
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
(define/private (scheme-lexer-wrapper in offset mode)
(let-values (((lexeme type paren start end backup-delta mode) (module-lexer in offset mode)))
(cond
((and (eq? type 'symbol)
(get-keyword-type lexeme tabify-pref))
(values lexeme 'keyword paren start end mode))
(values lexeme 'keyword paren start end backup-delta mode))
(else
(values lexeme type paren start end mode)))))
(values lexeme type paren start end backup-delta mode)))))
(define/override (put-file text sup directory default-name)
(parameterize ([finder:default-extension "ss"]
@ -1188,7 +1188,7 @@
;; don't call the surrogate's super, since it sets the default extension
(sup directory default-name)))
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
(super-new (get-token (lambda (in offset mode) (scheme-lexer-wrapper in offset mode)))
(token-sym->style short-sym->style-name)
(matches '((|(| |)|)
(|[| |]|)

View File

@ -16,12 +16,14 @@
exact-nonnegative-integer?
exact-nonnegative-integer?))
(-> input-port?
exact-nonnegative-integer?
any/c
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?
exact-nonnegative-integer?
any/c))))
(pairs (listof (list/p symbol? symbol?)))) void))]{
Starts tokenizing the buffer for coloring and parenthesis matching.
@ -29,7 +31,7 @@
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
and it should return the style-name that the token should be colored.
The @scheme[get-token] argument takes an input port and optionally a mode value.
The @scheme[get-token] argument takes an input port and optionally an offset and mode value.
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
@itemize[
@ -52,15 +54,21 @@
@item{
The ending position of the token.}]
When @scheme[get-token] accepts a mode value in addition to an
input port, it must also return an extra result, which is a new
mode. When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]. Thereafter, the mode
When @scheme[get-token] accepts an offset and mode value in addition to an
input port, it must also return two extra results, which are a backup
distance and new mode. The offset given to @scheme[get-token] can be added
to the position of the input port to obtain absolute coordinates within a
text stream. The mode argument allows @scheme[get-token] to communicate information
from earlier parsing to later.
When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]; thereafter, the mode
returned for the previous token is provided to @scheme[get-token]
for the next token. The mode should not be a mutable value; if
part of the stream is re-tokenized, the mode saved from the
immediately preceding token is given again to the
@scheme[get-token] function.
@scheme[get-token] function. The backup distance returned by @scheme[get-token]
indicates the maximum number of characters to back up (counting from the start of the token)
and for re-parsing after a change to the editor within the token's region.
The @scheme[get-token] function is usually be implemented with a lexer using the
@scheme[parser-tools/lex] library. The
@ -68,7 +76,7 @@
@itemize[
@item{
Every position in the buffer must be accounted for in exactly one
token.}
token, and every token must have a non-zero width.}
@item{
The token returned by @scheme[get-token] must rely only on the contents of the
input port argument plus the mode argument. This constraint means that the
@ -77,16 +85,16 @@
for tokens).}
@item{
A change to the stream must not change the tokenization of the stream prior
to the token immediately preceding the change. In the following
example this invariant does not hold. If the buffer contains
to the token immediately preceding the change plus the backup distance. In the following
example, this invariant does not hold for a zero backup distance: If the buffer contains
@verbatim[#:indent 2]{" 1 2 3}
and the tokenizer treats the unmatched " as its own token (a string
error token), and separately tokenizes the 1 2 and 3, an edit to make
the buffer look like
@verbatim[#:indent 2]{" 1 2 3"}
would result in a single string token modifying previous tokens. To
handle these situations, @scheme[get-token] must treat the first line as a
single token.}]
handle these situations, @scheme[get-token] can treat the first line as a
single token, or it can precisely track backup distances.}]
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
value returned by @scheme[get-token] is compared to this list to see how the

View File

@ -4,7 +4,7 @@
(provide module-lexer)
(define (module-lexer in mode)
(define (module-lexer in offset mode)
(cond
[(not mode)
;; Starting out: look for #lang:
@ -21,39 +21,41 @@
(old g))))])
;; FIXME: set the reader guard to disable access to
;; untrusted planet packages.
(read-language in (lambda () #f))))])
(read-language p (lambda () #f))))]
[sync-ports (lambda ()
(read-bytes (- (file-position p) init) in))])
(cond
[(procedure? get-info)
;; Produce language as first token:
(let*-values ([(bytes-len) (- (file-position p) init)]
[(bstr) (read-bytes bytes-len in)]
[(end-line end-col end-pos) (port-next-location in)])
(sync-ports)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values
bstr
"#lang"
'other
#f
start-pos
end-pos
0
(or (let ([v (get-info 'color-lexer)])
(and v
(if (procedure-arity-includes? v 2)
(if (procedure-arity-includes? v 3)
(cons v #f)
v)))
scheme-lexer)))]
[(eq? 'fail get-info)
(sync-ports)
(let*-values ([(end-line end-col end-pos) (port-next-location in)])
(values #f 'error #f start-pos end-pos
scheme-lexer))]
(values #f 'error #f start-pos end-pos 0 scheme-lexer))]
[else
;; Start over using the Scheme lexer
(module-lexer in scheme-lexer)])))]
(module-lexer in offset scheme-lexer)])))]
[(pair? mode)
;; #lang-selected language consumes and produces a mode:
(let-values ([(lexeme type data new-token-start new-token-end new-mode)
((car mode) in (cdr mode))])
(values lexeme type data new-token-start new-token-end (cons (car mode) new-mode)))]
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
((car mode) in offset (cdr mode))])
(values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))]
[else
;; #lang-selected language (or default) doesn't deal with modes:
(let-values ([(lexeme type data new-token-start new-token-end)
(mode in)])
(values lexeme type data new-token-start new-token-end mode))]))
(values lexeme type data new-token-start new-token-end 0 mode))]))

View File

@ -4,14 +4,25 @@
(provide scribble-inside-lexer
scribble-lexer)
(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren))
(define-struct scheme (status))
(define-struct args ())
(define-struct text-args ())
(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren) #:transparent)
(define-struct scheme (status backup) #:transparent)
(define-struct args () #:transparent)
(define-struct text-args () #:transparent)
(define rx:opener #rx"^[|]([^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*){")
(define (scribble-inside-lexer in mode)
(define rxes (make-weak-hash))
(define rx-keys (make-weak-hasheq))
(define (intern-byte-regexp bstr)
(let ([v (hash-ref rxes bstr #f)])
(or v
(let ([rx (byte-regexp bstr)])
(hash-set! rxes bstr rx)
(hash-set! rx-keys rx (make-ephemeron rx bstr))
rx))))
(define (scribble-inside-lexer in offset mode)
(let ([mode (or mode
(list
(make-text #rx"^@"
@ -22,6 +33,31 @@
#f)))])
(let-values ([(line col pos) (port-next-location in)]
[(l) (car mode)])
;; If we don't match rx:opener in a place where we might otherwise
;; match, and there is a "|" at that point, then a change later
;; could turn the non-match into a match, AND there could be
;; arbitrarily many Scheme tokens in between. So we carry the backup
;; position, use it as necessary (from places that might be between a "|"
;; and a potential match creator), and cancel it when it's clearly
;; not needed anymore (which includes after any token that isn't a
;; Scheme token).
(define (backup-if-needed pos)
(if (and (scheme? (car mode))
(scheme-backup (car mode)))
(- (+ pos offset) (scheme-backup (car mode)))
0))
(define (no-backup mode)
(if (and (scheme? (car mode))
(scheme-backup (car mode)))
(cons (make-scheme (scheme-status (car mode)) #f)
(cdr mode))
mode))
(define (maybe-no-backup type mode)
(if (eq? type 'white-space)
;; white space definitely ends the need for backup
(no-backup mode)
mode))
(define (enter-@ comment-k)
(cond
@ -37,8 +73,9 @@
#f
pos
end-pos
(backup-if-needed pos)
(cons (make-text-args)
mode)))
(no-backup mode))))
;; Line comment:
(begin
(regexp-match? #rx".*?(?=[\r\n])" in)
@ -48,7 +85,8 @@
#f
pos
end-pos
mode))))]
(backup-if-needed pos)
(no-backup mode)))))]
[(regexp-try-match rx:opener in)
=> (lambda (m) (enter-opener m mode))]
[(regexp-try-match #rx"^{" in)
@ -58,18 +96,19 @@
(cond
[(equal? #\| (peek-char in))
(read-char in)
(list* (make-scheme 'bar)
mode)]
(list* (make-scheme 'bar (+ offset pos))
(no-backup mode))]
[else
(list* (make-scheme 'one)
(list* (make-scheme 'one #f)
(make-args)
mode)])])
(no-backup mode))])])
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "@"
'parenthesis
#f
pos
end-pos
(backup-if-needed pos)
new-mode)))]))
(define (enter-simple-opener mode)
@ -79,13 +118,14 @@
'|{|
pos
end-pos
(backup-if-needed pos)
(cons (make-text #rx"^@"
#rx"^}"
#rx"^{"
#rx".*?(?:(?=[@{}\r\n])|$)"
'|{|
'|}|)
mode))))
(no-backup mode)))))
(define (enter-opener m mode)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
@ -94,23 +134,24 @@
'|{| ;; Better complex paren?
pos
end-pos
(backup-if-needed pos)
(let ([closer (regexp-quote
(bytes-append #"}"
(flip (cadr m))
#"|"))]
[re-opener (regexp-quote (cadr m))])
(cons (make-text (byte-regexp (bytes-append #"^[|]" re-opener #"@"))
(byte-regexp (bytes-append #"^" closer))
(byte-regexp (bytes-append #"^[|]" re-opener #"{"))
(byte-regexp (bytes-append
#".*?(?:(?=[|]"
re-opener
#"[@{])|(?="
closer
#")|(?=[\r\n])|$)"))
(cons (make-text (intern-byte-regexp (bytes-append #"^[|]" re-opener #"@"))
(intern-byte-regexp (bytes-append #"^" closer))
(intern-byte-regexp (bytes-append #"^[|]" re-opener #"{"))
(intern-byte-regexp (bytes-append
#".*?(?:(?=[|]"
re-opener
#"[@{])|(?="
closer
#")|(?=[\r\n])|$)"))
'|{| ;; Better complex paren?
'|}|) ;; Better complex paren?
mode)))))
(no-backup mode))))))
(if (eof-object? (peek-char in))
(values eof
@ -118,6 +159,7 @@
#f
pos
pos
0
#f)
(cond
[(text? l)
@ -134,6 +176,7 @@
(text-close-paren l)
pos
end-pos
0
(cdr mode)))]
[(and (text-sub-rx l)
(regexp-try-match (text-sub-rx l) in))
@ -143,6 +186,7 @@
(text-open-paren l)
pos
end-pos
0
(cons (car mode) mode)))]
[(regexp-try-match #px"^(?:[\r\n])\\s*" in)
;; Treat a newline and leading whitespace in text mode as whitespace
@ -153,6 +197,7 @@
#f
pos
end-pos
0
mode))]
[else
;; Read string up to @, }, or newline
@ -163,6 +208,7 @@
#f
pos
end-pos
0
mode))])]
[(scheme? l)
(let ([status (scheme-status l)])
@ -175,6 +221,7 @@
'|]|
pos
end-pos
0
(cdr mode)))]
[(and (memq status '(bar bar-no-more))
(regexp-try-match #px"^\\s*?[|]" in))
@ -184,9 +231,12 @@
#f
pos
end-pos
(backup-if-needed pos)
(cdr mode)))]
[(regexp-try-match #rx"^@" in)
(enter-@ (lambda (lexeme type paren start end mode)
;; If we have a backup at this point, we can drop it, because
;; edits after here cannot lead to a rx:opener match.
(enter-@ (lambda (lexeme type paren start end backup mode)
(values lexeme
(if (eq? status 'one)
'error
@ -194,6 +244,7 @@
paren
start
end
backup
mode)))]
[(and (eq? status 'one)
(regexp-try-match rx:opener in))
@ -212,6 +263,7 @@
#f
pos
end-pos
(backup-if-needed pos)
mode))]
[(and (eq? status 'one)
(regexp-try-match #rx"^#?,@?" in))
@ -222,6 +274,7 @@
#f
pos
end-pos
(backup-if-needed pos)
mode))]
[else
(let-values ([(lexeme type paren start end adj)
@ -233,9 +286,11 @@
[(many) mode]
[(one) (cdr mode)]
[(bracket bar-no-more)
(cons (make-scheme status) (cdr mode))]
[(bar) (cons (make-scheme 'bar-no-more) (cdr mode))]
[else (error "bad status")]))])
(cons (make-scheme status (scheme-backup l))
(cdr mode))]
[(bar) (cons (make-scheme 'bar-no-more (scheme-backup l))
(cdr mode))]
[else (error "bad status" status)]))])
(values lexeme
(cond
[(or (eq? type 'comment)
@ -250,24 +305,29 @@
paren
start
end
(case adj
[(continue) mode]
[(datum)
(cond
[(pair? status) mode]
[else (consume status)])]
[(open)
(cons (make-scheme (cons #t status))
(cdr mode))]
[(close)
(if (pair? status)
(let ([v (cdr status)])
(if (symbol? v)
(consume v)
(cons (make-scheme v) (cdr mode))))
(consume status))]
[(bad) (consume status)]
[else (error "bad adj")])))]))]
(backup-if-needed start)
(maybe-no-backup
type
(case adj
[(continue) mode]
[(datum)
(cond
[(pair? status) mode]
[else (consume status)])]
[(open)
(cons (make-scheme (cons #t status) (scheme-backup l))
(cdr mode))]
[(close)
(if (pair? status)
(let ([v (cdr status)])
(if (symbol? v)
(consume v)
(cons (make-scheme v (scheme-backup l)) (cdr mode))))
(consume status))]
[(bad) (if (pair? status)
mode
(consume status))]
[else (error "bad adj")]))))]))]
[(args? l)
(cond
[(regexp-try-match #rx"^\\[" in)
@ -277,10 +337,11 @@
'|[|
pos
end-pos
(list* (make-scheme 'bracket)
0
(list* (make-scheme 'bracket #f)
mode)))]
[else
(scribble-lexer in (cons (make-text-args) (cdr mode)))])]
(scribble-inside-lexer in offset (cons (make-text-args) (cdr mode)))])]
[(text-args? l)
(cond
[(regexp-try-match rx:opener in)
@ -288,11 +349,11 @@
[(regexp-try-match #rx"^{" in)
(enter-simple-opener (cdr mode))]
[else
(scribble-lexer in (cdr mode))])]
(scribble-inside-lexer in offset (cdr mode))])]
[else (error "bad mode")])))))
(define (scribble-lexer in mode)
(scribble-inside-lexer in (or mode (list (make-scheme 'many)))))
(define (scribble-lexer in offset mode)
(scribble-inside-lexer in offset (or mode (list (make-scheme 'many #f)))))
(define (flip s)
(list->bytes
@ -300,10 +361,8 @@
(cond
[(equal? c (char->integer #\()) (char->integer #\))]
[(equal? c (char->integer #\[)) (char->integer #\])]
[(equal? c (char->integer #\{)) (char->integer #\})]
[(equal? c (char->integer #\<)) (char->integer #\>)]
[(equal? c (char->integer #\))) (char->integer #\()]
[(equal? c (char->integer #\])) (char->integer #\[)]
[(equal? c (char->integer #\})) (char->integer #\{)]
[(equal? c (char->integer #\>)) (char->integer #\<)]
[else c]))))

View File

@ -126,6 +126,7 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
@defmodule[syntax-color/module-lexer]
@defproc[(module-lexer [in input-port?]
[offset exact-nonnegative-integer?]
[mode (or/c #f
(-> input-port? any)
(cons/c (-> input-port? any/c any) any/c))])
@ -134,6 +135,7 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
exact-nonnegative-integer?
(or/c #f
(-> input-port? any)
(cons/c (-> input-port? any/c any) any/c)))]{
@ -142,8 +144,9 @@ Like @scheme[scheme-lexer], but
@itemize[
@item{A @scheme[module-lexer] accepts (and returns) a lexer mode,
instead of just an input port.}
@item{A @scheme[module-lexer] accepts an offset and lexer mode,
instead of just an input port, and it returns a backup distance
and a new lexer mode.}
@item{When @scheme[mode] is @scheme[#f] (indicating the start of the
stream), the lexer checks @scheme[in] for a @hash-lang[]
@ -158,7 +161,7 @@ Like @scheme[scheme-lexer], but
@scheme['color-lexer]. If the result is not @scheme[#f], then
it should be a lexer function for use with
@scheme[color:text%]. The result mode is the lexer---paired
with @scheme[#f] if the lexer is a procedure arity 2---so that
with @scheme[#f] if the lexer is a procedure arity 3---so that
future calls will dispatch to the language-supplied lexer.
If the language is specified but it provides no
@ -170,7 +173,7 @@ Like @scheme[scheme-lexer], but
lexer again as the mode.}
@item{When @scheme[mode] is a pair, then the lexer procedure in the
@scheme[car] is applied to @scheme[in] and the mode in the
@scheme[car] is applied to @scheme[in], @scheme[offset], and the mode in the
@scheme[cdr]. The lexer's results are returned, except that its
mode result is paired back with the lexer procedure.}
@ -181,12 +184,14 @@ Like @scheme[scheme-lexer], but
@defmodule[syntax-color/scribble-lexer]
@defproc[(scribble-lexer [in input-port?]
[offset exact-nonnegative-integer?]
[mode any/c])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
exact-nonnegative-integer?
any/c)]{
Like @scheme[scheme-lexer], but for Scheme extended with Scribbles
@ -194,12 +199,14 @@ Like @scheme[scheme-lexer], but for Scheme extended with Scribbles
"scribblings/scribble/scribble.scrbl") "reader"]).}
@defproc[(scribble-inside-lexer [in input-port?]
[offset exact-nonnegative-integer?]
[mode any/c])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
exact-nonnegative-integer?
any/c)]{
Like @scheme[scribble-lexer], but starting in ``text'' mode instead of

View File

@ -6,10 +6,10 @@
(define (color str)
(let ([in (open-input-string str)])
(let loop ([mode #f])
(let-values ([(lexeme type paren start end mode) (scribble-inside-lexer in mode)])
(let-values ([(lexeme type paren start end backup mode) (scribble-inside-lexer in 0 mode)])
(if (eq? type 'eof)
null
(cons (list start end type)
(cons (list start end type backup)
(loop mode)))))))
(define (test* str len-val line)
@ -17,7 +17,10 @@
[val (let loop ([pos 1][l len-val])
(if (null? l)
null
(cons (list pos (+ pos (caar l)) (cadar l))
(cons (list pos (+ pos (caar l)) (cadar l)
(if (null? (cddar l))
0
(caddar l)))
(loop (+ (+ pos (caar l))) (cdr l)))))])
(unless (equal? v val)
(printf "FAILED, line ~s\n" line)
@ -91,27 +94,27 @@
(1 parenthesis)))
(test "@|x|str" '((2 parenthesis)
(1 symbol)
(1 parenthesis)
(1 symbol 2)
(1 parenthesis 3)
(3 string)))
(test "@|x #|ok|#|str" '((2 parenthesis)
(1 symbol)
(1 white-space)
(1 symbol 2)
(1 white-space 3)
(6 comment)
(1 parenthesis)
(3 string)))
(test "@| x ; c\n| str" '((2 parenthesis)
(1 white-space)
(1 white-space 2)
(1 symbol)
(1 white-space)
(3 comment)
(2 parenthesis)
(4 string)))
(test "@|(a|b|)|str" '((2 parenthesis)
(1 parenthesis)
(4 symbol)
(1 parenthesis)
(1 parenthesis)
(1 parenthesis 2)
(4 symbol 3)
(1 parenthesis 7)
(1 parenthesis 8)
(3 string)))
(test "@#|bad|#x str" '((1 parenthesis)
@ -122,7 +125,7 @@
(1 parenthesis)
(1 symbol)))
(test "@|@x|z" '((2 parenthesis)
(1 parenthesis)
(1 parenthesis 2)
(1 symbol)
(1 parenthesis)
(1 string)))
@ -232,4 +235,9 @@
(1 string)
(1 parenthesis)))
; (test "@|=@=|}" null)
(test "@|()|})|" '((2 parenthesis)
(1 parenthesis 2)
(1 parenthesis 3)
(1 parenthesis 4)
(3 string)))