extended syntax colorer to support lexer-specific backup; fix problems with new color lexers
svn: r15617
This commit is contained in:
parent
6dcc67cb25
commit
d807421a07
|
@ -32,9 +32,13 @@ added get-regions
|
|||
(define (should-color-type? type)
|
||||
(not (memq type '(white-space no-color))))
|
||||
|
||||
(define (make-data type mode) (cons type mode))
|
||||
(define (data-type data) (car data))
|
||||
(define (data-lexer-mode data) (cdr data))
|
||||
(define (make-data type mode backup-delta)
|
||||
(if (zero? backup-delta)
|
||||
(cons type mode)
|
||||
(vector type mode backup-delta)))
|
||||
(define (data-type data) (if (pair? data) (car data) (vector-ref data 0)))
|
||||
(define (data-lexer-mode data) (if (pair? data) (cdr data) (vector-ref data 1)))
|
||||
(define (data-backup-delta data) (if (vector? data) (vector-ref data 2) 0))
|
||||
|
||||
(define -text<%>
|
||||
(interface (text:basic<%>)
|
||||
|
@ -274,11 +278,11 @@ added get-regions
|
|||
(sync-invalid ls))))
|
||||
|
||||
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-lexer-mode)
|
||||
(begin
|
||||
(enable-suspend #f)
|
||||
(begin0
|
||||
(get-token in in-lexer-mode)
|
||||
(get-token in in-start-pos in-lexer-mode)
|
||||
(enable-suspend #t)))])
|
||||
(unless (eq? 'eof type)
|
||||
(enable-suspend #f)
|
||||
|
@ -302,7 +306,8 @@ added get-regions
|
|||
;; version. In other words, the new greatly outweighs the tree
|
||||
;; operations.
|
||||
;;(insert-last! tokens (new token-tree% (length len) (data type)))
|
||||
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
|
||||
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode backup-delta))
|
||||
#; (show-tree (lexer-state-tokens ls))
|
||||
(send (lexer-state-parens ls) add-token data len)
|
||||
(cond
|
||||
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
|
||||
|
@ -320,6 +325,29 @@ added get-regions
|
|||
(else
|
||||
(enable-suspend #t)
|
||||
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
|
||||
|
||||
(define/private (show-tree t)
|
||||
(printf "Tree:\n")
|
||||
(send t search-min!)
|
||||
(let loop ([old-s -inf.0])
|
||||
(let ([s (send t get-root-start-position)]
|
||||
[e (send t get-root-end-position)])
|
||||
(unless (= s old-s)
|
||||
(printf " ~s\n" (list s e))
|
||||
(send t search! e)
|
||||
(loop s)))))
|
||||
|
||||
(define/private (split-backward ls valid-tree pos)
|
||||
(let loop ([pos pos][valid-tree valid-tree][old-invalid-tree #f])
|
||||
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
|
||||
(send valid-tree split/data (- pos (lexer-state-start-pos ls)))))
|
||||
(let ([backup-pos (- pos (data-backup-delta orig-data))]
|
||||
[invalid-tree (or old-invalid-tree invalid-tree)])
|
||||
(if (backup-pos . < . pos)
|
||||
;; back up more:
|
||||
(loop pos valid-tree invalid-tree)
|
||||
;; that was far enough:
|
||||
(values orig-token-start orig-token-end valid-tree invalid-tree orig-data))))))
|
||||
|
||||
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
|
||||
(unless (lexer-state-up-to-date? ls)
|
||||
|
@ -327,7 +355,7 @@ added get-regions
|
|||
(cond
|
||||
((lexer-state-up-to-date? ls)
|
||||
(let-values (((orig-token-start orig-token-end valid-tree invalid-tree orig-data)
|
||||
(send (lexer-state-tokens ls) split/data (- edit-start-pos (lexer-state-start-pos ls)))))
|
||||
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
|
||||
(send (lexer-state-parens ls) split-tree orig-token-start)
|
||||
(set-lexer-state-invalid-tokens! ls invalid-tree)
|
||||
(set-lexer-state-tokens! ls valid-tree)
|
||||
|
@ -349,8 +377,7 @@ added get-regions
|
|||
(queue-callback (λ () (colorer-callback)) #f)))
|
||||
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
|
||||
(let-values (((tok-start tok-end valid-tree invalid-tree orig-data)
|
||||
(send (lexer-state-invalid-tokens ls) split/data
|
||||
(- edit-start-pos (lexer-state-start-pos ls)))))
|
||||
(split-backward ls (lexer-state-invalid-tokens ls) edit-start-pos)))
|
||||
(set-lexer-state-invalid-tokens! ls invalid-tree)
|
||||
(set-lexer-state-invalid-tokens-start!
|
||||
ls
|
||||
|
@ -362,8 +389,7 @@ added get-regions
|
|||
(+ change-length (lexer-state-invalid-tokens-start ls))))
|
||||
(else
|
||||
(let-values (((tok-start tok-end valid-tree invalid-tree data)
|
||||
(send (lexer-state-tokens ls) split/data
|
||||
(- edit-start-pos (lexer-state-start-pos ls)))))
|
||||
(split-backward ls (lexer-state-tokens ls) edit-start-pos)))
|
||||
(send (lexer-state-parens ls) truncate tok-start)
|
||||
(set-lexer-state-tokens! ls valid-tree)
|
||||
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
|
||||
|
@ -463,14 +489,14 @@ added get-regions
|
|||
(reset-tokens)
|
||||
(set! should-color? (preferences:get 'framework:coloring-active))
|
||||
(set! token-sym->style token-sym->style-)
|
||||
(set! get-token (if (procedure-arity-includes? get-token- 2)
|
||||
(set! get-token (if (procedure-arity-includes? get-token- 3)
|
||||
;; New interface: thread through a mode:
|
||||
get-token-
|
||||
;; Old interface: no mode
|
||||
(lambda (in mode)
|
||||
;; Old interface: no offset, backup delta, or mode
|
||||
(lambda (in offset mode)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(get-token- in)])
|
||||
(values lexeme type data new-token-start new-token-end #f)))))
|
||||
(values lexeme type data new-token-start new-token-end 0 #f)))))
|
||||
(set! pairs pairs-)
|
||||
(for-each
|
||||
(lambda (ls)
|
||||
|
|
|
@ -1172,14 +1172,14 @@
|
|||
(preferences:add-callback
|
||||
'framework:tabify
|
||||
(lambda (k v) (set! tabify-pref v)))
|
||||
(define/private (scheme-lexer-wrapper in mode)
|
||||
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
|
||||
(define/private (scheme-lexer-wrapper in offset mode)
|
||||
(let-values (((lexeme type paren start end backup-delta mode) (module-lexer in offset mode)))
|
||||
(cond
|
||||
((and (eq? type 'symbol)
|
||||
(get-keyword-type lexeme tabify-pref))
|
||||
(values lexeme 'keyword paren start end mode))
|
||||
(values lexeme 'keyword paren start end backup-delta mode))
|
||||
(else
|
||||
(values lexeme type paren start end mode)))))
|
||||
(values lexeme type paren start end backup-delta mode)))))
|
||||
|
||||
(define/override (put-file text sup directory default-name)
|
||||
(parameterize ([finder:default-extension "ss"]
|
||||
|
@ -1188,7 +1188,7 @@
|
|||
;; don't call the surrogate's super, since it sets the default extension
|
||||
(sup directory default-name)))
|
||||
|
||||
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
|
||||
(super-new (get-token (lambda (in offset mode) (scheme-lexer-wrapper in offset mode)))
|
||||
(token-sym->style short-sym->style-name)
|
||||
(matches '((|(| |)|)
|
||||
(|[| |]|)
|
||||
|
|
|
@ -16,12 +16,14 @@
|
|||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))
|
||||
(-> input-port?
|
||||
exact-nonnegative-integer?
|
||||
any/c
|
||||
(values any/c
|
||||
symbol?
|
||||
(or/c false? symbol?)
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?
|
||||
any/c))))
|
||||
(pairs (listof (list/p symbol? symbol?)))) void))]{
|
||||
Starts tokenizing the buffer for coloring and parenthesis matching.
|
||||
|
@ -29,7 +31,7 @@
|
|||
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
|
||||
and it should return the style-name that the token should be colored.
|
||||
|
||||
The @scheme[get-token] argument takes an input port and optionally a mode value.
|
||||
The @scheme[get-token] argument takes an input port and optionally an offset and mode value.
|
||||
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
|
||||
|
||||
@itemize[
|
||||
|
@ -52,15 +54,21 @@
|
|||
@item{
|
||||
The ending position of the token.}]
|
||||
|
||||
When @scheme[get-token] accepts a mode value in addition to an
|
||||
input port, it must also return an extra result, which is a new
|
||||
mode. When @scheme[get-token] is called for the beginning on a
|
||||
stream, the mode argument is @scheme[#f]. Thereafter, the mode
|
||||
When @scheme[get-token] accepts an offset and mode value in addition to an
|
||||
input port, it must also return two extra results, which are a backup
|
||||
distance and new mode. The offset given to @scheme[get-token] can be added
|
||||
to the position of the input port to obtain absolute coordinates within a
|
||||
text stream. The mode argument allows @scheme[get-token] to communicate information
|
||||
from earlier parsing to later.
|
||||
When @scheme[get-token] is called for the beginning on a
|
||||
stream, the mode argument is @scheme[#f]; thereafter, the mode
|
||||
returned for the previous token is provided to @scheme[get-token]
|
||||
for the next token. The mode should not be a mutable value; if
|
||||
part of the stream is re-tokenized, the mode saved from the
|
||||
immediately preceding token is given again to the
|
||||
@scheme[get-token] function.
|
||||
@scheme[get-token] function. The backup distance returned by @scheme[get-token]
|
||||
indicates the maximum number of characters to back up (counting from the start of the token)
|
||||
and for re-parsing after a change to the editor within the token's region.
|
||||
|
||||
The @scheme[get-token] function is usually be implemented with a lexer using the
|
||||
@scheme[parser-tools/lex] library. The
|
||||
|
@ -68,7 +76,7 @@
|
|||
@itemize[
|
||||
@item{
|
||||
Every position in the buffer must be accounted for in exactly one
|
||||
token.}
|
||||
token, and every token must have a non-zero width.}
|
||||
@item{
|
||||
The token returned by @scheme[get-token] must rely only on the contents of the
|
||||
input port argument plus the mode argument. This constraint means that the
|
||||
|
@ -77,16 +85,16 @@
|
|||
for tokens).}
|
||||
@item{
|
||||
A change to the stream must not change the tokenization of the stream prior
|
||||
to the token immediately preceding the change. In the following
|
||||
example this invariant does not hold. If the buffer contains
|
||||
to the token immediately preceding the change plus the backup distance. In the following
|
||||
example, this invariant does not hold for a zero backup distance: If the buffer contains
|
||||
@verbatim[#:indent 2]{" 1 2 3}
|
||||
and the tokenizer treats the unmatched " as its own token (a string
|
||||
error token), and separately tokenizes the 1 2 and 3, an edit to make
|
||||
the buffer look like
|
||||
@verbatim[#:indent 2]{" 1 2 3"}
|
||||
would result in a single string token modifying previous tokens. To
|
||||
handle these situations, @scheme[get-token] must treat the first line as a
|
||||
single token.}]
|
||||
handle these situations, @scheme[get-token] can treat the first line as a
|
||||
single token, or it can precisely track backup distances.}]
|
||||
|
||||
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
|
||||
value returned by @scheme[get-token] is compared to this list to see how the
|
||||
|
|
|
@ -4,7 +4,7 @@
|
|||
|
||||
(provide module-lexer)
|
||||
|
||||
(define (module-lexer in mode)
|
||||
(define (module-lexer in offset mode)
|
||||
(cond
|
||||
[(not mode)
|
||||
;; Starting out: look for #lang:
|
||||
|
@ -21,39 +21,41 @@
|
|||
(old g))))])
|
||||
;; FIXME: set the reader guard to disable access to
|
||||
;; untrusted planet packages.
|
||||
(read-language in (lambda () #f))))])
|
||||
(read-language p (lambda () #f))))]
|
||||
[sync-ports (lambda ()
|
||||
(read-bytes (- (file-position p) init) in))])
|
||||
(cond
|
||||
[(procedure? get-info)
|
||||
;; Produce language as first token:
|
||||
(let*-values ([(bytes-len) (- (file-position p) init)]
|
||||
[(bstr) (read-bytes bytes-len in)]
|
||||
[(end-line end-col end-pos) (port-next-location in)])
|
||||
(sync-ports)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values
|
||||
bstr
|
||||
"#lang"
|
||||
'other
|
||||
#f
|
||||
start-pos
|
||||
end-pos
|
||||
0
|
||||
(or (let ([v (get-info 'color-lexer)])
|
||||
(and v
|
||||
(if (procedure-arity-includes? v 2)
|
||||
(if (procedure-arity-includes? v 3)
|
||||
(cons v #f)
|
||||
v)))
|
||||
scheme-lexer)))]
|
||||
[(eq? 'fail get-info)
|
||||
(sync-ports)
|
||||
(let*-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values #f 'error #f start-pos end-pos
|
||||
scheme-lexer))]
|
||||
(values #f 'error #f start-pos end-pos 0 scheme-lexer))]
|
||||
[else
|
||||
;; Start over using the Scheme lexer
|
||||
(module-lexer in scheme-lexer)])))]
|
||||
(module-lexer in offset scheme-lexer)])))]
|
||||
[(pair? mode)
|
||||
;; #lang-selected language consumes and produces a mode:
|
||||
(let-values ([(lexeme type data new-token-start new-token-end new-mode)
|
||||
((car mode) in (cdr mode))])
|
||||
(values lexeme type data new-token-start new-token-end (cons (car mode) new-mode)))]
|
||||
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
|
||||
((car mode) in offset (cdr mode))])
|
||||
(values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))]
|
||||
[else
|
||||
;; #lang-selected language (or default) doesn't deal with modes:
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(mode in)])
|
||||
(values lexeme type data new-token-start new-token-end mode))]))
|
||||
(values lexeme type data new-token-start new-token-end 0 mode))]))
|
||||
|
|
|
@ -4,14 +4,25 @@
|
|||
(provide scribble-inside-lexer
|
||||
scribble-lexer)
|
||||
|
||||
(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren))
|
||||
(define-struct scheme (status))
|
||||
(define-struct args ())
|
||||
(define-struct text-args ())
|
||||
(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren) #:transparent)
|
||||
(define-struct scheme (status backup) #:transparent)
|
||||
(define-struct args () #:transparent)
|
||||
(define-struct text-args () #:transparent)
|
||||
|
||||
(define rx:opener #rx"^[|]([^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*){")
|
||||
|
||||
(define (scribble-inside-lexer in mode)
|
||||
(define rxes (make-weak-hash))
|
||||
(define rx-keys (make-weak-hasheq))
|
||||
|
||||
(define (intern-byte-regexp bstr)
|
||||
(let ([v (hash-ref rxes bstr #f)])
|
||||
(or v
|
||||
(let ([rx (byte-regexp bstr)])
|
||||
(hash-set! rxes bstr rx)
|
||||
(hash-set! rx-keys rx (make-ephemeron rx bstr))
|
||||
rx))))
|
||||
|
||||
(define (scribble-inside-lexer in offset mode)
|
||||
(let ([mode (or mode
|
||||
(list
|
||||
(make-text #rx"^@"
|
||||
|
@ -22,6 +33,31 @@
|
|||
#f)))])
|
||||
(let-values ([(line col pos) (port-next-location in)]
|
||||
[(l) (car mode)])
|
||||
|
||||
;; If we don't match rx:opener in a place where we might otherwise
|
||||
;; match, and there is a "|" at that point, then a change later
|
||||
;; could turn the non-match into a match, AND there could be
|
||||
;; arbitrarily many Scheme tokens in between. So we carry the backup
|
||||
;; position, use it as necessary (from places that might be between a "|"
|
||||
;; and a potential match creator), and cancel it when it's clearly
|
||||
;; not needed anymore (which includes after any token that isn't a
|
||||
;; Scheme token).
|
||||
(define (backup-if-needed pos)
|
||||
(if (and (scheme? (car mode))
|
||||
(scheme-backup (car mode)))
|
||||
(- (+ pos offset) (scheme-backup (car mode)))
|
||||
0))
|
||||
(define (no-backup mode)
|
||||
(if (and (scheme? (car mode))
|
||||
(scheme-backup (car mode)))
|
||||
(cons (make-scheme (scheme-status (car mode)) #f)
|
||||
(cdr mode))
|
||||
mode))
|
||||
(define (maybe-no-backup type mode)
|
||||
(if (eq? type 'white-space)
|
||||
;; white space definitely ends the need for backup
|
||||
(no-backup mode)
|
||||
mode))
|
||||
|
||||
(define (enter-@ comment-k)
|
||||
(cond
|
||||
|
@ -37,8 +73,9 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
(cons (make-text-args)
|
||||
mode)))
|
||||
(no-backup mode))))
|
||||
;; Line comment:
|
||||
(begin
|
||||
(regexp-match? #rx".*?(?=[\r\n])" in)
|
||||
|
@ -48,7 +85,8 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
mode))))]
|
||||
(backup-if-needed pos)
|
||||
(no-backup mode)))))]
|
||||
[(regexp-try-match rx:opener in)
|
||||
=> (lambda (m) (enter-opener m mode))]
|
||||
[(regexp-try-match #rx"^{" in)
|
||||
|
@ -58,18 +96,19 @@
|
|||
(cond
|
||||
[(equal? #\| (peek-char in))
|
||||
(read-char in)
|
||||
(list* (make-scheme 'bar)
|
||||
mode)]
|
||||
(list* (make-scheme 'bar (+ offset pos))
|
||||
(no-backup mode))]
|
||||
[else
|
||||
(list* (make-scheme 'one)
|
||||
(list* (make-scheme 'one #f)
|
||||
(make-args)
|
||||
mode)])])
|
||||
(no-backup mode))])])
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "@"
|
||||
'parenthesis
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
new-mode)))]))
|
||||
|
||||
(define (enter-simple-opener mode)
|
||||
|
@ -79,13 +118,14 @@
|
|||
'|{|
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
(cons (make-text #rx"^@"
|
||||
#rx"^}"
|
||||
#rx"^{"
|
||||
#rx".*?(?:(?=[@{}\r\n])|$)"
|
||||
'|{|
|
||||
'|}|)
|
||||
mode))))
|
||||
(no-backup mode)))))
|
||||
|
||||
(define (enter-opener m mode)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
|
@ -94,23 +134,24 @@
|
|||
'|{| ;; Better complex paren?
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
(let ([closer (regexp-quote
|
||||
(bytes-append #"}"
|
||||
(flip (cadr m))
|
||||
#"|"))]
|
||||
[re-opener (regexp-quote (cadr m))])
|
||||
(cons (make-text (byte-regexp (bytes-append #"^[|]" re-opener #"@"))
|
||||
(byte-regexp (bytes-append #"^" closer))
|
||||
(byte-regexp (bytes-append #"^[|]" re-opener #"{"))
|
||||
(byte-regexp (bytes-append
|
||||
#".*?(?:(?=[|]"
|
||||
re-opener
|
||||
#"[@{])|(?="
|
||||
closer
|
||||
#")|(?=[\r\n])|$)"))
|
||||
(cons (make-text (intern-byte-regexp (bytes-append #"^[|]" re-opener #"@"))
|
||||
(intern-byte-regexp (bytes-append #"^" closer))
|
||||
(intern-byte-regexp (bytes-append #"^[|]" re-opener #"{"))
|
||||
(intern-byte-regexp (bytes-append
|
||||
#".*?(?:(?=[|]"
|
||||
re-opener
|
||||
#"[@{])|(?="
|
||||
closer
|
||||
#")|(?=[\r\n])|$)"))
|
||||
'|{| ;; Better complex paren?
|
||||
'|}|) ;; Better complex paren?
|
||||
mode)))))
|
||||
(no-backup mode))))))
|
||||
|
||||
(if (eof-object? (peek-char in))
|
||||
(values eof
|
||||
|
@ -118,6 +159,7 @@
|
|||
#f
|
||||
pos
|
||||
pos
|
||||
0
|
||||
#f)
|
||||
(cond
|
||||
[(text? l)
|
||||
|
@ -134,6 +176,7 @@
|
|||
(text-close-paren l)
|
||||
pos
|
||||
end-pos
|
||||
0
|
||||
(cdr mode)))]
|
||||
[(and (text-sub-rx l)
|
||||
(regexp-try-match (text-sub-rx l) in))
|
||||
|
@ -143,6 +186,7 @@
|
|||
(text-open-paren l)
|
||||
pos
|
||||
end-pos
|
||||
0
|
||||
(cons (car mode) mode)))]
|
||||
[(regexp-try-match #px"^(?:[\r\n])\\s*" in)
|
||||
;; Treat a newline and leading whitespace in text mode as whitespace
|
||||
|
@ -153,6 +197,7 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
0
|
||||
mode))]
|
||||
[else
|
||||
;; Read string up to @, }, or newline
|
||||
|
@ -163,6 +208,7 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
0
|
||||
mode))])]
|
||||
[(scheme? l)
|
||||
(let ([status (scheme-status l)])
|
||||
|
@ -175,6 +221,7 @@
|
|||
'|]|
|
||||
pos
|
||||
end-pos
|
||||
0
|
||||
(cdr mode)))]
|
||||
[(and (memq status '(bar bar-no-more))
|
||||
(regexp-try-match #px"^\\s*?[|]" in))
|
||||
|
@ -184,9 +231,12 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
(cdr mode)))]
|
||||
[(regexp-try-match #rx"^@" in)
|
||||
(enter-@ (lambda (lexeme type paren start end mode)
|
||||
;; If we have a backup at this point, we can drop it, because
|
||||
;; edits after here cannot lead to a rx:opener match.
|
||||
(enter-@ (lambda (lexeme type paren start end backup mode)
|
||||
(values lexeme
|
||||
(if (eq? status 'one)
|
||||
'error
|
||||
|
@ -194,6 +244,7 @@
|
|||
paren
|
||||
start
|
||||
end
|
||||
backup
|
||||
mode)))]
|
||||
[(and (eq? status 'one)
|
||||
(regexp-try-match rx:opener in))
|
||||
|
@ -212,6 +263,7 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
mode))]
|
||||
[(and (eq? status 'one)
|
||||
(regexp-try-match #rx"^#?,@?" in))
|
||||
|
@ -222,6 +274,7 @@
|
|||
#f
|
||||
pos
|
||||
end-pos
|
||||
(backup-if-needed pos)
|
||||
mode))]
|
||||
[else
|
||||
(let-values ([(lexeme type paren start end adj)
|
||||
|
@ -233,9 +286,11 @@
|
|||
[(many) mode]
|
||||
[(one) (cdr mode)]
|
||||
[(bracket bar-no-more)
|
||||
(cons (make-scheme status) (cdr mode))]
|
||||
[(bar) (cons (make-scheme 'bar-no-more) (cdr mode))]
|
||||
[else (error "bad status")]))])
|
||||
(cons (make-scheme status (scheme-backup l))
|
||||
(cdr mode))]
|
||||
[(bar) (cons (make-scheme 'bar-no-more (scheme-backup l))
|
||||
(cdr mode))]
|
||||
[else (error "bad status" status)]))])
|
||||
(values lexeme
|
||||
(cond
|
||||
[(or (eq? type 'comment)
|
||||
|
@ -250,24 +305,29 @@
|
|||
paren
|
||||
start
|
||||
end
|
||||
(case adj
|
||||
[(continue) mode]
|
||||
[(datum)
|
||||
(cond
|
||||
[(pair? status) mode]
|
||||
[else (consume status)])]
|
||||
[(open)
|
||||
(cons (make-scheme (cons #t status))
|
||||
(cdr mode))]
|
||||
[(close)
|
||||
(if (pair? status)
|
||||
(let ([v (cdr status)])
|
||||
(if (symbol? v)
|
||||
(consume v)
|
||||
(cons (make-scheme v) (cdr mode))))
|
||||
(consume status))]
|
||||
[(bad) (consume status)]
|
||||
[else (error "bad adj")])))]))]
|
||||
(backup-if-needed start)
|
||||
(maybe-no-backup
|
||||
type
|
||||
(case adj
|
||||
[(continue) mode]
|
||||
[(datum)
|
||||
(cond
|
||||
[(pair? status) mode]
|
||||
[else (consume status)])]
|
||||
[(open)
|
||||
(cons (make-scheme (cons #t status) (scheme-backup l))
|
||||
(cdr mode))]
|
||||
[(close)
|
||||
(if (pair? status)
|
||||
(let ([v (cdr status)])
|
||||
(if (symbol? v)
|
||||
(consume v)
|
||||
(cons (make-scheme v (scheme-backup l)) (cdr mode))))
|
||||
(consume status))]
|
||||
[(bad) (if (pair? status)
|
||||
mode
|
||||
(consume status))]
|
||||
[else (error "bad adj")]))))]))]
|
||||
[(args? l)
|
||||
(cond
|
||||
[(regexp-try-match #rx"^\\[" in)
|
||||
|
@ -277,10 +337,11 @@
|
|||
'|[|
|
||||
pos
|
||||
end-pos
|
||||
(list* (make-scheme 'bracket)
|
||||
0
|
||||
(list* (make-scheme 'bracket #f)
|
||||
mode)))]
|
||||
[else
|
||||
(scribble-lexer in (cons (make-text-args) (cdr mode)))])]
|
||||
(scribble-inside-lexer in offset (cons (make-text-args) (cdr mode)))])]
|
||||
[(text-args? l)
|
||||
(cond
|
||||
[(regexp-try-match rx:opener in)
|
||||
|
@ -288,11 +349,11 @@
|
|||
[(regexp-try-match #rx"^{" in)
|
||||
(enter-simple-opener (cdr mode))]
|
||||
[else
|
||||
(scribble-lexer in (cdr mode))])]
|
||||
(scribble-inside-lexer in offset (cdr mode))])]
|
||||
[else (error "bad mode")])))))
|
||||
|
||||
(define (scribble-lexer in mode)
|
||||
(scribble-inside-lexer in (or mode (list (make-scheme 'many)))))
|
||||
(define (scribble-lexer in offset mode)
|
||||
(scribble-inside-lexer in offset (or mode (list (make-scheme 'many #f)))))
|
||||
|
||||
(define (flip s)
|
||||
(list->bytes
|
||||
|
@ -300,10 +361,8 @@
|
|||
(cond
|
||||
[(equal? c (char->integer #\()) (char->integer #\))]
|
||||
[(equal? c (char->integer #\[)) (char->integer #\])]
|
||||
[(equal? c (char->integer #\{)) (char->integer #\})]
|
||||
[(equal? c (char->integer #\<)) (char->integer #\>)]
|
||||
[(equal? c (char->integer #\))) (char->integer #\()]
|
||||
[(equal? c (char->integer #\])) (char->integer #\[)]
|
||||
[(equal? c (char->integer #\})) (char->integer #\{)]
|
||||
[(equal? c (char->integer #\>)) (char->integer #\<)]
|
||||
[else c]))))
|
||||
|
|
|
@ -126,6 +126,7 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
|
|||
@defmodule[syntax-color/module-lexer]
|
||||
|
||||
@defproc[(module-lexer [in input-port?]
|
||||
[offset exact-nonnegative-integer?]
|
||||
[mode (or/c #f
|
||||
(-> input-port? any)
|
||||
(cons/c (-> input-port? any/c any) any/c))])
|
||||
|
@ -134,6 +135,7 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
|
|||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
exact-nonnegative-integer?
|
||||
(or/c #f
|
||||
(-> input-port? any)
|
||||
(cons/c (-> input-port? any/c any) any/c)))]{
|
||||
|
@ -142,8 +144,9 @@ Like @scheme[scheme-lexer], but
|
|||
|
||||
@itemize[
|
||||
|
||||
@item{A @scheme[module-lexer] accepts (and returns) a lexer mode,
|
||||
instead of just an input port.}
|
||||
@item{A @scheme[module-lexer] accepts an offset and lexer mode,
|
||||
instead of just an input port, and it returns a backup distance
|
||||
and a new lexer mode.}
|
||||
|
||||
@item{When @scheme[mode] is @scheme[#f] (indicating the start of the
|
||||
stream), the lexer checks @scheme[in] for a @hash-lang[]
|
||||
|
@ -158,7 +161,7 @@ Like @scheme[scheme-lexer], but
|
|||
@scheme['color-lexer]. If the result is not @scheme[#f], then
|
||||
it should be a lexer function for use with
|
||||
@scheme[color:text%]. The result mode is the lexer---paired
|
||||
with @scheme[#f] if the lexer is a procedure arity 2---so that
|
||||
with @scheme[#f] if the lexer is a procedure arity 3---so that
|
||||
future calls will dispatch to the language-supplied lexer.
|
||||
|
||||
If the language is specified but it provides no
|
||||
|
@ -170,7 +173,7 @@ Like @scheme[scheme-lexer], but
|
|||
lexer again as the mode.}
|
||||
|
||||
@item{When @scheme[mode] is a pair, then the lexer procedure in the
|
||||
@scheme[car] is applied to @scheme[in] and the mode in the
|
||||
@scheme[car] is applied to @scheme[in], @scheme[offset], and the mode in the
|
||||
@scheme[cdr]. The lexer's results are returned, except that its
|
||||
mode result is paired back with the lexer procedure.}
|
||||
|
||||
|
@ -181,12 +184,14 @@ Like @scheme[scheme-lexer], but
|
|||
@defmodule[syntax-color/scribble-lexer]
|
||||
|
||||
@defproc[(scribble-lexer [in input-port?]
|
||||
[offset exact-nonnegative-integer?]
|
||||
[mode any/c])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
exact-nonnegative-integer?
|
||||
any/c)]{
|
||||
|
||||
Like @scheme[scheme-lexer], but for Scheme extended with Scribbles
|
||||
|
@ -194,12 +199,14 @@ Like @scheme[scheme-lexer], but for Scheme extended with Scribbles
|
|||
"scribblings/scribble/scribble.scrbl") "reader"]).}
|
||||
|
||||
@defproc[(scribble-inside-lexer [in input-port?]
|
||||
[offset exact-nonnegative-integer?]
|
||||
[mode any/c])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
exact-nonnegative-integer?
|
||||
any/c)]{
|
||||
|
||||
Like @scheme[scribble-lexer], but starting in ``text'' mode instead of
|
||||
|
|
|
@ -6,10 +6,10 @@
|
|||
(define (color str)
|
||||
(let ([in (open-input-string str)])
|
||||
(let loop ([mode #f])
|
||||
(let-values ([(lexeme type paren start end mode) (scribble-inside-lexer in mode)])
|
||||
(let-values ([(lexeme type paren start end backup mode) (scribble-inside-lexer in 0 mode)])
|
||||
(if (eq? type 'eof)
|
||||
null
|
||||
(cons (list start end type)
|
||||
(cons (list start end type backup)
|
||||
(loop mode)))))))
|
||||
|
||||
(define (test* str len-val line)
|
||||
|
@ -17,7 +17,10 @@
|
|||
[val (let loop ([pos 1][l len-val])
|
||||
(if (null? l)
|
||||
null
|
||||
(cons (list pos (+ pos (caar l)) (cadar l))
|
||||
(cons (list pos (+ pos (caar l)) (cadar l)
|
||||
(if (null? (cddar l))
|
||||
0
|
||||
(caddar l)))
|
||||
(loop (+ (+ pos (caar l))) (cdr l)))))])
|
||||
(unless (equal? v val)
|
||||
(printf "FAILED, line ~s\n" line)
|
||||
|
@ -91,27 +94,27 @@
|
|||
(1 parenthesis)))
|
||||
|
||||
(test "@|x|str" '((2 parenthesis)
|
||||
(1 symbol)
|
||||
(1 parenthesis)
|
||||
(1 symbol 2)
|
||||
(1 parenthesis 3)
|
||||
(3 string)))
|
||||
(test "@|x #|ok|#|str" '((2 parenthesis)
|
||||
(1 symbol)
|
||||
(1 white-space)
|
||||
(1 symbol 2)
|
||||
(1 white-space 3)
|
||||
(6 comment)
|
||||
(1 parenthesis)
|
||||
(3 string)))
|
||||
(test "@| x ; c\n| str" '((2 parenthesis)
|
||||
(1 white-space)
|
||||
(1 white-space 2)
|
||||
(1 symbol)
|
||||
(1 white-space)
|
||||
(3 comment)
|
||||
(2 parenthesis)
|
||||
(4 string)))
|
||||
(test "@|(a|b|)|str" '((2 parenthesis)
|
||||
(1 parenthesis)
|
||||
(4 symbol)
|
||||
(1 parenthesis)
|
||||
(1 parenthesis)
|
||||
(1 parenthesis 2)
|
||||
(4 symbol 3)
|
||||
(1 parenthesis 7)
|
||||
(1 parenthesis 8)
|
||||
(3 string)))
|
||||
|
||||
(test "@#|bad|#x str" '((1 parenthesis)
|
||||
|
@ -122,7 +125,7 @@
|
|||
(1 parenthesis)
|
||||
(1 symbol)))
|
||||
(test "@|@x|z" '((2 parenthesis)
|
||||
(1 parenthesis)
|
||||
(1 parenthesis 2)
|
||||
(1 symbol)
|
||||
(1 parenthesis)
|
||||
(1 string)))
|
||||
|
@ -232,4 +235,9 @@
|
|||
(1 string)
|
||||
(1 parenthesis)))
|
||||
|
||||
; (test "@|=@=|}" null)
|
||||
(test "@|()|})|" '((2 parenthesis)
|
||||
(1 parenthesis 2)
|
||||
(1 parenthesis 3)
|
||||
(1 parenthesis 4)
|
||||
(3 string)))
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user