#lang syntax coloring; Scribble syntax coloring
svn: r15607
This commit is contained in:
parent
2425917a33
commit
1ba7cf0926
|
@ -31,8 +31,15 @@
|
|||
(bad (cadr spec) #f)))))
|
||||
|
||||
(define (get-info inp mod line col pos)
|
||||
(at-get inp 'get-info (object-name inp) line col pos
|
||||
(lambda (spec) (lambda () (lambda (tag) #f)))))
|
||||
(let ([r (at-get inp 'get-info (object-name inp) line col pos
|
||||
(lambda (spec) (lambda () (lambda (inp mod line col pos)
|
||||
(lambda (tag) #f)))))])
|
||||
(let ([proc (r inp mod line col pos)])
|
||||
(lambda (key)
|
||||
(case key
|
||||
[(color-lexer)
|
||||
(dynamic-require 'syntax-color/scribble-lexer 'scribble-lexer)]
|
||||
[else (and proc (proc key))])))))
|
||||
|
||||
(define at-readtable (make-at-readtable))
|
||||
|
||||
|
|
|
@ -32,6 +32,10 @@ added get-regions
|
|||
(define (should-color-type? type)
|
||||
(not (memq type '(white-space no-color))))
|
||||
|
||||
(define (make-data type mode) (cons type mode))
|
||||
(define (data-type data) (car data))
|
||||
(define (data-lexer-mode data) (cdr data))
|
||||
|
||||
(define -text<%>
|
||||
(interface (text:basic<%>)
|
||||
start-colorer
|
||||
|
@ -102,6 +106,11 @@ added get-regions
|
|||
invalid-tokens-start ; = +inf.0
|
||||
;; The position right before the next token to be read
|
||||
current-pos
|
||||
;; Thread a mode through lexing, and remember the mode
|
||||
;; at each token boundary, so that lexing can depend on
|
||||
;; previous tokens. This is the mode for lexing at
|
||||
;; current-pos:
|
||||
current-lexer-mode
|
||||
;; Paren-matching
|
||||
parens
|
||||
)
|
||||
|
@ -118,6 +127,7 @@ added get-regions
|
|||
(new token-tree%)
|
||||
+inf.0
|
||||
start
|
||||
#f
|
||||
(new paren-tree% (matches pairs))))
|
||||
|
||||
(define lexer-states (list (make-new-lexer-state 0 'end)))
|
||||
|
@ -228,6 +238,7 @@ added get-regions
|
|||
(set-lexer-state-invalid-tokens-start! ls +inf.0)
|
||||
(set-lexer-state-up-to-date?! ls #t)
|
||||
(set-lexer-state-current-pos! ls (lexer-state-start-pos ls))
|
||||
(set-lexer-state-current-lexer-mode! ls #f)
|
||||
(set-lexer-state-parens! ls (new paren-tree% (matches pairs))))
|
||||
lexer-states)
|
||||
(set! restart-callback #f)
|
||||
|
@ -258,12 +269,12 @@ added get-regions
|
|||
(set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length)))
|
||||
(sync-invalid ls))))
|
||||
|
||||
(define/private (re-tokenize ls in in-start-pos enable-suspend)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
|
||||
(begin
|
||||
(enable-suspend #f)
|
||||
(begin0
|
||||
(get-token in)
|
||||
(get-token in in-lexer-mode)
|
||||
(enable-suspend #t)))])
|
||||
(unless (eq? 'eof type)
|
||||
(enable-suspend #f)
|
||||
|
@ -271,6 +282,7 @@ added get-regions
|
|||
(+ in-start-pos (sub1 new-token-end)))
|
||||
(let ((len (- new-token-end new-token-start)))
|
||||
(set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls)))
|
||||
(set-lexer-state-current-lexer-mode! ls new-lexer-mode)
|
||||
(sync-invalid ls)
|
||||
(when (and should-color? (should-color-type? type) (not frozen?))
|
||||
(set! colors
|
||||
|
@ -286,7 +298,7 @@ added get-regions
|
|||
;; version. In other words, the new greatly outweighs the tree
|
||||
;; operations.
|
||||
;;(insert-last! tokens (new token-tree% (length len) (data type)))
|
||||
(insert-last-spec! (lexer-state-tokens ls) len type)
|
||||
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
|
||||
(send (lexer-state-parens ls) add-token data len)
|
||||
(cond
|
||||
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
|
||||
|
@ -301,7 +313,7 @@ added get-regions
|
|||
(enable-suspend #t))
|
||||
(else
|
||||
(enable-suspend #t)
|
||||
(re-tokenize ls in in-start-pos enable-suspend)))))))
|
||||
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
|
||||
|
||||
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
|
||||
(unless (lexer-state-up-to-date? ls)
|
||||
|
@ -318,7 +330,14 @@ added get-regions
|
|||
(if (send (lexer-state-invalid-tokens ls) is-empty?)
|
||||
+inf.0
|
||||
(+ (lexer-state-start-pos ls) orig-token-end change-length)))
|
||||
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start))
|
||||
(let ([start (+ (lexer-state-start-pos ls) orig-token-start)])
|
||||
(set-lexer-state-current-pos! ls start)
|
||||
(set-lexer-state-current-lexer-mode! ls
|
||||
(if (= start (lexer-state-start-pos ls))
|
||||
#f
|
||||
(begin
|
||||
(send valid-tree search-max!)
|
||||
(data-lexer-mode (send valid-tree get-root-data))))))
|
||||
(set-lexer-state-up-to-date?! ls #f)
|
||||
(queue-callback (λ () (colorer-callback)) #f)))
|
||||
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
|
||||
|
@ -340,7 +359,13 @@ added get-regions
|
|||
(send (lexer-state-parens ls) truncate tok-start)
|
||||
(set-lexer-state-tokens! ls valid-tree)
|
||||
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
|
||||
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start))))))
|
||||
(let ([start (+ (lexer-state-start-pos ls) tok-start)])
|
||||
(set-lexer-state-current-pos! ls start)
|
||||
(if (= start (lexer-state-start-pos ls))
|
||||
#f
|
||||
(begin
|
||||
(send valid-tree search-max!)
|
||||
(data-lexer-mode (send valid-tree get-root-data)))))))))
|
||||
|
||||
(define/private (do-insert/delete edit-start-pos change-length)
|
||||
(unless (or stopped? force-stop?)
|
||||
|
@ -378,6 +403,7 @@ added get-regions
|
|||
(λ (x) #f))
|
||||
(enable-suspend #t)))
|
||||
(lexer-state-current-pos ls)
|
||||
(lexer-state-current-lexer-mode ls)
|
||||
enable-suspend))
|
||||
lexer-states)))))
|
||||
(set! rev (get-revision-number)))
|
||||
|
@ -427,7 +453,14 @@ added get-regions
|
|||
(reset-tokens)
|
||||
(set! should-color? (preferences:get 'framework:coloring-active))
|
||||
(set! token-sym->style token-sym->style-)
|
||||
(set! get-token get-token-)
|
||||
(set! get-token (if (procedure-arity-includes? get-token- 2)
|
||||
;; New interface: thread through a mode:
|
||||
get-token-
|
||||
;; Old interface: no mode
|
||||
(lambda (in mode)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(get-token- in)])
|
||||
(values lexeme type data new-token-start new-token-end #f)))))
|
||||
(set! pairs pairs-)
|
||||
(for-each
|
||||
(lambda (ls)
|
||||
|
@ -739,7 +772,7 @@ added get-regions
|
|||
(let ([tokens (lexer-state-tokens ls)])
|
||||
(tokenize-to-pos ls position)
|
||||
(send tokens search! (- position (lexer-state-start-pos ls)))
|
||||
(send tokens get-root-data)))))
|
||||
(data-type (send tokens get-root-data))))))
|
||||
|
||||
(define/private (tokenize-to-pos ls position)
|
||||
(when (and (not (lexer-state-up-to-date? ls))
|
||||
|
@ -768,8 +801,8 @@ added get-regions
|
|||
(send tokens search! (- (if (eq? direction 'backward) (sub1 position) position)
|
||||
start-pos))
|
||||
(cond
|
||||
((or (eq? 'white-space (send tokens get-root-data))
|
||||
(and comments? (eq? 'comment (send tokens get-root-data))))
|
||||
((or (eq? 'white-space (data-type (send tokens get-root-data)))
|
||||
(and comments? (eq? 'comment (data-type (send tokens get-root-data)))))
|
||||
(skip-whitespace (+ start-pos
|
||||
(if (eq? direction 'forward)
|
||||
(send tokens get-root-end-position)
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
(require string-constants
|
||||
scheme/class
|
||||
mred/mred-sig
|
||||
syntax-color/scheme-lexer
|
||||
syntax-color/module-lexer
|
||||
"collapsed-snipclass-helpers.ss"
|
||||
"sig.ss"
|
||||
"../gui-utils.ss"
|
||||
|
@ -1172,14 +1172,14 @@
|
|||
(preferences:add-callback
|
||||
'framework:tabify
|
||||
(lambda (k v) (set! tabify-pref v)))
|
||||
(define/private (scheme-lexer-wrapper in)
|
||||
(let-values (((lexeme type paren start end) (scheme-lexer in)))
|
||||
(define/private (scheme-lexer-wrapper in mode)
|
||||
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
|
||||
(cond
|
||||
((and (eq? type 'symbol)
|
||||
(get-keyword-type lexeme tabify-pref))
|
||||
(values lexeme 'keyword paren start end))
|
||||
(values lexeme 'keyword paren start end mode))
|
||||
(else
|
||||
(values lexeme type paren start end)))))
|
||||
(values lexeme type paren start end mode)))))
|
||||
|
||||
(define/override (put-file text sup directory default-name)
|
||||
(parameterize ([finder:default-extension "ss"]
|
||||
|
@ -1188,7 +1188,7 @@
|
|||
;; don't call the surrogate's super, since it sets the default extension
|
||||
(sup directory default-name)))
|
||||
|
||||
(super-new (get-token (lambda (in) (scheme-lexer-wrapper in)))
|
||||
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
|
||||
(token-sym->style short-sym->style-name)
|
||||
(matches '((|(| |)|)
|
||||
(|[| |]|)
|
||||
|
|
|
@ -259,6 +259,7 @@
|
|||
(->* () () #:rest (listof pre-content?) element?))
|
||||
|
||||
(provide/contract
|
||||
[linebreak (-> element?)]
|
||||
[hspace (-> exact-nonnegative-integer? element?)]
|
||||
[elem (->* ()
|
||||
(#:style element-style?)
|
||||
|
@ -291,6 +292,9 @@
|
|||
h))
|
||||
(make-element 'hspace (list (make-string n #\space)))))
|
||||
|
||||
(define (linebreak)
|
||||
(make-element 'newline '("\n")))
|
||||
|
||||
(define (elem #:style [style plain] . str)
|
||||
(make-element style (decode-content str)))
|
||||
|
||||
|
|
|
@ -6,5 +6,10 @@ scribble/base/lang
|
|||
#:read-syntax scribble:read-syntax-inside
|
||||
#:whole-body-readers? #t
|
||||
#:wrapper1 (lambda (t) (list* 'doc 'values '() (t)))
|
||||
#:info (lambda (key default)
|
||||
(case key
|
||||
[(color-lexer)
|
||||
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
|
||||
[else (default key)]))
|
||||
|
||||
(require (prefix-in scribble: "../../reader.ss"))
|
||||
|
|
|
@ -1,3 +1,11 @@
|
|||
#lang scheme/base
|
||||
(require (prefix-in doc: scribble/doc/reader))
|
||||
(provide (rename-out [doc:read read] [doc:read-syntax read-syntax]))
|
||||
(provide (rename-out [doc:read read] [doc:read-syntax read-syntax])
|
||||
get-info)
|
||||
|
||||
(define (get-info . args)
|
||||
(lambda (key)
|
||||
(case key
|
||||
[(color-lexer)
|
||||
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
|
||||
[else #f])))
|
||||
|
|
|
@ -6,5 +6,10 @@ scribble/manual/lang
|
|||
#:read-syntax scribble:read-syntax-inside
|
||||
#:whole-body-readers? #t
|
||||
#:wrapper1 (lambda (t) (cons 'doc (t)))
|
||||
#:info (lambda (key default)
|
||||
(case key
|
||||
[(color-lexer)
|
||||
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
|
||||
[else (default key)]))
|
||||
|
||||
(require (prefix-in scribble: "../../reader.ss"))
|
||||
|
|
|
@ -47,6 +47,7 @@
|
|||
(struct-out var-id)
|
||||
(struct-out shaped-parens)
|
||||
(struct-out just-context)
|
||||
(struct-out alternate-display)
|
||||
(struct-out literal-syntax)
|
||||
(for-syntax make-variable-id
|
||||
variable-id?
|
||||
|
@ -178,11 +179,12 @@
|
|||
(memq (syntax-e c) (current-variable-list)))]
|
||||
[(s it? sub?)
|
||||
(let ([sc (syntax-e c)])
|
||||
(let ([s (format "~s" (if (literal-syntax? sc)
|
||||
(literal-syntax-stx sc)
|
||||
(if (var-id? sc)
|
||||
(var-id-sym sc)
|
||||
sc)))])
|
||||
(let ([s (or (syntax-property c 'display-string)
|
||||
(format "~s" (if (literal-syntax? sc)
|
||||
(literal-syntax-stx sc)
|
||||
(if (var-id? sc)
|
||||
(var-id-sym sc)
|
||||
sc))))])
|
||||
(if (and (symbol? sc)
|
||||
((string-length s) . > . 1)
|
||||
(char=? (string-ref s 0) #\_)
|
||||
|
@ -763,6 +765,7 @@
|
|||
(define-struct var-id (sym))
|
||||
(define-struct shaped-parens (val shape))
|
||||
(define-struct just-context (val ctx))
|
||||
(define-struct alternate-display (id string))
|
||||
(define-struct literal-syntax (stx))
|
||||
|
||||
(define-struct graph-reference (bx))
|
||||
|
@ -792,6 +795,11 @@
|
|||
s
|
||||
s
|
||||
(just-context-ctx v)))]
|
||||
[(alternate-display? v)
|
||||
(let ([s (do-syntax-ize (alternate-display-id v) col line ht #f)])
|
||||
(syntax-property s
|
||||
'display-string
|
||||
(alternate-display-string v)))]
|
||||
[(hash-ref (unbox ht) v #f)
|
||||
=> (lambda (m)
|
||||
(unless (unbox m)
|
||||
|
|
|
@ -6,5 +6,10 @@ scribble/sigplan/lang
|
|||
#:read-syntax scribble:read-syntax-inside
|
||||
#:whole-body-readers? #t
|
||||
#:wrapper1 (lambda (t) (cons 'doc (t)))
|
||||
#:info (lambda (key default)
|
||||
(case key
|
||||
[(color-lexer)
|
||||
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
|
||||
[else (default key)]))
|
||||
|
||||
(require (prefix-in scribble: "../../reader.ss"))
|
||||
|
|
|
@ -9,19 +9,29 @@
|
|||
that knows how to color itself. It also describes how to query the
|
||||
lexical and s-expression structure of the text.
|
||||
@defmethod*[(((start-colorer (token-sym->style (-> symbol? string?))
|
||||
(get-token (-> input-port? (values any/c
|
||||
symbol?
|
||||
(or/c false? symbol?)
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?)))
|
||||
(get-token (or/c (-> input-port?
|
||||
(values any/c
|
||||
symbol?
|
||||
(or/c false? symbol?)
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))
|
||||
(-> input-port?
|
||||
any/c
|
||||
(values any/c
|
||||
symbol?
|
||||
(or/c false? symbol?)
|
||||
exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?
|
||||
any/c))))
|
||||
(pairs (listof (list/p symbol? symbol?)))) void))]{
|
||||
Starts tokenizing the buffer for coloring and parenthesis matching.
|
||||
|
||||
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
|
||||
and it should return the style-name that the token should be colored.
|
||||
|
||||
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token]
|
||||
and should return the style-name that the token should be colored.
|
||||
The @scheme[get-token] argument takes an input port and optionally a mode value.
|
||||
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
|
||||
|
||||
The @scheme[get-token] argument takes an input port and returns the next token as 5 values:
|
||||
@itemize[
|
||||
@item{
|
||||
An unused value. This value is intended to represent the textual
|
||||
|
@ -42,32 +52,44 @@
|
|||
@item{
|
||||
The ending position of the token.}]
|
||||
|
||||
The @scheme[get-token] function will usually be implemented with a lexer using the
|
||||
@scheme[parser-tools/lex] library.
|
||||
get-token must obey the following invariants:
|
||||
When @scheme[get-token] accepts a mode value in addition to an
|
||||
input port, it must also return an extra result, which is a new
|
||||
mode. When @scheme[get-token] is called for the beginning on a
|
||||
stream, the mode argument is @scheme[#f]. Thereafter, the mode
|
||||
returned for the previous token is provided to @scheme[get-token]
|
||||
for the next token. The mode should not be a mutable value; if
|
||||
part of the stream is re-tokenized, the mode saved from the
|
||||
immediately preceding token is given again to the
|
||||
@scheme[get-token] function.
|
||||
|
||||
The @scheme[get-token] function is usually be implemented with a lexer using the
|
||||
@scheme[parser-tools/lex] library. The
|
||||
@scheme[get-token] function must obey the following invariants:
|
||||
@itemize[
|
||||
@item{
|
||||
Every position in the buffer must be accounted for in exactly one
|
||||
token.}
|
||||
@item{
|
||||
The token returned by @scheme[get-token] must rely only on the contents of the
|
||||
input port argument. This means that the tokenization of some part of
|
||||
the input cannot depend on earlier parts of the input.}
|
||||
input port argument plus the mode argument. This constraint means that the
|
||||
tokenization of some part of the input cannot depend on earlier parts of the
|
||||
input except through the mode (and implicitly through the starting positions
|
||||
for tokens).}
|
||||
@item{
|
||||
No edit to the buffer can change the tokenization of the buffer prior
|
||||
to the token immediately preceding the edit. In the following
|
||||
example this invariant does not hold. If the buffer contains:
|
||||
@verbatim{" 1 2 3}
|
||||
A change to the stream must not change the tokenization of the stream prior
|
||||
to the token immediately preceding the change. In the following
|
||||
example this invariant does not hold. If the buffer contains
|
||||
@verbatim[#:indent 2]{" 1 2 3}
|
||||
and the tokenizer treats the unmatched " as its own token (a string
|
||||
error token), and separately tokenizes the 1 2 and 3, an edit to make
|
||||
the buffer look like:
|
||||
@verbatim{" 1 2 3"}
|
||||
the buffer look like
|
||||
@verbatim[#:indent 2]{" 1 2 3"}
|
||||
would result in a single string token modifying previous tokens. To
|
||||
handle these situations, @scheme[get-token] must treat the first line as a
|
||||
single token.}]
|
||||
|
||||
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
|
||||
value returned by get-token is compared to this list to see how the
|
||||
value returned by @scheme[get-token] is compared to this list to see how the
|
||||
paren matcher should treat the token. An example: Suppose pairs is
|
||||
@scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there
|
||||
are three kinds of parens. Any token which has @scheme['begin] as its second
|
||||
|
|
|
@ -272,6 +272,9 @@ gets progressively larger.}
|
|||
@defproc[(emph [pre-content pre-content?] ...) element?]{
|
||||
The same as @scheme[italic].}
|
||||
|
||||
@defproc[(linebreak) element?]{
|
||||
Produces an element that forces a line break.}
|
||||
|
||||
@defproc[(hspace [n exact-nonnegative-integer?]) element?]{
|
||||
|
||||
Produces an element containing @scheme[n] spaces and style
|
||||
|
|
|
@ -60,7 +60,7 @@ in the case of Latex).
|
|||
To add a mapping from your own style name to a CSS configuration, add
|
||||
a @scheme[css-addition] structure instance to a style's @tech{style property}
|
||||
list. To map a style name to a Latex macro or environment, add a
|
||||
scheme[tex-addition] structure instance. A @scheme[css-addition] or
|
||||
@scheme[tex-addition] structure instance. A @scheme[css-addition] or
|
||||
@scheme[tex-addition] is normally associated with the style whose name
|
||||
is implemented by the adition, but it can also be added to the style
|
||||
for an enclosing part.
|
||||
|
|
|
@ -167,7 +167,7 @@ needed.
|
|||
@{blah @|foo|: blah}
|
||||
}===|
|
||||
|
||||
Actually, the command part can be any Scheme expression (that does
|
||||
Actually, the command part can be any Scheme expression (that does not
|
||||
start with @litchar["["], @litchar["{"], or @litchar["|"]), which is
|
||||
particularly useful with such escapes since they can be used with any
|
||||
expression.
|
||||
|
|
54
collects/syntax-color/module-lexer.ss
Normal file
54
collects/syntax-color/module-lexer.ss
Normal file
|
@ -0,0 +1,54 @@
|
|||
#lang scheme/base
|
||||
(require scheme/port
|
||||
"scheme-lexer.ss")
|
||||
|
||||
(provide module-lexer)
|
||||
|
||||
(define (module-lexer in mode)
|
||||
(cond
|
||||
[(not mode)
|
||||
;; Starting out: look for #lang:
|
||||
(let*-values ([(p) (peeking-input-port in)]
|
||||
[(init) (file-position p)]
|
||||
[(start-line start-col start-pos) (port-next-location p)])
|
||||
(let ([get-info (with-handlers ([exn:fail? (lambda (exn) 'fail)])
|
||||
;; FIXME: set the reader guard to disable access to
|
||||
;; untrusted planet packages.
|
||||
(read-language in (lambda () #f)))])
|
||||
(cond
|
||||
[(procedure? get-info)
|
||||
;; Produce language as first token:
|
||||
(let*-values ([(bytes-len) (- (file-position p) init)]
|
||||
[(bstr) (read-bytes bytes-len in)]
|
||||
[(end-line end-col end-pos) (port-next-location in)])
|
||||
(values
|
||||
bstr
|
||||
'other
|
||||
#f
|
||||
start-pos
|
||||
end-pos
|
||||
(or (let ([v (get-info 'color-lexer)])
|
||||
(and v
|
||||
(if (procedure-arity-includes? v 2)
|
||||
(cons v #f)
|
||||
v)))
|
||||
scheme-lexer)))]
|
||||
[(eq? 'fail get-info)
|
||||
(copy-port in (open-output-nowhere))
|
||||
(let*-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values #f 'error #f start-pos end-pos
|
||||
(lambda (in)
|
||||
(values #f 'eof #f end-pos end-pos))))]
|
||||
[else
|
||||
;; Start over using the Scheme lexer
|
||||
(module-lexer in scheme-lexer)])))]
|
||||
[(pair? mode)
|
||||
;; #lang-selected language consumes and produces a mode:
|
||||
(let-values ([(lexeme type data new-token-start new-token-end new-mode)
|
||||
((car mode) in (cdr mode))])
|
||||
(values lexeme type data new-token-start new-token-end (cons (car mode) new-mode)))]
|
||||
[else
|
||||
;; #lang-selected language (or default) doesn't deal with modes:
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(mode in)])
|
||||
(values lexeme type data new-token-start new-token-end mode))]))
|
|
@ -3,7 +3,9 @@
|
|||
(require parser-tools/lex
|
||||
(prefix : parser-tools/lex-sre))
|
||||
|
||||
(provide scheme-lexer)
|
||||
(provide scheme-lexer
|
||||
scheme-lexer/status
|
||||
scheme-nobar-lexer/status)
|
||||
|
||||
(define-lex-abbrevs
|
||||
|
||||
|
@ -120,6 +122,13 @@
|
|||
[identifier (:: identifier-start
|
||||
(:* identifier-escapes identifier-chars))]
|
||||
|
||||
[nobar-identifier-escapes (:: "\\" any-char)]
|
||||
[nobar-identifier-start (:or nobar-identifier-escapes
|
||||
(:~ identifier-delims "\\" "|" "#")
|
||||
"#%")]
|
||||
[nobar-identifier (:: nobar-identifier-start
|
||||
(:* nobar-identifier-escapes identifier-chars))]
|
||||
|
||||
[bad-id-start (:or identifier-escapes
|
||||
(:~ identifier-delims "\\" "|"))]
|
||||
[bad-id-escapes (:or identifier-escapes
|
||||
|
@ -129,8 +138,17 @@
|
|||
(:? "\\" bad-id-escapes))
|
||||
"\\"
|
||||
bad-id-escapes)]
|
||||
|
||||
|
||||
[nobar-bad-id-escapes nobar-identifier-escapes]
|
||||
[nobar-bad-id (:or (:: bad-id-start
|
||||
(:* nobar-identifier-escapes identifier-chars)
|
||||
(:? "\\" nobar-bad-id-escapes))
|
||||
"\\"
|
||||
nobar-bad-id-escapes)]
|
||||
|
||||
[keyword (:: "#:" (:* identifier-escapes identifier-chars))]
|
||||
[nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))]
|
||||
|
||||
[reader-command (:or (:: "#" c s) (:: "#" c i))]
|
||||
[sharing (:or (:: "#" (make-uinteger digit10) "=")
|
||||
|
@ -189,8 +207,8 @@
|
|||
((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit))))))
|
||||
|
||||
|
||||
(define (ret lexeme type paren start-pos end-pos)
|
||||
(values lexeme type paren (position-offset start-pos) (position-offset end-pos)))
|
||||
(define (ret lexeme type paren start-pos end-pos status)
|
||||
(values lexeme type paren (position-offset start-pos) (position-offset end-pos) status))
|
||||
|
||||
|
||||
(define get-next-comment
|
||||
|
@ -208,11 +226,11 @@
|
|||
(define (read-nested-comment num-opens start-pos input)
|
||||
(let-values (((diff end) (get-next-comment input)))
|
||||
(cond
|
||||
((eq? 'eof diff) (ret "" 'error #f start-pos end))
|
||||
((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue))
|
||||
(else
|
||||
(let ((next-num-opens (+ diff num-opens)))
|
||||
(cond
|
||||
((= 0 next-num-opens) (ret "" 'comment #f start-pos end))
|
||||
((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue))
|
||||
(else (read-nested-comment next-num-opens start-pos input))))))))
|
||||
|
||||
(define (get-offset i)
|
||||
|
@ -253,7 +271,7 @@
|
|||
(next-char (peek-char-or-special i)))
|
||||
(cond
|
||||
((or (equal? ender "") (not (eq? #\newline next-char)))
|
||||
(values (string-append "#<<" ender) 'error #f start-pos (get-offset i)))
|
||||
(values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum))
|
||||
(else
|
||||
(read-char i)
|
||||
(let loop ((acc (list (string-append "#<<" ender "\n"))))
|
||||
|
@ -262,85 +280,96 @@
|
|||
(cond
|
||||
((not (or (char? next-char) (eof-object? next-char))) ;; a special
|
||||
(values (apply string-append (reverse (cons next-line acc)))
|
||||
'error #f start-pos (get-offset i)))
|
||||
'error #f start-pos (get-offset i)
|
||||
'datum))
|
||||
((equal? next-line ender) ;; end of string
|
||||
(values (apply string-append (reverse (cons next-line acc)))
|
||||
'string #f start-pos (get-offset i)))
|
||||
'string #f start-pos (get-offset i)
|
||||
'datum))
|
||||
((eof-object? next-char)
|
||||
(values (apply string-append (reverse (cons next-line acc)))
|
||||
'error #f start-pos (get-offset i)))
|
||||
'error #f start-pos (get-offset i)
|
||||
'datum))
|
||||
(else
|
||||
(read-char i)
|
||||
(loop (cons (string-append next-line "\n") acc))))))))))
|
||||
|
||||
(define (scheme-lexer in)
|
||||
(let-values ([(lexeme type paren start end adj) (scheme-lexer/status in)])
|
||||
(values lexeme type paren start end)))
|
||||
|
||||
(define scheme-lexer
|
||||
(define-syntax-rule (lexer/status identifier keyword bad-id)
|
||||
(lexer
|
||||
[(:+ scheme-whitespace)
|
||||
(ret lexeme 'white-space #f start-pos end-pos)]
|
||||
(ret lexeme 'white-space #f start-pos end-pos 'continue)]
|
||||
[(:or "#t" "#f" "#T" "#F" character
|
||||
(make-num digit2 radix2)
|
||||
(make-num digit8 radix8)
|
||||
(make-num digit10 (:? radix10))
|
||||
(make-num digit16 radix16))
|
||||
(ret lexeme 'constant #f start-pos end-pos)]
|
||||
[keyword (ret lexeme 'parenthesis #f start-pos end-pos)]
|
||||
[str (ret lexeme 'string #f start-pos end-pos)]
|
||||
(ret lexeme 'constant #f start-pos end-pos 'datum)]
|
||||
[keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)]
|
||||
[str (ret lexeme 'string #f start-pos end-pos 'datum)]
|
||||
[";"
|
||||
(values (apply string (read-line/skip-over-specials input-port)) 'comment #f
|
||||
(position-offset start-pos)
|
||||
(get-offset input-port))]
|
||||
(get-offset input-port)
|
||||
'continue)]
|
||||
#;
|
||||
[line-comment
|
||||
(ret lexeme 'comment #f start-pos end-pos)]
|
||||
["#;"
|
||||
(ret lexeme 'sexp-comment #f start-pos end-pos)]
|
||||
(ret lexeme 'sexp-comment #f start-pos end-pos 'continue)]
|
||||
["#|" (read-nested-comment 1 start-pos input-port)]
|
||||
[script
|
||||
(ret lexeme 'comment #f start-pos end-pos)]
|
||||
(ret lexeme 'comment #f start-pos end-pos 'continue)]
|
||||
[(:: list-prefix "(")
|
||||
(ret lexeme 'parenthesis '|(| start-pos end-pos)]
|
||||
(ret lexeme 'parenthesis '|(| start-pos end-pos 'open)]
|
||||
[(:: list-prefix "[")
|
||||
(ret lexeme 'parenthesis '|[| start-pos end-pos)]
|
||||
(ret lexeme 'parenthesis '|[| start-pos end-pos 'open)]
|
||||
[(:: list-prefix "{")
|
||||
(ret lexeme 'parenthesis '|{| start-pos end-pos)]
|
||||
(ret lexeme 'parenthesis '|{| start-pos end-pos 'open)]
|
||||
[(:or ")" "]" "}")
|
||||
(ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos)]
|
||||
(ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)]
|
||||
[(:or "'" "`" "#'" "#`" "#&")
|
||||
(ret lexeme 'constant #f start-pos end-pos)]
|
||||
(ret lexeme 'constant #f start-pos end-pos 'continue)]
|
||||
[(:or sharing reader-command "." "," ",@" "#," "#,@")
|
||||
(ret lexeme 'other #f start-pos end-pos)]
|
||||
(ret lexeme 'other #f start-pos end-pos 'continue)]
|
||||
|
||||
[(:: (:or "#lang " "#!")
|
||||
(:or langchar
|
||||
(:: langchar (:* (:or langchar "/")) langchar)))
|
||||
(ret lexeme 'other #f start-pos end-pos)]
|
||||
(ret lexeme 'other #f start-pos end-pos 'continue)]
|
||||
[(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace))))
|
||||
(ret lexeme 'error #f start-pos end-pos)]
|
||||
(ret lexeme 'error #f start-pos end-pos 'continue)]
|
||||
|
||||
[identifier
|
||||
(ret lexeme 'symbol #f start-pos end-pos)]
|
||||
(ret lexeme 'symbol #f start-pos end-pos 'datum)]
|
||||
["#<<"
|
||||
(get-here-string (position-offset start-pos) input-port)]
|
||||
[(special)
|
||||
(ret "" 'no-color #f start-pos end-pos)]
|
||||
(ret "" 'no-color #f start-pos end-pos 'datum)]
|
||||
[(special-comment)
|
||||
(ret "" 'comment #f start-pos end-pos)]
|
||||
[(eof) (values lexeme 'eof #f #f #f)]
|
||||
(ret "" 'comment #f start-pos end-pos 'continue)]
|
||||
[(eof) (values lexeme 'eof #f #f #f #f)]
|
||||
[(:or bad-char bad-str
|
||||
(:& bad-id
|
||||
(complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script)
|
||||
any-string))))
|
||||
(ret lexeme 'error #f start-pos end-pos)]
|
||||
(ret lexeme 'error #f start-pos end-pos 'bad)]
|
||||
[any-char (extend-error lexeme start-pos end-pos input-port)]))
|
||||
|
||||
(define scheme-lexer/status (lexer/status identifier keyword bad-id))
|
||||
(define scheme-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id))
|
||||
|
||||
(define (extend-error lexeme start end in)
|
||||
(if (memq (peek-char-or-special in)
|
||||
`(special #\newline #\return #\tab #\space #\vtab
|
||||
#\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\;
|
||||
,eof))
|
||||
(ret lexeme 'error #f start end)
|
||||
(ret lexeme 'error #f start end 'bad)
|
||||
(let-values (((rest end-pos) (get-chunk in)))
|
||||
(ret (string-append lexeme rest) 'error #f start end-pos))))
|
||||
(ret (string-append lexeme rest) 'error #f start end-pos 'bad))))
|
||||
|
||||
(define get-chunk
|
||||
(lexer
|
||||
|
|
253
collects/syntax-color/scribble-lexer.ss
Normal file
253
collects/syntax-color/scribble-lexer.ss
Normal file
|
@ -0,0 +1,253 @@
|
|||
#lang scheme/base
|
||||
(require "scheme-lexer.ss")
|
||||
|
||||
(provide scribble-inside-lexer
|
||||
scribble-lexer)
|
||||
|
||||
(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren))
|
||||
(define-struct scheme (status))
|
||||
(define-struct args ())
|
||||
(define-struct text-args ())
|
||||
|
||||
(define (scribble-inside-lexer in mode)
|
||||
(let ([mode (or mode
|
||||
(list
|
||||
(make-text #rx"^@"
|
||||
#f
|
||||
#f
|
||||
#rx".*?(?:(?=@)|$)"
|
||||
#f
|
||||
#f)))])
|
||||
(let-values ([(line col pos) (port-next-location in)]
|
||||
[(l) (car mode)])
|
||||
(define (enter-@ comment-k)
|
||||
(if (equal? #\; (peek-char in))
|
||||
;; Comment
|
||||
(begin
|
||||
(read-char in)
|
||||
(if (or (equal? #\{ (peek-char in))
|
||||
(equal? #\| (peek-char in)))
|
||||
;; Bracketed comment:
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(comment-k "@;"
|
||||
'comment
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
(cons (make-text-args)
|
||||
mode)))
|
||||
;; Line comment:
|
||||
(begin
|
||||
(regexp-match #rx"\r\n|\r|\n" in)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(comment-k "@;"
|
||||
'comment
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
mode)))))
|
||||
(let ([new-mode
|
||||
(cond
|
||||
[(equal? #\| (peek-char in))
|
||||
(read-char in)
|
||||
(list* (make-scheme 'bar)
|
||||
mode)]
|
||||
[else
|
||||
(list* (make-scheme 'one)
|
||||
(make-args)
|
||||
mode)])])
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "@"
|
||||
'other
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
new-mode)))))
|
||||
|
||||
(if (eof-object? (peek-char in))
|
||||
(values eof
|
||||
'eof
|
||||
#f
|
||||
pos
|
||||
pos
|
||||
#f)
|
||||
(cond
|
||||
[(text? l)
|
||||
(cond
|
||||
[(and (text-scheme-rx l)
|
||||
(regexp-try-match (text-scheme-rx l) in))
|
||||
;; Found @
|
||||
(enter-@ values)]
|
||||
[(and (text-end-rx l)
|
||||
(regexp-try-match (text-end-rx l) in))
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "}"
|
||||
'other
|
||||
(text-close-paren l)
|
||||
pos
|
||||
end-pos
|
||||
(cdr mode)))]
|
||||
[(and (text-sub-rx l)
|
||||
(regexp-try-match (text-sub-rx l) in))
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "{"
|
||||
'other
|
||||
(text-open-paren l)
|
||||
pos
|
||||
end-pos
|
||||
(cons (car mode) mode)))]
|
||||
[else
|
||||
;; Read string up to @ or }
|
||||
(regexp-match? (text-string-rx l) in)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values 'string
|
||||
'string
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
mode))])]
|
||||
[(scheme? l)
|
||||
(let ([status (scheme-status l)])
|
||||
(cond
|
||||
[(and (eq? status 'bracket)
|
||||
(regexp-try-match #px"^\\s*?[]]" in))
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "]"
|
||||
'other
|
||||
'|]|
|
||||
pos
|
||||
end-pos
|
||||
(cdr mode)))]
|
||||
[(and (memq status '(bar bar-no-more))
|
||||
(regexp-try-match #px"^\\s*?[|]" in))
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "|"
|
||||
'other
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
(cdr mode)))]
|
||||
[(regexp-try-match #rx"^@" in)
|
||||
(enter-@ (lambda (lexeme type paren start end mode)
|
||||
(values lexeme
|
||||
(if (eq? status 'one)
|
||||
'error
|
||||
type)
|
||||
paren
|
||||
start
|
||||
end
|
||||
mode)))]
|
||||
[else
|
||||
(let-values ([(lexeme type paren start end adj)
|
||||
(case status
|
||||
[(bar bar-no-more one) (scheme-nobar-lexer/status in)]
|
||||
[else (scheme-lexer/status in)])]
|
||||
[(consume) (lambda (status)
|
||||
(case status
|
||||
[(many) mode]
|
||||
[(one) (cdr mode)]
|
||||
[(bracket bar-no-more)
|
||||
(cons (make-scheme status) (cdr mode))]
|
||||
[(bar) (cons (make-scheme 'bar-no-more) (cdr mode))]
|
||||
[else (error "bad status")]))])
|
||||
(values lexeme
|
||||
(cond
|
||||
[(or (eq? type 'comment)
|
||||
(eq? type 'white-space))
|
||||
(if (eq? status 'one)
|
||||
'error
|
||||
type)]
|
||||
[(eq? status 'bar-no-more)
|
||||
;; Too many S-expressions in @| ... |
|
||||
'error]
|
||||
[else type])
|
||||
paren
|
||||
start
|
||||
end
|
||||
(case adj
|
||||
[(continue) mode]
|
||||
[(datum)
|
||||
(cond
|
||||
[(pair? status) mode]
|
||||
[else (consume status)])]
|
||||
[(open)
|
||||
(cons (make-scheme (cons #t status))
|
||||
(cdr mode))]
|
||||
[(close)
|
||||
(if (pair? status)
|
||||
(let ([v (cdr status)])
|
||||
(if (symbol? v)
|
||||
(consume v)
|
||||
(cons (make-scheme v) (cdr mode))))
|
||||
(consume status))]
|
||||
[(bad) (consume status)]
|
||||
[else (error "bad adj")])))]))]
|
||||
[(args? l)
|
||||
(cond
|
||||
[(regexp-try-match #rx"^\\[" in)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "["
|
||||
'other
|
||||
'|[|
|
||||
pos
|
||||
end-pos
|
||||
(list* (make-scheme 'bracket)
|
||||
mode)))]
|
||||
[else
|
||||
(scribble-lexer in (cons (make-text-args) (cdr mode)))])]
|
||||
[(text-args? l)
|
||||
(cond
|
||||
[(regexp-try-match #rx"^[|]([^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*){" in)
|
||||
=> (lambda (m)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values (cadr m)
|
||||
'other
|
||||
#f
|
||||
pos
|
||||
end-pos
|
||||
(let ([closer (regexp-quote
|
||||
(bytes-append #"}"
|
||||
(regexp-replace** (list #rx"[(]" #rx"[[]" #rx"{" #rx"<")
|
||||
(cadr m)
|
||||
(list #")" #"]" #"}" #">"))
|
||||
#"|"))]
|
||||
[re-opener (regexp-quote (cadr m))])
|
||||
(cons (make-text (byte-regexp (bytes-append #"^[|]" re-opener #"@"))
|
||||
(byte-regexp (bytes-append #"^" closer))
|
||||
(byte-regexp (bytes-append #"^[|]" re-opener #"{"))
|
||||
(byte-regexp (bytes-append
|
||||
#".*?(?:(?=[|]"
|
||||
re-opener
|
||||
#"[@{])|(?="
|
||||
closer
|
||||
#")|$)"))
|
||||
#f
|
||||
#f)
|
||||
(cdr mode))))))]
|
||||
[(regexp-try-match #rx"^{" in)
|
||||
(let-values ([(end-line end-col end-pos) (port-next-location in)])
|
||||
(values "{"
|
||||
'other
|
||||
'|{|
|
||||
pos
|
||||
end-pos
|
||||
(cons (make-text #rx"^@"
|
||||
#rx"^}"
|
||||
#rx"^{"
|
||||
#rx".*?(?:(?=[@{}])|$)"
|
||||
'|{|
|
||||
'|}|)
|
||||
(cdr mode))))]
|
||||
[else
|
||||
(scribble-lexer in (cdr mode))])]
|
||||
[else (error "bad mode")])))))
|
||||
|
||||
(define (scribble-lexer in mode)
|
||||
(scribble-inside-lexer in (or mode (list (make-scheme 'many)))))
|
||||
|
||||
(define (regexp-replace** rxs str strs)
|
||||
(if (null? rxs)
|
||||
str
|
||||
(regexp-replace** (cdr rxs)
|
||||
(regexp-replace* (car rxs) str (car strs))
|
||||
(cdr strs))))
|
|
@ -3,6 +3,8 @@
|
|||
(for-label syntax-color/token-tree
|
||||
syntax-color/paren-tree
|
||||
syntax-color/scheme-lexer
|
||||
syntax-color/module-lexer
|
||||
syntax-color/scribble-lexer
|
||||
syntax-color/default-lexer
|
||||
framework/framework
|
||||
framework/private/color
|
||||
|
@ -38,7 +40,7 @@ Parenthesis matching code built on top of @scheme[token-tree%].
|
|||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c))]
|
||||
(or/c number? false/c))]{
|
||||
|
||||
A lexer for Scheme, including reader extensions (@secref[#:doc'(lib
|
||||
"scribblings/reference/reference.scrbl")]{Reader_Extension}), built
|
||||
|
@ -60,6 +62,35 @@ The @scheme[scheme-lexer] function returns 5 values:
|
|||
|
||||
@item{A number representing the ending position of the match (or @scheme[#f] if eof).}]
|
||||
|
||||
}
|
||||
|
||||
@defproc[(scheme-lexer/status [in input-port?])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c 'datum 'open 'close 'continue))]{
|
||||
|
||||
Like @scheme[scheme-lexer], but returns an extra value. The last
|
||||
return value indicates whether the consumed token should count as a
|
||||
datum, an opening parenthesis (or similar starting token to group
|
||||
other tokens), a closing parenthesis (or similar), or a prefix (such
|
||||
as whitespace) on a datum.}
|
||||
|
||||
@defproc[(scheme-nobar-lexer/status [in input-port?])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c 'datum 'open 'close 'continue))]{
|
||||
|
||||
Like @scheme[scheme-lexer/status], but for a dialect of Scheme where
|
||||
@litchar{|} is a delimiter instead of quoting syntax for a symbol.
|
||||
This function is used by @scheme[scribble-lexer].}
|
||||
|
||||
|
||||
@section{Default lexer}
|
||||
@defmodule[syntax-color/default-lexer]
|
||||
|
||||
|
@ -90,6 +121,90 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
|
|||
@item{A number representing the ending position of the match (or @scheme[#f] if eof).}]
|
||||
|
||||
|
||||
@section{Module Lexer}
|
||||
|
||||
@defmodule[syntax-color/module-lexer]
|
||||
|
||||
@defproc[(module-lexer [in input-port?]
|
||||
[mode (or/c #f
|
||||
(-> input-port? any)
|
||||
(cons/c (-> input-port? any/c any) any/c))])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c #f
|
||||
(-> input-port? any)
|
||||
(cons/c (-> input-port? any/c any) any/c)))]{
|
||||
|
||||
Like @scheme[scheme-lexer], but
|
||||
|
||||
@itemize[
|
||||
|
||||
@item{A @scheme[module-lexer] accepts (and returns) a lexer mode,
|
||||
instead of just an input port.}
|
||||
|
||||
@item{When @scheme[mode] is @scheme[#f] (indicating the start of the
|
||||
stream), the lexer checks @scheme[in] for a @hash-lang[]
|
||||
specification.
|
||||
|
||||
If a @hash-lang[] line is present but the specified
|
||||
language does not exist, the entire @scheme[in] input is
|
||||
consumed and colored as @scheme['error].
|
||||
|
||||
If the language exists and the language provides a
|
||||
@scheme[get-info] function, then it is called with
|
||||
@scheme['color-lexer]. If the result is not @scheme[#f], then
|
||||
it should be a lexer function for use with
|
||||
@scheme[color:text%]. The result mode is the lexer---paired
|
||||
with @scheme[#f] if the lexer is a procedure arity 2---so that
|
||||
future calls will dispatch to the language-supplied lexer.
|
||||
|
||||
If the language is specified but it provides no
|
||||
@scheme[get-info] or @scheme['color-lexer] result, then
|
||||
@scheme[scheme-lexer] is returned as the mode.}
|
||||
|
||||
@item{When @scheme[mode] is a lexer procedure, the lexer is applied
|
||||
to @scheme[in]. The lexer's results are returned, plus the
|
||||
lexer again as the mode.}
|
||||
|
||||
@item{When @scheme[mode] is a pair, then the lexer procedure in the
|
||||
@scheme[car] is applied to @scheme[in] and the mode in the
|
||||
@scheme[cdr]. The lexer's results are returned, except that its
|
||||
mode result is paired back with the lexer procedure.}
|
||||
|
||||
]}
|
||||
|
||||
@section{Scribble Lexer}
|
||||
|
||||
@defmodule[syntax-color/scribble-lexer]
|
||||
|
||||
@defproc[(scribble-lexer [in input-port?]
|
||||
[mode any/c])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
any/c)]{
|
||||
|
||||
Like @scheme[scheme-lexer], but for Scheme extended with Scribbles
|
||||
@"@" notation (see @secref[#:doc '(lib
|
||||
"scribblings/scribble/scribble.scrbl") "reader"]).}
|
||||
|
||||
@defproc[(scribble-inside-lexer [in input-port?]
|
||||
[mode any/c])
|
||||
(values (or/c string? eof-object?)
|
||||
symbol?
|
||||
(or/c symbol? false/c)
|
||||
(or/c number? false/c)
|
||||
(or/c number? false/c)
|
||||
any/c)]{
|
||||
|
||||
Like @scheme[scribble-lexer], but starting in ``text'' mode instead of
|
||||
Scheme mode.}
|
||||
|
||||
@; ----------------------------------------------------------------------
|
||||
|
||||
@section{Splay Tree for Tokenization}
|
||||
|
|
201
collects/tests/syntax-color/scribble-lexer.ss
Normal file
201
collects/tests/syntax-color/scribble-lexer.ss
Normal file
|
@ -0,0 +1,201 @@
|
|||
#lang scheme
|
||||
(require syntax-color/scribble-lexer)
|
||||
|
||||
(define in (open-input-string "@|x #|10|#| @me[1 2 #| comment |# ]{10}"))
|
||||
|
||||
(define (color str)
|
||||
(let ([in (open-input-string str)])
|
||||
(let loop ([mode #f])
|
||||
(let-values ([(lexeme type paren start end mode) (scribble-inside-lexer in mode)])
|
||||
(if (eq? type 'eof)
|
||||
null
|
||||
(cons (list start end type)
|
||||
(loop mode)))))))
|
||||
|
||||
(define (test* str len-val line)
|
||||
(let ([v (color str)]
|
||||
[val (let loop ([pos 1][l len-val])
|
||||
(if (null? l)
|
||||
null
|
||||
(cons (list pos (+ pos (caar l)) (cadar l))
|
||||
(loop (+ (+ pos (caar l))) (cdr l)))))])
|
||||
(unless (equal? v val)
|
||||
(printf "FAILED, line ~s\n" line)
|
||||
(printf " result\n")
|
||||
(pretty-print v)
|
||||
(printf " is not expected\n")
|
||||
(pretty-print val)
|
||||
(printf "\n"))))
|
||||
|
||||
(define-syntax-rule (test str len-val)
|
||||
(test* str len-val (syntax-line #'str)))
|
||||
|
||||
(test "x" '((1 string)))
|
||||
(test "x{}" '((3 string)))
|
||||
(test "@x" '((1 other)
|
||||
(1 symbol)))
|
||||
|
||||
(test "@x str" '((1 other)
|
||||
(1 symbol)
|
||||
(4 string)))
|
||||
|
||||
(test "@x[] str" '((1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(1 other)
|
||||
(4 string)))
|
||||
|
||||
(test "@x[z] str" '((1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(4 string)))
|
||||
|
||||
(test "@x[z +1.5] str" '((1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(1 symbol)
|
||||
(1 white-space)
|
||||
(4 constant)
|
||||
(1 other)
|
||||
(4 string)))
|
||||
|
||||
(test "@x[z @w{10}] str" '((1 other)
|
||||
(1 symbol) ; x
|
||||
(1 other)
|
||||
(1 symbol) ; z
|
||||
(1 white-space)
|
||||
(1 other)
|
||||
(1 symbol) ; w
|
||||
(1 other)
|
||||
(2 string)
|
||||
(1 other)
|
||||
(1 other)
|
||||
(4 string)))
|
||||
|
||||
(test "@x[a@b]{a}{b}" '((1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(3 symbol)
|
||||
(1 other)
|
||||
(1 other)
|
||||
(1 string)
|
||||
(1 other)
|
||||
(3 string)))
|
||||
(test "@x{{}}" '((1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(1 other) ; {
|
||||
(1 other) ; }
|
||||
(1 other)))
|
||||
|
||||
(test "@|x|str" '((2 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(3 string)))
|
||||
(test "@|x #|ok|#|str" '((2 other)
|
||||
(1 symbol)
|
||||
(1 white-space)
|
||||
(6 comment)
|
||||
(1 other)
|
||||
(3 string)))
|
||||
(test "@| x ; c\n| str" '((2 other)
|
||||
(1 white-space)
|
||||
(1 symbol)
|
||||
(1 white-space)
|
||||
(3 comment)
|
||||
(2 other)
|
||||
(4 string)))
|
||||
(test "@|(a|b|)|str" '((2 other)
|
||||
(1 parenthesis)
|
||||
(4 symbol)
|
||||
(1 parenthesis)
|
||||
(1 other)
|
||||
(3 string)))
|
||||
|
||||
(test "@#|bad|#x str" '((1 other)
|
||||
(7 error)
|
||||
(1 symbol)
|
||||
(4 string)))
|
||||
(test "@@x" '((1 other)
|
||||
(1 other)
|
||||
(1 symbol)))
|
||||
(test "@|@x|z" '((2 other)
|
||||
(1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(1 string)))
|
||||
(test "@@x[1 2][3]" '((1 other)
|
||||
(1 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(1 constant)
|
||||
(1 white-space)
|
||||
(1 constant)
|
||||
(1 other)
|
||||
(1 other)
|
||||
(1 constant)
|
||||
(1 other)))
|
||||
|
||||
(test "@x|{10}|" '((1 other)
|
||||
(1 symbol)
|
||||
(2 other)
|
||||
(2 string)
|
||||
(2 other)))
|
||||
(test "@x|{@q}|" '((1 other)
|
||||
(1 symbol)
|
||||
(2 other)
|
||||
(2 string)
|
||||
(2 other)))
|
||||
(test "@x|!!{@q}!!|" '((1 other)
|
||||
(1 symbol)
|
||||
(4 other)
|
||||
(2 string)
|
||||
(4 other)))
|
||||
(test "@x|(({@q}))|" '((1 other)
|
||||
(1 symbol)
|
||||
(4 other)
|
||||
(2 string)
|
||||
(4 other)))
|
||||
(test "@x|<<{a|<<@a[10]}>>|" '((1 other)
|
||||
(1 symbol)
|
||||
(4 other)
|
||||
(1 string)
|
||||
(4 other)
|
||||
(1 symbol)
|
||||
(1 other)
|
||||
(2 constant)
|
||||
(1 other)
|
||||
(4 other)))
|
||||
(test "@x|{ |{ } }|}|" '((1 other)
|
||||
(1 symbol)
|
||||
(2 other)
|
||||
(1 string)
|
||||
(2 other) ; |{
|
||||
(3 string)
|
||||
(2 other) ; }|
|
||||
(2 other)))
|
||||
|
||||
(test "@`',@foo{blah}" '((1 other)
|
||||
(1 constant) ; `
|
||||
(1 constant) ; '
|
||||
(2 other) ; ,@
|
||||
(3 symbol)
|
||||
(1 other)
|
||||
(4 string)
|
||||
(1 other)))
|
||||
|
||||
(test "@; 1" '((4 comment)))
|
||||
(test "@; 1\nv" '((5 comment)
|
||||
(1 string)))
|
||||
(test "@;{1}v" '((2 comment)
|
||||
(1 other)
|
||||
(1 string)
|
||||
(1 other)
|
||||
(1 string)))
|
||||
(test "@;|{1 }} }|v" '((2 comment)
|
||||
(2 other)
|
||||
(5 string)
|
||||
(2 other)
|
||||
(1 string)))
|
|
@ -1953,7 +1953,7 @@ static jit_insn *generate_proc_struct_retry(mz_jit_state *jitter, int num_rands,
|
|||
return ref2;
|
||||
}
|
||||
|
||||
/* Support for interectpting direct calls to primitives: */
|
||||
/* Support for intercepting direct calls to primitives: */
|
||||
#if 1
|
||||
# define mz_prepare_direct_prim(n) mz_prepare(n)
|
||||
# define mz_finishr_direct_prim(reg, proc) mz_finishr(reg)
|
||||
|
|
|
@ -263,7 +263,7 @@ scheme_init_port_fun(Scheme_Env *env)
|
|||
GLOBAL_NONCM_PRIM("read-honu/recursive", read_honu_recur_f, 0, 1, env);
|
||||
GLOBAL_NONCM_PRIM("read-honu-syntax", read_honu_syntax_f, 0, 2, env);
|
||||
GLOBAL_NONCM_PRIM("read-honu-syntax/recursive", read_honu_syntax_recur_f, 0, 2, env);
|
||||
GLOBAL_NONCM_PRIM("read-language", read_language, 0, 2, env);
|
||||
GLOBAL_PRIM_W_ARITY2("read-language", read_language, 0, 2, 0, -1, env);
|
||||
GLOBAL_NONCM_PRIM("read-char", read_char, 0, 1, env);
|
||||
GLOBAL_NONCM_PRIM("read-char-or-special", read_char_spec, 0, 1, env);
|
||||
GLOBAL_NONCM_PRIM("read-byte", read_byte, 0, 1, env);
|
||||
|
|
Loading…
Reference in New Issue
Block a user