#lang syntax coloring; Scribble syntax coloring

svn: r15607
This commit is contained in:
Matthew Flatt 2009-07-28 18:06:14 +00:00
parent 2425917a33
commit 1ba7cf0926
20 changed files with 834 additions and 82 deletions

View File

@ -31,8 +31,15 @@
(bad (cadr spec) #f)))))
(define (get-info inp mod line col pos)
(at-get inp 'get-info (object-name inp) line col pos
(lambda (spec) (lambda () (lambda (tag) #f)))))
(let ([r (at-get inp 'get-info (object-name inp) line col pos
(lambda (spec) (lambda () (lambda (inp mod line col pos)
(lambda (tag) #f)))))])
(let ([proc (r inp mod line col pos)])
(lambda (key)
(case key
[(color-lexer)
(dynamic-require 'syntax-color/scribble-lexer 'scribble-lexer)]
[else (and proc (proc key))])))))
(define at-readtable (make-at-readtable))

View File

@ -32,6 +32,10 @@ added get-regions
(define (should-color-type? type)
(not (memq type '(white-space no-color))))
(define (make-data type mode) (cons type mode))
(define (data-type data) (car data))
(define (data-lexer-mode data) (cdr data))
(define -text<%>
(interface (text:basic<%>)
start-colorer
@ -102,6 +106,11 @@ added get-regions
invalid-tokens-start ; = +inf.0
;; The position right before the next token to be read
current-pos
;; Thread a mode through lexing, and remember the mode
;; at each token boundary, so that lexing can depend on
;; previous tokens. This is the mode for lexing at
;; current-pos:
current-lexer-mode
;; Paren-matching
parens
)
@ -118,6 +127,7 @@ added get-regions
(new token-tree%)
+inf.0
start
#f
(new paren-tree% (matches pairs))))
(define lexer-states (list (make-new-lexer-state 0 'end)))
@ -228,6 +238,7 @@ added get-regions
(set-lexer-state-invalid-tokens-start! ls +inf.0)
(set-lexer-state-up-to-date?! ls #t)
(set-lexer-state-current-pos! ls (lexer-state-start-pos ls))
(set-lexer-state-current-lexer-mode! ls #f)
(set-lexer-state-parens! ls (new paren-tree% (matches pairs))))
lexer-states)
(set! restart-callback #f)
@ -258,12 +269,12 @@ added get-regions
(set-lexer-state-invalid-tokens-start! ls (+ invalid-tokens-start length)))
(sync-invalid ls))))
(define/private (re-tokenize ls in in-start-pos enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end)
(define/private (re-tokenize ls in in-start-pos in-lexer-mode enable-suspend)
(let-values ([(lexeme type data new-token-start new-token-end new-lexer-mode)
(begin
(enable-suspend #f)
(begin0
(get-token in)
(get-token in in-lexer-mode)
(enable-suspend #t)))])
(unless (eq? 'eof type)
(enable-suspend #f)
@ -271,6 +282,7 @@ added get-regions
(+ in-start-pos (sub1 new-token-end)))
(let ((len (- new-token-end new-token-start)))
(set-lexer-state-current-pos! ls (+ len (lexer-state-current-pos ls)))
(set-lexer-state-current-lexer-mode! ls new-lexer-mode)
(sync-invalid ls)
(when (and should-color? (should-color-type? type) (not frozen?))
(set! colors
@ -286,7 +298,7 @@ added get-regions
;; version. In other words, the new greatly outweighs the tree
;; operations.
;;(insert-last! tokens (new token-tree% (length len) (data type)))
(insert-last-spec! (lexer-state-tokens ls) len type)
(insert-last-spec! (lexer-state-tokens ls) len (make-data type new-lexer-mode))
(send (lexer-state-parens ls) add-token data len)
(cond
((and (not (send (lexer-state-invalid-tokens ls) is-empty?))
@ -301,7 +313,7 @@ added get-regions
(enable-suspend #t))
(else
(enable-suspend #t)
(re-tokenize ls in in-start-pos enable-suspend)))))))
(re-tokenize ls in in-start-pos new-lexer-mode enable-suspend)))))))
(define/private (do-insert/delete/ls ls edit-start-pos change-length)
(unless (lexer-state-up-to-date? ls)
@ -318,7 +330,14 @@ added get-regions
(if (send (lexer-state-invalid-tokens ls) is-empty?)
+inf.0
(+ (lexer-state-start-pos ls) orig-token-end change-length)))
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) orig-token-start))
(let ([start (+ (lexer-state-start-pos ls) orig-token-start)])
(set-lexer-state-current-pos! ls start)
(set-lexer-state-current-lexer-mode! ls
(if (= start (lexer-state-start-pos ls))
#f
(begin
(send valid-tree search-max!)
(data-lexer-mode (send valid-tree get-root-data))))))
(set-lexer-state-up-to-date?! ls #f)
(queue-callback (λ () (colorer-callback)) #f)))
((>= edit-start-pos (lexer-state-invalid-tokens-start ls))
@ -340,7 +359,13 @@ added get-regions
(send (lexer-state-parens ls) truncate tok-start)
(set-lexer-state-tokens! ls valid-tree)
(set-lexer-state-invalid-tokens-start! ls (+ change-length (lexer-state-invalid-tokens-start ls)))
(set-lexer-state-current-pos! ls (+ (lexer-state-start-pos ls) tok-start))))))
(let ([start (+ (lexer-state-start-pos ls) tok-start)])
(set-lexer-state-current-pos! ls start)
(if (= start (lexer-state-start-pos ls))
#f
(begin
(send valid-tree search-max!)
(data-lexer-mode (send valid-tree get-root-data)))))))))
(define/private (do-insert/delete edit-start-pos change-length)
(unless (or stopped? force-stop?)
@ -378,6 +403,7 @@ added get-regions
(λ (x) #f))
(enable-suspend #t)))
(lexer-state-current-pos ls)
(lexer-state-current-lexer-mode ls)
enable-suspend))
lexer-states)))))
(set! rev (get-revision-number)))
@ -427,7 +453,14 @@ added get-regions
(reset-tokens)
(set! should-color? (preferences:get 'framework:coloring-active))
(set! token-sym->style token-sym->style-)
(set! get-token get-token-)
(set! get-token (if (procedure-arity-includes? get-token- 2)
;; New interface: thread through a mode:
get-token-
;; Old interface: no mode
(lambda (in mode)
(let-values ([(lexeme type data new-token-start new-token-end)
(get-token- in)])
(values lexeme type data new-token-start new-token-end #f)))))
(set! pairs pairs-)
(for-each
(lambda (ls)
@ -739,7 +772,7 @@ added get-regions
(let ([tokens (lexer-state-tokens ls)])
(tokenize-to-pos ls position)
(send tokens search! (- position (lexer-state-start-pos ls)))
(send tokens get-root-data)))))
(data-type (send tokens get-root-data))))))
(define/private (tokenize-to-pos ls position)
(when (and (not (lexer-state-up-to-date? ls))
@ -768,8 +801,8 @@ added get-regions
(send tokens search! (- (if (eq? direction 'backward) (sub1 position) position)
start-pos))
(cond
((or (eq? 'white-space (send tokens get-root-data))
(and comments? (eq? 'comment (send tokens get-root-data))))
((or (eq? 'white-space (data-type (send tokens get-root-data)))
(and comments? (eq? 'comment (data-type (send tokens get-root-data)))))
(skip-whitespace (+ start-pos
(if (eq? direction 'forward)
(send tokens get-root-end-position)

View File

@ -6,7 +6,7 @@
(require string-constants
scheme/class
mred/mred-sig
syntax-color/scheme-lexer
syntax-color/module-lexer
"collapsed-snipclass-helpers.ss"
"sig.ss"
"../gui-utils.ss"
@ -1172,14 +1172,14 @@
(preferences:add-callback
'framework:tabify
(lambda (k v) (set! tabify-pref v)))
(define/private (scheme-lexer-wrapper in)
(let-values (((lexeme type paren start end) (scheme-lexer in)))
(define/private (scheme-lexer-wrapper in mode)
(let-values (((lexeme type paren start end mode) (module-lexer in mode)))
(cond
((and (eq? type 'symbol)
(get-keyword-type lexeme tabify-pref))
(values lexeme 'keyword paren start end))
(values lexeme 'keyword paren start end mode))
(else
(values lexeme type paren start end)))))
(values lexeme type paren start end mode)))))
(define/override (put-file text sup directory default-name)
(parameterize ([finder:default-extension "ss"]
@ -1188,7 +1188,7 @@
;; don't call the surrogate's super, since it sets the default extension
(sup directory default-name)))
(super-new (get-token (lambda (in) (scheme-lexer-wrapper in)))
(super-new (get-token (lambda (in mode) (scheme-lexer-wrapper in mode)))
(token-sym->style short-sym->style-name)
(matches '((|(| |)|)
(|[| |]|)

View File

@ -259,6 +259,7 @@
(->* () () #:rest (listof pre-content?) element?))
(provide/contract
[linebreak (-> element?)]
[hspace (-> exact-nonnegative-integer? element?)]
[elem (->* ()
(#:style element-style?)
@ -291,6 +292,9 @@
h))
(make-element 'hspace (list (make-string n #\space)))))
(define (linebreak)
(make-element 'newline '("\n")))
(define (elem #:style [style plain] . str)
(make-element style (decode-content str)))

View File

@ -6,5 +6,10 @@ scribble/base/lang
#:read-syntax scribble:read-syntax-inside
#:whole-body-readers? #t
#:wrapper1 (lambda (t) (list* 'doc 'values '() (t)))
#:info (lambda (key default)
(case key
[(color-lexer)
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
[else (default key)]))
(require (prefix-in scribble: "../../reader.ss"))

View File

@ -1,3 +1,11 @@
#lang scheme/base
(require (prefix-in doc: scribble/doc/reader))
(provide (rename-out [doc:read read] [doc:read-syntax read-syntax]))
(provide (rename-out [doc:read read] [doc:read-syntax read-syntax])
get-info)
(define (get-info . args)
(lambda (key)
(case key
[(color-lexer)
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
[else #f])))

View File

@ -6,5 +6,10 @@ scribble/manual/lang
#:read-syntax scribble:read-syntax-inside
#:whole-body-readers? #t
#:wrapper1 (lambda (t) (cons 'doc (t)))
#:info (lambda (key default)
(case key
[(color-lexer)
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
[else (default key)]))
(require (prefix-in scribble: "../../reader.ss"))

View File

@ -47,6 +47,7 @@
(struct-out var-id)
(struct-out shaped-parens)
(struct-out just-context)
(struct-out alternate-display)
(struct-out literal-syntax)
(for-syntax make-variable-id
variable-id?
@ -178,11 +179,12 @@
(memq (syntax-e c) (current-variable-list)))]
[(s it? sub?)
(let ([sc (syntax-e c)])
(let ([s (format "~s" (if (literal-syntax? sc)
(literal-syntax-stx sc)
(if (var-id? sc)
(var-id-sym sc)
sc)))])
(let ([s (or (syntax-property c 'display-string)
(format "~s" (if (literal-syntax? sc)
(literal-syntax-stx sc)
(if (var-id? sc)
(var-id-sym sc)
sc))))])
(if (and (symbol? sc)
((string-length s) . > . 1)
(char=? (string-ref s 0) #\_)
@ -763,6 +765,7 @@
(define-struct var-id (sym))
(define-struct shaped-parens (val shape))
(define-struct just-context (val ctx))
(define-struct alternate-display (id string))
(define-struct literal-syntax (stx))
(define-struct graph-reference (bx))
@ -792,6 +795,11 @@
s
s
(just-context-ctx v)))]
[(alternate-display? v)
(let ([s (do-syntax-ize (alternate-display-id v) col line ht #f)])
(syntax-property s
'display-string
(alternate-display-string v)))]
[(hash-ref (unbox ht) v #f)
=> (lambda (m)
(unless (unbox m)

View File

@ -6,5 +6,10 @@ scribble/sigplan/lang
#:read-syntax scribble:read-syntax-inside
#:whole-body-readers? #t
#:wrapper1 (lambda (t) (cons 'doc (t)))
#:info (lambda (key default)
(case key
[(color-lexer)
(dynamic-require 'syntax-color/scribble-lexer 'scribble-inside-lexer)]
[else (default key)]))
(require (prefix-in scribble: "../../reader.ss"))

View File

@ -9,19 +9,29 @@
that knows how to color itself. It also describes how to query the
lexical and s-expression structure of the text.
@defmethod*[(((start-colorer (token-sym->style (-> symbol? string?))
(get-token (-> input-port? (values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?)))
(get-token (or/c (-> input-port?
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?))
(-> input-port?
any/c
(values any/c
symbol?
(or/c false? symbol?)
exact-nonnegative-integer?
exact-nonnegative-integer?
any/c))))
(pairs (listof (list/p symbol? symbol?)))) void))]{
Starts tokenizing the buffer for coloring and parenthesis matching.
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token],
and it should return the style-name that the token should be colored.
The @scheme[token-sym->style] argument will be passed the first return symbol from @scheme[get-token]
and should return the style-name that the token should be colored.
The @scheme[get-token] argument takes an input port and optionally a mode value.
When it accepts just an input port, @scheme[get-token] returns the next token as 5 values:
The @scheme[get-token] argument takes an input port and returns the next token as 5 values:
@itemize[
@item{
An unused value. This value is intended to represent the textual
@ -42,32 +52,44 @@
@item{
The ending position of the token.}]
The @scheme[get-token] function will usually be implemented with a lexer using the
@scheme[parser-tools/lex] library.
get-token must obey the following invariants:
When @scheme[get-token] accepts a mode value in addition to an
input port, it must also return an extra result, which is a new
mode. When @scheme[get-token] is called for the beginning on a
stream, the mode argument is @scheme[#f]. Thereafter, the mode
returned for the previous token is provided to @scheme[get-token]
for the next token. The mode should not be a mutable value; if
part of the stream is re-tokenized, the mode saved from the
immediately preceding token is given again to the
@scheme[get-token] function.
The @scheme[get-token] function is usually be implemented with a lexer using the
@scheme[parser-tools/lex] library. The
@scheme[get-token] function must obey the following invariants:
@itemize[
@item{
Every position in the buffer must be accounted for in exactly one
token.}
@item{
The token returned by @scheme[get-token] must rely only on the contents of the
input port argument. This means that the tokenization of some part of
the input cannot depend on earlier parts of the input.}
input port argument plus the mode argument. This constraint means that the
tokenization of some part of the input cannot depend on earlier parts of the
input except through the mode (and implicitly through the starting positions
for tokens).}
@item{
No edit to the buffer can change the tokenization of the buffer prior
to the token immediately preceding the edit. In the following
example this invariant does not hold. If the buffer contains:
@verbatim{" 1 2 3}
A change to the stream must not change the tokenization of the stream prior
to the token immediately preceding the change. In the following
example this invariant does not hold. If the buffer contains
@verbatim[#:indent 2]{" 1 2 3}
and the tokenizer treats the unmatched " as its own token (a string
error token), and separately tokenizes the 1 2 and 3, an edit to make
the buffer look like:
@verbatim{" 1 2 3"}
the buffer look like
@verbatim[#:indent 2]{" 1 2 3"}
would result in a single string token modifying previous tokens. To
handle these situations, @scheme[get-token] must treat the first line as a
single token.}]
The @scheme[pairs] argument is a list of different kinds of matching parens. The second
value returned by get-token is compared to this list to see how the
value returned by @scheme[get-token] is compared to this list to see how the
paren matcher should treat the token. An example: Suppose pairs is
@scheme['((|(| |)|) (|[| |]|) (begin end))]. This means that there
are three kinds of parens. Any token which has @scheme['begin] as its second

View File

@ -272,6 +272,9 @@ gets progressively larger.}
@defproc[(emph [pre-content pre-content?] ...) element?]{
The same as @scheme[italic].}
@defproc[(linebreak) element?]{
Produces an element that forces a line break.}
@defproc[(hspace [n exact-nonnegative-integer?]) element?]{
Produces an element containing @scheme[n] spaces and style

View File

@ -60,7 +60,7 @@ in the case of Latex).
To add a mapping from your own style name to a CSS configuration, add
a @scheme[css-addition] structure instance to a style's @tech{style property}
list. To map a style name to a Latex macro or environment, add a
scheme[tex-addition] structure instance. A @scheme[css-addition] or
@scheme[tex-addition] structure instance. A @scheme[css-addition] or
@scheme[tex-addition] is normally associated with the style whose name
is implemented by the adition, but it can also be added to the style
for an enclosing part.

View File

@ -167,7 +167,7 @@ needed.
@{blah @|foo|: blah}
}===|
Actually, the command part can be any Scheme expression (that does
Actually, the command part can be any Scheme expression (that does not
start with @litchar["["], @litchar["{"], or @litchar["|"]), which is
particularly useful with such escapes since they can be used with any
expression.

View File

@ -0,0 +1,54 @@
#lang scheme/base
(require scheme/port
"scheme-lexer.ss")
(provide module-lexer)
(define (module-lexer in mode)
(cond
[(not mode)
;; Starting out: look for #lang:
(let*-values ([(p) (peeking-input-port in)]
[(init) (file-position p)]
[(start-line start-col start-pos) (port-next-location p)])
(let ([get-info (with-handlers ([exn:fail? (lambda (exn) 'fail)])
;; FIXME: set the reader guard to disable access to
;; untrusted planet packages.
(read-language in (lambda () #f)))])
(cond
[(procedure? get-info)
;; Produce language as first token:
(let*-values ([(bytes-len) (- (file-position p) init)]
[(bstr) (read-bytes bytes-len in)]
[(end-line end-col end-pos) (port-next-location in)])
(values
bstr
'other
#f
start-pos
end-pos
(or (let ([v (get-info 'color-lexer)])
(and v
(if (procedure-arity-includes? v 2)
(cons v #f)
v)))
scheme-lexer)))]
[(eq? 'fail get-info)
(copy-port in (open-output-nowhere))
(let*-values ([(end-line end-col end-pos) (port-next-location in)])
(values #f 'error #f start-pos end-pos
(lambda (in)
(values #f 'eof #f end-pos end-pos))))]
[else
;; Start over using the Scheme lexer
(module-lexer in scheme-lexer)])))]
[(pair? mode)
;; #lang-selected language consumes and produces a mode:
(let-values ([(lexeme type data new-token-start new-token-end new-mode)
((car mode) in (cdr mode))])
(values lexeme type data new-token-start new-token-end (cons (car mode) new-mode)))]
[else
;; #lang-selected language (or default) doesn't deal with modes:
(let-values ([(lexeme type data new-token-start new-token-end)
(mode in)])
(values lexeme type data new-token-start new-token-end mode))]))

View File

@ -3,7 +3,9 @@
(require parser-tools/lex
(prefix : parser-tools/lex-sre))
(provide scheme-lexer)
(provide scheme-lexer
scheme-lexer/status
scheme-nobar-lexer/status)
(define-lex-abbrevs
@ -120,6 +122,13 @@
[identifier (:: identifier-start
(:* identifier-escapes identifier-chars))]
[nobar-identifier-escapes (:: "\\" any-char)]
[nobar-identifier-start (:or nobar-identifier-escapes
(:~ identifier-delims "\\" "|" "#")
"#%")]
[nobar-identifier (:: nobar-identifier-start
(:* nobar-identifier-escapes identifier-chars))]
[bad-id-start (:or identifier-escapes
(:~ identifier-delims "\\" "|"))]
[bad-id-escapes (:or identifier-escapes
@ -129,8 +138,17 @@
(:? "\\" bad-id-escapes))
"\\"
bad-id-escapes)]
[nobar-bad-id-escapes nobar-identifier-escapes]
[nobar-bad-id (:or (:: bad-id-start
(:* nobar-identifier-escapes identifier-chars)
(:? "\\" nobar-bad-id-escapes))
"\\"
nobar-bad-id-escapes)]
[keyword (:: "#:" (:* identifier-escapes identifier-chars))]
[nobar-keyword (:: "#:" (:* nobar-identifier-escapes identifier-chars))]
[reader-command (:or (:: "#" c s) (:: "#" c i))]
[sharing (:or (:: "#" (make-uinteger digit10) "=")
@ -189,8 +207,8 @@
((_ digit) (:or "" (:: exponent-marker (:? sign) (:+ digit))))))
(define (ret lexeme type paren start-pos end-pos)
(values lexeme type paren (position-offset start-pos) (position-offset end-pos)))
(define (ret lexeme type paren start-pos end-pos status)
(values lexeme type paren (position-offset start-pos) (position-offset end-pos) status))
(define get-next-comment
@ -208,11 +226,11 @@
(define (read-nested-comment num-opens start-pos input)
(let-values (((diff end) (get-next-comment input)))
(cond
((eq? 'eof diff) (ret "" 'error #f start-pos end))
((eq? 'eof diff) (ret "" 'error #f start-pos end 'continue))
(else
(let ((next-num-opens (+ diff num-opens)))
(cond
((= 0 next-num-opens) (ret "" 'comment #f start-pos end))
((= 0 next-num-opens) (ret "" 'comment #f start-pos end 'continue))
(else (read-nested-comment next-num-opens start-pos input))))))))
(define (get-offset i)
@ -253,7 +271,7 @@
(next-char (peek-char-or-special i)))
(cond
((or (equal? ender "") (not (eq? #\newline next-char)))
(values (string-append "#<<" ender) 'error #f start-pos (get-offset i)))
(values (string-append "#<<" ender) 'error #f start-pos (get-offset i) 'datum))
(else
(read-char i)
(let loop ((acc (list (string-append "#<<" ender "\n"))))
@ -262,85 +280,96 @@
(cond
((not (or (char? next-char) (eof-object? next-char))) ;; a special
(values (apply string-append (reverse (cons next-line acc)))
'error #f start-pos (get-offset i)))
'error #f start-pos (get-offset i)
'datum))
((equal? next-line ender) ;; end of string
(values (apply string-append (reverse (cons next-line acc)))
'string #f start-pos (get-offset i)))
'string #f start-pos (get-offset i)
'datum))
((eof-object? next-char)
(values (apply string-append (reverse (cons next-line acc)))
'error #f start-pos (get-offset i)))
'error #f start-pos (get-offset i)
'datum))
(else
(read-char i)
(loop (cons (string-append next-line "\n") acc))))))))))
(define (scheme-lexer in)
(let-values ([(lexeme type paren start end adj) (scheme-lexer/status in)])
(values lexeme type paren start end)))
(define scheme-lexer
(define-syntax-rule (lexer/status identifier keyword bad-id)
(lexer
[(:+ scheme-whitespace)
(ret lexeme 'white-space #f start-pos end-pos)]
(ret lexeme 'white-space #f start-pos end-pos 'continue)]
[(:or "#t" "#f" "#T" "#F" character
(make-num digit2 radix2)
(make-num digit8 radix8)
(make-num digit10 (:? radix10))
(make-num digit16 radix16))
(ret lexeme 'constant #f start-pos end-pos)]
[keyword (ret lexeme 'parenthesis #f start-pos end-pos)]
[str (ret lexeme 'string #f start-pos end-pos)]
(ret lexeme 'constant #f start-pos end-pos 'datum)]
[keyword (ret lexeme 'parenthesis #f start-pos end-pos 'datum)]
[str (ret lexeme 'string #f start-pos end-pos 'datum)]
[";"
(values (apply string (read-line/skip-over-specials input-port)) 'comment #f
(position-offset start-pos)
(get-offset input-port))]
(get-offset input-port)
'continue)]
#;
[line-comment
(ret lexeme 'comment #f start-pos end-pos)]
["#;"
(ret lexeme 'sexp-comment #f start-pos end-pos)]
(ret lexeme 'sexp-comment #f start-pos end-pos 'continue)]
["#|" (read-nested-comment 1 start-pos input-port)]
[script
(ret lexeme 'comment #f start-pos end-pos)]
(ret lexeme 'comment #f start-pos end-pos 'continue)]
[(:: list-prefix "(")
(ret lexeme 'parenthesis '|(| start-pos end-pos)]
(ret lexeme 'parenthesis '|(| start-pos end-pos 'open)]
[(:: list-prefix "[")
(ret lexeme 'parenthesis '|[| start-pos end-pos)]
(ret lexeme 'parenthesis '|[| start-pos end-pos 'open)]
[(:: list-prefix "{")
(ret lexeme 'parenthesis '|{| start-pos end-pos)]
(ret lexeme 'parenthesis '|{| start-pos end-pos 'open)]
[(:or ")" "]" "}")
(ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos)]
(ret lexeme 'parenthesis (string->symbol lexeme) start-pos end-pos 'close)]
[(:or "'" "`" "#'" "#`" "#&")
(ret lexeme 'constant #f start-pos end-pos)]
(ret lexeme 'constant #f start-pos end-pos 'continue)]
[(:or sharing reader-command "." "," ",@" "#," "#,@")
(ret lexeme 'other #f start-pos end-pos)]
(ret lexeme 'other #f start-pos end-pos 'continue)]
[(:: (:or "#lang " "#!")
(:or langchar
(:: langchar (:* (:or langchar "/")) langchar)))
(ret lexeme 'other #f start-pos end-pos)]
(ret lexeme 'other #f start-pos end-pos 'continue)]
[(:: (:or "#lang " "#!") (:* (:& any-char (complement whitespace))))
(ret lexeme 'error #f start-pos end-pos)]
(ret lexeme 'error #f start-pos end-pos 'continue)]
[identifier
(ret lexeme 'symbol #f start-pos end-pos)]
(ret lexeme 'symbol #f start-pos end-pos 'datum)]
["#<<"
(get-here-string (position-offset start-pos) input-port)]
[(special)
(ret "" 'no-color #f start-pos end-pos)]
(ret "" 'no-color #f start-pos end-pos 'datum)]
[(special-comment)
(ret "" 'comment #f start-pos end-pos)]
[(eof) (values lexeme 'eof #f #f #f)]
(ret "" 'comment #f start-pos end-pos 'continue)]
[(eof) (values lexeme 'eof #f #f #f #f)]
[(:or bad-char bad-str
(:& bad-id
(complement (:: (:or (:: "#" (:or f t)) reader-command sharing "#<<" "#\\" "#|" "#;" "#&" script)
any-string))))
(ret lexeme 'error #f start-pos end-pos)]
(ret lexeme 'error #f start-pos end-pos 'bad)]
[any-char (extend-error lexeme start-pos end-pos input-port)]))
(define scheme-lexer/status (lexer/status identifier keyword bad-id))
(define scheme-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id))
(define (extend-error lexeme start end in)
(if (memq (peek-char-or-special in)
`(special #\newline #\return #\tab #\space #\vtab
#\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\;
,eof))
(ret lexeme 'error #f start end)
(ret lexeme 'error #f start end 'bad)
(let-values (((rest end-pos) (get-chunk in)))
(ret (string-append lexeme rest) 'error #f start end-pos))))
(ret (string-append lexeme rest) 'error #f start end-pos 'bad))))
(define get-chunk
(lexer

View File

@ -0,0 +1,253 @@
#lang scheme/base
(require "scheme-lexer.ss")
(provide scribble-inside-lexer
scribble-lexer)
(define-struct text (scheme-rx end-rx sub-rx string-rx open-paren close-paren))
(define-struct scheme (status))
(define-struct args ())
(define-struct text-args ())
(define (scribble-inside-lexer in mode)
(let ([mode (or mode
(list
(make-text #rx"^@"
#f
#f
#rx".*?(?:(?=@)|$)"
#f
#f)))])
(let-values ([(line col pos) (port-next-location in)]
[(l) (car mode)])
(define (enter-@ comment-k)
(if (equal? #\; (peek-char in))
;; Comment
(begin
(read-char in)
(if (or (equal? #\{ (peek-char in))
(equal? #\| (peek-char in)))
;; Bracketed comment:
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(comment-k "@;"
'comment
#f
pos
end-pos
(cons (make-text-args)
mode)))
;; Line comment:
(begin
(regexp-match #rx"\r\n|\r|\n" in)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(comment-k "@;"
'comment
#f
pos
end-pos
mode)))))
(let ([new-mode
(cond
[(equal? #\| (peek-char in))
(read-char in)
(list* (make-scheme 'bar)
mode)]
[else
(list* (make-scheme 'one)
(make-args)
mode)])])
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "@"
'other
#f
pos
end-pos
new-mode)))))
(if (eof-object? (peek-char in))
(values eof
'eof
#f
pos
pos
#f)
(cond
[(text? l)
(cond
[(and (text-scheme-rx l)
(regexp-try-match (text-scheme-rx l) in))
;; Found @
(enter-@ values)]
[(and (text-end-rx l)
(regexp-try-match (text-end-rx l) in))
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "}"
'other
(text-close-paren l)
pos
end-pos
(cdr mode)))]
[(and (text-sub-rx l)
(regexp-try-match (text-sub-rx l) in))
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "{"
'other
(text-open-paren l)
pos
end-pos
(cons (car mode) mode)))]
[else
;; Read string up to @ or }
(regexp-match? (text-string-rx l) in)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values 'string
'string
#f
pos
end-pos
mode))])]
[(scheme? l)
(let ([status (scheme-status l)])
(cond
[(and (eq? status 'bracket)
(regexp-try-match #px"^\\s*?[]]" in))
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "]"
'other
'|]|
pos
end-pos
(cdr mode)))]
[(and (memq status '(bar bar-no-more))
(regexp-try-match #px"^\\s*?[|]" in))
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "|"
'other
#f
pos
end-pos
(cdr mode)))]
[(regexp-try-match #rx"^@" in)
(enter-@ (lambda (lexeme type paren start end mode)
(values lexeme
(if (eq? status 'one)
'error
type)
paren
start
end
mode)))]
[else
(let-values ([(lexeme type paren start end adj)
(case status
[(bar bar-no-more one) (scheme-nobar-lexer/status in)]
[else (scheme-lexer/status in)])]
[(consume) (lambda (status)
(case status
[(many) mode]
[(one) (cdr mode)]
[(bracket bar-no-more)
(cons (make-scheme status) (cdr mode))]
[(bar) (cons (make-scheme 'bar-no-more) (cdr mode))]
[else (error "bad status")]))])
(values lexeme
(cond
[(or (eq? type 'comment)
(eq? type 'white-space))
(if (eq? status 'one)
'error
type)]
[(eq? status 'bar-no-more)
;; Too many S-expressions in @| ... |
'error]
[else type])
paren
start
end
(case adj
[(continue) mode]
[(datum)
(cond
[(pair? status) mode]
[else (consume status)])]
[(open)
(cons (make-scheme (cons #t status))
(cdr mode))]
[(close)
(if (pair? status)
(let ([v (cdr status)])
(if (symbol? v)
(consume v)
(cons (make-scheme v) (cdr mode))))
(consume status))]
[(bad) (consume status)]
[else (error "bad adj")])))]))]
[(args? l)
(cond
[(regexp-try-match #rx"^\\[" in)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "["
'other
'|[|
pos
end-pos
(list* (make-scheme 'bracket)
mode)))]
[else
(scribble-lexer in (cons (make-text-args) (cdr mode)))])]
[(text-args? l)
(cond
[(regexp-try-match #rx"^[|]([^a-zA-Z0-9 \t\r\n\f@\\\177-\377{]*){" in)
=> (lambda (m)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values (cadr m)
'other
#f
pos
end-pos
(let ([closer (regexp-quote
(bytes-append #"}"
(regexp-replace** (list #rx"[(]" #rx"[[]" #rx"{" #rx"<")
(cadr m)
(list #")" #"]" #"}" #">"))
#"|"))]
[re-opener (regexp-quote (cadr m))])
(cons (make-text (byte-regexp (bytes-append #"^[|]" re-opener #"@"))
(byte-regexp (bytes-append #"^" closer))
(byte-regexp (bytes-append #"^[|]" re-opener #"{"))
(byte-regexp (bytes-append
#".*?(?:(?=[|]"
re-opener
#"[@{])|(?="
closer
#")|$)"))
#f
#f)
(cdr mode))))))]
[(regexp-try-match #rx"^{" in)
(let-values ([(end-line end-col end-pos) (port-next-location in)])
(values "{"
'other
'|{|
pos
end-pos
(cons (make-text #rx"^@"
#rx"^}"
#rx"^{"
#rx".*?(?:(?=[@{}])|$)"
'|{|
'|}|)
(cdr mode))))]
[else
(scribble-lexer in (cdr mode))])]
[else (error "bad mode")])))))
(define (scribble-lexer in mode)
(scribble-inside-lexer in (or mode (list (make-scheme 'many)))))
(define (regexp-replace** rxs str strs)
(if (null? rxs)
str
(regexp-replace** (cdr rxs)
(regexp-replace* (car rxs) str (car strs))
(cdr strs))))

View File

@ -3,6 +3,8 @@
(for-label syntax-color/token-tree
syntax-color/paren-tree
syntax-color/scheme-lexer
syntax-color/module-lexer
syntax-color/scribble-lexer
syntax-color/default-lexer
framework/framework
framework/private/color
@ -38,7 +40,7 @@ Parenthesis matching code built on top of @scheme[token-tree%].
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c))]
(or/c number? false/c))]{
A lexer for Scheme, including reader extensions (@secref[#:doc'(lib
"scribblings/reference/reference.scrbl")]{Reader_Extension}), built
@ -60,6 +62,35 @@ The @scheme[scheme-lexer] function returns 5 values:
@item{A number representing the ending position of the match (or @scheme[#f] if eof).}]
}
@defproc[(scheme-lexer/status [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c 'datum 'open 'close 'continue))]{
Like @scheme[scheme-lexer], but returns an extra value. The last
return value indicates whether the consumed token should count as a
datum, an opening parenthesis (or similar starting token to group
other tokens), a closing parenthesis (or similar), or a prefix (such
as whitespace) on a datum.}
@defproc[(scheme-nobar-lexer/status [in input-port?])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c 'datum 'open 'close 'continue))]{
Like @scheme[scheme-lexer/status], but for a dialect of Scheme where
@litchar{|} is a delimiter instead of quoting syntax for a symbol.
This function is used by @scheme[scribble-lexer].}
@section{Default lexer}
@defmodule[syntax-color/default-lexer]
@ -90,6 +121,90 @@ A lexer that only identifies @litchar{(}, @litchar{)}, @litchar{[},
@item{A number representing the ending position of the match (or @scheme[#f] if eof).}]
@section{Module Lexer}
@defmodule[syntax-color/module-lexer]
@defproc[(module-lexer [in input-port?]
[mode (or/c #f
(-> input-port? any)
(cons/c (-> input-port? any/c any) any/c))])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
(or/c #f
(-> input-port? any)
(cons/c (-> input-port? any/c any) any/c)))]{
Like @scheme[scheme-lexer], but
@itemize[
@item{A @scheme[module-lexer] accepts (and returns) a lexer mode,
instead of just an input port.}
@item{When @scheme[mode] is @scheme[#f] (indicating the start of the
stream), the lexer checks @scheme[in] for a @hash-lang[]
specification.
If a @hash-lang[] line is present but the specified
language does not exist, the entire @scheme[in] input is
consumed and colored as @scheme['error].
If the language exists and the language provides a
@scheme[get-info] function, then it is called with
@scheme['color-lexer]. If the result is not @scheme[#f], then
it should be a lexer function for use with
@scheme[color:text%]. The result mode is the lexer---paired
with @scheme[#f] if the lexer is a procedure arity 2---so that
future calls will dispatch to the language-supplied lexer.
If the language is specified but it provides no
@scheme[get-info] or @scheme['color-lexer] result, then
@scheme[scheme-lexer] is returned as the mode.}
@item{When @scheme[mode] is a lexer procedure, the lexer is applied
to @scheme[in]. The lexer's results are returned, plus the
lexer again as the mode.}
@item{When @scheme[mode] is a pair, then the lexer procedure in the
@scheme[car] is applied to @scheme[in] and the mode in the
@scheme[cdr]. The lexer's results are returned, except that its
mode result is paired back with the lexer procedure.}
]}
@section{Scribble Lexer}
@defmodule[syntax-color/scribble-lexer]
@defproc[(scribble-lexer [in input-port?]
[mode any/c])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
any/c)]{
Like @scheme[scheme-lexer], but for Scheme extended with Scribbles
@"@" notation (see @secref[#:doc '(lib
"scribblings/scribble/scribble.scrbl") "reader"]).}
@defproc[(scribble-inside-lexer [in input-port?]
[mode any/c])
(values (or/c string? eof-object?)
symbol?
(or/c symbol? false/c)
(or/c number? false/c)
(or/c number? false/c)
any/c)]{
Like @scheme[scribble-lexer], but starting in ``text'' mode instead of
Scheme mode.}
@; ----------------------------------------------------------------------
@section{Splay Tree for Tokenization}

View File

@ -0,0 +1,201 @@
#lang scheme
(require syntax-color/scribble-lexer)
(define in (open-input-string "@|x #|10|#| @me[1 2 #| comment |# ]{10}"))
(define (color str)
(let ([in (open-input-string str)])
(let loop ([mode #f])
(let-values ([(lexeme type paren start end mode) (scribble-inside-lexer in mode)])
(if (eq? type 'eof)
null
(cons (list start end type)
(loop mode)))))))
(define (test* str len-val line)
(let ([v (color str)]
[val (let loop ([pos 1][l len-val])
(if (null? l)
null
(cons (list pos (+ pos (caar l)) (cadar l))
(loop (+ (+ pos (caar l))) (cdr l)))))])
(unless (equal? v val)
(printf "FAILED, line ~s\n" line)
(printf " result\n")
(pretty-print v)
(printf " is not expected\n")
(pretty-print val)
(printf "\n"))))
(define-syntax-rule (test str len-val)
(test* str len-val (syntax-line #'str)))
(test "x" '((1 string)))
(test "x{}" '((3 string)))
(test "@x" '((1 other)
(1 symbol)))
(test "@x str" '((1 other)
(1 symbol)
(4 string)))
(test "@x[] str" '((1 other)
(1 symbol)
(1 other)
(1 other)
(4 string)))
(test "@x[z] str" '((1 other)
(1 symbol)
(1 other)
(1 symbol)
(1 other)
(4 string)))
(test "@x[z +1.5] str" '((1 other)
(1 symbol)
(1 other)
(1 symbol)
(1 white-space)
(4 constant)
(1 other)
(4 string)))
(test "@x[z @w{10}] str" '((1 other)
(1 symbol) ; x
(1 other)
(1 symbol) ; z
(1 white-space)
(1 other)
(1 symbol) ; w
(1 other)
(2 string)
(1 other)
(1 other)
(4 string)))
(test "@x[a@b]{a}{b}" '((1 other)
(1 symbol)
(1 other)
(3 symbol)
(1 other)
(1 other)
(1 string)
(1 other)
(3 string)))
(test "@x{{}}" '((1 other)
(1 symbol)
(1 other)
(1 other) ; {
(1 other) ; }
(1 other)))
(test "@|x|str" '((2 other)
(1 symbol)
(1 other)
(3 string)))
(test "@|x #|ok|#|str" '((2 other)
(1 symbol)
(1 white-space)
(6 comment)
(1 other)
(3 string)))
(test "@| x ; c\n| str" '((2 other)
(1 white-space)
(1 symbol)
(1 white-space)
(3 comment)
(2 other)
(4 string)))
(test "@|(a|b|)|str" '((2 other)
(1 parenthesis)
(4 symbol)
(1 parenthesis)
(1 other)
(3 string)))
(test "@#|bad|#x str" '((1 other)
(7 error)
(1 symbol)
(4 string)))
(test "@@x" '((1 other)
(1 other)
(1 symbol)))
(test "@|@x|z" '((2 other)
(1 other)
(1 symbol)
(1 other)
(1 string)))
(test "@@x[1 2][3]" '((1 other)
(1 other)
(1 symbol)
(1 other)
(1 constant)
(1 white-space)
(1 constant)
(1 other)
(1 other)
(1 constant)
(1 other)))
(test "@x|{10}|" '((1 other)
(1 symbol)
(2 other)
(2 string)
(2 other)))
(test "@x|{@q}|" '((1 other)
(1 symbol)
(2 other)
(2 string)
(2 other)))
(test "@x|!!{@q}!!|" '((1 other)
(1 symbol)
(4 other)
(2 string)
(4 other)))
(test "@x|(({@q}))|" '((1 other)
(1 symbol)
(4 other)
(2 string)
(4 other)))
(test "@x|<<{a|<<@a[10]}>>|" '((1 other)
(1 symbol)
(4 other)
(1 string)
(4 other)
(1 symbol)
(1 other)
(2 constant)
(1 other)
(4 other)))
(test "@x|{ |{ } }|}|" '((1 other)
(1 symbol)
(2 other)
(1 string)
(2 other) ; |{
(3 string)
(2 other) ; }|
(2 other)))
(test "@`',@foo{blah}" '((1 other)
(1 constant) ; `
(1 constant) ; '
(2 other) ; ,@
(3 symbol)
(1 other)
(4 string)
(1 other)))
(test "@; 1" '((4 comment)))
(test "@; 1\nv" '((5 comment)
(1 string)))
(test "@;{1}v" '((2 comment)
(1 other)
(1 string)
(1 other)
(1 string)))
(test "@;|{1 }} }|v" '((2 comment)
(2 other)
(5 string)
(2 other)
(1 string)))

View File

@ -1953,7 +1953,7 @@ static jit_insn *generate_proc_struct_retry(mz_jit_state *jitter, int num_rands,
return ref2;
}
/* Support for interectpting direct calls to primitives: */
/* Support for intercepting direct calls to primitives: */
#if 1
# define mz_prepare_direct_prim(n) mz_prepare(n)
# define mz_finishr_direct_prim(reg, proc) mz_finishr(reg)

View File

@ -263,7 +263,7 @@ scheme_init_port_fun(Scheme_Env *env)
GLOBAL_NONCM_PRIM("read-honu/recursive", read_honu_recur_f, 0, 1, env);
GLOBAL_NONCM_PRIM("read-honu-syntax", read_honu_syntax_f, 0, 2, env);
GLOBAL_NONCM_PRIM("read-honu-syntax/recursive", read_honu_syntax_recur_f, 0, 2, env);
GLOBAL_NONCM_PRIM("read-language", read_language, 0, 2, env);
GLOBAL_PRIM_W_ARITY2("read-language", read_language, 0, 2, 0, -1, env);
GLOBAL_NONCM_PRIM("read-char", read_char, 0, 1, env);
GLOBAL_NONCM_PRIM("read-char-or-special", read_char_spec, 0, 1, env);
GLOBAL_NONCM_PRIM("read-byte", read_byte, 0, 1, env);