racket/collects/syntax-color/module-lexer.rkt
2012-02-29 00:28:11 -05:00

101 lines
4.1 KiB
Racket

#lang scheme/base
(require scheme/port
"scheme-lexer.rkt")
(provide module-lexer)
#|
mode : (or/c #f 'before-lang-line
'no-lang-line
(cons lexer mode)
lexer)
the module lexer tracks any white-space and comments before
the #lang line (if any) explicitly by wrapping calls to the
scheme-lexer (in #f or 'before-lang-line mode).
Once it finds a non-white-space and non-comment
token, it checks to see if there is a #lang line and, if so
changes the mode to be the lexer that the #lang indicates,
delegating to it (the last two modes listed above).
If there is no #lang line, then it continues
to delegate to the scheme-lexer (in the 'no-lang-line mode).
|#
(define (module-lexer in offset mode)
(cond
[(or (not mode) (eq? mode 'before-lang-line))
(define lexer-port (peeking-input-port in #:init-position (+ 1 (file-position in))))
(let-values ([(line col pos) (port-next-location in)])
(when line
(port-count-lines! lexer-port)))
(set-port-next-location-from in lexer-port)
(define-values (lexeme type data new-token-start new-token-end) (scheme-lexer lexer-port))
(cond
[(or (eq? type 'comment) (eq? type 'white-space))
(define lexer-end (file-position lexer-port))
;; sync ports
(for/list ([i (in-range (file-position in) (file-position lexer-port))])
(read-byte-or-special in))
(values lexeme type data new-token-start new-token-end 0 'before-lang-line)]
[else
;; look for #lang:
(define p (peeking-input-port in #:init-position (+ 1 (file-position in))))
(let-values ([(line col pos) (port-next-location in)])
(when line
(port-count-lines! p)))
(set-port-next-location-from in p)
(define-values (_1 _2 start-pos) (port-next-location p))
(define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail))))
(define-values (_3 _4 end-pos) (port-next-location p))
(cond
[(procedure? get-info)
;; sync ports
(for ([i (in-range (file-position in) (file-position p))])
(read-byte-or-special in))
;; Produce language as first token:
(values
"#lang"
'other
#f
start-pos
end-pos
0
(or (let ([v (get-info 'color-lexer #f)])
(and v
(if (procedure-arity-includes? v 3)
(cons v #f)
v)))
scheme-lexer))]
[(and (eq? type 'other)
(string? lexeme)
;; the read-language docs say that this is all it takes to commit to a #lang
(regexp-match #rx"^#[!l]" lexeme))
;; sync ports
(for ([i (in-range (file-position in) (file-position p))])
(read-byte-or-special in))
(values lexeme 'error data 1 end-pos 0 'no-lang-line)]
[else
(for ([i (in-range (file-position in) (file-position lexer-port))])
(read-byte-or-special in))
(values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])]
[(eq? mode 'no-lang-line)
(let-values ([(lexeme type data new-token-start new-token-end)
(scheme-lexer in)])
(values lexeme type data new-token-start new-token-end 0 'no-lang-line))]
[(pair? mode)
;; #lang-selected language consumes and produces a mode:
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
((car mode) in offset (cdr mode))])
(values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))]
[else
;; #lang-selected language (or default) doesn't deal with modes:
(let-values ([(lexeme type data new-token-start new-token-end)
(mode in)])
(values lexeme type data new-token-start new-token-end 0 mode))]))
(define (set-port-next-location-from src dest)
(define-values (line col pos) (port-next-location src))
(set-port-next-location! dest line col pos))