132 lines
5.2 KiB
Racket
132 lines
5.2 KiB
Racket
#lang racket/base
|
|
(require racket/port
|
|
"racket-lexer.rkt"
|
|
"lexer-contract.rkt"
|
|
racket/contract
|
|
unstable/options)
|
|
(provide
|
|
(contract-out [module-lexer lexer/c]))
|
|
|
|
#|
|
|
|
|
mode : (or/c #f 'before-lang-line
|
|
'no-lang-line
|
|
(cons lexer mode)
|
|
lexer)
|
|
|
|
the module lexer tracks any white-space and comments before
|
|
the #lang line (if any) explicitly by wrapping calls to the
|
|
scheme-lexer (in #f or 'before-lang-line mode).
|
|
Once it finds a non-white-space and non-comment
|
|
token, it checks to see if there is a #lang line and, if so
|
|
changes the mode to be the lexer that the #lang indicates,
|
|
delegating to it (the last two modes listed above).
|
|
If there is no #lang line, then it continues
|
|
to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|
|
|
|#
|
|
|
|
|
|
(define (module-lexer in offset mode)
|
|
(cond
|
|
[(or (not mode) (eq? mode 'before-lang-line))
|
|
(define lexer-port (peeking-input-port in #:init-position (+ 1 (file-position in))))
|
|
(let-values ([(line col pos) (port-next-location in)])
|
|
(when line
|
|
(port-count-lines! lexer-port)))
|
|
(set-port-next-location-from in lexer-port)
|
|
(define-values (lexeme type data new-token-start new-token-end) (racket-lexer lexer-port))
|
|
(cond
|
|
[(or (eq? type 'comment) (eq? type 'white-space))
|
|
(define lexer-end (file-position lexer-port))
|
|
;; sync ports
|
|
(for/list ([i (in-range (file-position in) (file-position lexer-port))])
|
|
(read-byte-or-special in))
|
|
(values lexeme type data new-token-start new-token-end 0 'before-lang-line)]
|
|
[else
|
|
;; look for #lang:
|
|
(define p (peeking-input-port in #:init-position (+ 1 (file-position in))))
|
|
(define name-p (peeking-input-port in #:init-position (+ 1 (file-position in))))
|
|
(let-values ([(line col pos) (port-next-location in)])
|
|
(when line
|
|
(port-count-lines! p)
|
|
(port-count-lines! name-p)))
|
|
(set-port-next-location-from in p)
|
|
(set-port-next-location-from in name-p)
|
|
(define-values (_1 _2 start-pos) (port-next-location p))
|
|
(define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail))))
|
|
(define-values (_3 _4 end-pos) (port-next-location p))
|
|
(cond
|
|
[(procedure? get-info)
|
|
(define lang-name
|
|
(apply string
|
|
(filter
|
|
char?
|
|
(for/list ([i (in-range (file-position in) (file-position p))])
|
|
(read-char name-p)))))
|
|
|
|
;; sync ports
|
|
(for ([i (in-range (file-position in) (file-position p))])
|
|
(read-byte-or-special in))
|
|
|
|
(define the-lexer
|
|
(let ([raw-lexer (or (get-info 'color-lexer #f) racket-lexer)])
|
|
(if (trusted-lexer? raw-lexer)
|
|
(waive-option raw-lexer)
|
|
(exercise-option raw-lexer))))
|
|
|
|
;; Produce language as first token:
|
|
(values
|
|
lang-name
|
|
'other
|
|
#f
|
|
start-pos
|
|
end-pos
|
|
0
|
|
(if (procedure-arity-includes? the-lexer 3)
|
|
(cons the-lexer #f)
|
|
the-lexer))]
|
|
|
|
[(and (eq? type 'other)
|
|
(string? lexeme)
|
|
;; the read-language docs say that this is all it takes to commit to a #lang
|
|
(regexp-match #rx"^#[!l]" lexeme))
|
|
;; sync ports
|
|
(for ([i (in-range (file-position in) (file-position p))])
|
|
(read-byte-or-special in))
|
|
(values lexeme 'error data 1 end-pos 0 'no-lang-line)]
|
|
[else
|
|
(for ([i (in-range (file-position in) (file-position lexer-port))])
|
|
(read-byte-or-special in))
|
|
(values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])]
|
|
[(eq? mode 'no-lang-line)
|
|
(let-values ([(lexeme type data new-token-start new-token-end)
|
|
(racket-lexer in)])
|
|
(values lexeme type data new-token-start new-token-end 0 'no-lang-line))]
|
|
[(pair? mode)
|
|
;; #lang-selected language consumes and produces a mode:
|
|
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
|
|
((car mode) in offset (cdr mode))])
|
|
(values lexeme type data new-token-start new-token-end backup-delta
|
|
(if (dont-stop? new-mode)
|
|
(dont-stop (cons (car mode) (dont-stop-val new-mode)))
|
|
(cons (car mode) new-mode))))]
|
|
[else
|
|
;; #lang-selected language (or default) doesn't deal with modes:
|
|
(let-values ([(lexeme type data new-token-start new-token-end)
|
|
(mode in)])
|
|
(values lexeme type data new-token-start new-token-end 0 mode))]))
|
|
|
|
(define (set-port-next-location-from src dest)
|
|
(define-values (line col pos) (port-next-location src))
|
|
(set-port-next-location! dest line col pos))
|
|
|
|
|
|
(define (trusted-lexer? the-lexer)
|
|
(member (object-name the-lexer)
|
|
'(racket-lexer
|
|
racket-lexer/status
|
|
racket-nobar-lexer/status
|
|
scribble-inside-lexer
|
|
scribble-lexer)))
|