#lang scheme/base (require scheme/port "scheme-lexer.rkt") (provide module-lexer) #| mode : (or/c #f 'before-lang-line 'no-lang-line (cons lexer mode) lexer) the module lexer tracks any white-space and comments before the #lang line (if any) explicitly by wrapping calls to the scheme-lexer (in #f or 'before-lang-line mode). Once it finds a non-white-space and non-comment token, it checks to see if there is a #lang line and, if so changes the mode to be the lexer that the #lang indicates, delegating to it (the last two modes listed above). If there is no #lang line, then it continues to delegate to the scheme-lexer (in the 'no-lang-line mode). |# (define (module-lexer in offset mode) (cond [(or (not mode) (eq? mode 'before-lang-line)) (define lexer-port (peeking-input-port in #:init-position (+ 1 (file-position in)))) (let-values ([(line col pos) (port-next-location in)]) (when line (port-count-lines! lexer-port))) (set-port-next-location-from in lexer-port) (define-values (lexeme type data new-token-start new-token-end) (scheme-lexer lexer-port)) (cond [(or (eq? type 'comment) (eq? type 'white-space)) (define lexer-end (file-position lexer-port)) ;; sync ports (for/list ([i (in-range (file-position in) (file-position lexer-port))]) (read-byte-or-special in)) (values lexeme type data new-token-start new-token-end 0 'before-lang-line)] [else ;; look for #lang: (define p (peeking-input-port in #:init-position (+ 1 (file-position in)))) (let-values ([(line col pos) (port-next-location in)]) (when line (port-count-lines! p))) (set-port-next-location-from in p) (define-values (_1 _2 start-pos) (port-next-location p)) (define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail)))) (define-values (_3 _4 end-pos) (port-next-location p)) (cond [(procedure? get-info) ;; sync ports (for ([i (in-range (file-position in) (file-position p))]) (read-byte-or-special in)) ;; Produce language as first token: (values "#lang" 'other #f start-pos end-pos 0 (or (let ([v (get-info 'color-lexer #f)]) (and v (if (procedure-arity-includes? v 3) (cons v #f) v))) scheme-lexer))] [(and (eq? type 'other) (string? lexeme) ;; the read-language docs say that this is all it takes to commit to a #lang (regexp-match #rx"^#[!l]" lexeme)) ;; sync ports (for ([i (in-range (file-position in) (file-position p))]) (read-byte-or-special in)) (values lexeme 'error data 1 end-pos 0 'no-lang-line)] [else (for ([i (in-range (file-position in) (file-position lexer-port))]) (read-byte-or-special in)) (values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])] [(eq? mode 'no-lang-line) (let-values ([(lexeme type data new-token-start new-token-end) (scheme-lexer in)]) (values lexeme type data new-token-start new-token-end 0 'no-lang-line))] [(pair? mode) ;; #lang-selected language consumes and produces a mode: (let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode) ((car mode) in offset (cdr mode))]) (values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))] [else ;; #lang-selected language (or default) doesn't deal with modes: (let-values ([(lexeme type data new-token-start new-token-end) (mode in)]) (values lexeme type data new-token-start new-token-end 0 mode))])) (define (set-port-next-location-from src dest) (define-values (line col pos) (port-next-location src)) (set-port-next-location! dest line col pos))