started to fix the colorer problems (now that I undestand how!)

plus some extra checking in the colorer
This commit is contained in:
Robby Findler 2011-01-30 08:57:38 -06:00 committed by Robby Findler
parent eb45a6f15b
commit 49c3011f49
2 changed files with 66 additions and 45 deletions

View File

@ -297,6 +297,14 @@ added get-regions
(get-token in in-start-pos in-lexer-mode) (get-token in in-start-pos in-lexer-mode)
(enable-suspend #t)))]) (enable-suspend #t)))])
(unless (eq? 'eof type) (unless (eq? 'eof type)
(unless (exact-nonnegative-integer? new-token-start)
(error 'color:text<%> "expected an exact nonnegative integer for the token start, got ~e" new-token-start))
(unless (exact-nonnegative-integer? new-token-end)
(error 'color:text<%> "expected an exact nonnegative integer for the token end, got ~e" new-token-end))
(unless (exact-nonnegative-integer? backup-delta)
(error 'color:text<%> "expected an exact nonnegative integer for the backup delta, got ~e" backup-delta))
(unless (0 . < . (- new-token-end new-token-start))
(error 'color:text<%> "expected the distance between the start and end position for each token to be positive, but start was ~e and end was ~e" new-token-start new-token-end))
(enable-suspend #f) (enable-suspend #f)
#; (printf "~a at ~a to ~a\n" lexeme (+ in-start-pos (sub1 new-token-start)) #; (printf "~a at ~a to ~a\n" lexeme (+ in-start-pos (sub1 new-token-start))
(+ in-start-pos (sub1 new-token-end))) (+ in-start-pos (sub1 new-token-end)))

View File

@ -1,52 +1,65 @@
#lang scheme/base #lang racket/base
(require scheme/port (require racket/port
"scheme-lexer.rkt") "scheme-lexer.rkt")
(provide module-lexer) (provide module-lexer)
;; mode = (or/c #f 'before-lang-line --
;; 'no-lang-line
;; (cons lexer mode)
;; lexer)
(define count 0)
(define (module-lexer in offset mode) (define (module-lexer in offset mode)
(set! count (+ count 1))
(printf "~a ~s\n" count (list 'module-lexer in offset mode))
(cond (cond
[(not mode) [(or (not mode) (eq? mode 'before-lang-line))
;; Starting out: look for #lang: (define-values (lexeme type data new-token-start new-token-end) (scheme-lexer in))
(let*-values ([(p) (peeking-input-port in)] (printf "before-lang-line lexeme ~s type ~s\n" lexeme type)
[(init) (file-position p)] (cond
[(start-line start-col start-pos) (port-next-location p)]) [(or (eq? type 'comment) (eq? type 'whitespace))
(let ([get-info (with-handlers ([exn:fail? (lambda (exn) 'fail)]) (values lexeme 'other data new-token-start new-token-end 0 'before-lang-line)]
(read-language p (lambda () #f)))] [else
[sync-ports (lambda () ;; look for #lang:
(read-bytes (- (file-position p) init) in))]) (define p (peeking-input-port in))
(port-count-lines! p)
(define init (file-position p))
(define get-info (with-handlers ([exn:fail:read? values]) (read-language p (λ () 'fail))))
(printf "get-info ~s\n" get-info)
(cond (cond
[(procedure? get-info) [(not (procedure? get-info))
;; Produce language as first token: ;(or (exn? get-info) (eq? get-info 'fail))
(sync-ports) (values lexeme type data new-token-start new-token-end 0 'no-lang-line)]
(let-values ([(end-line end-col end-pos) (port-next-location in)]) [(procedure? get-info)
(values (define end-pos (file-position p))
"#lang" (read-bytes (- end-pos init) in) ;; sync ports
'other ;; Produce language as first token:
#f (values
start-pos "#lang"
end-pos 'other
0 #f
(or (let ([v (get-info 'color-lexer #f)]) 1 ;; start-pos
(and v (+ end-pos 1)
(if (procedure-arity-includes? v 3) 0
(cons v #f) (or (let ([v (get-info 'color-lexer #f)])
v))) (and v
scheme-lexer)))] (if (procedure-arity-includes? v 3)
[(eq? 'fail get-info) (cons v #f)
(sync-ports) v)))
(let*-values ([(end-line end-col end-pos) (port-next-location in)]) scheme-lexer))])])]
(values #f 'error #f start-pos end-pos 0 scheme-lexer))] [(eq? mode 'no-lang-line)
[else (let-values ([(lexeme type data new-token-start new-token-end)
;; Start over using the Scheme lexer (scheme-lexer in)])
(module-lexer in offset scheme-lexer)])))] (values lexeme type data new-token-start new-token-end 0 'no-lang-line))]
[(pair? mode) [(pair? mode)
;; #lang-selected language consumes and produces a mode: ;; #lang-selected language consumes and produces a mode:
(let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode) (let-values ([(lexeme type data new-token-start new-token-end backup-delta new-mode)
((car mode) in offset (cdr mode))]) ((car mode) in offset (cdr mode))])
(values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))] (values lexeme type data new-token-start new-token-end backup-delta (cons (car mode) new-mode)))]
[else [else
;; #lang-selected language (or default) doesn't deal with modes: ;; #lang-selected language (or default) doesn't deal with modes:
(let-values ([(lexeme type data new-token-start new-token-end) (let-values ([(lexeme type data new-token-start new-token-end)
(mode in)]) (mode in)])
(values lexeme type data new-token-start new-token-end 0 mode))])) (values lexeme type data new-token-start new-token-end 0 mode))]))