adjust the module lexer so that it treats the entire range that

'read-language' uses as a single token in the case that read-language
fails. This helps it to deal with things like s-exp and at-exp
properly

closes PR 12260
This commit is contained in:
Robby Findler 2011-10-06 10:48:58 -05:00
parent f6e5468dbb
commit 1eaf53d4cb
2 changed files with 57 additions and 9 deletions

View File

@ -22,7 +22,6 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|#
(define (module-lexer in offset mode)
(cond
[(or (not mode) (eq? mode 'before-lang-line))
@ -43,9 +42,9 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
(define p (peeking-input-port in))
(port-count-lines! p)
(define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail))))
(define end-pos (file-position p))
(cond
[(procedure? get-info)
(define end-pos (file-position p))
;; sync ports
(for ([i (in-range 0 end-pos)])
(read-char-or-special in))
@ -63,16 +62,19 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
(cons v #f)
v)))
scheme-lexer))]
[else
[(and (eq? type 'other)
(string? lexeme)
;; the read-language docs say that this is all it takes to commit to a #lang
(regexp-match #rx"^#[!l]" lexeme))
;; sync ports
(for ([i (in-range 0 end-pos)])
(read-char-or-special in))
(values lexeme 'error data 1 (+ end-pos 1) 0 'no-lang-line)]
[else
(for ([i (in-range 0 (file-position lexer-port))])
(read-char-or-special in))
(if (and (eq? type 'other)
(string? lexeme)
;; the read-language docs say that this is all it takes to commit to a #lang
(regexp-match #rx"^#[!l]" lexeme))
(values lexeme 'error data new-token-start new-token-end 0 'no-lang-line)
(values lexeme type data new-token-start new-token-end 0 'no-lang-line))])])]
(values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])]
[(eq? mode 'no-lang-line)
(let-values ([(lexeme type data new-token-start new-token-end)
(scheme-lexer in)])

View File

@ -0,0 +1,46 @@
#lang racket/base
(require syntax-color/module-lexer
rackunit)
(define (lex str)
(define p (open-input-string str))
(let loop ([mode #f]
[n 0])
(define-values (lexeme type data token-start token-end backup new-mode)
(module-lexer p
0
mode))
(define one (list lexeme
type token-start token-end
(cond
[(procedure? mode)
`(proc ,(object-name mode))]
[(and (pair? mode)
(procedure? (car mode)))
(cons `(proc ,(object-name (car mode)))
(cdr mode))]
[else mode])))
(cond
[(eof-object? lexeme) (list one)]
[(= n 1000) '()] ;; watch out for loops
[else (cons one (loop new-mode (+ n 1)))])))
(check-equal? (lex "#lang racket/base")
`(("#lang" other 1 18 #f)
(,eof eof #f #f (proc scheme-lexer))))
(check-equal? (lex "#lang racket/base\n1")
`(("#lang" other 1 18 #f)
("\n" white-space 18 19 (proc scheme-lexer))
("1" constant 19 20 (proc scheme-lexer))
(,eof eof #f #f (proc scheme-lexer))))
(check-equal? (lex ";; a\n#lang racket/base")
`(("; a" comment 1 5 #f)
("\n" white-space 5 6 before-lang-line)
("#lang" other 1 18 before-lang-line)
(,eof eof #f #f (proc scheme-lexer))))
(check-equal? (lex "#lang at-exp racket/base")
`(("#lang" other 1 25 #f)
(,eof eof 25 25 ((proc scribble-lexer) . #f))))
(check-equal? (lex "#lang at-exp racket/baseBOGUS")
`(("#lang at-exp" error 1 30 #f)
(,eof eof #f #f no-lang-line)))