adjust the module lexer so that it treats the entire range that
'read-language' uses as a single token in the case that read-language fails. This helps it to deal with things like s-exp and at-exp properly closes PR 12260
This commit is contained in:
parent
f6e5468dbb
commit
1eaf53d4cb
|
@ -22,7 +22,6 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|||
|
||||
|#
|
||||
|
||||
|
||||
(define (module-lexer in offset mode)
|
||||
(cond
|
||||
[(or (not mode) (eq? mode 'before-lang-line))
|
||||
|
@ -43,9 +42,9 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|||
(define p (peeking-input-port in))
|
||||
(port-count-lines! p)
|
||||
(define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail))))
|
||||
(define end-pos (file-position p))
|
||||
(cond
|
||||
[(procedure? get-info)
|
||||
(define end-pos (file-position p))
|
||||
;; sync ports
|
||||
(for ([i (in-range 0 end-pos)])
|
||||
(read-char-or-special in))
|
||||
|
@ -63,16 +62,19 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode).
|
|||
(cons v #f)
|
||||
v)))
|
||||
scheme-lexer))]
|
||||
[else
|
||||
|
||||
[(and (eq? type 'other)
|
||||
(string? lexeme)
|
||||
;; the read-language docs say that this is all it takes to commit to a #lang
|
||||
(regexp-match #rx"^#[!l]" lexeme))
|
||||
;; sync ports
|
||||
(for ([i (in-range 0 end-pos)])
|
||||
(read-char-or-special in))
|
||||
(values lexeme 'error data 1 (+ end-pos 1) 0 'no-lang-line)]
|
||||
[else
|
||||
(for ([i (in-range 0 (file-position lexer-port))])
|
||||
(read-char-or-special in))
|
||||
(if (and (eq? type 'other)
|
||||
(string? lexeme)
|
||||
;; the read-language docs say that this is all it takes to commit to a #lang
|
||||
(regexp-match #rx"^#[!l]" lexeme))
|
||||
(values lexeme 'error data new-token-start new-token-end 0 'no-lang-line)
|
||||
(values lexeme type data new-token-start new-token-end 0 'no-lang-line))])])]
|
||||
(values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])]
|
||||
[(eq? mode 'no-lang-line)
|
||||
(let-values ([(lexeme type data new-token-start new-token-end)
|
||||
(scheme-lexer in)])
|
||||
|
|
46
collects/tests/syntax-color/module-lexer.rkt
Normal file
46
collects/tests/syntax-color/module-lexer.rkt
Normal file
|
@ -0,0 +1,46 @@
|
|||
#lang racket/base
|
||||
(require syntax-color/module-lexer
|
||||
rackunit)
|
||||
|
||||
(define (lex str)
|
||||
(define p (open-input-string str))
|
||||
(let loop ([mode #f]
|
||||
[n 0])
|
||||
(define-values (lexeme type data token-start token-end backup new-mode)
|
||||
(module-lexer p
|
||||
0
|
||||
mode))
|
||||
(define one (list lexeme
|
||||
type token-start token-end
|
||||
(cond
|
||||
[(procedure? mode)
|
||||
`(proc ,(object-name mode))]
|
||||
[(and (pair? mode)
|
||||
(procedure? (car mode)))
|
||||
(cons `(proc ,(object-name (car mode)))
|
||||
(cdr mode))]
|
||||
[else mode])))
|
||||
(cond
|
||||
[(eof-object? lexeme) (list one)]
|
||||
[(= n 1000) '()] ;; watch out for loops
|
||||
[else (cons one (loop new-mode (+ n 1)))])))
|
||||
|
||||
(check-equal? (lex "#lang racket/base")
|
||||
`(("#lang" other 1 18 #f)
|
||||
(,eof eof #f #f (proc scheme-lexer))))
|
||||
(check-equal? (lex "#lang racket/base\n1")
|
||||
`(("#lang" other 1 18 #f)
|
||||
("\n" white-space 18 19 (proc scheme-lexer))
|
||||
("1" constant 19 20 (proc scheme-lexer))
|
||||
(,eof eof #f #f (proc scheme-lexer))))
|
||||
(check-equal? (lex ";; a\n#lang racket/base")
|
||||
`(("; a" comment 1 5 #f)
|
||||
("\n" white-space 5 6 before-lang-line)
|
||||
("#lang" other 1 18 before-lang-line)
|
||||
(,eof eof #f #f (proc scheme-lexer))))
|
||||
(check-equal? (lex "#lang at-exp racket/base")
|
||||
`(("#lang" other 1 25 #f)
|
||||
(,eof eof 25 25 ((proc scribble-lexer) . #f))))
|
||||
(check-equal? (lex "#lang at-exp racket/baseBOGUS")
|
||||
`(("#lang at-exp" error 1 30 #f)
|
||||
(,eof eof #f #f no-lang-line)))
|
Loading…
Reference in New Issue
Block a user