From 1eaf53d4cb1c817fdcf6d3a32783b99530621a9a Mon Sep 17 00:00:00 2001 From: Robby Findler Date: Thu, 6 Oct 2011 10:48:58 -0500 Subject: [PATCH] adjust the module lexer so that it treats the entire range that 'read-language' uses as a single token in the case that read-language fails. This helps it to deal with things like s-exp and at-exp properly closes PR 12260 --- collects/syntax-color/module-lexer.rkt | 20 +++++---- collects/tests/syntax-color/module-lexer.rkt | 46 ++++++++++++++++++++ 2 files changed, 57 insertions(+), 9 deletions(-) create mode 100644 collects/tests/syntax-color/module-lexer.rkt diff --git a/collects/syntax-color/module-lexer.rkt b/collects/syntax-color/module-lexer.rkt index 9d7a52d6c4..98962ef0da 100644 --- a/collects/syntax-color/module-lexer.rkt +++ b/collects/syntax-color/module-lexer.rkt @@ -22,7 +22,6 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode). |# - (define (module-lexer in offset mode) (cond [(or (not mode) (eq? mode 'before-lang-line)) @@ -43,9 +42,9 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode). (define p (peeking-input-port in)) (port-count-lines! p) (define get-info (with-handlers ([exn:fail? values]) (read-language p (λ () 'fail)))) + (define end-pos (file-position p)) (cond [(procedure? get-info) - (define end-pos (file-position p)) ;; sync ports (for ([i (in-range 0 end-pos)]) (read-char-or-special in)) @@ -63,16 +62,19 @@ to delegate to the scheme-lexer (in the 'no-lang-line mode). (cons v #f) v))) scheme-lexer))] - [else + + [(and (eq? type 'other) + (string? lexeme) + ;; the read-language docs say that this is all it takes to commit to a #lang + (regexp-match #rx"^#[!l]" lexeme)) ;; sync ports + (for ([i (in-range 0 end-pos)]) + (read-char-or-special in)) + (values lexeme 'error data 1 (+ end-pos 1) 0 'no-lang-line)] + [else (for ([i (in-range 0 (file-position lexer-port))]) (read-char-or-special in)) - (if (and (eq? type 'other) - (string? lexeme) - ;; the read-language docs say that this is all it takes to commit to a #lang - (regexp-match #rx"^#[!l]" lexeme)) - (values lexeme 'error data new-token-start new-token-end 0 'no-lang-line) - (values lexeme type data new-token-start new-token-end 0 'no-lang-line))])])] + (values lexeme type data new-token-start new-token-end 0 'no-lang-line)])])] [(eq? mode 'no-lang-line) (let-values ([(lexeme type data new-token-start new-token-end) (scheme-lexer in)]) diff --git a/collects/tests/syntax-color/module-lexer.rkt b/collects/tests/syntax-color/module-lexer.rkt new file mode 100644 index 0000000000..5be0e6df76 --- /dev/null +++ b/collects/tests/syntax-color/module-lexer.rkt @@ -0,0 +1,46 @@ +#lang racket/base +(require syntax-color/module-lexer + rackunit) + +(define (lex str) + (define p (open-input-string str)) + (let loop ([mode #f] + [n 0]) + (define-values (lexeme type data token-start token-end backup new-mode) + (module-lexer p + 0 + mode)) + (define one (list lexeme + type token-start token-end + (cond + [(procedure? mode) + `(proc ,(object-name mode))] + [(and (pair? mode) + (procedure? (car mode))) + (cons `(proc ,(object-name (car mode))) + (cdr mode))] + [else mode]))) + (cond + [(eof-object? lexeme) (list one)] + [(= n 1000) '()] ;; watch out for loops + [else (cons one (loop new-mode (+ n 1)))]))) + +(check-equal? (lex "#lang racket/base") + `(("#lang" other 1 18 #f) + (,eof eof #f #f (proc scheme-lexer)))) +(check-equal? (lex "#lang racket/base\n1") + `(("#lang" other 1 18 #f) + ("\n" white-space 18 19 (proc scheme-lexer)) + ("1" constant 19 20 (proc scheme-lexer)) + (,eof eof #f #f (proc scheme-lexer)))) +(check-equal? (lex ";; a\n#lang racket/base") + `(("; a" comment 1 5 #f) + ("\n" white-space 5 6 before-lang-line) + ("#lang" other 1 18 before-lang-line) + (,eof eof #f #f (proc scheme-lexer)))) +(check-equal? (lex "#lang at-exp racket/base") + `(("#lang" other 1 25 #f) + (,eof eof 25 25 ((proc scribble-lexer) . #f)))) +(check-equal? (lex "#lang at-exp racket/baseBOGUS") + `(("#lang at-exp" error 1 30 #f) + (,eof eof #f #f no-lang-line)))