syntax-color/racket-lexer: fix problem with unclosed "| "

... where the whitespace character after "|" is non-ASCII.

This bug was found by random testing in `syntax-color/lexer-contract`.
This commit is contained in:
Matthew Flatt 2014-10-20 16:22:56 -05:00
parent a1bac35b60
commit 95b083165c
2 changed files with 9 additions and 5 deletions

View File

@ -402,14 +402,16 @@
(define racket-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id)) (define racket-nobar-lexer/status (lexer/status nobar-identifier nobar-keyword nobar-bad-id))
(define (extend-error lexeme start end in) (define (extend-error lexeme start end in)
(if (memq (peek-char-or-special in) (define next (peek-char-or-special in))
`(special #\newline #\return #\tab #\space #\vtab (if (or (char-whitespace? next)
(memq next
`(special
#\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\; #\" #\, #\' #\` #\( #\) #\[ #\] #\{ #\} #\;
,eof)) ,eof)))
(ret lexeme 'error #f start end 'bad) (ret lexeme 'error #f start end 'bad)
(let-values (((rest end-pos) (get-chunk in))) (let-values (((rest end-pos) (get-chunk in)))
(ret (string-append lexeme rest) 'error #f start end-pos 'bad)))) (ret (string-append lexeme rest) 'error #f start end-pos 'bad))))
(define get-chunk (define get-chunk
(lexer (lexer
((:+ (:~ identifier-delims)) (values lexeme end-pos)))) [(:+ (:~ identifier-delims)) (values lexeme end-pos)]))

View File

@ -592,3 +592,5 @@ end-string
(test "#:a|" "xxxx") (test "#:a|" "xxxx")
(test "#:a#|" "xxxxx") (test "#:a#|" "xxxxx")
(test "#:a||a|, a\n\"" "xxxxxxxxxxxx") (test "#:a||a|, a\n\"" "xxxxxxxxxxxx")
(test "|\uA0x" "xxx")