racket/collects/tests/syntax-color/scheme-lexer.rkt
Robby Findler 65fd0234ad add a unicode test to scheme-lexer.rkt
and change the tests so they all run with port line
counting enabled (or else the unicode test fails)

adjust module-lexer.rkt tests so they can run in either
port-counting mode or not (but currently run them all in
port-counting mode because scheme-lexer doesn't work without it)

also make a first stab at what needs to change in the module
lexer to make it work in non port line-counting mode
2011-11-03 22:41:27 -05:00

574 lines
12 KiB
Racket
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#lang scheme
;; Not well tested:
;; Non-character input data (i.e. specials)
;; The R5RS part of numbers
;;
;; Not supported
;; Honu (#hx, #sx, #honu)
;; #reader
(require syntax-color/scheme-lexer)
(define (char kind)
(case kind
((white-space) #\space)
((symbol) #\i)
((constant) #\c)
((comment) #\;)
((sexp-comment) #\;)
((string) #\s)
((parenthesis) #\p)
((other) #\o)
((no-color) #\n)
((error) #\x)
(else
(error 'char "Given ~a" kind))))
(define (chunks x)
(cond
((null? x) 0)
((null? (cdr x)) 1)
((char=? (car x) (cadr x))
(chunks (cdr x)))
(else (add1 (chunks (cdr x))))))
(define (lex f p)
(define-values (lexeme kind paren? start end)
(f p))
(cond
((eq? 'eof kind) null)
(else
(cons (make-string (- end start) (char kind))
(lex f p)))))
(define-syntax (test stx)
(syntax-case stx ()
[(_ args ...)
(with-syntax ([line (syntax-line stx)])
#'(test/proc line args ...))]))
(define (test/proc line input expected [e-n (chunks (string->list expected))])
(define p (input->port input))
(port-count-lines! p)
(define l (lex scheme-lexer p))
(define s (apply string-append l))
(close-input-port p)
(unless (string=? s expected)
(eprintf "test on line ~a failed:\n" line)
(eprintf " input : ~s\n" input)
(eprintf " output : ~s\n" s)
(eprintf " expected: ~s\n\n" expected))
(let ((a-n (length l)))
(unless (= e-n a-n)
(eprintf "test on line ~a failed:\n" line)
(eprintf " input : ~a\n" input)
(eprintf " expected: ~a tokens\n" e-n)
(eprintf " got : ~a tokens\n\n" a-n))))
(define (input->port input)
(let-values ([(in out) (make-pipe-with-specials)])
(thread
(λ ()
(let loop ([input input])
(cond
[(list? input)
(for-each loop input)]
[(string? input)
(display input out)]
[else
(write-special input out)]))
(close-output-port out)))
in))
;; Delimiters
(test " " " ")
(test "\t" " ")
(test "\n" " ")
(test "\"" "x")
(test "," "o")
(test "'" "c")
(test "`" "c")
(test ";" ";")
(test "(" "p")
(test ")" "p")
(test "[" "p")
(test "]" "p")
(test "{" "p")
(test "}" "p")
;; #
(test "#fq" "xxx")
(test "#Fq" "xxx")
(test "#tq" "xxx")
(test "#Tq" "xxx")
(test "#true" "ccccc")
(test "#false" "cccccc")
(test "#f q" "cc i")
(test "#F q" "cc i")
(test "#t q" "cc i")
(test "#T q" "cc i")
(test "#true q" "ccccc i")
(test "#false q" "cccccc i")
(test "#f(q" "ccpi")
(test "#T(q" "ccpi")
(test "#true{q" "cccccpi")
(test "#012423(a" "ppppppppi")
(test "#1{a" "pppi")
(test "#1[a" "pppi")
(test "#(a" "ppi")
(test "#{a" "ppi")
(test "#[a" "ppi")
(test "#&a" "cci")
(test "#'a" "cci")
(test "#`a" "cci")
(test "#,a" "ooi")
(test "#,@a" "oooi")
(test "#CsA" "oooi")
(test "#cIA" "oooi")
(test "#hash(a" "ppppppi")
(test "#hasheq(a" "ppppppppi")
(test "#hash[a" "ppppppi")
(test "#hasheq[a" "ppppppppi")
(test "#hash{a" "ppppppi")
(test "#hasheq{a" "ppppppppi")
(test "#135#a" "oooooi")
(test "#453=a" "oooooi")
(test #<<string-end
#! a de \
ase
aa
string-end
";;;;;;;;;;;;; ii")
(test "#;z" ";;i")
;; Bad #
(test "#hashe(" "xxxxxxp")
(test "#HASH(" "xxxxxp")
(test "#HASHEQ(" "xxxxxxxp")
(test "#a#a" "xxxx")
(test "#a=a" "xxxx")
;; Identifier delimiters
(test "a" "i")
(test "a a" "i i")
(test "a\ta" "i i")
(test "a\na" "i i")
(test "a\"a" "ixx")
(test "a,a" "ioi")
(test "a'a" "ici")
(test "a`a" "ici")
(test "a;a" "i;;")
(test "a(a" "ipi")
(test "a)a" "ipi")
(test "a[a" "ipi")
(test "a]a" "ipi")
(test "a{a" "ipi")
(test "a}a" "ipi")
(test "a#a" "iii")
(test "a.a" "iii")
(test "a@a" "iii")
(test "a/a" "iii")
(test "a\"a\"a" "isssi")
(test "a1a" "iii")
(test "a%a" "iii")
;; Bad identidier delimiters
(test "#a\t#a" "xx xx")
(test "#a\n#a" "xx xx")
(test "#a\"#a" "xxxxx" 2)
(test "#a,#a" "xxoxx")
(test "#a'#a" "xxcxx")
(test "#a`#a" "xxcxx")
(test "#a;#a" "xx;;;")
(test "#a(#a" "xxpxx")
(test "#a)#a" "xxpxx")
(test "#a[#a" "xxpxx")
(test "#a]#a" "xxpxx")
(test "#a{#a" "xxpxx")
(test "#a}#a" "xxpxx")
(test "#a#a" "xxxx")
(test "#a.a" "xxxx")
(test "#a@a" "xxxx")
(test "#a/a" "xxxx")
(test "#a\"a\"#a" "xxsssxx")
(test "#a1a" "xxxx")
(test "#a%a" "xxxx")
;; Strange identifiers
(test "1a" "ii")
(test "#%a" "iii")
(test "%a" "ii")
(test "\\8" "ii")
(test "\\a" "ii")
(test "\\\\" "ii")
(test "a\\ a" "iiii")
(test "a\\\ta" "iiii")
(test "a\\\na" "iiii")
(test "a\\\"a" "iiii")
(test "a\\,a" "iiii")
(test "a\\'a" "iiii")
(test "a\\`a" "iiii")
(test "a\\;a" "iiii")
(test "a\\(a" "iiii")
(test "a\\)a" "iiii")
(test "a\\[a" "iiii")
(test "a\\]a" "iiii")
(test "a\\{a" "iiii")
(test "a\\}a" "iiii")
(test "\\|" "ii")
(test "a\\|a" "iiii")
(test #<<end-string
\ \8\ \a\\
end-string
"iiiiiiiiii")
(test "||" "ii")
(test "|a|" "iii")
(test "|8|" "iii")
(test "|\\|" "iii")
(test "a| |a" "iiiii")
(test "a|\t|a" "iiiii")
(test "a|\n|a" "iiiii")
(test "a|\"|a" "iiiii")
(test "a|,|a" "iiiii")
(test "a|'|a" "iiiii")
(test "a|`|a" "iiiii")
(test "a|;|a" "iiiii")
(test "a|(|a" "iiiii")
(test "a|)|a" "iiiii")
(test "a|[|a" "iiiii")
(test "a|]|a" "iiiii")
(test "a|{|a" "iiiii")
(test "a|}|a" "iiiii")
(test "a|,|\\ |||\\|| se \\|\\\\|\\\\|" "iiiiiiiiiiiiiiiiiiiiiiii")
(test "\\ ,a" "iioi")
(test "| |a,a" "iiiioi")
(test "a#||#" "iiiii")
(test "a#;1 2" "ii;;;;")
(test "a#<<a" "iiiii")
(test "1#!1" "iiii")
(test "1+nan.0" "iiiiiii")
(test "-inf.0+1" "iiiiiiii")
(test "\\#:a" "iiii")
(test "#\\:a" "ccci")
;; Bad identifiers
(test "#a" "xx")
(test "#a1.1" "xxxxx")
(test "#a#\\a" "xxxxx")
(test "a|" "xx")
(test "a#|" "xxx")
(test "a||a|, a\n\"" "xxxxxxxxxx")
;; Characters
(test "#\\nul" "ccccc")
(test "#\\NUL" "ccccc")
(test "#\\NuL" "ccccc")
(test "#\\baCKspaCE" "ccccccccccc")
(test "#\\TaB" "ccccc")
(test "#\\NEwline" "ccccccccc")
(test "#\\liNEFEED" "cccccccccc")
(test "#\\vtAb" "cccccc")
(test "#\\PAgE" "cccccc")
(test "#\\reTURn" "cccccccc")
(test "#\\SPACe" "ccccccc")
(test "#\\rubouT" "cccccccc")
(test "#\\nul1a" "cccccii")
(test "#\\18a" "cccii")
(test "#\\411a" "ccciii")
(test "#\\377a" "ccccci")
(test "#\\1#\\a" "cccccc" 2)
(test "#\\uaF12a" "ccccccci")
(test "#\\uaF1g" "cccccci")
(test "#\\UaF12a" "cccccccc")
(test "#\\UaF12aaa" "ccccccccci")
(test "#\\UA" "cccc")
;; Bad Characters
(test "#\\abcsd1a" "xxxxxxxii")
(test "#\\nuls1a" "xxxxxxii")
(test "#\\17a" "xxxxi")
(test "#\\179" "xxxxc")
;; String
(test #<<end-string
""
end-string
"ss")
(test #<<end-string
"\a\b\t\n\v\f\r\e\'\\\"x"
end-string
"sssssssssssssssssssssssss")
(test #<<end-string
"\1\12\123"
end-string
"sssssssssss")
(test #<<end-string
"\xA\xa\x11\xFF\xfff"
end-string
"sssssssssssssssssssss")
(test #<<end-string
"\xA\xa\x11\xFF\xfff"
end-string
"sssssssssssssssssssss")
(test #<<end-string
"
a
a \
"
end-string
"sssssssss")
(test #<<end-string
"\ua\uaA\uAAA\uAAAAa"
end-string
"sssssssssssssssssssss")
(test #<<end-string
"\Ua\UaA\UAAA\UAAAAa\UAAAAA\Uaaaaaa\Uaaaaaaaaaa"
end-string
"ssssssssssssssssssssssssssssssssssssssssssssssss")
(test #<<end-string
#""
end-string
"sss")
(test #<<end-string
#"\a\b\t\n\v\f\r\e\'\\\"x"
end-string
"ssssssssssssssssssssssssss")
(test #<<end-string
#"\1\12\123"
end-string
"ssssssssssss")
(test #<<end-string
#"\xA\xa\x11\xFF\xfff"
end-string
"ssssssssssssssssssssss")
(test #<<end-string
#"\xA\xa\x11\xFF\xfff"
end-string
"ssssssssssssssssssssss")
(test #<<end-string
#"
a
a \
"
end-string
"ssssssssss")
(test #<<end-string
#rx"a"
end-string
"ssssss")
(test #<<end-string
#rx#"a"
end-string
"sssssss")
;; String Errors
(test #<<end-string
#"\u1"a
end-string
"xxxxxxi")
(test #<<end-string
#"\U1"a
end-string
"xxxxxxi")
(test #<<end-string
"\p"a
end-string
"xxxxi")
(test #<<end-string
"\ug"a
end-string
"xxxxxi")
(test #<<end-string
"\Ug"a
end-string
"xxxxxi")
(test #<<end-string
"a
\
s
end-string
"xxxxxx")
(test #<<end-string
"\p
a
a
end-string
"xxxxxxx")
(test #<<end-string
#"\Ua
a
end-string
"xxxxxxx")
(test #<<end-string
#rx"\Ua
a
end-string
"xxxxxxxxx")
(test #<<end-string
#rx"a
a
end-string
"xxxxxxx")
;; Here strings
(test #<<end-string
#<<x
x a
x
x
a
end-string
"sssssssssssss i")
(test #<<end-string
#<<a
as
a
end-string
"xxxxxxxxxx")
(test #<<end-string
#<<
a
end-string
"xxx i")
;; Comments
(test ";ab" ";;;")
(test #<<end-string
1 a; asd\
1 ;a
end-string
"c i;;;;;; c ;;")
(test '(";a" 1 "b") ";;;;" 1) ;; a special comment
(test '(";a" 1 "b\n1" 1) ";;;; cn" 4)
(test ";αα" ";;;")
(test "#||#" ";;;;")
(test "#|#||#|#" ";;;;;;;;")
(test "#| #| \n|# |#" ";;;;;;;;;;;;")
(test "a #|a#|a|#a|#a" "i ;;;;;;;;;;;i")
(test "#|# |#" ";;;;;;")
(test "#|| |#" ";;;;;;")
;; Bad Comments
(test "#|#|#" "xxxxx")
(test "#|" "xx")
(test "a #| a" "i xxxx")
(test "a |# a|" "i iiiii")
(test "#|#| |# a\na" "xxxxxxxxxxx")
(test "#|\n#| |# a\na" "xxxxxxxxxxxx")
;; Numbers (mzscheme extensions)
(test "#b1.1" "ccccc")
(test "#o1.1" "ccccc")
(test "#d1.1" "ccccc")
(test "#xa.F" "ccccc")
(test "#b1e+1" "cccccc")
(test "#o1E+2" "cccccc")
(test "#d1E+2" "cccccc")
(test "#b1D+1" "cccccc")
(test "#o1d+2" "cccccc")
(test "#d1d+2" "cccccc")
(test "1/2e2" "ccccc")
(test "+iNf.0" "cccccc")
(test "-InF.0" "cccccc")
(test "+naN.0" "cccccc")
(test "-nAN.0" "cccccc")
(test "-inf.0+1i" "ccccccccc")
(test "1-inf.0I" "cccccccc")
;; Bad numbers
;; these next two tests are WRONG; if you
;; fix something that makes these tests return
;; the proper results, great! They are this
;; way now so we notice later.
(test "#x1E+2" "cccccc") ;; should be "xxxxxx"
(test "#x1d+2" "cccccc") ;; should be "xxxxxx"
;; Keywords
(test "#:" "pp")
(test "#:a#:a" "pppppp")
(test "#:a #:a" "ppp ppp")
(test "#:a\t#:a" "ppp ppp")
(test "#:a\n#:a" "ppp ppp")
(test "#:a\"#:a" "pppxxxx")
(test "#:a,#:a" "pppoppp")
(test "#:a'#:a" "pppcppp" 3)
(test "#:a`#:a" "pppcppp" 3)
(test "#:a;#:a" "ppp;;;;")
(test "#:a(#:a" "ppppppp" 3)
(test "#:a)#:a" "ppppppp" 3)
(test "#:a[#:a" "ppppppp" 3)
(test "#:a]#:a" "ppppppp" 3)
(test "#:a{#:a" "ppppppp" 3)
(test "#:a}#:a" "ppppppp" 3)
(test "#:a##:a" "ppppppp")
(test "#:a.#:a" "ppppppp")
(test "#:a@#:a" "ppppppp")
(test "#:a/#:a" "ppppppp")
(test "#:a\"#:a\"#:a" "pppsssssppp")
(test "#:a1#:a" "ppppppp")
(test "#:a%#:a" "ppppppp")
(test "#:1a" "pppp")
(test "#:\\8" "pppp")
(test "#:\\a" "pppp")
(test "#:\\\\" "pppp")
(test "#:a\\ a" "pppppp")
(test "#:a\\\ta" "pppppp")
(test "#:a\\\na" "pppppp")
(test "#:a\\\"a" "pppppp")
(test "#:a\\,a" "pppppp")
(test "#:a\\'a" "pppppp")
(test "#:a\\`a" "pppppp")
(test "#:a\\;a" "pppppp")
(test "#:a\\(a" "pppppp")
(test "#:a\\)a" "pppppp")
(test "#:a\\[a" "pppppp")
(test "#:a\\]a" "pppppp")
(test "#:a\\{a" "pppppp")
(test "#:a\\}a" "pppppp")
(test "#:\\|" "pppp")
(test "#:a\\|a" "pppppp")
(test #<<end-string
#:\ \8\ \a\\
end-string
"pppppppppppp")
(test "#:||" "pppp")
(test "#:|a|" "ppppp")
(test "#:|8|" "ppppp")
(test "#:|\\|" "ppppp")
(test "#:a| |a" "ppppppp")
(test "#:a|\t|a" "ppppppp")
(test "#:a|\n|a" "ppppppp")
(test "#:a|\"|a" "ppppppp")
(test "#:a|,|a" "ppppppp")
(test "#:a|'|a" "ppppppp")
(test "#:a|`|a" "ppppppp")
(test "#:a|;|a" "ppppppp")
(test "#:a|(|a" "ppppppp")
(test "#:a|)|a" "ppppppp")
(test "#:a|[|a" "ppppppp")
(test "#:a|]|a" "ppppppp")
(test "#:a|{|a" "ppppppp")
(test "#:a|}|a" "ppppppp")
(test "#:a|,|\\ |||\\|| se \\|\\\\|\\\\|" "pppppppppppppppppppppppppp")
(test "#:\\ ,#:a" "ppppoppp")
(test "#:| |a,#:a" "ppppppoppp")
(test "#:a#||#" "ppppppp")
(test "#:a#;1 2" "pppp;;;;")
(test "#:a#<<a" "ppppppp")
(test "#:1#!1" "pppppp")
(test "#:1+nan.0" "ppppppppp")
(test "#:-inf.0+1" "pppppppppp")
(test "#:\\#:a" "pppppp")
(test "#:#\\:a" "pppppp")
(test "#:1.1" "ppppp")
(test "#:a|" "xxxx")
(test "#:a#|" "xxxxx")
(test "#:a||a|, a\n\"" "xxxxxxxxxxxx")