racket/collects/profj/parsers/lexer.ss
Kathy Gray 38ae4e952b Fixed a bug where interfaces were not being properly mirrored for dynamic use,
also adds ability to use graphics in the interactions window for Full Java

svn: r506
2005-07-30 18:31:11 +00:00

439 lines
16 KiB
Scheme

#cs
(module lexer mzscheme
;; Lexical Analysis according to the Java Language Specification First Edition
;; chapter 3.
;; Lacks all Unicode support
(require (lib "class.ss")
(lib "lex.ss" "parser-tools")
(prefix re: (lib "lex-sre.ss" "parser-tools"))
(lib "parameters.ss" "profj"))
(define (image-snip%)
(if (mred?)
(dynamic-require '(lib "mred.ss" "mred") 'image-snip%)
(class object% (super-instantiate ()))))
(provide (all-defined-except image-snip%))
(define-struct test-case (test))
(define-struct example-box (contents))
(define-struct interact-case (box))
(define-struct class-case (box))
(define-empty-tokens Operators
(PIPE OR OREQUAL
= > < ! ~ ? :
== <= >= != && ++ --
+ - * / & ^ % << >> >>>
+= -= *= /= &= ^= %= <<= >>= >>>=))
(define-empty-tokens Separators
(O_PAREN C_PAREN O_BRACE C_BRACE O_BRACKET C_BRACKET SEMI_COLON PERIOD COMMA))
(define-empty-tokens EmptyLiterals (NULL_LIT TRUE_LIT FALSE_LIT EOF))
(define-empty-tokens Keywords
(abstract default if private this
boolean do implements protected throw
break double import public throws
byte else instanceof return transient
case extends int short try
catch final interface static void
char finally long strictfp volatile
class float native super while
const for new switch
continue goto package synchronized))
(define-empty-tokens ExtraKeywords (dynamic))
(define-tokens java-vals
(STRING_LIT CHAR_LIT INTEGER_LIT LONG_LIT FLOAT_LIT DOUBLE_LIT
IDENTIFIER STRING_ERROR NUMBER_ERROR HEX_LIT OCT_LIT HEXL_LIT OCTL_LIT))
(define-tokens special-toks (CLASS_BOX INTERACTIONS_BOX EXAMPLE TEST_SUITE
IMAGE_SPECIAL OTHER_SPECIAL))
(define (trim-string s f l)
(substring s f (- (string-length s) l)))
(define-lex-abbrevs
;; 3.4
(CR #\015)
(LF #\012)
(LineTerminator (re:or CR
LF
(re:: CR LF)))
(InputCharacter (re:~ CR LF))
;; 3.6
(FF #\014)
(TAB #\011)
(WhiteSpace (re:or #\space
TAB
FF
LineTerminator))
;; 3.7 (Had to transform CommentTail and CommentTailStar into one RE)
;; (DocumentationComment only appears in version 1 of the spec)
(Comment (re:or TraditionalComment
EndOfLineComment
DocumentationComment))
(TraditionalComment (re:: "/*" NotStar CommentTail))
(EndOfLineComment (re:: "//" (re:* InputCharacter)))
(DocumentationComment (re:: "/**" CommentTailStar))
(CommentTail (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
(re:* NotStar)
(re:+ "*")
"/"))
(CommentTailStar (re:: (re:* (re:: (re:* "*") NotStarNotSlash (re:* NotStar) "*"))
(re:* "*")
"/"))
(NotStar (re:~ "*"))
(NotStarNotSlash (re:~ "*" "/"))
(SyntaxComment (re:or TraditionalCommentEOF
EndOfLineComment))
(TraditionalCommentEOF (re:: "/*" CommentTailEOF))
(CommentTailEOF (re:or (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
(re:* NotStar)
(re:+ "*")
"/")
(re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
(re:* NotStar)
(re:* "*"))))
;; 3.8 (No need to worry about excluding keywords and such. They will
;; appear first in the lexer spec)
;Not UNICODE compliant
(Identifier (re:: JavaLetter (re:* JavaLetterOrDigit)))
(JavaLetter (re:or (re:/ "AZ" "az") "_" "$"))
(JavaLetterOrDigit (re:or JavaLetter (re:/ "09")))
;; 3.9
(Keyword (re:or "abstract" "default" "if" "private" "this"
"boolean" "do" "implements" "protected" "throw"
"break" "double" "import" "public" "throws"
"byte" "else" "instanceof" "return" "transient"
"case" "extends" "int" "short" "try"
"catch" "final" "interface" "static" "void"
"char" "finally" "long" "strictfp" "volatile"
"class" "float" "native" "super" "while"
"const" "for" "new" "switch"
"continue" "goto" "package" "synchronized"))
;; 3.10.1
(Digits (re:+ (re:/ "09")))
(DigitsOpt (re:* (re:/ "09")))
(IntegerTypeSuffix (char-set "lL"))
(DecimalNumeral (re:or #\0
(re:: (re:/ "19") (re:* (re:/ "09")))))
(HexDigit (re:/ "09" "af" "AF"))
(HexNumeral (re:: #\0 (char-set "xX") (re:+ HexDigit)))
(OctalNumeral (re:: #\0 (re:+ (re:/ "07"))))
;; 3.10.2
(FloatTypeSuffix (char-set "fF"))
(DoubleTypeSuffix (char-set "dD"))
(FloatA (re:: Digits #\. DigitsOpt (re:? ExponentPart)))
(FloatB (re:: #\. Digits (re:? ExponentPart)))
(FloatC (re:: Digits ExponentPart))
(FloatD (re:: Digits (re:? ExponentPart)))
(ExponentPart (re:: (char-set "eE") (re:? (char-set "+-")) Digits))
;; MORE
;; 3.10.6
(EscapeSequence (re:or "\\b" "\\t" "\\n" "\\f" "\\r" "\\\"" "\\'" "\\\\"
(re:: #\\ (re:? (re:/ "03")) (re:/ "07") (re:/ "07"))
(re:: #\\ (re:/ "07"))))
;; 3.12
(Operator (re:or "=" ">" "<" "!" "~" "?" ":"
"==" "<=" ">=" "!=" "&&" "||" "++" "--"
"+" "-" "*" "/" "&" "|" "^" "%" "<<" ">>" ">>>"
"+=" "-=" "*=" "/=" "&=" "|=" "^=" "%=" "<<=" ">>=" ">>>=")))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;Comment lexers
(define read-line-comment
(lexer
[(re:~ #\newline) (read-line-comment input-port)]
[#\newline end-pos]
[(eof) end-pos]
[(special) (read-line-comment input-port)]
[(special-comment) (read-line-comment input-port)]
))
(define read-block-comment
(lexer
["*/" end-pos]
[(eof) end-pos]
[(re:or "*" "/" (complement (re:: any-string (re:or "*" "/") any-string))) (read-block-comment input-port)]
[(special) (read-block-comment input-port)]
[(special-comment) (read-block-comment input-port)]
))
#;(define read-document-comment
(lexer
["**/" end-pos]
[(eof) end-pos]
[(re:or "*" "/" (~ (any-string))) (read-document-comment input-port)]
[(special) (read-document-comment input-port)]
[(special-comment) (read-document-comment input-port)]
[(special-error) (read-document-comment input-port)]))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;String lexer
;get-string: input-port -> (U STRING_LIT STRING_ERROR tokens)
(define (get-string input-port)
(letrec ((tokens (get-string-tokens input-port))
(last-token (list-ref tokens (sub1 (length tokens))))
(tokens->string
(lambda (toks)
;Stops before the last element, which does not have a string
(if (null? (cdr toks))
""
(string-append (string (token-value (position-token-token (car toks))))
(tokens->string (cdr toks)))))))
(if (eq? 'STRING_END (token-name (position-token-token last-token)))
(token-STRING_LIT (list (tokens->string tokens) (position-token-end-pos last-token)))
(token-STRING_ERROR
(list (tokens->string tokens)
(position-token-end-pos last-token)
(position-token-token last-token))))))
;get-string-tokens: input-port -> (list position-token)
(define (get-string-tokens input-port)
(let ((tok (get-str-tok input-port)))
(case (token-name (position-token-token tok))
((STRING_EOF STRING_END STRING_NEWLINE) (list tok))
(else (cons tok (get-string-tokens input-port))))))
(define-tokens str-tok (STRING_CHAR))
(define-empty-tokens err (STRING_END STRING_EOF STRING_NEWLINE))
(define get-str-tok
(lexer-src-pos
(#\" (token-STRING_END))
(EscapeSequence (token-STRING_CHAR (EscapeSequence->char lexeme)))
(InputCharacter (token-STRING_CHAR (string-ref lexeme 0)))
((re:or CR LF) (token-STRING_NEWLINE))
(#\032 (token-STRING_EOF))
((eof) (token-STRING_EOF))))
;; 3.10.6
(define (EscapeSequence->char es)
(cond
((string=? es "\\b") #\010)
((string=? es "\\t") #\011)
((string=? es "\\n") #\012)
((string=? es "\\f") #\014)
((string=? es "\\r") #\015)
((string=? es "\\\"") #\")
((string=? es "\\'") #\')
((string=? es "\\\\") #\\)
(else (integer->char (string->number (trim-string es 1 0) 8)))))
(define get-token
(lexer-src-pos
;; 3.12
(Operator (let ((l lexeme))
(cond
((string=? l "|") (token-PIPE))
((string=? l "||") (token-OR))
((string=? l "|=") (token-OREQUAL))
(else (string->symbol l)))))
;; 3.11
("(" (token-O_PAREN))
(")" (token-C_PAREN))
("{" (token-O_BRACE))
("}" (token-C_BRACE))
("[" (token-O_BRACKET))
("]" (token-C_BRACKET))
(";" (token-SEMI_COLON))
("," (token-COMMA))
("." (token-PERIOD))
;; 3.10.7
("null" (token-NULL_LIT))
;; 3.10.5
(#\" (get-string input-port))
;(token-STRING_LIT (list->string (get-string input-port))))
;; 3.10.4
((re:: #\' (re:~ CR LF #\' #\\) #\')
(token-CHAR_LIT (string-ref lexeme 1)))
((re:: #\' EscapeSequence #\')
(token-CHAR_LIT (EscapeSequence->char
(trim-string lexeme 1 1))))
;; 3.10.3
("true" (token-TRUE_LIT))
("false" (token-FALSE_LIT))
;; 3.10.2
((re:or FloatA FloatB FloatC)
(token-DOUBLE_LIT (string->number lexeme)))
((re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix)
(token-FLOAT_LIT (string->number (trim-string lexeme 0 1))))
((re:: (re:or FloatA FloatB FloatC FloatD) DoubleTypeSuffix)
(token-DOUBLE_LIT (string->number (trim-string lexeme 0 1))))
;; 3.10.1
(DecimalNumeral
(token-INTEGER_LIT (string->number lexeme 10)))
((re:: DecimalNumeral IntegerTypeSuffix)
(token-LONG_LIT (string->number (trim-string lexeme 0 1) 10)))
((re:: HexNumeral IntegerTypeSuffix)
(token-HEXL_LIT (string->number (trim-string lexeme 2 1) 16)))
(HexNumeral
(token-HEX_LIT (string->number (trim-string lexeme 2 0) 16)))
(OctalNumeral
(token-OCT_LIT (string->number (trim-string lexeme 1 0) 8)))
((re:: OctalNumeral IntegerTypeSuffix)
(token-OCTL_LIT (string->number (trim-string lexeme 1 1) 8)))
("dynamic"
(cond
((dynamic?) (string->symbol lexeme))
(else (token-IDENTIFIER lexeme))))
;; 3.9
(Keyword (string->symbol lexeme))
;; 3.8
(Identifier (token-IDENTIFIER lexeme))
;; 3.7
("//" (begin (read-line-comment input-port) (return-without-pos (get-token input-port))))
("/*" (begin (read-block-comment input-port) (return-without-pos (get-token input-port))))
#;("/**" (begin (read-document-comment input-port) (return-without-pos (get-token input-port))))
((special)
(syntax-case lexeme ()
((parse-example-box examples) (token-EXAMPLE (make-example-box (syntax examples))))
(_
(cond
((and (syntax? lexeme) (syntax-property lexeme 'test-case-box))
(token-TEST_SUITE (make-test-case lexeme)))
((is-a? lexeme (image-snip%))
(token-IMAGE_SPECIAL lexeme))
((token-OTHER_SPECIAL (list lexeme start-pos end-pos)))))))
#;(cond
((class-case? lexeme) (token-CLASS_BOX lexeme))
((interact-case? lexeme) (token-INTERACTIONS_BOX lexeme))
((test-case? lexeme) (token-TEST_SUITE lexeme))
(else (token-OTHER_SPECIAL (list lexeme start-pos end-pos))))
;; 3.6
((re:+ WhiteSpace) (return-without-pos (get-token input-port)))
;; 3.5
(#\032 'EOF)
((eof) 'EOF)
((re:+ (re:/ "09" "az" "AZ")) (token-NUMBER_ERROR lexeme))
))
(define (syn-val lex a b c d)
(values lex a b (position-offset c) (position-offset d)))
(define get-syn-string
(lexer
((re:or CR LF #\") (position-offset end-pos))
((eof) (position-offset end-pos))
(EscapeSequence (get-syn-string input-port))
(InputCharacter (get-syn-string input-port))))
(define (colorize-string my-start-pos)
(lexer
(#\" (syn-val "" 'string #f my-start-pos end-pos))
((re:or CR LF) (syn-val "" 'error #f my-start-pos end-pos))
((eof) (syn-val "" 'error #f my-start-pos end-pos))
(EscapeSequence ((colorize-string my-start-pos) input-port))
(InputCharacter ((colorize-string my-start-pos) input-port))))
(define get-syntax-token
(lexer
;; 3.12
(Operator
(syn-val lexeme 'keyword #f start-pos end-pos))
;; 3.11
((char-set "(){}[]")
(syn-val lexeme 'keyword (string->symbol lexeme) start-pos end-pos))
;; 3.11
((char-set ";,.")
(syn-val lexeme 'default #f start-pos end-pos))
;; 3.10.7, 3.10.4, 3.10.3, 3.10.1
((re:or "null" "true" "false"
;char-lit
(re:: #\' (re:~ CR LF #\' #\\) #\')
(re:: #\' EscapeSequence #\')
;Doubles and Floats
FloatA FloatB FloatC
(re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix)
(re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix)
;Decimal numbers
DecimalNumeral
HexNumeral
OctalNumeral
(re:: DecimalNumeral IntegerTypeSuffix)
(re:: HexNumeral IntegerTypeSuffix)
(re:: OctalNumeral IntegerTypeSuffix))
(syn-val lexeme 'literal #f start-pos end-pos))
((re:: #\' InputCharacter) (syn-val lexeme 'literal #f start-pos end-pos))
((re:: #\' InputCharacter (re:~ #\')) (syn-val lexeme 'error #f start-pos end-pos))
((re:: #\' EscapeSequence) (syn-val lexeme 'literal #f start-pos end-pos))
((re:: #\' EscapeSequence (re:~ #\')) (syn-val lexeme 'error #f start-pos end-pos))
((re:: #\' #\\) (syn-val lexeme 'error #f start-pos end-pos))
;; 3.10.5
(#\" ((colorize-string start-pos) input-port))
("dynamic"
(cond
((dynamic?) (syn-val lexeme 'keyword #f start-pos end-pos))
(else (syn-val lexeme 'identifier #f start-pos end-pos))))
;; 3.9
(Keyword (syn-val lexeme 'keyword #f start-pos end-pos))
;; 3.8
(Identifier (syn-val lexeme 'identifier #f start-pos end-pos))
;; 3.7
("//" (syn-val lexeme 'comment #f start-pos (read-line-comment input-port)))
("/*" (syn-val lexeme 'comment #f start-pos (read-block-comment input-port)))
#;("/**" (syn-val lexeme 'comment #f start-pos (read-document-comment input-port)))
;; 3.6
((re:+ WhiteSpace) (syn-val lexeme 'white-space #f start-pos end-pos))
;; 3.5
(#\032 (values lexeme 'eof #f start-pos end-pos))
((eof) (values lexeme 'eof #f start-pos end-pos))
((special) (syn-val "" 'error #f start-pos end-pos))
((special-comment) (syn-val "" 'comment #f start-pos end-pos))
((re:+ (re:/ "09" "az" "AZ")) (syn-val lexeme 'error #f start-pos end-pos))
(any-char (syn-val lexeme 'error #f start-pos end-pos))
))
)