Merging in r6487:6512 from the branch kathyg/parser2

Adds the new parser to professorj

svn: r6515
This commit is contained in:
Kathy Gray 2007-06-07 15:09:10 +00:00
parent 6741e4d3e7
commit a33bbe23e6
8 changed files with 1773 additions and 10 deletions

View File

@ -0,0 +1,2 @@
(module info (lib "infotab.ss" "setup")
(define name "ProfessorJ: parser"))

View File

@ -0,0 +1,390 @@
(module java-signatures mzscheme
(require (lib "unit.ss"))
(require (lib "lex.ss" "parser-tools")
(lib "combinator-unit.ss" "combinator-parser")
(lib "string.ss"))
(provide (all-defined))
;Terminal signatures
(define-signature java-operators^
((terminals Operators
(PIPE OR OREQUAL GT LT ! ~ ? : == LTEQ GTEQ != && ++ -- PLUS
MINUS DIVIDE & ^T % << >> >>> += -= *= /= &= ^= %= <<= >>= >>>=))))
(define-signature java-separators^
((terminals Separators
(O_PAREN C_PAREN O_BRACE C_BRACE O_BRACKET C_BRACKET SEMI_COLON
PERIOD COMMA EQUAL TIMES))))
(define-signature java-literals^
((terminals EmptyLiterals (NULL_LIT TRUE_LIT FALSE_LIT EOF))))
(define-signature java-expression-keywords^
((terminals ExpressionKeywords (instanceof new))))
(define-signature java-statement-keywords^
((terminals StatementKeywords (break caseT catch continue ifT doT elseT
for return switch throw try while finally))))
(define-signature java-definition-keywords^
((terminals DefinitionKeywords (abstract class extends final private implements import
interface native package protected public static
strictfp synchronized throws transient volatile))))
(define-signature java-type-keywords^
((terminals TypeKeywords (boolean byte char double float int long short voidT))))
(define-signature java-reserved^
((terminals ReservedWords (default const goto))))
(define-signature java-extras^
((terminals ExtraKeywords (dynamic check expect within -> ->> ->>> test tests testcase))))
(define-signature java-ids^ ((terminals java-vals (IDENTIFIER this super))))
(define-signature java-vals^
((terminals java-vals (STRING_LIT CHAR_LIT INTEGER_LIT LONG_LIT FLOAT_LIT DOUBLE_LIT
STRING_ERROR NUMBER_ERROR HEX_LIT OCT_LIT
HEXL_LIT OCTL_LIT))))
(define-signature java-specials^
((terminals special-toks (EXAMPLE TEST_SUITE IMAGE_SPECIAL OTHER_SPECIAL))))
;General purpose signatures
(define-signature general-productions^ (comma-sep variable-declaration name))
;Types, modifiers, operator signatures
(define-signature java-types^ (integer-types inexact-types numeric-type prim-type
other-type-base value+name-type method-type array-type))
(define-signature java-access^ (access-mods global-mods method-mods))
(define-signature java-ops^ (math-ops shift-ops compare-ops bool-ops bit-ops assignment-ops
bin-ops un-assignment un-op))
;Expression signatures
(define-signature expression-maker^ (simple-expression))
(define-signature expr-lits^ (boolean-lits textual-lits prim-numeric-lits null-lit numeric-lits
double-lits literals all-literals))
(define-signature expr-terms^ (new-class new-array simple-method-call
assignment unary-assignment-front cast
super-call))
(define-signature expr-tails^ (field-access-end
array-access-end array-init
binary-expression-end if-expr-end
method-call-end unary-assignment-back instanceof-back))
(define-signature expr-terms+^ extends expr-terms^ (checks))
;Statement signatures
(define-signature statements^ (statement if-s return-s this-call super-ctor-call
block expression-stmt while-l do-while for-l
break-s cont-s init))
;Member signatures
(define-signature fields^ (field arg args))
(define-signature methods^ (method-signature method-header method))
(define-signature ctors^ (constructor))
;Definition signatures
(define-signature interfaces^ (interface-body interface-def))
(define-signature classes^ (class-body implements-dec extend-dec class-def))
(define-signature top-forms^ (top-member import-dec program))
)
;
; ;
; ;
; ; ;;;;
; ; ;
; ; ;
; ; ; ;;;;; ;; ;;; ;;;; ;; ;; ;;; ;;;;; ;;;; ;; ;;;;; ;;;; ;
; ; ; ; ;; ; ; ;; ; ; ; ; ;; ; ; ; ;;
; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;
; ; ; ;;;;;; ; ; ; ; ; ; ;;;;;; ; ; ;;;;;;;;; ;;;
; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;;;
; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;
; ; ; ; ;; ; ; ; ;; ; ;; ; ;; ; ;; ; ;; ;; ;
; ; ;;;;;;; ;;;; ;; ;;; ;;; ;;;; ; ;;; ;; ;;;; ;; ;;;; ; ;;;;; ; ;;;;
; ; ; ;
; ; ; ;
; ; ;;;;; ;;;;;
;
; (define beginner-unique-base
; (simple-expression
; (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits))
; this
; IDENTIFIER
; (new-class IDENTIFIER (eta beginner-expression))
; (simple-method-call (eta beginner-expression))
; (sequence (O_PAREN (eta beginner-expression) C_PAREN) id "expression")
; (sequence (! (eta beginner-expression)) id "unary expression")
; (checks (eta beginner-expression)))))
;
; (define beginner-unique-end
; (simple-expression
; (list field-access-end
; (method-call-end (eta beginner-expression))
; (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops))
; (eta beginner-expression)))))
;
; (define beginner-expression
; (sequence (beginner-unique-base (repeat beginner-unique-end)) id "expression"))
;
; (define beginner-statement
; (statement (list (if-s beginner-expression (eta beginner-statement) #f)
; (return-s beginner-expression #f))))
;
; (define beginner-field (field #f value-type beginner-expression #f))
;
; (define beginner-method-sig
; (method-signature #f value-type args))
;
; (define beginner-method
; (method beginner-method-sig beginner-statement))
;
; (define beginner-constructor (constructor #f init*))
;
; (define beginner-interface
; (interface-def #f #f (method-header* beginner-method-sig)))
;
; (define beginner-class
; (class-def #f #f (implements-dec IDENTIFIER)
; (repeat (class-body (list beginner-field beginner-method beginner-constructor)))))
;
; (define beginner-program
; (program #f (repeat import-dec)
; (repeat (top-member (list beginner-class beginner-interface)))))
;
; (define parse-beginner (parser beginner-program))
;
; (define intermediate-unique-base
; (simple-expression
; (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits))
; this
; IDENTIFIER
; (new-class IDENTIFIER (eta intermediate-expression))
; (simple-method-call (eta intermediate-expression))
; (sequence (O_PAREN (eta intermediate-expression) C_PAREN) id "expression")
; (sequence (! (eta intermediate-expression)) id "unary expression")
; (cast value-type (eta intermediate-expression))
; (super-call (eta intermediate-expression))
; (checks (eta intermediate-expression)))))
;
; (define intermediate-unique-end
; (simple-expression
; (list field-access-end
; (method-call-end (eta intermediate-expression))
; (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops bit-ops))
; (eta intermediate-expression)))))
;
; (define intermediate-expression
; (sequence (intermediate-unique-base (repeat intermediate-unique-end))
; id "expression"))
;
; (define intermediate-stmt-expr
; (simple-expression (list (new-class IDENTIFIER intermediate-expression)
; (super-call intermediate-expression)
; (sequence (intermediate-expression
; (method-call-end intermediate-expression))
; id "method call")
; (assignment IDENTIFIER EQUAL intermediate-expression))))
;
; (define intermediate-statement
; (statement (list (if-s intermediate-expression (eta intermediate-statement) #f)
; (return-s intermediate-expression #t)
; (variable-declaration value-type intermediate-expression #f "local variable")
; (block (repeat (eta intermediate-statement)))
; (sequence (intermediate-stmt-expr SEMI_COLON) id "statement"))))
;
; (define intermediate-field (field access-mods value-type intermediate-expression #t))
;
; (define intermediate-method-sig-no-abs
; (method-signature access-mods
; (method-type value-type)
; args))
; (define intermediate-method-sig-abs
; (method-signature (method-mods access-mods)
; (method-type value-type)
; args))
;
; (define intermediate-method
; (choose ((method intermediate-method-sig-no-abs intermediate-statement)
; (method-header intermediate-method-sig-abs)) "method definition top"))
;
; (define intermediate-constructor
; (constructor access-mods
; (choose ((sequence ((super-call intermediate-expression) (repeat intermediate-statement)) id)
; (sequence ((this-call intermediate-expression) (repeat intermediate-statement)) id)
; (repeat intermediate-statement)) "constructor body")))
;
; (define intermediate-interface
; (interface-def
; #f
; (sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends")
; (method-header* intermediate-method-sig-no-abs)))
;
; (define intermediate-class
; (class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces"))
; (repeat (class-body (list intermediate-field intermediate-method intermediate-constructor)))))
;
; (define intermediate-program
; (program #f (repeat import-dec)
; (repeat (top-member (list intermediate-class intermediate-interface)))))
;
; (define parse-intermediate (parser intermediate-program))
;
; (define advanced-unique-base
; (simple-expression
; (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits))
; this
; IDENTIFIER
; (new-class IDENTIFIER (eta advanced-expression))
; (simple-method-call (eta advanced-expression))
; (new-array value-type (eta advanced-expression))
; (sequence (O_PAREN (eta advanced-expression) C_PAREN) id "expression")
; (sequence (! (eta advanced-expression)) id "unary expression")
; (cast value-type (eta advanced-expression))
; (super-call (eta advanced-expression))
; (checks (eta advanced-expression)))))
;
; (define advanced-unique-end
; (simple-expression
; (list field-access-end
; (array-access-end (eta advanced-expression))
; (method-call-end (eta advanced-expression))
; (if-expr-end (eta advanced-expression))
; (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops))
; (eta advanced-expression)))))
;
; (define advanced-expression
; (sequence (advanced-unique-base (repeat advanced-unique-end)) id "expression"))
;
;
; (define advanced-stmt-expr
; (simple-expression (list (new-class IDENTIFIER advanced-expression)
; (super-call advanced-expression)
; (sequence (advanced-expression
; (method-call-end advanced-expression)) id "method call")
; (assignment IDENTIFIER assignment-ops advanced-expression)
; (sequence (advanced-expression ++) id "unary mutation")
; (sequence (advanced-expression --) id "unary mutation")
; (sequence (++ advanced-expression) id "unary mutation")
; (sequence (-- advanced-expression) id "unary mutation"))))
;
; (define advanced-statement
; (statement (list (if-s advanced-expression (eta advanced-statement) #t)
; (return-s advanced-expression #t)
; (variable-declaration value-type advanced-expression #t "local variable")
; (block (repeat (eta advanced-statement)))
; (sequence (advanced-stmt-expr SEMI_COLON) id "statement")
; (for-l (choose ((variable-declaration value-type advanced-expression #t "for loop variable")
; (comma-sep advanced-stmt-expr "initializations")) "for loop initialization")
; #t
; advanced-expression #t
; (comma-sep advanced-stmt-expr "for loop increments") #t (eta advanced-statement))
; (while-l advanced-expression (eta advanced-statement))
; (do-while advanced-expression (eta advanced-statement))
; (break-s #f)
; (cont-s #f))))
;
; (define advanced-field (field (global-mods access-mods) value-type advanced-expression #t))
;
; (define advanced-method-sig-no-abs
; (method-signature (global-mods access-mods)
; (method-type value-type)
; args))
; (define advanced-method-sig-abs
; (method-signature (method-mods (global-mods access-mods))
; (method-type value-type)
; args))
;
; (define advanced-method
; (choose ((method advanced-method-sig-no-abs advanced-statement)
; (method-header advanced-method-sig-abs)) "method definition"))
;
; (define advanced-constructor
; (constructor access-mods
; (choose ((sequence ((super-call advanced-expression) (repeat advanced-statement)) id)
; (sequence ((this-call advanced-expression) (repeat advanced-statement)) id)
; (repeat advanced-statement)) "constructor body")))
;
; (define advanced-interface
; (interface-def
; #f
; (sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends")
; (method-header* advanced-method-sig-no-abs)))
;
; (define advanced-class
; (class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces"))
; (repeat (class-body (list advanced-field advanced-method advanced-constructor
; (method-header advanced-method-sig-abs))))))
;
; (define advanced-program
; (program (sequence (package name SEMI_COLON) id "package specification")
; (repeat import-dec)
; (repeat (top-member (list advanced-class advanced-interface)))))
;
; (define parse-advanced
; (parser advanced-program))
;
; (define (old-tokens->new tok-list)
; (cond
; [(null? tok-list) null]
; [else
; (cons
; (make-position-token
; (case (token-name (position-token-token (car tok-list)))
; [(=) (token-EQUAL)]
; ((<) (token-LT))
; ((>) (token-GT))
; ((<=) (token-LTEQ))
; ((>=) (token-GTEQ))
; ((+) (token-PLUS))
; ((-) (token-MINUS))
; ((*) (token-TIMES))
; ((/) (token-DIVIDE))
; ((^) (token-^T))
; ((if) (token-ifT))
; ((do) (token-doT))
; ((case) (token-caseT))
; ((else) (token-elseT))
; ((void) (token-voidT))
; (else (position-token-token (car tok-list))))
; (position-token-start-pos (car tok-list))
; (position-token-end-pos (car tok-list)))
; (old-tokens->new (cdr tok-list)))]))
;
; )
;
; (define-unit constants@
; (import)
; (export error-format-parameters^)
; (define src? #t)
; (define input-type "file")
; (define show-options #f)
; (define max-depth 1)
; (define max-choice-depth 3))
;
; (define-compound-unit/infer java-parsers@
; (import)
; (export teaching-languages^)
; (link java-dictionary@ combinator-parser-tools@ constants@ java-grammars@))
;
; (provide java-parsers@ teaching-languages^)
;
; )
;

View File

@ -0,0 +1,993 @@
(module parser-units (lib "lazy.ss" "lazy")
(require (lib "unit.ss"))
(require (lib "lex.ss" "parser-tools")
(lib "combinator-unit.ss" "combinator-parser")
"java-signatures.scm"
(lib "string.ss"))
(define-unit java-dictionary@
(import)
(export language-dictionary^
(rename language-format-parameters^
(output-map input->output-name)))
(define class-type "keyword")
(define (output-map x)
(!!! (when (position-token? x)
(set! x (position-token-token x))))
(!!! (case (token-name x)
[(PIPE) "|"]
[(OR) "||"]
[(OREQUAL) "|="]
[(EQUAL) "="]
[(GT) ">"]
[(LT) "<"]
[(LTEQ) "<="]
[(GTEQ) ">="]
[(PLUS) "+"]
[(MINUS) "-"]
[(TIMES) "*"]
[(DIVIDE) "/"]
[(^T) "^"]
[(O_PAREN) "("]
[(C_PAREN) ")"]
[(O_BRACE) "{"]
[(C_BRACE) "}"]
[(O_BRACKET) "["]
[(C_BRACKET) "]"]
[(SEMI_COLON) ";"]
[(PERIOD) "."]
[(COMMA) ","]
[(NULL_LIT) "null"]
[(TRUE_LIT) "true"]
[(FALSE_LIT) "false"]
[(EOF) "end of input"]
[(caseT) "case"]
[(doT) "do"]
[(elseT) "else"]
[(ifT) "if"]
[(voidT) "void"]
[(STRING_LIT) (format "\"~a\"" (token-value x))]
[(CHAR_LIT) (format "'~a'" (token-value x))]
[(INTEGER_LIT LONG_LIT FLOAT_LIT DOUBLE_LIT
HEX_LIT OCT_LIT HEXL_LIT OCTL_LIT) (token-value x)]
[(IDENTIFIER) (format "identifier ~a" (token-value x))]
[(STRING_ERROR) (format "misformatted string ~a" (token-value x))]
[else (token-name x)])))
(define (java-keyword? t)
(memq t `(? this super new instanceof while try throw synchronized switch return ifT goto for finally
elseT doT default continue catch case break voidT throws const interface implements extends
class import package EQUAL += -= *= /= &= ^= %= <<= >>= >>>=
boolean byte char double float int long short
abstract native private protected public static strictfp transient volatile)))
(define (close-to-keyword? t arg)
(printf "close-to-keyword ~a ~a~n" t arg)
(and (string? t)
(member t (select-words (string->symbol arg)))))
(define (miscapitalized? t key)
(and (string? t)
(let ((s (string-copy t)))
(string-lowercase! s)
(equal? s key))))
(define misspelled-list '((import "mport" "iport" "imort" "imprt" "impot" "impor" "improt" "impourt")
(class "lass" "cass" "clss" "clas" "calss")
(abstract
"bstract" "astract" "abtract" "absract" "abstact" "abstrct" "abstrat" "abstract" "abstarct" "abstracts")
(extends "xtends" "etends" "exends" "extnds" "exteds" "extens" "extneds" "extend")
(new "nw" "ne" "nwe")
(this "his" "tis" "ths" "thi" "tihs" "thsi")
(instanceof "instancef" "instanceo" "intsanceof")
(if "fi")
(else "lse" "ese" "els" "eles")
(return "eturn" "rturn" "reurn" "retrn" "retun" "retur" "reutrn" "retrun" "returns")
(true "rue" "tue" "tre" "tru" "ture" "treu")
(false "flse" "fase" "fale" "fals" "flase" "fasle")
(interface
"nterface" "iterface" "inerface" "intrface" "inteface" "interace" "interfce" "interfae" "intreface")
(implements
"mplements" "iplements" "impements" "implments" "impleents" "implemnts" "implemets" "implemens"
"implement")
(void "oid" "vid" "voi" "viod")
(super "uper" "sper" "supr" "supe" "supper")
(public "ublic" "pblic" "pulic" "pubic" "publc" "publi" "pubilc")
(private "rivate" "pivate" "prvate" "priate" "privte" "privae" "privat" "pravite")
(package "ackage" "pckage" "pakage" "pacage" "packge" "packae" "packag")
(protected "rotected" "portected")
(final "inal" "fnal" "fial" "finl" "finale" "fianl")
(check "chek" "cehck" "chck" "chack")
(expect "expct" "expeet" "expec" "exect")
(within "with" "withi" "withen" "wihtin")
))
(define (select-words key)
(safe-car (filter (lambda (x) (eq? (car x) key)) misspelled-list)))
(define (safe-car f)
(if (null? f) null (car f)))
(define all-words (filter string? (apply append misspelled-list)))
(define misspelled (lambda (id token-v) (if (close-to-keyword? token-v id) 1 0)))
(define misscap (lambda (id token-v) (miscapitalized? token-v id)))
(define missclass (lambda (id token-n) (and (eq? 'IDENTIFIER id) (java-keyword? token-n))))
)
(define-signature teaching-languages^
(parse-beginner parse-beginner-interactions
parse-intermediate parse-intermediate-interactions parse-intermediate+access
parse-advanced parse-advanced-interactions
old-tokens->new))
(define-signature id^ (id))
;Terminals unit
(define-unit java-terminals@
(import combinator-parser^ id^)
(export java-operators^ java-separators^ java-literals^ java-expression-keywords^
java-statement-keywords^ java-definition-keywords^
java-type-keywords^ java-reserved^ java-extras^ java-vals^ java-ids^ java-specials^)
(define-simple-terminals Operators
((PIPE "|") (OR "||") (OREQUAL "|=")
(EQUAL "=") (GT ">") (LT "<") ! ~ ? :
== (LTEQ "<=") (GTEQ ">=") != && ++ --
(PLUS "+") (MINUS "-")
(TIMES "*") (DIVIDE "/") & (^T "^") % << >> >>>
+= -= *= /= &= ^= %= <<= >>= >>>=))
(define-simple-terminals Separators
((O_PAREN "(") (C_PAREN ")") (O_BRACE "{") (C_BRACE "}")
(O_BRACKET "[") (C_BRACKET "]") (SEMI_COLON ";") (PERIOD ".") (COMMA ",")))
(define-simple-terminals EmptyLiterals ((NULL_LIT "null") (TRUE_LIT "true") (FALSE_LIT "false") EOF))
(define-simple-terminals Keywords
(abstract default (ifT "if") private this
boolean (doT "do") implements protected throw
break double import public throws
byte (elseT "else") instanceof return transient
(caseT "case") extends int short try
catch final interface static (voidT "void")
char finally long strictfp volatile
class float native super while
const for new switch
continue goto package synchronized))
(define-simple-terminals ExtraKeywords (dynamic check expect within -> ->> ->>> test tests testcase))
(define-terminals java-vals
((STRING_LIT "String literal" id) (CHAR_LIT "character" id) (INTEGER_LIT "integer" id)
(LONG_LIT "long" id) (FLOAT_LIT "float" id) (DOUBLE_LIT "double" id)
(IDENTIFIER "identifer" id) (STRING_ERROR id)
(NUMBER_ERROR id) (HEX_LIT id) (OCT_LIT id) (HEXL_LIT id) (OCTL_LIT id)))
(define-terminals special-toks ((EXAMPLE id) (TEST_SUITE id) (IMAGE_SPECIAL id) (OTHER_SPECIAL id)))
)
;---------------------------------------------------------------------------------------------------
; Types, modifiers, operators
(define-unit types@
(import combinator-parser^ java-type-keywords^ java-ids^ java-separators^ id^)
(export java-types^)
(define integer-types
(choose (byte int short long) "type"))
(define inexact-types
(choose (float double) "type"))
(define numeric-type
(choose (integer-types inexact-types) "numeric type"))
(define prim-type
(choose (boolean double byte int short char long float) "type"))
(define (other-type-base types) (choice types "type"))
(define (value+name-type base-type name)
(choose (base-type name) "type"))
(define (method-type base-t)
(choice (list base-t voidT) "method return"))
(define (array-type base-t)
(choice (base-t (sequence (base-t O_BRACKET C_BRACKET (repeat (sequence (O_BRACKET C_BRACKET) id))) id
"array type")) "type"))
)
(define-unit mods@
(import combinator-parser^ java-definition-keywords^)
(export java-access^)
(define access-mods
(choose (public private protected) "access modifier"))
(define (global-mods base-mods)
(choice (list base-mods static) "modifier"))
(define (method-mods base-mods)
(choice (list base-mods abstract) "modifier"))
)
(define-unit operators@
(import combinator-parser^ java-operators^ java-separators^)
(export java-ops^)
(define math-ops
(choose (PLUS MINUS TIMES DIVIDE %) "binary operation"))
(define shift-ops
(choose (<< >> >>>) "shift operation"))
(define compare-ops
(choose (== GT LT LTEQ GTEQ !=) "binary operation"))
(define bool-ops
(choose (&& OR) "binary operation"))
(define bit-ops
(choose (^T PIPE &) "binary operation"))
(define assignment-ops
(choose (EQUAL OREQUAL += -= *= /= &= ^= %= <<= >>= >>>=) "assignment"))
(define (bin-ops ops)
(choice ops "binary operation"))
(define un-assignment
(choose (++ --) "unary operation"))
(define un-op
(choose (~ + -) "unary operation"))
)
(define-unit general@
(import combinator-parser^ java-separators^ java-operators^ java-ids^ id^)
(export general-productions^)
(define (comma-sep term name)
(sequence (term (repeat (sequence (COMMA term) id))) id (string-append "list of " name)))
(define (variable-declaration type expr share-type? name)
(let* ([f (choose (IDENTIFIER (sequence ((^ IDENTIFIER) EQUAL expr) id)) (string-append name " declaration"))]
[s&e (sequence (type (comma-sep f name) SEMI_COLON) id (string-append name " definition"))]
[s (sequence (type (comma-sep IDENTIFIER name) SEMI_COLON) id (string-append name " definition"))]
[e (sequence (type (^ IDENTIFIER) EQUAL expr SEMI_COLON) id (string-append name " definition"))]
[base (sequence (type (^ IDENTIFIER) SEMI_COLON) id (string-append name " definition"))])
(cond
[(and expr share-type?) s&e]
[share-type? s]
[expr (choose (e base) (string-append name " definition"))]
[else base])))
(define name
(sequence (IDENTIFIER (repeat (sequence (PERIOD IDENTIFIER) id))) id "name"))
)
(define-unit expressions@
(import combinator-parser^ general-productions^ id^
java-literals^ java-expression-keywords^ java-vals^ java-ids^ java-separators^
java-operators^ java-extras^)
(export expression-maker^ expr-lits^ expr-terms+^ expr-tails^)
(define (simple-expression exprs)
(choice exprs "expression"))
(define boolean-lits
(choose (TRUE_LIT FALSE_LIT) "boolean literal"))
(define textual-lits
(choose (STRING_LIT CHAR_LIT) "literal expression"))
(define prim-numeric-lits
(choose (INTEGER_LIT LONG_LIT) "literal expression"))
(define numeric-lits
(choose (HEX_LIT HEXL_LIT OCTL_LIT OCT_LIT) "literal expression"))
(define double-lits
(choose (FLOAT_LIT DOUBLE_LIT) "literal expression"))
(define null-lit NULL_LIT)
(define (literals lits)
(choice lits "literal expression"))
(define all-literals
(choose (NULL_LIT boolean-lits textual-lits prim-numeric-lits double-lits numeric-lits)
"literal expression"))
(define (new-class class-name expr)
(choose ((sequence (new class-name O_PAREN C_PAREN) id)
(sequence (new class-name O_PAREN (comma-sep expr "arguments") C_PAREN) id))
"class instantiation"))
(define (new-array type-name expr)
(sequence (new type-name O_BRACKET expr C_BRACKET (repeat (sequence (O_BRACKET expr C_BRACKET) id)))
id "array instantiation"))
(define field-access-end
(sequence (PERIOD IDENTIFIER) id "field access"))
(define (array-access-end expr)
(sequence (O_BRACKET expr C_BRACKET) id "array access"))
(define (array-init-maker contents)
(sequence (O_BRACE (comma-sep contents "array elements") C_BRACE) id "array initializations"))
(define (array-init type-name expr)
(letrec ([base-init (array-init-maker expr)]
[simple-init (array-init-maker (choose (expr base-init (eta init)) "array initializations"))]
[init (array-init-maker (choose (expr simple-init) "array initializations"))])
(sequence (new type-name init) "array initialization")))
(define (binary-expression-end op expr)
(sequence ((^ op) expr) id "binary expression"))
(define (if-expr-end expr)
(sequence (? expr : expr) id "conditional expression"))
(define (simple-method-call expr)
(choose
((sequence ((^ IDENTIFIER) O_PAREN C_PAREN) id "method invocation")
(sequence ((^ IDENTIFIER) O_PAREN (comma-sep expr "argument list") C_PAREN) id "method invocation"))
"method invocation"))
(define (method-call-end expr)
(choose
((sequence (PERIOD (^ IDENTIFIER) O_PAREN C_PAREN) id "method invocation")
(sequence (PERIOD (^ IDENTIFIER) O_PAREN (comma-sep expr "argument list") C_PAREN) id "method invocation"))
"method invocation"))
(define (assignment name op expr)
(sequence ((^ name) op expr) id "assignment"))
(define (unary-assignment-front expr)
(choose ((sequence (++ expr) id "unary modification")
(sequence (-- expr) id "unary modification")) "unary modification"))
(define (unary-assignment-back base)
(choose ((sequence (base ++) id "unary modification")
(sequence (base --) id "unary modification")) "unary modification"))
(define (cast type expr)
(sequence (O_PAREN type C_PAREN expr) "cast expression"))
(define (instanceof-back name)
(sequence (instanceof name) "instanceof expression"))
(define (super-call expr)
(choose ((sequence (super PERIOD IDENTIFIER O_PAREN C_PAREN) id "super method invocation")
(sequence (super PERIOD IDENTIFIER O_PAREN (comma-sep expr "arguments") C_PAREN) id "super method invocation"))
"super method invocation"))
(define (checks expr)
(choose
((sequence (check expr expect expr) id "check expression")
(sequence (check expr expect expr within expr) id "check expression"))
"check expression"))
)
(define-unit statements@
(import combinator-parser^ general-productions^ id^
java-statement-keywords^ java-separators^ java-ids^ java-operators^)
(export statements^)
(define (if-s expr statement else?)
(cond
[else?
(choose ((sequence (ifT O_PAREN expr C_PAREN statement elseT statement) id)
(sequence (ifT O_PAREN expr C_PAREN statement) id)) "if")]
[else (sequence (ifT O_PAREN expr C_PAREN statement elseT statement) id "if")]))
(define (return-s expr opt?)
(cond
[opt? (choose ((sequence (return expr SEMI_COLON) id "return statement")
(sequence (return SEMI_COLON) id "return statement")) "return statement")]
[else (sequence (return expr SEMI_COLON) id "return statement")]))
(define (this-call expr)
(choose ((sequence (this O_PAREN C_PAREN SEMI_COLON) id)
(sequence (this O_PAREN (comma-sep expr "arguments") C_PAREN SEMI_COLON) id)) "this constructor call"))
(define (super-ctor-call expr)
(choose ((sequence (super O_PAREN C_PAREN SEMI_COLON) id)
(sequence (super O_PAREN (comma-sep expr "arguments") C_PAREN SEMI_COLON) id)) "super constructor call"))
(define (block statement)
(sequence (O_BRACE statement C_BRACE) id "block statement"))
(define (expression-stmt expr)
(sequence (expr SEMI_COLON) id "statement"))
(define (while-l expr statement)
(sequence (while O_PAREN expr C_PAREN statement) id "while loop"))
(define (do-while expr statement)
(sequence (doT statement while O_PAREN expr C_PAREN SEMI_COLON) id "do loop"))
(define (for-l init i-op? expr t-op? update up-op? statement)
(let ([full (sequence (for O_PAREN init SEMI_COLON expr SEMI_COLON update C_PAREN statement) id "for loop")]
[no-init (sequence (for O_PAREN SEMI_COLON expr SEMI_COLON update C_PAREN statement) id "for loop")]
[no-tst (sequence (for O_PAREN init SEMI_COLON SEMI_COLON update C_PAREN statement) id "for loop")]
[no-up (sequence (for O_PAREN init SEMI_COLON expr SEMI_COLON C_PAREN statement) id "for loop")]
[no-it (sequence (for O_PAREN SEMI_COLON SEMI_COLON update C_PAREN statement) id "for loop")]
[no-iu (sequence (for O_PAREN SEMI_COLON expr SEMI_COLON C_PAREN statement) id "for loop")]
[no-tu (sequence (for O_PAREN init SEMI_COLON SEMI_COLON C_PAREN statement) id "for loop")]
[none (sequence (for O_PAREN SEMI_COLON SEMI_COLON C_PAREN statement) id "for loop")])
(cond
[(and i-op? t-op? up-op?)
(choice (list full no-init no-tst no-up no-it no-iu no-tu none) "for loop")]
[(and t-op? up-op?)
(choice (list full no-tst no-up no-tu) "for loop")]
[(and i-op? t-op?)
(choice (list full no-init no-tst no-it) "for loop")]
[(and i-op? up-op?)
(choice (list full no-init no-up no-iu) "for loop")]
[i-op? (choice (list full no-init) "for loop")]
[t-op? (choice (list full no-tst) "for loop")]
[up-op? (choice (list full no-up) "for loop")]
[else full])))
(define (break-s label)
(cond
[label (choose ((sequence (break SEMI_COLON) id)
(sequence (break label SEMI_COLON) id)) "break statement")]
[else (sequence (break SEMI_COLON) id "break statement")]))
(define (cont-s label)
(cond
[label (choose ((sequence (continue SEMI_COLON) id)
(sequence (continue label SEMI_COLON) id)) "continue statement")]
[else (sequence (continue SEMI_COLON) id "continue statement")]))
(define init
(sequence (this PERIOD IDENTIFIER EQUAL IDENTIFIER SEMI_COLON) id "field initialization"))
(define (statement statements)
(choice statements "statement"))
)
(define-unit members@
(import combinator-parser^ general-productions^ id^ java-types^
java-separators^ java-ids^ java-definition-keywords^)
(export fields^ methods^ ctors^)
(define (field mods type expr share-types?)
(cond
[mods (sequence ((repeat mods) (variable-declaration type expr share-types? "field"))
id "field definition")]
[else (variable-declaration type expr share-types? "field")]))
(define arg (sequence ((value+name-type prim-type IDENTIFIER) IDENTIFIER) id "argument"))
(define args (comma-sep arg "parameter list"))
;method-signature: {U parser #f} [U parser #f] [U parser #f] bool bool parser -> parser
(define (method-signature m ret a t? n)
(let* ([method-parms (if a
(choose ((sequence (O_PAREN C_PAREN) id)
(sequence (O_PAREN a C_PAREN) id)) "method parameter list")
(sequence (O_PAREN C_PAREN) id "method parameter list"))]
[full (sequence ((repeat m) ret (^ IDENTIFIER) method-parms throws (comma-sep n "thrown type")) id "method signature")]
[full-no-t (sequence ((repeat m) ret (^ IDENTIFIER) method-parms) id "method signature")]
[no-mods-t (sequence (ret (^ IDENTIFIER) method-parms throws (comma-sep n "thrown type")) id "method signature")]
[no-mods (sequence (ret (^ IDENTIFIER) method-parms) id "method signature")])
(cond
[(and m t?) (choose (full full-no-t) "method signature")]
[m full-no-t]
[t? (choose (no-mods-t no-mods) "method signature")]
[else no-mods])))
(define (method-header method-sig)
(sequence (method-sig SEMI_COLON) id "method declaration"))
(define (method signature statement)
(sequence ((^ signature) O_BRACE statement C_BRACE) id "method definition"))
(define (constructor mod body)
(let ([ctor (choose
((sequence ((^ IDENTIFIER) O_PAREN C_PAREN O_BRACE body C_BRACE) id)
(sequence ((^ IDENTIFIER) O_PAREN args C_PAREN O_BRACE body C_BRACE) id))
"constructor definition")])
(cond
[mod (sequence ((repeat mod) ctor) id "constructor definition")]
[else ctor])))
)
(define-unit interface@
(import combinator-parser^ id^ java-definition-keywords^ java-ids^ java-separators^)
(export interfaces^)
(define (interface-body members)
(repeat (choice members "interface member")))
(define (interface-def modifier extends body)
(let ([m&e (sequence ((repeat modifier) interface (^ IDENTIFIER) extends O_BRACE body C_BRACE)
id "interface definition")]
[m (sequence ((repeat modifier) interface (^ IDENTIFIER) O_BRACE body C_BRACE) id "interface definition")]
[e (sequence (interface (^ IDENTIFIER) extends O_BRACE body C_BRACE id) "interface definition")]
[always (sequence (interface (^ IDENTIFIER) O_BRACE body C_BRACE) id "interface definition")])
(choice (cond
[(and modifier extends) (list m&e m e always)]
[modifier (list m always)]
[extends (list e always)]
[else (list always)])
"interface definition")))
)
(define-unit class@
(import combinator-parser^ id^ java-definition-keywords^ java-ids^ java-separators^)
(export classes^)
(define (class-body members)
(choice members "class member"))
(define (implements-dec name)
(sequence (implements name) id "implementation declaration"))
(define (extend-dec name)
(sequence (extends name) id "extends declaration"))
(define (class-def mods extends implements body)
(let ([e&i (sequence (class (^ IDENTIFIER) extends implements O_BRACE body C_BRACE) id "class definition")]
[e (sequence (class (^ IDENTIFIER) extends O_BRACE body C_BRACE) id "class definition")]
[i (sequence (class (^ IDENTIFIER) implements O_BRACE body C_BRACE) id "class definition")]
[base (sequence (class (^ IDENTIFIER) O_BRACE body C_BRACE) id "class definition")])
(let ([base-choice
(cond
[(and extends implements)
(choice (list e&i e i base) "class definition")]
[extends (choice (list e base) "class definition")]
[implements (choice (list i base) "class definition")]
[else base])])
(cond
[mods (choose ((sequence (mods base-choice) id) base-choice) "class definition")]
[else base-choice]))))
)
(define-unit top-forms@
(import combinator-parser^ id^ java-definition-keywords^ java-separators^
general-productions^)
(export top-forms^)
(define (top-member mems)
(choice mems "program body"))
(define import-dec
(choose
((sequence (import name PERIOD TIMES SEMI_COLON) id)
(sequence (import name SEMI_COLON) id)) "import declaration"))
(define (program package import body)
(let ([p&i (sequence (package import body) id "program")]
[p (sequence (package body) id "program")]
[i (sequence (import body) id "program")])
(cond
[(and package import)
(choice (list p&i p i body) "program")]
[package
(choice (list p body) "program")]
[import
(choice (list i body) "program")]
[else body])))
)
(define-signature language-forms^
(beginner-program beginner-statement beginner-expression beginner-field
intermediate-program intermediate+access-program intermediate-statement intermediate-expression
advanced-program advanced-statement advanced-expression
))
(define-signature token-proc^ (old-tokens->new))
(define-signature parsers^
(parse-beginner parse-intermediate parse-intermediate+access parse-advanced))
(define-unit java-grammars@
(import combinator-parser^ java-operators^ java-separators^
java-statement-keywords^ java-definition-keywords^
java-type-keywords^ java-ids^
java-types^ java-access^ java-ops^ general-productions^
expression-maker^ expr-lits^ expr-terms+^ expr-tails^ statements^
fields^ methods^ ctors^ interfaces^ classes^ top-forms^ id^)
(export language-forms^ token-proc^)
;Remembered Unsupported Features
;throws clause
;strictfp
;allowing static fields in interface
;Beginner definition
(define beginner-unique-base
(simple-expression
(list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits))
this
IDENTIFIER
(new-class IDENTIFIER (eta beginner-expression))
(simple-method-call (eta beginner-expression))
(sequence (O_PAREN (eta beginner-expression) C_PAREN) id "expression")
(sequence (! (eta beginner-expression)) id "conditional expression")
(sequence (MINUS (eta beginner-expression)) id "negation expression")
(checks (eta beginner-expression)))))
(define beginner-unique-end
(simple-expression
(list field-access-end
(method-call-end (eta beginner-expression))
(binary-expression-end (bin-ops (list math-ops compare-ops bool-ops))
(eta beginner-expression)))))
(define beginner-expression
(sequence (beginner-unique-base (repeat beginner-unique-end)) id "expression"))
(define beginner-statement
(statement (list (if-s beginner-expression (eta beginner-statement) #f)
(return-s beginner-expression #f))))
(define beginner-field (field #f (value+name-type prim-type IDENTIFIER) beginner-expression #f))
(define beginner-method-sig
(method-signature #f (value+name-type prim-type IDENTIFIER) args #f IDENTIFIER))
(define beginner-method
(method beginner-method-sig beginner-statement))
(define beginner-constructor (constructor #f (repeat init)))
(define beginner-interface
(interface-def #f #f (repeat beginner-method-sig)))
(define beginner-class
(class-def #f #f (implements-dec IDENTIFIER)
(repeat (class-body (list beginner-field beginner-method beginner-constructor)))))
(define beginner-program
(program #f (repeat import-dec)
(repeat (top-member (list beginner-class beginner-interface)))))
;
;Intermediate definition
;
(define intermediate-unique-base
(simple-expression
(list (literals (list null-lit boolean-lits textual-lits prim-numeric-lits double-lits))
this
IDENTIFIER
(new-class IDENTIFIER (eta intermediate-expression))
(simple-method-call (eta intermediate-expression))
(sequence (O_PAREN (eta intermediate-expression) C_PAREN) id "expression")
(sequence (! (eta intermediate-expression)) id "conditional expression")
(sequence (MINUS (eta intermediate-expression)) id "negation expression")
(cast (value+name-type prim-type IDENTIFIER) (eta intermediate-expression))
(super-call (eta intermediate-expression))
(checks (eta intermediate-expression)))))
(define intermediate-unique-end
(simple-expression
(list field-access-end
(method-call-end (eta intermediate-expression))
(binary-expression-end (bin-ops (list math-ops compare-ops bool-ops bit-ops))
(eta intermediate-expression))
(instanceof-back (value+name-type prim-type IDENTIFIER)))))
(define intermediate-expression
(sequence (intermediate-unique-base (repeat intermediate-unique-end))
id "expression"))
(define intermediate-stmt-expr
(simple-expression (list (new-class IDENTIFIER intermediate-expression)
(super-call intermediate-expression)
(sequence (intermediate-expression
(method-call-end intermediate-expression))
id "method call")
(assignment
(choose (IDENTIFIER
(sequence (intermediate-unique-base field-access-end) id))
"assignee")
EQUAL intermediate-expression))))
(define intermediate-statement
(statement (list (if-s intermediate-expression (eta intermediate-statement) #f)
(return-s intermediate-expression #t)
(variable-declaration (value+name-type prim-type IDENTIFIER) intermediate-expression #f "local variable")
(block (repeat (eta intermediate-statement)))
(sequence (intermediate-stmt-expr SEMI_COLON) id "statement"))))
(define intermediate-field (field #f (value+name-type prim-type IDENTIFIER) intermediate-expression #t))
(define intermediate+access-field (field access-mods (value+name-type prim-type IDENTIFIER) intermediate-expression #t))
(define intermediate-method-sig-no-abs
(method-signature #f (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER))
(define intermediate-method-sig-abs
(method-signature abstract (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER))
(define intermediate+access-method-sig-no-abs
(method-signature access-mods (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER))
(define intermediate+access-method-sig-abs
(method-signature (method-mods access-mods) (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER))
(define intermediate-method
(choose ((method intermediate-method-sig-no-abs intermediate-statement)
(method-header intermediate-method-sig-abs)) "method definition"))
(define intermediate+access-method
(choose ((method intermediate+access-method-sig-no-abs intermediate-statement)
(method-header intermediate+access-method-sig-abs)) "method definition"))
(define intermediate-constructor
(constructor #f
(choose ((sequence ((super-call intermediate-expression) (repeat intermediate-statement)) id)
(sequence ((this-call intermediate-expression) (repeat intermediate-statement)) id)
(repeat intermediate-statement)) "constructor body")))
(define intermediate+access-constructor
(constructor access-mods
(choose ((sequence ((super-call intermediate-expression) (repeat intermediate-statement)) id)
(sequence ((this-call intermediate-expression) (repeat intermediate-statement)) id)
(repeat intermediate-statement)) "constructor body")))
(define intermediate-interface
(interface-def
#f
(sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends")
(repeat intermediate-method-sig-no-abs)))
(define intermediate-class
(class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces"))
(repeat (class-body (list intermediate-field intermediate-method intermediate-constructor)))))
(define intermediate+access-class
(class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces"))
(repeat (class-body (list intermediate+access-field
intermediate+access-method
intermediate+access-constructor)))))
(define intermediate-program
(program #f (repeat import-dec)
(repeat (top-member (list intermediate-class intermediate-interface)))))
(define intermediate+access-program
(program #f (repeat import-dec)
(repeat (top-member (list intermediate+access-class intermediate-interface)))))
(define advanced-unique-base
(simple-expression
(list (literals (list null-lit boolean-lits textual-lits prim-numeric-lits double-lits))
this
IDENTIFIER
(new-class IDENTIFIER (eta advanced-expression))
(simple-method-call (eta advanced-expression))
(new-array (value+name-type prim-type IDENTIFIER) (eta advanced-expression))
(sequence (O_PAREN (eta advanced-expression) C_PAREN) id "expression")
(sequence (! (eta advanced-expression)) id "conditional expression")
(sequence (MINUS (eta advanced-expression)) id "negation exxpression")
(cast (value+name-type prim-type IDENTIFIER) (eta advanced-expression))
(super-call (eta advanced-expression))
(checks (eta advanced-expression)))))
(define advanced-unique-end
(simple-expression
(list field-access-end
(array-access-end (eta advanced-expression))
(method-call-end (eta advanced-expression))
(if-expr-end (eta advanced-expression))
(binary-expression-end (bin-ops (list math-ops compare-ops bool-ops bit-ops))
(eta advanced-expression))
(instanceof-back (value+name-type prim-type IDENTIFIER)))))
(define advanced-expression
(sequence (advanced-unique-base (repeat advanced-unique-end)) id "expression"))
(define advanced-stmt-expr
(simple-expression (list (new-class IDENTIFIER advanced-expression)
(super-call advanced-expression)
(sequence (advanced-expression
(method-call-end advanced-expression)) id "method call")
(assignment
(choose (IDENTIFIER
(sequence (advanced-expression field-access-end) id)
(sequence (advanced-expression array-access-end) id))
"asignee")
assignment-ops advanced-expression)
(sequence (advanced-expression ++) id "unary mutation")
(sequence (advanced-expression --) id "unary mutation")
(sequence (++ advanced-expression) id "unary mutation")
(sequence (-- advanced-expression) id "unary mutation"))))
(define advanced-statement
(statement (list (if-s advanced-expression (eta advanced-statement) #t)
(return-s advanced-expression #t)
(variable-declaration (value+name-type prim-type IDENTIFIER) advanced-expression #t "local variable")
(block (repeat (eta advanced-statement)))
(sequence (advanced-stmt-expr SEMI_COLON) id "statement")
(for-l (choose ((variable-declaration (value+name-type prim-type IDENTIFIER) advanced-expression #t "for loop variable")
(comma-sep advanced-stmt-expr "initializations")) "for loop initialization")
#t
advanced-expression #t
(comma-sep advanced-stmt-expr "for loop increments") #t (eta advanced-statement))
(while-l advanced-expression (eta advanced-statement))
(do-while advanced-expression (eta advanced-statement))
(break-s #f)
(cont-s #f))))
(define advanced-field (field (global-mods access-mods) (value+name-type prim-type IDENTIFIER) advanced-expression #t))
(define advanced-method-sig-no-abs
(method-signature (global-mods access-mods)
(method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER))
(define advanced-method-sig-abs
(method-signature (method-mods (global-mods access-mods))
(method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER))
(define advanced-method
(choose ((method advanced-method-sig-no-abs advanced-statement)
(method-header advanced-method-sig-abs)) "method definition"))
(define advanced-constructor
(constructor access-mods
(choose ((sequence ((super-call advanced-expression) (repeat advanced-statement)) id)
(sequence ((this-call advanced-expression) (repeat advanced-statement)) id)
(repeat advanced-statement)) "constructor body")))
(define advanced-interface
(interface-def
#f
(sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends")
(repeat (choose (advanced-method-sig-no-abs
(field (global-mods access-mods) (value+name-type prim-type IDENTIFIER) advanced-expression #t))
"interface member definition"))))
(define advanced-class
(class-def (choose (abstract public) "class modifier")
(extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces"))
(repeat (class-body (list advanced-field advanced-method advanced-constructor
(method-header advanced-method-sig-abs))))))
(define advanced-program
(program (sequence (package name SEMI_COLON) id "package specification")
(repeat import-dec)
(repeat (top-member (list advanced-class advanced-interface)))))
(define (old-tokens->new tok-list)
(cond
[(null? tok-list) null]
[(eq? (token-name (position-token-token (car tok-list))) 'EOF) null]
[else
(cons
(make-position-token
(case (token-name (position-token-token (car tok-list)))
[(=) (token-EQUAL)]
((<) (token-LT))
((>) (token-GT))
((<=) (token-LTEQ))
((>=) (token-GTEQ))
((+) (token-PLUS))
((-) (token-MINUS))
((*) (token-TIMES))
((/) (token-DIVIDE))
((^) (token-^T))
((if) (token-ifT))
((do) (token-doT))
((case) (token-caseT))
((else) (token-elseT))
((void) (token-voidT))
(else (position-token-token (car tok-list))))
(position-token-start-pos (car tok-list))
(position-token-end-pos (car tok-list)))
(old-tokens->new (cdr tok-list)))]))
)
(define-unit full-program-parsers@
(import language-forms^ combinator-parser^)
(export parsers^)
(define parse-beginner (parser beginner-program))
(define parse-intermediate (parser intermediate-program))
(define parse-intermediate+access (parser intermediate+access-program))
(define parse-advanced (parser advanced-program))
)
(define-unit interaction-parsers@
(import language-forms^ combinator-parser^)
(export parsers^)
(define parse-beginner (parser (choose (beginner-expression beginner-statement beginner-field)
"interactions program")))
(define parse-intermediate (parser (choose (intermediate-expression intermediate-statement)
"interactions program")))
(define parse-intermediate+access parse-intermediate)
(define parse-advanced
(parser (choose (advanced-expression advanced-statement) "interactions program")))
)
(define-unit file-constants@
(import)
(export error-format-parameters^)
(define src? #t)
(define input-type "file")
(define show-options #f)
(define max-depth 2)
(define max-choice-depth 3))
(define-unit de-constants@
(import)
(export error-format-parameters^)
(define src? #t)
(define input-type "definitions window")
(define show-options #f)
(define max-depth 1)
(define max-choice-depth 3))
(define-unit interact-constants@
(import)
(export error-format-parameters^)
(define src? #t)
(define input-type "interactions-window")
(define show-options #f)
(define max-depth 0)
(define max-choice-depth 3))
(define-unit id@
(import)
(export id^)
(define (id x . args) x))
(define-compound-unit/infer java-file-parsers@
(import)
(export parsers^ token-proc^ err^)
(link java-dictionary@ combinator-parser-tools@ file-constants@ id@
java-terminals@ types@ mods@ operators@ general@
expressions@ statements@ members@ interface@ class@ top-forms@
java-grammars@ full-program-parsers@))
(define-compound-unit/infer java-definitions-parsers@
(import)
(export parsers^ token-proc^ err^)
(link java-dictionary@ combinator-parser-tools@ de-constants@ id@
java-terminals@ types@ mods@ operators@ general@
expressions@ statements@ members@ interface@ class@ top-forms@
java-grammars@ full-program-parsers@))
(define-compound-unit/infer java-interactions-parsers@
(import)
(export parsers^ token-proc^ err^)
(link java-dictionary@ combinator-parser-tools@ interact-constants@ id@
java-terminals@ types@ mods@ operators@ general@
expressions@ statements@ members@ interface@ class@ top-forms@
java-grammars@ interaction-parsers@))
(provide java-definitions-parsers@ java-interactions-parsers@ parsers^ token-proc^)
)

View File

@ -0,0 +1,355 @@
(module parsers mzscheme
(require "parser-units.scm"
(only (lib "force.ss" "lazy") !!!)
(only (lib "combinator-unit.ss" "combinator-parser") err^)
(lib "unit.ss")
#;(lib "lex.ss" "parser-tools")
#;(prefix re: (lib "lex-sre.ss" "parser-tools")))
(provide parse-beginner parse-intermediate parse-intermediate+access parse-advanced
parse-beginner-interact parse-intermediate-interact parse-advanced-interact)
(define (trim-string s f l)
(substring s f (- (string-length s) l)))
; (define-lex-abbrevs
; ;; 3.4
; (CR #\015)
; (LF #\012)
; (LineTerminator (re:or CR
; LF
; (re:: CR LF)))
; (InputCharacter (re:~ CR LF))
;
; ;; 3.6
; (FF #\014)
; (TAB #\011)
; (WhiteSpace (re:or #\space
; TAB
; FF
; LineTerminator))
;
; ;; 3.7 (Had to transform CommentTail and CommentTailStar into one RE)
; ;; (DocumentationComment only appears in version 1 of the spec)
; (Comment (re:or TraditionalComment
; EndOfLineComment
; DocumentationComment))
; (TraditionalComment (re:: "/*" NotStar CommentTail))
; (EndOfLineComment (re:: "//" (re:* InputCharacter)))
; (DocumentationComment (re:: "/**" CommentTailStar))
; (CommentTail (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
; (re:* NotStar)
; (re:+ "*")
; "/"))
; (CommentTailStar (re:: (re:* (re:: (re:* "*") NotStarNotSlash (re:* NotStar) "*"))
; (re:* "*")
; "/"))
; (NotStar (re:~ "*"))
; (NotStarNotSlash (re:~ "*" "/"))
;
; (SyntaxComment (re:or TraditionalCommentEOF
; EndOfLineComment))
; (TraditionalCommentEOF (re:: "/*" CommentTailEOF))
; (CommentTailEOF (re:or (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
; (re:* NotStar)
; (re:+ "*")
; "/")
; (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
; (re:* NotStar)
; (re:* "*"))))
;
; ;; 3.8 (No need to worry about excluding keywords and such. They will
; ;; appear first in the lexer spec)
; ;Not UNICODE compliant
; (Identifier (re:: JavaLetter (re:* JavaLetterOrDigit)))
; (JavaLetter (re:or (re:/ "AZ" "az") "_" "$"))
; (JavaLetterOrDigit (re:or JavaLetter (re:/ "09")))
;
; (KnownTypes (re:or "boolean" "byte" "char" "double" "float" "int" "long" "short"
; "String" "Object"))
;
; ;; 3.9
; (Keyword (re:or "abstract" "default" "if" "private" "this"
; "boolean" "do" "implements" "protected" "throw"
; "break" "double" "import" "public" "throws"
; "byte" "else" "instanceof" "return" "transient"
; "case" "extends" "int" "short" "try"
; "catch" "final" "interface" "static" "void"
; "char" "finally" "long" "strictfp" "volatile"
; "class" "float" "native" "super" "while"
; "const" "for" "new" "switch"
; "continue" "goto" "package" "synchronized"))
;
; ;; 3.10.1
; (Digits (re:+ (re:/ "09")))
; (DigitsOpt (re:* (re:/ "09")))
;
; (IntegerTypeSuffix (char-set "lL"))
; (DecimalNumeral (re:or #\0
; (re:: (re:/ "19") (re:* (re:/ "09")))))
; (HexDigit (re:/ "09" "af" "AF"))
; (HexNumeral (re:: #\0 (char-set "xX") (re:+ HexDigit)))
; (OctalNumeral (re:: #\0 (re:+ (re:/ "07"))))
;
; ;; 3.10.2
; (FloatTypeSuffix (char-set "fF"))
; (DoubleTypeSuffix (char-set "dD"))
;
; (FloatA (re:: Digits #\. DigitsOpt (re:? ExponentPart)))
; (FloatB (re:: #\. Digits (re:? ExponentPart)))
; (FloatC (re:: Digits ExponentPart))
; (FloatD (re:: Digits (re:? ExponentPart)))
;
; (ExponentPart (re:: (char-set "eE") (re:? (char-set "+-")) Digits))
;
; ;; MORE
;
; ;; 3.10.6
; (EscapeSequence (re:or "\\b" "\\t" "\\n" "\\f" "\\r" "\\\"" "\\'" "\\\\"
; (re:: #\\ (re:? (re:/ "03")) (re:/ "07") (re:/ "07"))
; (re:: #\\ (re:/ "07"))))
;
; ;; 3.12
; (Operator (re:or "=" ">" "<" "!" "~" "?" ":"
; "==" "<=" ">=" "!=" "&&" "||" "++" "--"
; "+" "-" "*" "/" "&" "|" "^" "%" "<<" ">>" ">>>"
; "+=" "-=" "*=" "/=" "&=" "|=" "^=" "%=" "<<=" ">>=" ">>>=")))
;
; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;;Comment lexers
;
; (define read-line-comment
; (lexer
; [(re:~ #\newline) (read-line-comment input-port)]
; [#\newline end-pos]
; [(eof) end-pos]
; [(special) (read-line-comment input-port)]
; [(special-comment) (read-line-comment input-port)]
; ))
;
; (define read-block-comment
; (lexer
; ["*/" end-pos]
; [(eof) end-pos]
; [(re:or "*" "/" (complement (re:: any-string (re:or "*" "/") any-string))) (read-block-comment input-port)]
; [(special) (read-block-comment input-port)]
; [(special-comment) (read-block-comment input-port)]
; ))
;
; #;(define read-document-comment
; (lexer
; ["**/" end-pos]
; [(eof) end-pos]
; [(re:or "*" "/" (~ (any-string))) (read-document-comment input-port)]
; [(special) (read-document-comment input-port)]
; [(special-comment) (read-document-comment input-port)]
; [(special-error) (read-document-comment input-port)]))
;
; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;String lexer
;
; ;get-string: input-port -> (U STRING_LIT STRING_ERROR tokens)
; (define (get-string input-port)
; (letrec ((tokens (get-string-tokens input-port))
; (last-token (list-ref tokens (sub1 (length tokens))))
; (tokens->string
; (lambda (toks)
; ;Stops before the last element, which does not have a string
; (if (null? (cdr toks))
; ""
; (string-append (string (token-value (position-token-token (car toks))))
; (tokens->string (cdr toks)))))))
; (if (eq? 'STRING_END (token-name (position-token-token last-token)))
; (token-STRING_LIT (list (tokens->string tokens) (position-token-end-pos last-token)))
; (token-STRING_ERROR
; (list (tokens->string tokens)
; (position-token-end-pos last-token)
; (position-token-token last-token))))))
;
; ;get-string-tokens: input-port -> (list position-token)
; (define (get-string-tokens input-port)
; (let ((tok (get-str-tok input-port)))
; (case (token-name (position-token-token tok))
; ((STRING_EOF STRING_END STRING_NEWLINE) (list tok))
; (else (cons tok (get-string-tokens input-port))))))
;
; (define-tokens str-tok (STRING_CHAR))
; (define-empty-tokens err (STRING_END STRING_EOF STRING_NEWLINE))
;
; (define get-str-tok
; (lexer-src-pos
; (#\" (token-STRING_END))
; (EscapeSequence (token-STRING_CHAR (EscapeSequence->char lexeme)))
; (InputCharacter (token-STRING_CHAR (string-ref lexeme 0)))
; ((re:or CR LF) (token-STRING_NEWLINE))
; (#\032 (token-STRING_EOF))
; ((eof) (token-STRING_EOF))))
;
; ;; 3.10.6
; (define (EscapeSequence->char es)
; (cond
; ((string=? es "\\b") #\010)
; ((string=? es "\\t") #\011)
; ((string=? es "\\n") #\012)
; ((string=? es "\\f") #\014)
; ((string=? es "\\r") #\015)
; ((string=? es "\\\"") #\")
; ((string=? es "\\'") #\')
; ((string=? es "\\\\") #\\)
; (else (integer->char (string->number (trim-string es 1 0) 8)))))
;
; (define get-token
; (lexer-src-pos
; ;; 3.12
; (Operator (let ((l lexeme))
; (cond
; ((string=? l "|") (token-PIPE))
; ((string=? l "||") (token-OR))
; ((string=? l "|=") (token-OREQUAL))
; ((string=? l "=") (token-EQUAL))
; ((string=? l "<") (token-LT))
; ((string=? l ">") (token-GT))
; ((string=? l "<=") (token-LTEQ))
; ((string=? l ">=") (token-GTEQ))
; ((string=? l "+") (token-PLUS))
; ((string=? l "-") (token-MINUS))
; ((string=? l "*") (token-TIMES))
; ((string=? l "/") (token-DIVIDE))
; ((string=? l "^") (token-^T))
; (else (string->symbol l)))))
;
; ("->" (string->symbol lexeme))
; ("->>" (string->symbol lexeme))
; ("->>>" (string->symbol lexeme))
;
; ;; 3.11
; ("(" (token-O_PAREN))
; (")" (token-C_PAREN))
; ("{" (token-O_BRACE))
; ("}" (token-C_BRACE))
; ("[" (token-O_BRACKET))
; ("]" (token-C_BRACKET))
; (";" (token-SEMI_COLON))
; ("," (token-COMMA))
; ("." (token-PERIOD))
;
; ;; 3.10.7
; ("null" (token-NULL_LIT))
;
; ;; 3.10.5
; (#\" (get-string input-port))
; ;(token-STRING_LIT (list->string (get-string input-port))))
;
; ;; 3.10.4
; ((re:: #\' (re:~ CR LF #\' #\\) #\')
; (token-CHAR_LIT (string-ref lexeme 1)))
; ((re:: #\' EscapeSequence #\')
; (token-CHAR_LIT (EscapeSequence->char
; (trim-string lexeme 1 1))))
;
; ;; 3.10.3
; ("true" (token-TRUE_LIT))
; ("false" (token-FALSE_LIT))
;
; ;; 3.10.2
; ((re:or FloatA FloatB FloatC)
; (token-DOUBLE_LIT (string->number lexeme)))
; ((re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix)
; (token-FLOAT_LIT (string->number (trim-string lexeme 0 1))))
; ((re:: (re:or FloatA FloatB FloatC FloatD) DoubleTypeSuffix)
; (token-DOUBLE_LIT (string->number (trim-string lexeme 0 1))))
;
;
; ;; 3.10.1
; (DecimalNumeral
; (token-INTEGER_LIT (string->number lexeme 10)))
; ((re:: DecimalNumeral IntegerTypeSuffix)
; (token-LONG_LIT (string->number (trim-string lexeme 0 1) 10)))
; ((re:: HexNumeral IntegerTypeSuffix)
; (token-HEXL_LIT (string->number (trim-string lexeme 2 1) 16)))
; (HexNumeral
; (token-HEX_LIT (string->number (trim-string lexeme 2 0) 16)))
; (OctalNumeral
; (token-OCT_LIT (string->number (trim-string lexeme 1 0) 8)))
; ((re:: OctalNumeral IntegerTypeSuffix)
; (token-OCTL_LIT (string->number (trim-string lexeme 1 1) 8)))
;
; #;("dynamic"
; (cond
; ((dynamic?) (string->symbol lexeme))
; (else (token-IDENTIFIER lexeme))))
;
; #;((re:or "check" "expect" "within")
; (cond
; ((test-ext?) (string->symbol lexeme))
; (else (token-IDENTIFIER lexeme))))
;
; #;((re:or "test" "tests" "testcase")
; (cond
; ((testcase-ext?) (string->symbol lexeme))
; (else (token-IDENTIFIER lexeme))))
;
; ;; 3.9
; (Keyword (string->symbol lexeme))
;
; ;; 3.8
; (Identifier (token-IDENTIFIER lexeme))
;
; ;; 3.7
; ("//" (begin (read-line-comment input-port) (return-without-pos (get-token input-port))))
; ("/*" (begin (read-block-comment input-port) (return-without-pos (get-token input-port))))
; #;("/**" (begin (read-document-comment input-port) (return-without-pos (get-token input-port))))
;
; #;((special)
; (cond
; ((and (syntax? lexeme) (syntax-property lexeme 'test-case-box))
; (token-TEST_SUITE (make-test-case lexeme)))
; ((and (syntax? lexeme) (syntax-property lexeme 'example-box))
; (syntax-case lexeme ()
; ((parse-example-box examples) (token-EXAMPLE (make-example-box (syntax examples))))))
; ((is-a? lexeme (image-snip%))
; (token-IMAGE_SPECIAL lexeme))
; (else
; (token-OTHER_SPECIAL (list lexeme start-pos end-pos)))))
;
; ;; 3.6
; ((re:+ WhiteSpace) (return-without-pos (get-token input-port)))
;
; ;; 3.5
; (#\032 'EOF)
; ((eof) 'EOF)
;
; ((re:+ (re:/ "09" "az" "AZ")) (token-NUMBER_ERROR lexeme))
;
; ))
;
(define-values/invoke-unit java-definitions-parsers@
(import)
(export (prefix def: parsers^) (prefix def: err^) token-proc^))
(define-values/invoke-unit java-interactions-parsers@
(import)
(export (prefix int: parsers^) (prefix int: err^)))
(define (parse parser err? err-src err-msg)
(lambda (program-stream location)
(let ([output
;(with-handlers ((exn?
; (lambda (e)
; (string-append "parse function failed with this internal exception:"
; (exn-message e)))))
(!!! ((!!! parser) (old-tokens->new program-stream) location))]);)])
(if (err? output) (list (err-msg output) (!!! (err-src output)))))))
(define parse-beginner (parse def:parse-beginner def:err? def:err-msg def:err-src))
(define parse-intermediate (parse def:parse-intermediate def:err? def:err-msg def:err-src))
(define parse-intermediate+access (parse def:parse-intermediate+access def:err? def:err-msg def:err-src))
(define parse-advanced (parse def:parse-advanced def:err? def:err-msg def:err-src))
(define parse-beginner-interact (parse int:parse-beginner int:err? int:err-msg int:err-src))
(define parse-intermediate-interact (parse int:parse-intermediate int:err? int:err-msg int:err-src))
(define parse-advanced-interact (parse int:parse-advanced int:err? int:err-msg int:err-src))
)

View File

@ -8,6 +8,7 @@
(define pre-install-collection "pre-installer.ss")
(define compile-subcollections
'(("profj" "parsers")
("profj" "comb-parsers")
("profj" "libs" "java" "lang")
("profj" "libs" "java" "io")
("profj" "libs" "java" "util")))

View File

@ -31,6 +31,8 @@
;Stores the error function to trigger for parsing
(define determine-error (make-parameter (lambda () #t)))
(define new-parser? (make-parameter #t))
;Stores a function which when called will produce (->token) of lexed tokens
(define lex-stream (make-parameter (lambda () null)))

View File

@ -7,11 +7,13 @@
"parsers/general-parsing.ss"
"parsers/parse-error.ss"
"parsers/lexer.ss"
(prefix err: "comb-parsers/parsers.scm")
"ast.ss"
"parameters.ss")
(require (all-except (lib "lex.ss" "parser-tools") input-port)
(lib "readerr.ss" "syntax"))
(lib "readerr.ss" "syntax")
(lib "force.ss" "lazy"))
(provide parse parse-interactions parse-expression parse-type parse-name lex-stream)
;function to lex in the entire port
@ -32,6 +34,21 @@
(begin0 (car token-list)
(unless (null? (cdr token-list))
(set! token-list (cdr token-list))))))
(define (error-builder parser old-parser lexed loc)
(if (new-parser?)
(lambda ()
(let ([result (!!! (parser lexed loc))])
(printf "Calling new parser ~a ~n" result)
(if (list? result)
(raise-read-error (cadr result)
(car (car result))
(cadr (car result))
(caddr (car result))
(cadddr (car result))
(car (cddddr (car result))))
(old-parser))))
old-parser))
;main parsing function
@ -42,13 +59,16 @@
(lex-stream (lambda () (getter lexed)))
(case level
((beginner)
(determine-error find-beginner-error)
(determine-error (error-builder err:parse-beginner find-beginner-error lexed filename))
(parse-beginner my-get))
((intermediate)
(determine-error find-intermediate-error)
(determine-error (error-builder err:parse-intermediate find-intermediate-error lexed filename))
(parse-intermediate my-get))
((intermediate+access)
(determine-error (error-builder err:parse-intermediate+access (lambda () #t) lexed filename))
(error))
((advanced)
(determine-error find-advanced-error)
(determine-error (error-builder err:parse-advanced find-advanced-error lexed filename))
(parse-advanced my-get))
((full) (parse-full my-get)))))
@ -59,13 +79,13 @@
(lex-stream (lambda () (getter lexed)))
(case level
((beginner)
(determine-error find-beginner-error-interactions)
(determine-error (error-builder err:parse-beginner-interact find-beginner-error-interactions lexed loc))
(parse-beginner-interactions my-get))
((intermediate)
(determine-error find-intermediate-error-interactions)
((intermediate intermediate+access)
(determine-error (error-builder err:parse-intermediate-interact find-intermediate-error-interactions lexed loc))
(parse-intermediate-interactions my-get))
((advanced)
(determine-error find-advanced-error-interactions)
(determine-error (error-builder err:parse-advanced-interact find-advanced-error-interactions lexed loc))
(parse-advanced-interactions my-get))
((full) (parse-full-interactions my-get)))))

View File

@ -582,11 +582,11 @@
[(new ClassOrInterfaceType DimExprs)
(make-array-alloc #f (build-src 3) (make-type-spec $2 0 (build-src 2 2)) (reverse $3) 0)]
;; 1.1
[(new PrimitiveType Dims ArrayInitializer)
#;[(new PrimitiveType Dims ArrayInitializer)
(begin (display $2)
(error 'unimplemented-1.1))]
;; 1.1
[(new ClassOrInterfaceType Dims ArrayInitializer) (error 'unimplemented-1.1)])
#;[(new ClassOrInterfaceType Dims ArrayInitializer) (error 'unimplemented-1.1)])
(DimExprs
[(DimExpr) (list $1)]