diff --git a/collects/profj/comb-parsers/info.ss b/collects/profj/comb-parsers/info.ss new file mode 100644 index 0000000000..8b5296b1b4 --- /dev/null +++ b/collects/profj/comb-parsers/info.ss @@ -0,0 +1,2 @@ +(module info (lib "infotab.ss" "setup") + (define name "ProfessorJ: parser")) diff --git a/collects/profj/comb-parsers/java-signatures.scm b/collects/profj/comb-parsers/java-signatures.scm new file mode 100644 index 0000000000..0e6d2dbf7f --- /dev/null +++ b/collects/profj/comb-parsers/java-signatures.scm @@ -0,0 +1,390 @@ +(module java-signatures mzscheme + + (require (lib "unit.ss")) + + (require (lib "lex.ss" "parser-tools") + (lib "combinator-unit.ss" "combinator-parser") + (lib "string.ss")) + + (provide (all-defined)) + + ;Terminal signatures + + (define-signature java-operators^ + ((terminals Operators + (PIPE OR OREQUAL GT LT ! ~ ? : == LTEQ GTEQ != && ++ -- PLUS + MINUS DIVIDE & ^T % << >> >>> += -= *= /= &= ^= %= <<= >>= >>>=)))) + + (define-signature java-separators^ + ((terminals Separators + (O_PAREN C_PAREN O_BRACE C_BRACE O_BRACKET C_BRACKET SEMI_COLON + PERIOD COMMA EQUAL TIMES)))) + + (define-signature java-literals^ + ((terminals EmptyLiterals (NULL_LIT TRUE_LIT FALSE_LIT EOF)))) + + (define-signature java-expression-keywords^ + ((terminals ExpressionKeywords (instanceof new)))) + + (define-signature java-statement-keywords^ + ((terminals StatementKeywords (break caseT catch continue ifT doT elseT + for return switch throw try while finally)))) + + (define-signature java-definition-keywords^ + ((terminals DefinitionKeywords (abstract class extends final private implements import + interface native package protected public static + strictfp synchronized throws transient volatile)))) + + (define-signature java-type-keywords^ + ((terminals TypeKeywords (boolean byte char double float int long short voidT)))) + + (define-signature java-reserved^ + ((terminals ReservedWords (default const goto)))) + + (define-signature java-extras^ + ((terminals ExtraKeywords (dynamic check expect within -> ->> ->>> test tests testcase)))) + + (define-signature java-ids^ ((terminals java-vals (IDENTIFIER this super)))) + + (define-signature java-vals^ + ((terminals java-vals (STRING_LIT CHAR_LIT INTEGER_LIT LONG_LIT FLOAT_LIT DOUBLE_LIT + STRING_ERROR NUMBER_ERROR HEX_LIT OCT_LIT + HEXL_LIT OCTL_LIT)))) + + (define-signature java-specials^ + ((terminals special-toks (EXAMPLE TEST_SUITE IMAGE_SPECIAL OTHER_SPECIAL)))) + + ;General purpose signatures + (define-signature general-productions^ (comma-sep variable-declaration name)) + + ;Types, modifiers, operator signatures + + (define-signature java-types^ (integer-types inexact-types numeric-type prim-type + other-type-base value+name-type method-type array-type)) + (define-signature java-access^ (access-mods global-mods method-mods)) + (define-signature java-ops^ (math-ops shift-ops compare-ops bool-ops bit-ops assignment-ops + bin-ops un-assignment un-op)) + + ;Expression signatures + (define-signature expression-maker^ (simple-expression)) + + (define-signature expr-lits^ (boolean-lits textual-lits prim-numeric-lits null-lit numeric-lits + double-lits literals all-literals)) + + (define-signature expr-terms^ (new-class new-array simple-method-call + assignment unary-assignment-front cast + super-call)) + + (define-signature expr-tails^ (field-access-end + array-access-end array-init + binary-expression-end if-expr-end + method-call-end unary-assignment-back instanceof-back)) + + (define-signature expr-terms+^ extends expr-terms^ (checks)) + + ;Statement signatures + + (define-signature statements^ (statement if-s return-s this-call super-ctor-call + block expression-stmt while-l do-while for-l + break-s cont-s init)) + + ;Member signatures + + (define-signature fields^ (field arg args)) + + (define-signature methods^ (method-signature method-header method)) + + (define-signature ctors^ (constructor)) + + ;Definition signatures + + (define-signature interfaces^ (interface-body interface-def)) + + (define-signature classes^ (class-body implements-dec extend-dec class-def)) + + (define-signature top-forms^ (top-member import-dec program)) + + ) +; +; ; +; ; +; ; ;;;; +; ; ; +; ; ; +; ; ; ;;;;; ;; ;;; ;;;; ;; ;; ;;; ;;;;; ;;;; ;; ;;;;; ;;;; ; +; ; ; ; ;; ; ; ;; ; ; ; ; ;; ; ; ; ;; +; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; +; ; ; ;;;;;; ; ; ; ; ; ; ;;;;;; ; ; ;;;;;;;;; ;;; +; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ;;; +; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; ; +; ; ; ; ;; ; ; ; ;; ; ;; ; ;; ; ;; ; ;; ;; ; +; ; ;;;;;;; ;;;; ;; ;;; ;;; ;;;; ; ;;; ;; ;;;; ;; ;;;; ; ;;;;; ; ;;;; +; ; ; ; +; ; ; ; +; ; ;;;;; ;;;;; +; +; (define beginner-unique-base +; (simple-expression +; (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits)) +; this +; IDENTIFIER +; (new-class IDENTIFIER (eta beginner-expression)) +; (simple-method-call (eta beginner-expression)) +; (sequence (O_PAREN (eta beginner-expression) C_PAREN) id "expression") +; (sequence (! (eta beginner-expression)) id "unary expression") +; (checks (eta beginner-expression))))) +; +; (define beginner-unique-end +; (simple-expression +; (list field-access-end +; (method-call-end (eta beginner-expression)) +; (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops)) +; (eta beginner-expression))))) +; +; (define beginner-expression +; (sequence (beginner-unique-base (repeat beginner-unique-end)) id "expression")) +; +; (define beginner-statement +; (statement (list (if-s beginner-expression (eta beginner-statement) #f) +; (return-s beginner-expression #f)))) +; +; (define beginner-field (field #f value-type beginner-expression #f)) +; +; (define beginner-method-sig +; (method-signature #f value-type args)) +; +; (define beginner-method +; (method beginner-method-sig beginner-statement)) +; +; (define beginner-constructor (constructor #f init*)) +; +; (define beginner-interface +; (interface-def #f #f (method-header* beginner-method-sig))) +; +; (define beginner-class +; (class-def #f #f (implements-dec IDENTIFIER) +; (repeat (class-body (list beginner-field beginner-method beginner-constructor))))) +; +; (define beginner-program +; (program #f (repeat import-dec) +; (repeat (top-member (list beginner-class beginner-interface))))) +; +; (define parse-beginner (parser beginner-program)) +; +; (define intermediate-unique-base +; (simple-expression +; (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits)) +; this +; IDENTIFIER +; (new-class IDENTIFIER (eta intermediate-expression)) +; (simple-method-call (eta intermediate-expression)) +; (sequence (O_PAREN (eta intermediate-expression) C_PAREN) id "expression") +; (sequence (! (eta intermediate-expression)) id "unary expression") +; (cast value-type (eta intermediate-expression)) +; (super-call (eta intermediate-expression)) +; (checks (eta intermediate-expression))))) +; +; (define intermediate-unique-end +; (simple-expression +; (list field-access-end +; (method-call-end (eta intermediate-expression)) +; (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops bit-ops)) +; (eta intermediate-expression))))) +; +; (define intermediate-expression +; (sequence (intermediate-unique-base (repeat intermediate-unique-end)) +; id "expression")) +; +; (define intermediate-stmt-expr +; (simple-expression (list (new-class IDENTIFIER intermediate-expression) +; (super-call intermediate-expression) +; (sequence (intermediate-expression +; (method-call-end intermediate-expression)) +; id "method call") +; (assignment IDENTIFIER EQUAL intermediate-expression)))) +; +; (define intermediate-statement +; (statement (list (if-s intermediate-expression (eta intermediate-statement) #f) +; (return-s intermediate-expression #t) +; (variable-declaration value-type intermediate-expression #f "local variable") +; (block (repeat (eta intermediate-statement))) +; (sequence (intermediate-stmt-expr SEMI_COLON) id "statement")))) +; +; (define intermediate-field (field access-mods value-type intermediate-expression #t)) +; +; (define intermediate-method-sig-no-abs +; (method-signature access-mods +; (method-type value-type) +; args)) +; (define intermediate-method-sig-abs +; (method-signature (method-mods access-mods) +; (method-type value-type) +; args)) +; +; (define intermediate-method +; (choose ((method intermediate-method-sig-no-abs intermediate-statement) +; (method-header intermediate-method-sig-abs)) "method definition top")) +; +; (define intermediate-constructor +; (constructor access-mods +; (choose ((sequence ((super-call intermediate-expression) (repeat intermediate-statement)) id) +; (sequence ((this-call intermediate-expression) (repeat intermediate-statement)) id) +; (repeat intermediate-statement)) "constructor body"))) +; +; (define intermediate-interface +; (interface-def +; #f +; (sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends") +; (method-header* intermediate-method-sig-no-abs))) +; +; (define intermediate-class +; (class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces")) +; (repeat (class-body (list intermediate-field intermediate-method intermediate-constructor))))) +; +; (define intermediate-program +; (program #f (repeat import-dec) +; (repeat (top-member (list intermediate-class intermediate-interface))))) +; +; (define parse-intermediate (parser intermediate-program)) +; +; (define advanced-unique-base +; (simple-expression +; (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits)) +; this +; IDENTIFIER +; (new-class IDENTIFIER (eta advanced-expression)) +; (simple-method-call (eta advanced-expression)) +; (new-array value-type (eta advanced-expression)) +; (sequence (O_PAREN (eta advanced-expression) C_PAREN) id "expression") +; (sequence (! (eta advanced-expression)) id "unary expression") +; (cast value-type (eta advanced-expression)) +; (super-call (eta advanced-expression)) +; (checks (eta advanced-expression))))) +; +; (define advanced-unique-end +; (simple-expression +; (list field-access-end +; (array-access-end (eta advanced-expression)) +; (method-call-end (eta advanced-expression)) +; (if-expr-end (eta advanced-expression)) +; (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops)) +; (eta advanced-expression))))) +; +; (define advanced-expression +; (sequence (advanced-unique-base (repeat advanced-unique-end)) id "expression")) +; +; +; (define advanced-stmt-expr +; (simple-expression (list (new-class IDENTIFIER advanced-expression) +; (super-call advanced-expression) +; (sequence (advanced-expression +; (method-call-end advanced-expression)) id "method call") +; (assignment IDENTIFIER assignment-ops advanced-expression) +; (sequence (advanced-expression ++) id "unary mutation") +; (sequence (advanced-expression --) id "unary mutation") +; (sequence (++ advanced-expression) id "unary mutation") +; (sequence (-- advanced-expression) id "unary mutation")))) +; +; (define advanced-statement +; (statement (list (if-s advanced-expression (eta advanced-statement) #t) +; (return-s advanced-expression #t) +; (variable-declaration value-type advanced-expression #t "local variable") +; (block (repeat (eta advanced-statement))) +; (sequence (advanced-stmt-expr SEMI_COLON) id "statement") +; (for-l (choose ((variable-declaration value-type advanced-expression #t "for loop variable") +; (comma-sep advanced-stmt-expr "initializations")) "for loop initialization") +; #t +; advanced-expression #t +; (comma-sep advanced-stmt-expr "for loop increments") #t (eta advanced-statement)) +; (while-l advanced-expression (eta advanced-statement)) +; (do-while advanced-expression (eta advanced-statement)) +; (break-s #f) +; (cont-s #f)))) +; +; (define advanced-field (field (global-mods access-mods) value-type advanced-expression #t)) +; +; (define advanced-method-sig-no-abs +; (method-signature (global-mods access-mods) +; (method-type value-type) +; args)) +; (define advanced-method-sig-abs +; (method-signature (method-mods (global-mods access-mods)) +; (method-type value-type) +; args)) +; +; (define advanced-method +; (choose ((method advanced-method-sig-no-abs advanced-statement) +; (method-header advanced-method-sig-abs)) "method definition")) +; +; (define advanced-constructor +; (constructor access-mods +; (choose ((sequence ((super-call advanced-expression) (repeat advanced-statement)) id) +; (sequence ((this-call advanced-expression) (repeat advanced-statement)) id) +; (repeat advanced-statement)) "constructor body"))) +; +; (define advanced-interface +; (interface-def +; #f +; (sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends") +; (method-header* advanced-method-sig-no-abs))) +; +; (define advanced-class +; (class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces")) +; (repeat (class-body (list advanced-field advanced-method advanced-constructor +; (method-header advanced-method-sig-abs)))))) +; +; (define advanced-program +; (program (sequence (package name SEMI_COLON) id "package specification") +; (repeat import-dec) +; (repeat (top-member (list advanced-class advanced-interface))))) +; +; (define parse-advanced +; (parser advanced-program)) +; +; (define (old-tokens->new tok-list) +; (cond +; [(null? tok-list) null] +; [else +; (cons +; (make-position-token +; (case (token-name (position-token-token (car tok-list))) +; [(=) (token-EQUAL)] +; ((<) (token-LT)) +; ((>) (token-GT)) +; ((<=) (token-LTEQ)) +; ((>=) (token-GTEQ)) +; ((+) (token-PLUS)) +; ((-) (token-MINUS)) +; ((*) (token-TIMES)) +; ((/) (token-DIVIDE)) +; ((^) (token-^T)) +; ((if) (token-ifT)) +; ((do) (token-doT)) +; ((case) (token-caseT)) +; ((else) (token-elseT)) +; ((void) (token-voidT)) +; (else (position-token-token (car tok-list)))) +; (position-token-start-pos (car tok-list)) +; (position-token-end-pos (car tok-list))) +; (old-tokens->new (cdr tok-list)))])) +; +; ) +; +; (define-unit constants@ +; (import) +; (export error-format-parameters^) +; (define src? #t) +; (define input-type "file") +; (define show-options #f) +; (define max-depth 1) +; (define max-choice-depth 3)) +; +; (define-compound-unit/infer java-parsers@ +; (import) +; (export teaching-languages^) +; (link java-dictionary@ combinator-parser-tools@ constants@ java-grammars@)) +; +; (provide java-parsers@ teaching-languages^) +; +; ) +; diff --git a/collects/profj/comb-parsers/parser-units.scm b/collects/profj/comb-parsers/parser-units.scm new file mode 100644 index 0000000000..f08e9ed549 --- /dev/null +++ b/collects/profj/comb-parsers/parser-units.scm @@ -0,0 +1,993 @@ +(module parser-units (lib "lazy.ss" "lazy") + + (require (lib "unit.ss")) + + (require (lib "lex.ss" "parser-tools") + (lib "combinator-unit.ss" "combinator-parser") + "java-signatures.scm" + (lib "string.ss")) + + (define-unit java-dictionary@ + (import) + (export language-dictionary^ + (rename language-format-parameters^ + (output-map input->output-name))) + + (define class-type "keyword") + + (define (output-map x) + (!!! (when (position-token? x) + (set! x (position-token-token x)))) + (!!! (case (token-name x) + [(PIPE) "|"] + [(OR) "||"] + [(OREQUAL) "|="] + [(EQUAL) "="] + [(GT) ">"] + [(LT) "<"] + [(LTEQ) "<="] + [(GTEQ) ">="] + [(PLUS) "+"] + [(MINUS) "-"] + [(TIMES) "*"] + [(DIVIDE) "/"] + [(^T) "^"] + [(O_PAREN) "("] + [(C_PAREN) ")"] + [(O_BRACE) "{"] + [(C_BRACE) "}"] + [(O_BRACKET) "["] + [(C_BRACKET) "]"] + [(SEMI_COLON) ";"] + [(PERIOD) "."] + [(COMMA) ","] + [(NULL_LIT) "null"] + [(TRUE_LIT) "true"] + [(FALSE_LIT) "false"] + [(EOF) "end of input"] + [(caseT) "case"] + [(doT) "do"] + [(elseT) "else"] + [(ifT) "if"] + [(voidT) "void"] + [(STRING_LIT) (format "\"~a\"" (token-value x))] + [(CHAR_LIT) (format "'~a'" (token-value x))] + [(INTEGER_LIT LONG_LIT FLOAT_LIT DOUBLE_LIT + HEX_LIT OCT_LIT HEXL_LIT OCTL_LIT) (token-value x)] + [(IDENTIFIER) (format "identifier ~a" (token-value x))] + [(STRING_ERROR) (format "misformatted string ~a" (token-value x))] + [else (token-name x)]))) + + (define (java-keyword? t) + (memq t `(? this super new instanceof while try throw synchronized switch return ifT goto for finally + elseT doT default continue catch case break voidT throws const interface implements extends + class import package EQUAL += -= *= /= &= ^= %= <<= >>= >>>= + boolean byte char double float int long short + abstract native private protected public static strictfp transient volatile))) + + (define (close-to-keyword? t arg) + (printf "close-to-keyword ~a ~a~n" t arg) + (and (string? t) + (member t (select-words (string->symbol arg))))) + + (define (miscapitalized? t key) + (and (string? t) + (let ((s (string-copy t))) + (string-lowercase! s) + (equal? s key)))) + + (define misspelled-list '((import "mport" "iport" "imort" "imprt" "impot" "impor" "improt" "impourt") + (class "lass" "cass" "clss" "clas" "calss") + (abstract + "bstract" "astract" "abtract" "absract" "abstact" "abstrct" "abstrat" "abstract" "abstarct" "abstracts") + (extends "xtends" "etends" "exends" "extnds" "exteds" "extens" "extneds" "extend") + (new "nw" "ne" "nwe") + (this "his" "tis" "ths" "thi" "tihs" "thsi") + (instanceof "instancef" "instanceo" "intsanceof") + (if "fi") + (else "lse" "ese" "els" "eles") + (return "eturn" "rturn" "reurn" "retrn" "retun" "retur" "reutrn" "retrun" "returns") + (true "rue" "tue" "tre" "tru" "ture" "treu") + (false "flse" "fase" "fale" "fals" "flase" "fasle") + (interface + "nterface" "iterface" "inerface" "intrface" "inteface" "interace" "interfce" "interfae" "intreface") + (implements + "mplements" "iplements" "impements" "implments" "impleents" "implemnts" "implemets" "implemens" + "implement") + (void "oid" "vid" "voi" "viod") + (super "uper" "sper" "supr" "supe" "supper") + (public "ublic" "pblic" "pulic" "pubic" "publc" "publi" "pubilc") + (private "rivate" "pivate" "prvate" "priate" "privte" "privae" "privat" "pravite") + (package "ackage" "pckage" "pakage" "pacage" "packge" "packae" "packag") + (protected "rotected" "portected") + (final "inal" "fnal" "fial" "finl" "finale" "fianl") + (check "chek" "cehck" "chck" "chack") + (expect "expct" "expeet" "expec" "exect") + (within "with" "withi" "withen" "wihtin") + )) + + (define (select-words key) + (safe-car (filter (lambda (x) (eq? (car x) key)) misspelled-list))) + (define (safe-car f) + (if (null? f) null (car f))) + + (define all-words (filter string? (apply append misspelled-list))) + + + (define misspelled (lambda (id token-v) (if (close-to-keyword? token-v id) 1 0))) + (define misscap (lambda (id token-v) (miscapitalized? token-v id))) + (define missclass (lambda (id token-n) (and (eq? 'IDENTIFIER id) (java-keyword? token-n)))) + + ) + + (define-signature teaching-languages^ + (parse-beginner parse-beginner-interactions + parse-intermediate parse-intermediate-interactions parse-intermediate+access + parse-advanced parse-advanced-interactions + old-tokens->new)) + + (define-signature id^ (id)) + + ;Terminals unit + (define-unit java-terminals@ + (import combinator-parser^ id^) + (export java-operators^ java-separators^ java-literals^ java-expression-keywords^ + java-statement-keywords^ java-definition-keywords^ + java-type-keywords^ java-reserved^ java-extras^ java-vals^ java-ids^ java-specials^) + + (define-simple-terminals Operators + ((PIPE "|") (OR "||") (OREQUAL "|=") + (EQUAL "=") (GT ">") (LT "<") ! ~ ? : + == (LTEQ "<=") (GTEQ ">=") != && ++ -- + (PLUS "+") (MINUS "-") + (TIMES "*") (DIVIDE "/") & (^T "^") % << >> >>> + += -= *= /= &= ^= %= <<= >>= >>>=)) + + (define-simple-terminals Separators + ((O_PAREN "(") (C_PAREN ")") (O_BRACE "{") (C_BRACE "}") + (O_BRACKET "[") (C_BRACKET "]") (SEMI_COLON ";") (PERIOD ".") (COMMA ","))) + + (define-simple-terminals EmptyLiterals ((NULL_LIT "null") (TRUE_LIT "true") (FALSE_LIT "false") EOF)) + + (define-simple-terminals Keywords + (abstract default (ifT "if") private this + boolean (doT "do") implements protected throw + break double import public throws + byte (elseT "else") instanceof return transient + (caseT "case") extends int short try + catch final interface static (voidT "void") + char finally long strictfp volatile + class float native super while + const for new switch + continue goto package synchronized)) + + (define-simple-terminals ExtraKeywords (dynamic check expect within -> ->> ->>> test tests testcase)) + + (define-terminals java-vals + ((STRING_LIT "String literal" id) (CHAR_LIT "character" id) (INTEGER_LIT "integer" id) + (LONG_LIT "long" id) (FLOAT_LIT "float" id) (DOUBLE_LIT "double" id) + (IDENTIFIER "identifer" id) (STRING_ERROR id) + (NUMBER_ERROR id) (HEX_LIT id) (OCT_LIT id) (HEXL_LIT id) (OCTL_LIT id))) + + (define-terminals special-toks ((EXAMPLE id) (TEST_SUITE id) (IMAGE_SPECIAL id) (OTHER_SPECIAL id))) + + ) + + ;--------------------------------------------------------------------------------------------------- + ; Types, modifiers, operators + + + + (define-unit types@ + (import combinator-parser^ java-type-keywords^ java-ids^ java-separators^ id^) + (export java-types^) + + (define integer-types + (choose (byte int short long) "type")) + (define inexact-types + (choose (float double) "type")) + (define numeric-type + (choose (integer-types inexact-types) "numeric type")) + (define prim-type + (choose (boolean double byte int short char long float) "type")) + + (define (other-type-base types) (choice types "type")) + + (define (value+name-type base-type name) + (choose (base-type name) "type")) + + (define (method-type base-t) + (choice (list base-t voidT) "method return")) + + (define (array-type base-t) + (choice (base-t (sequence (base-t O_BRACKET C_BRACKET (repeat (sequence (O_BRACKET C_BRACKET) id))) id + "array type")) "type")) + ) + + + (define-unit mods@ + (import combinator-parser^ java-definition-keywords^) + (export java-access^) + + (define access-mods + (choose (public private protected) "access modifier")) + + (define (global-mods base-mods) + (choice (list base-mods static) "modifier")) + + (define (method-mods base-mods) + (choice (list base-mods abstract) "modifier")) + + ) + + (define-unit operators@ + (import combinator-parser^ java-operators^ java-separators^) + (export java-ops^) + + (define math-ops + (choose (PLUS MINUS TIMES DIVIDE %) "binary operation")) + + (define shift-ops + (choose (<< >> >>>) "shift operation")) + + (define compare-ops + (choose (== GT LT LTEQ GTEQ !=) "binary operation")) + + (define bool-ops + (choose (&& OR) "binary operation")) + + (define bit-ops + (choose (^T PIPE &) "binary operation")) + + (define assignment-ops + (choose (EQUAL OREQUAL += -= *= /= &= ^= %= <<= >>= >>>=) "assignment")) + + (define (bin-ops ops) + (choice ops "binary operation")) + + (define un-assignment + (choose (++ --) "unary operation")) + + (define un-op + (choose (~ + -) "unary operation")) + + ) + + (define-unit general@ + (import combinator-parser^ java-separators^ java-operators^ java-ids^ id^) + (export general-productions^) + + (define (comma-sep term name) + (sequence (term (repeat (sequence (COMMA term) id))) id (string-append "list of " name))) + + (define (variable-declaration type expr share-type? name) + (let* ([f (choose (IDENTIFIER (sequence ((^ IDENTIFIER) EQUAL expr) id)) (string-append name " declaration"))] + [s&e (sequence (type (comma-sep f name) SEMI_COLON) id (string-append name " definition"))] + [s (sequence (type (comma-sep IDENTIFIER name) SEMI_COLON) id (string-append name " definition"))] + [e (sequence (type (^ IDENTIFIER) EQUAL expr SEMI_COLON) id (string-append name " definition"))] + [base (sequence (type (^ IDENTIFIER) SEMI_COLON) id (string-append name " definition"))]) + (cond + [(and expr share-type?) s&e] + [share-type? s] + [expr (choose (e base) (string-append name " definition"))] + [else base]))) + + (define name + (sequence (IDENTIFIER (repeat (sequence (PERIOD IDENTIFIER) id))) id "name")) + + ) + + (define-unit expressions@ + (import combinator-parser^ general-productions^ id^ + java-literals^ java-expression-keywords^ java-vals^ java-ids^ java-separators^ + java-operators^ java-extras^) + (export expression-maker^ expr-lits^ expr-terms+^ expr-tails^) + + (define (simple-expression exprs) + (choice exprs "expression")) + + (define boolean-lits + (choose (TRUE_LIT FALSE_LIT) "boolean literal")) + + (define textual-lits + (choose (STRING_LIT CHAR_LIT) "literal expression")) + + (define prim-numeric-lits + (choose (INTEGER_LIT LONG_LIT) "literal expression")) + + (define numeric-lits + (choose (HEX_LIT HEXL_LIT OCTL_LIT OCT_LIT) "literal expression")) + + (define double-lits + (choose (FLOAT_LIT DOUBLE_LIT) "literal expression")) + + (define null-lit NULL_LIT) + + (define (literals lits) + (choice lits "literal expression")) + + (define all-literals + (choose (NULL_LIT boolean-lits textual-lits prim-numeric-lits double-lits numeric-lits) + "literal expression")) + + (define (new-class class-name expr) + (choose ((sequence (new class-name O_PAREN C_PAREN) id) + (sequence (new class-name O_PAREN (comma-sep expr "arguments") C_PAREN) id)) + "class instantiation")) + + (define (new-array type-name expr) + (sequence (new type-name O_BRACKET expr C_BRACKET (repeat (sequence (O_BRACKET expr C_BRACKET) id))) + id "array instantiation")) + + (define field-access-end + (sequence (PERIOD IDENTIFIER) id "field access")) + + (define (array-access-end expr) + (sequence (O_BRACKET expr C_BRACKET) id "array access")) + + (define (array-init-maker contents) + (sequence (O_BRACE (comma-sep contents "array elements") C_BRACE) id "array initializations")) + + (define (array-init type-name expr) + (letrec ([base-init (array-init-maker expr)] + [simple-init (array-init-maker (choose (expr base-init (eta init)) "array initializations"))] + [init (array-init-maker (choose (expr simple-init) "array initializations"))]) + (sequence (new type-name init) "array initialization"))) + + (define (binary-expression-end op expr) + (sequence ((^ op) expr) id "binary expression")) + + (define (if-expr-end expr) + (sequence (? expr : expr) id "conditional expression")) + + (define (simple-method-call expr) + (choose + ((sequence ((^ IDENTIFIER) O_PAREN C_PAREN) id "method invocation") + (sequence ((^ IDENTIFIER) O_PAREN (comma-sep expr "argument list") C_PAREN) id "method invocation")) + "method invocation")) + + (define (method-call-end expr) + (choose + ((sequence (PERIOD (^ IDENTIFIER) O_PAREN C_PAREN) id "method invocation") + (sequence (PERIOD (^ IDENTIFIER) O_PAREN (comma-sep expr "argument list") C_PAREN) id "method invocation")) + "method invocation")) + + (define (assignment name op expr) + (sequence ((^ name) op expr) id "assignment")) + + (define (unary-assignment-front expr) + (choose ((sequence (++ expr) id "unary modification") + (sequence (-- expr) id "unary modification")) "unary modification")) + + (define (unary-assignment-back base) + (choose ((sequence (base ++) id "unary modification") + (sequence (base --) id "unary modification")) "unary modification")) + + (define (cast type expr) + (sequence (O_PAREN type C_PAREN expr) "cast expression")) + + (define (instanceof-back name) + (sequence (instanceof name) "instanceof expression")) + + (define (super-call expr) + (choose ((sequence (super PERIOD IDENTIFIER O_PAREN C_PAREN) id "super method invocation") + (sequence (super PERIOD IDENTIFIER O_PAREN (comma-sep expr "arguments") C_PAREN) id "super method invocation")) + "super method invocation")) + + (define (checks expr) + (choose + ((sequence (check expr expect expr) id "check expression") + (sequence (check expr expect expr within expr) id "check expression")) + "check expression")) + + ) + + (define-unit statements@ + (import combinator-parser^ general-productions^ id^ + java-statement-keywords^ java-separators^ java-ids^ java-operators^) + (export statements^) + + (define (if-s expr statement else?) + (cond + [else? + (choose ((sequence (ifT O_PAREN expr C_PAREN statement elseT statement) id) + (sequence (ifT O_PAREN expr C_PAREN statement) id)) "if")] + [else (sequence (ifT O_PAREN expr C_PAREN statement elseT statement) id "if")])) + + (define (return-s expr opt?) + (cond + [opt? (choose ((sequence (return expr SEMI_COLON) id "return statement") + (sequence (return SEMI_COLON) id "return statement")) "return statement")] + [else (sequence (return expr SEMI_COLON) id "return statement")])) + + (define (this-call expr) + (choose ((sequence (this O_PAREN C_PAREN SEMI_COLON) id) + (sequence (this O_PAREN (comma-sep expr "arguments") C_PAREN SEMI_COLON) id)) "this constructor call")) + + (define (super-ctor-call expr) + (choose ((sequence (super O_PAREN C_PAREN SEMI_COLON) id) + (sequence (super O_PAREN (comma-sep expr "arguments") C_PAREN SEMI_COLON) id)) "super constructor call")) + + (define (block statement) + (sequence (O_BRACE statement C_BRACE) id "block statement")) + + (define (expression-stmt expr) + (sequence (expr SEMI_COLON) id "statement")) + + + (define (while-l expr statement) + (sequence (while O_PAREN expr C_PAREN statement) id "while loop")) + + (define (do-while expr statement) + (sequence (doT statement while O_PAREN expr C_PAREN SEMI_COLON) id "do loop")) + + (define (for-l init i-op? expr t-op? update up-op? statement) + (let ([full (sequence (for O_PAREN init SEMI_COLON expr SEMI_COLON update C_PAREN statement) id "for loop")] + [no-init (sequence (for O_PAREN SEMI_COLON expr SEMI_COLON update C_PAREN statement) id "for loop")] + [no-tst (sequence (for O_PAREN init SEMI_COLON SEMI_COLON update C_PAREN statement) id "for loop")] + [no-up (sequence (for O_PAREN init SEMI_COLON expr SEMI_COLON C_PAREN statement) id "for loop")] + [no-it (sequence (for O_PAREN SEMI_COLON SEMI_COLON update C_PAREN statement) id "for loop")] + [no-iu (sequence (for O_PAREN SEMI_COLON expr SEMI_COLON C_PAREN statement) id "for loop")] + [no-tu (sequence (for O_PAREN init SEMI_COLON SEMI_COLON C_PAREN statement) id "for loop")] + [none (sequence (for O_PAREN SEMI_COLON SEMI_COLON C_PAREN statement) id "for loop")]) + (cond + [(and i-op? t-op? up-op?) + (choice (list full no-init no-tst no-up no-it no-iu no-tu none) "for loop")] + [(and t-op? up-op?) + (choice (list full no-tst no-up no-tu) "for loop")] + [(and i-op? t-op?) + (choice (list full no-init no-tst no-it) "for loop")] + [(and i-op? up-op?) + (choice (list full no-init no-up no-iu) "for loop")] + [i-op? (choice (list full no-init) "for loop")] + [t-op? (choice (list full no-tst) "for loop")] + [up-op? (choice (list full no-up) "for loop")] + [else full]))) + + (define (break-s label) + (cond + [label (choose ((sequence (break SEMI_COLON) id) + (sequence (break label SEMI_COLON) id)) "break statement")] + [else (sequence (break SEMI_COLON) id "break statement")])) + + (define (cont-s label) + (cond + [label (choose ((sequence (continue SEMI_COLON) id) + (sequence (continue label SEMI_COLON) id)) "continue statement")] + [else (sequence (continue SEMI_COLON) id "continue statement")])) + + (define init + (sequence (this PERIOD IDENTIFIER EQUAL IDENTIFIER SEMI_COLON) id "field initialization")) + + (define (statement statements) + (choice statements "statement")) + + ) + + (define-unit members@ + (import combinator-parser^ general-productions^ id^ java-types^ + java-separators^ java-ids^ java-definition-keywords^) + (export fields^ methods^ ctors^) + + (define (field mods type expr share-types?) + (cond + [mods (sequence ((repeat mods) (variable-declaration type expr share-types? "field")) + id "field definition")] + [else (variable-declaration type expr share-types? "field")])) + + (define arg (sequence ((value+name-type prim-type IDENTIFIER) IDENTIFIER) id "argument")) + + (define args (comma-sep arg "parameter list")) + + ;method-signature: {U parser #f} [U parser #f] [U parser #f] bool bool parser -> parser + (define (method-signature m ret a t? n) + (let* ([method-parms (if a + (choose ((sequence (O_PAREN C_PAREN) id) + (sequence (O_PAREN a C_PAREN) id)) "method parameter list") + (sequence (O_PAREN C_PAREN) id "method parameter list"))] + [full (sequence ((repeat m) ret (^ IDENTIFIER) method-parms throws (comma-sep n "thrown type")) id "method signature")] + [full-no-t (sequence ((repeat m) ret (^ IDENTIFIER) method-parms) id "method signature")] + [no-mods-t (sequence (ret (^ IDENTIFIER) method-parms throws (comma-sep n "thrown type")) id "method signature")] + [no-mods (sequence (ret (^ IDENTIFIER) method-parms) id "method signature")]) + (cond + [(and m t?) (choose (full full-no-t) "method signature")] + [m full-no-t] + [t? (choose (no-mods-t no-mods) "method signature")] + [else no-mods]))) + + (define (method-header method-sig) + (sequence (method-sig SEMI_COLON) id "method declaration")) + + (define (method signature statement) + (sequence ((^ signature) O_BRACE statement C_BRACE) id "method definition")) + + (define (constructor mod body) + (let ([ctor (choose + ((sequence ((^ IDENTIFIER) O_PAREN C_PAREN O_BRACE body C_BRACE) id) + (sequence ((^ IDENTIFIER) O_PAREN args C_PAREN O_BRACE body C_BRACE) id)) + "constructor definition")]) + (cond + [mod (sequence ((repeat mod) ctor) id "constructor definition")] + [else ctor]))) + + ) + + (define-unit interface@ + (import combinator-parser^ id^ java-definition-keywords^ java-ids^ java-separators^) + (export interfaces^) + + (define (interface-body members) + (repeat (choice members "interface member"))) + + (define (interface-def modifier extends body) + (let ([m&e (sequence ((repeat modifier) interface (^ IDENTIFIER) extends O_BRACE body C_BRACE) + id "interface definition")] + [m (sequence ((repeat modifier) interface (^ IDENTIFIER) O_BRACE body C_BRACE) id "interface definition")] + [e (sequence (interface (^ IDENTIFIER) extends O_BRACE body C_BRACE id) "interface definition")] + [always (sequence (interface (^ IDENTIFIER) O_BRACE body C_BRACE) id "interface definition")]) + (choice (cond + [(and modifier extends) (list m&e m e always)] + [modifier (list m always)] + [extends (list e always)] + [else (list always)]) + "interface definition"))) + + ) + + (define-unit class@ + (import combinator-parser^ id^ java-definition-keywords^ java-ids^ java-separators^) + (export classes^) + + (define (class-body members) + (choice members "class member")) + + (define (implements-dec name) + (sequence (implements name) id "implementation declaration")) + + (define (extend-dec name) + (sequence (extends name) id "extends declaration")) + + (define (class-def mods extends implements body) + (let ([e&i (sequence (class (^ IDENTIFIER) extends implements O_BRACE body C_BRACE) id "class definition")] + [e (sequence (class (^ IDENTIFIER) extends O_BRACE body C_BRACE) id "class definition")] + [i (sequence (class (^ IDENTIFIER) implements O_BRACE body C_BRACE) id "class definition")] + [base (sequence (class (^ IDENTIFIER) O_BRACE body C_BRACE) id "class definition")]) + (let ([base-choice + (cond + [(and extends implements) + (choice (list e&i e i base) "class definition")] + [extends (choice (list e base) "class definition")] + [implements (choice (list i base) "class definition")] + [else base])]) + (cond + [mods (choose ((sequence (mods base-choice) id) base-choice) "class definition")] + [else base-choice])))) + + ) + + (define-unit top-forms@ + (import combinator-parser^ id^ java-definition-keywords^ java-separators^ + general-productions^) + (export top-forms^) + + (define (top-member mems) + (choice mems "program body")) + + (define import-dec + (choose + ((sequence (import name PERIOD TIMES SEMI_COLON) id) + (sequence (import name SEMI_COLON) id)) "import declaration")) + + (define (program package import body) + (let ([p&i (sequence (package import body) id "program")] + [p (sequence (package body) id "program")] + [i (sequence (import body) id "program")]) + (cond + [(and package import) + (choice (list p&i p i body) "program")] + [package + (choice (list p body) "program")] + [import + (choice (list i body) "program")] + [else body]))) + + ) + + (define-signature language-forms^ + (beginner-program beginner-statement beginner-expression beginner-field + intermediate-program intermediate+access-program intermediate-statement intermediate-expression + advanced-program advanced-statement advanced-expression + )) + (define-signature token-proc^ (old-tokens->new)) + + (define-signature parsers^ + (parse-beginner parse-intermediate parse-intermediate+access parse-advanced)) + + (define-unit java-grammars@ + (import combinator-parser^ java-operators^ java-separators^ + java-statement-keywords^ java-definition-keywords^ + java-type-keywords^ java-ids^ + java-types^ java-access^ java-ops^ general-productions^ + expression-maker^ expr-lits^ expr-terms+^ expr-tails^ statements^ + fields^ methods^ ctors^ interfaces^ classes^ top-forms^ id^) + (export language-forms^ token-proc^) + + ;Remembered Unsupported Features + ;throws clause + ;strictfp + ;allowing static fields in interface + + ;Beginner definition + + (define beginner-unique-base + (simple-expression + (list (literals (list boolean-lits textual-lits prim-numeric-lits double-lits)) + this + IDENTIFIER + (new-class IDENTIFIER (eta beginner-expression)) + (simple-method-call (eta beginner-expression)) + (sequence (O_PAREN (eta beginner-expression) C_PAREN) id "expression") + (sequence (! (eta beginner-expression)) id "conditional expression") + (sequence (MINUS (eta beginner-expression)) id "negation expression") + (checks (eta beginner-expression))))) + + (define beginner-unique-end + (simple-expression + (list field-access-end + (method-call-end (eta beginner-expression)) + (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops)) + (eta beginner-expression))))) + + (define beginner-expression + (sequence (beginner-unique-base (repeat beginner-unique-end)) id "expression")) + + (define beginner-statement + (statement (list (if-s beginner-expression (eta beginner-statement) #f) + (return-s beginner-expression #f)))) + + (define beginner-field (field #f (value+name-type prim-type IDENTIFIER) beginner-expression #f)) + + (define beginner-method-sig + (method-signature #f (value+name-type prim-type IDENTIFIER) args #f IDENTIFIER)) + + (define beginner-method + (method beginner-method-sig beginner-statement)) + + (define beginner-constructor (constructor #f (repeat init))) + + (define beginner-interface + (interface-def #f #f (repeat beginner-method-sig))) + + (define beginner-class + (class-def #f #f (implements-dec IDENTIFIER) + (repeat (class-body (list beginner-field beginner-method beginner-constructor))))) + + (define beginner-program + (program #f (repeat import-dec) + (repeat (top-member (list beginner-class beginner-interface))))) + + + ; + ;Intermediate definition + ; + + (define intermediate-unique-base + (simple-expression + (list (literals (list null-lit boolean-lits textual-lits prim-numeric-lits double-lits)) + this + IDENTIFIER + (new-class IDENTIFIER (eta intermediate-expression)) + (simple-method-call (eta intermediate-expression)) + (sequence (O_PAREN (eta intermediate-expression) C_PAREN) id "expression") + (sequence (! (eta intermediate-expression)) id "conditional expression") + (sequence (MINUS (eta intermediate-expression)) id "negation expression") + (cast (value+name-type prim-type IDENTIFIER) (eta intermediate-expression)) + (super-call (eta intermediate-expression)) + (checks (eta intermediate-expression))))) + + (define intermediate-unique-end + (simple-expression + (list field-access-end + (method-call-end (eta intermediate-expression)) + (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops bit-ops)) + (eta intermediate-expression)) + (instanceof-back (value+name-type prim-type IDENTIFIER))))) + + (define intermediate-expression + (sequence (intermediate-unique-base (repeat intermediate-unique-end)) + id "expression")) + + (define intermediate-stmt-expr + (simple-expression (list (new-class IDENTIFIER intermediate-expression) + (super-call intermediate-expression) + (sequence (intermediate-expression + (method-call-end intermediate-expression)) + id "method call") + (assignment + (choose (IDENTIFIER + (sequence (intermediate-unique-base field-access-end) id)) + "assignee") + EQUAL intermediate-expression)))) + + (define intermediate-statement + (statement (list (if-s intermediate-expression (eta intermediate-statement) #f) + (return-s intermediate-expression #t) + (variable-declaration (value+name-type prim-type IDENTIFIER) intermediate-expression #f "local variable") + (block (repeat (eta intermediate-statement))) + (sequence (intermediate-stmt-expr SEMI_COLON) id "statement")))) + + (define intermediate-field (field #f (value+name-type prim-type IDENTIFIER) intermediate-expression #t)) + (define intermediate+access-field (field access-mods (value+name-type prim-type IDENTIFIER) intermediate-expression #t)) + + (define intermediate-method-sig-no-abs + (method-signature #f (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER)) + (define intermediate-method-sig-abs + (method-signature abstract (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER)) + + (define intermediate+access-method-sig-no-abs + (method-signature access-mods (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER)) + (define intermediate+access-method-sig-abs + (method-signature (method-mods access-mods) (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER)) + + (define intermediate-method + (choose ((method intermediate-method-sig-no-abs intermediate-statement) + (method-header intermediate-method-sig-abs)) "method definition")) + + (define intermediate+access-method + (choose ((method intermediate+access-method-sig-no-abs intermediate-statement) + (method-header intermediate+access-method-sig-abs)) "method definition")) + + (define intermediate-constructor + (constructor #f + (choose ((sequence ((super-call intermediate-expression) (repeat intermediate-statement)) id) + (sequence ((this-call intermediate-expression) (repeat intermediate-statement)) id) + (repeat intermediate-statement)) "constructor body"))) + + (define intermediate+access-constructor + (constructor access-mods + (choose ((sequence ((super-call intermediate-expression) (repeat intermediate-statement)) id) + (sequence ((this-call intermediate-expression) (repeat intermediate-statement)) id) + (repeat intermediate-statement)) "constructor body"))) + + (define intermediate-interface + (interface-def + #f + (sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends") + (repeat intermediate-method-sig-no-abs))) + + (define intermediate-class + (class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces")) + (repeat (class-body (list intermediate-field intermediate-method intermediate-constructor))))) + + (define intermediate+access-class + (class-def abstract (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces")) + (repeat (class-body (list intermediate+access-field + intermediate+access-method + intermediate+access-constructor))))) + + (define intermediate-program + (program #f (repeat import-dec) + (repeat (top-member (list intermediate-class intermediate-interface))))) + + (define intermediate+access-program + (program #f (repeat import-dec) + (repeat (top-member (list intermediate+access-class intermediate-interface))))) + + + (define advanced-unique-base + (simple-expression + (list (literals (list null-lit boolean-lits textual-lits prim-numeric-lits double-lits)) + this + IDENTIFIER + (new-class IDENTIFIER (eta advanced-expression)) + (simple-method-call (eta advanced-expression)) + (new-array (value+name-type prim-type IDENTIFIER) (eta advanced-expression)) + (sequence (O_PAREN (eta advanced-expression) C_PAREN) id "expression") + (sequence (! (eta advanced-expression)) id "conditional expression") + (sequence (MINUS (eta advanced-expression)) id "negation exxpression") + (cast (value+name-type prim-type IDENTIFIER) (eta advanced-expression)) + (super-call (eta advanced-expression)) + (checks (eta advanced-expression))))) + + (define advanced-unique-end + (simple-expression + (list field-access-end + (array-access-end (eta advanced-expression)) + (method-call-end (eta advanced-expression)) + (if-expr-end (eta advanced-expression)) + (binary-expression-end (bin-ops (list math-ops compare-ops bool-ops bit-ops)) + (eta advanced-expression)) + (instanceof-back (value+name-type prim-type IDENTIFIER))))) + + + (define advanced-expression + (sequence (advanced-unique-base (repeat advanced-unique-end)) id "expression")) + + + (define advanced-stmt-expr + (simple-expression (list (new-class IDENTIFIER advanced-expression) + (super-call advanced-expression) + (sequence (advanced-expression + (method-call-end advanced-expression)) id "method call") + (assignment + (choose (IDENTIFIER + (sequence (advanced-expression field-access-end) id) + (sequence (advanced-expression array-access-end) id)) + "asignee") + assignment-ops advanced-expression) + (sequence (advanced-expression ++) id "unary mutation") + (sequence (advanced-expression --) id "unary mutation") + (sequence (++ advanced-expression) id "unary mutation") + (sequence (-- advanced-expression) id "unary mutation")))) + + (define advanced-statement + (statement (list (if-s advanced-expression (eta advanced-statement) #t) + (return-s advanced-expression #t) + (variable-declaration (value+name-type prim-type IDENTIFIER) advanced-expression #t "local variable") + (block (repeat (eta advanced-statement))) + (sequence (advanced-stmt-expr SEMI_COLON) id "statement") + (for-l (choose ((variable-declaration (value+name-type prim-type IDENTIFIER) advanced-expression #t "for loop variable") + (comma-sep advanced-stmt-expr "initializations")) "for loop initialization") + #t + advanced-expression #t + (comma-sep advanced-stmt-expr "for loop increments") #t (eta advanced-statement)) + (while-l advanced-expression (eta advanced-statement)) + (do-while advanced-expression (eta advanced-statement)) + (break-s #f) + (cont-s #f)))) + + (define advanced-field (field (global-mods access-mods) (value+name-type prim-type IDENTIFIER) advanced-expression #t)) + + (define advanced-method-sig-no-abs + (method-signature (global-mods access-mods) + (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER)) + (define advanced-method-sig-abs + (method-signature (method-mods (global-mods access-mods)) + (method-type (value+name-type prim-type IDENTIFIER)) args #f IDENTIFIER)) + + (define advanced-method + (choose ((method advanced-method-sig-no-abs advanced-statement) + (method-header advanced-method-sig-abs)) "method definition")) + + (define advanced-constructor + (constructor access-mods + (choose ((sequence ((super-call advanced-expression) (repeat advanced-statement)) id) + (sequence ((this-call advanced-expression) (repeat advanced-statement)) id) + (repeat advanced-statement)) "constructor body"))) + + (define advanced-interface + (interface-def + #f + (sequence (extends (comma-sep IDENTIFIER "interfaces")) id "extends") + (repeat (choose (advanced-method-sig-no-abs + (field (global-mods access-mods) (value+name-type prim-type IDENTIFIER) advanced-expression #t)) + "interface member definition")))) + + (define advanced-class + (class-def (choose (abstract public) "class modifier") + (extend-dec IDENTIFIER) (implements-dec (comma-sep IDENTIFIER "interfaces")) + (repeat (class-body (list advanced-field advanced-method advanced-constructor + (method-header advanced-method-sig-abs)))))) + + (define advanced-program + (program (sequence (package name SEMI_COLON) id "package specification") + (repeat import-dec) + (repeat (top-member (list advanced-class advanced-interface))))) + + (define (old-tokens->new tok-list) + (cond + [(null? tok-list) null] + [(eq? (token-name (position-token-token (car tok-list))) 'EOF) null] + [else + (cons + (make-position-token + (case (token-name (position-token-token (car tok-list))) + [(=) (token-EQUAL)] + ((<) (token-LT)) + ((>) (token-GT)) + ((<=) (token-LTEQ)) + ((>=) (token-GTEQ)) + ((+) (token-PLUS)) + ((-) (token-MINUS)) + ((*) (token-TIMES)) + ((/) (token-DIVIDE)) + ((^) (token-^T)) + ((if) (token-ifT)) + ((do) (token-doT)) + ((case) (token-caseT)) + ((else) (token-elseT)) + ((void) (token-voidT)) + (else (position-token-token (car tok-list)))) + (position-token-start-pos (car tok-list)) + (position-token-end-pos (car tok-list))) + (old-tokens->new (cdr tok-list)))])) + + ) + + + (define-unit full-program-parsers@ + (import language-forms^ combinator-parser^) + (export parsers^) + + (define parse-beginner (parser beginner-program)) + (define parse-intermediate (parser intermediate-program)) + (define parse-intermediate+access (parser intermediate+access-program)) + (define parse-advanced (parser advanced-program)) + + ) + + (define-unit interaction-parsers@ + (import language-forms^ combinator-parser^) + (export parsers^) + + (define parse-beginner (parser (choose (beginner-expression beginner-statement beginner-field) + "interactions program"))) + + (define parse-intermediate (parser (choose (intermediate-expression intermediate-statement) + "interactions program"))) + (define parse-intermediate+access parse-intermediate) + + (define parse-advanced + (parser (choose (advanced-expression advanced-statement) "interactions program"))) + ) + + (define-unit file-constants@ + (import) + (export error-format-parameters^) + (define src? #t) + (define input-type "file") + (define show-options #f) + (define max-depth 2) + (define max-choice-depth 3)) + + (define-unit de-constants@ + (import) + (export error-format-parameters^) + (define src? #t) + (define input-type "definitions window") + (define show-options #f) + (define max-depth 1) + (define max-choice-depth 3)) + + (define-unit interact-constants@ + (import) + (export error-format-parameters^) + (define src? #t) + (define input-type "interactions-window") + (define show-options #f) + (define max-depth 0) + (define max-choice-depth 3)) + + (define-unit id@ + (import) + (export id^) + (define (id x . args) x)) + + (define-compound-unit/infer java-file-parsers@ + (import) + (export parsers^ token-proc^ err^) + (link java-dictionary@ combinator-parser-tools@ file-constants@ id@ + java-terminals@ types@ mods@ operators@ general@ + expressions@ statements@ members@ interface@ class@ top-forms@ + java-grammars@ full-program-parsers@)) + + (define-compound-unit/infer java-definitions-parsers@ + (import) + (export parsers^ token-proc^ err^) + (link java-dictionary@ combinator-parser-tools@ de-constants@ id@ + java-terminals@ types@ mods@ operators@ general@ + expressions@ statements@ members@ interface@ class@ top-forms@ + java-grammars@ full-program-parsers@)) + + (define-compound-unit/infer java-interactions-parsers@ + (import) + (export parsers^ token-proc^ err^) + (link java-dictionary@ combinator-parser-tools@ interact-constants@ id@ + java-terminals@ types@ mods@ operators@ general@ + expressions@ statements@ members@ interface@ class@ top-forms@ + java-grammars@ interaction-parsers@)) + + (provide java-definitions-parsers@ java-interactions-parsers@ parsers^ token-proc^) + + ) + diff --git a/collects/profj/comb-parsers/parsers.scm b/collects/profj/comb-parsers/parsers.scm new file mode 100644 index 0000000000..5db4828950 --- /dev/null +++ b/collects/profj/comb-parsers/parsers.scm @@ -0,0 +1,355 @@ +(module parsers mzscheme + (require "parser-units.scm" + (only (lib "force.ss" "lazy") !!!) + (only (lib "combinator-unit.ss" "combinator-parser") err^) + (lib "unit.ss") + #;(lib "lex.ss" "parser-tools") + #;(prefix re: (lib "lex-sre.ss" "parser-tools"))) + + (provide parse-beginner parse-intermediate parse-intermediate+access parse-advanced + parse-beginner-interact parse-intermediate-interact parse-advanced-interact) + (define (trim-string s f l) + (substring s f (- (string-length s) l))) + +; (define-lex-abbrevs +; ;; 3.4 +; (CR #\015) +; (LF #\012) +; (LineTerminator (re:or CR +; LF +; (re:: CR LF))) +; (InputCharacter (re:~ CR LF)) +; +; ;; 3.6 +; (FF #\014) +; (TAB #\011) +; (WhiteSpace (re:or #\space +; TAB +; FF +; LineTerminator)) +; +; ;; 3.7 (Had to transform CommentTail and CommentTailStar into one RE) +; ;; (DocumentationComment only appears in version 1 of the spec) +; (Comment (re:or TraditionalComment +; EndOfLineComment +; DocumentationComment)) +; (TraditionalComment (re:: "/*" NotStar CommentTail)) +; (EndOfLineComment (re:: "//" (re:* InputCharacter))) +; (DocumentationComment (re:: "/**" CommentTailStar)) +; (CommentTail (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash)) +; (re:* NotStar) +; (re:+ "*") +; "/")) +; (CommentTailStar (re:: (re:* (re:: (re:* "*") NotStarNotSlash (re:* NotStar) "*")) +; (re:* "*") +; "/")) +; (NotStar (re:~ "*")) +; (NotStarNotSlash (re:~ "*" "/")) +; +; (SyntaxComment (re:or TraditionalCommentEOF +; EndOfLineComment)) +; (TraditionalCommentEOF (re:: "/*" CommentTailEOF)) +; (CommentTailEOF (re:or (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash)) +; (re:* NotStar) +; (re:+ "*") +; "/") +; (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash)) +; (re:* NotStar) +; (re:* "*")))) +; +; ;; 3.8 (No need to worry about excluding keywords and such. They will +; ;; appear first in the lexer spec) +; ;Not UNICODE compliant +; (Identifier (re:: JavaLetter (re:* JavaLetterOrDigit))) +; (JavaLetter (re:or (re:/ "AZ" "az") "_" "$")) +; (JavaLetterOrDigit (re:or JavaLetter (re:/ "09"))) +; +; (KnownTypes (re:or "boolean" "byte" "char" "double" "float" "int" "long" "short" +; "String" "Object")) +; +; ;; 3.9 +; (Keyword (re:or "abstract" "default" "if" "private" "this" +; "boolean" "do" "implements" "protected" "throw" +; "break" "double" "import" "public" "throws" +; "byte" "else" "instanceof" "return" "transient" +; "case" "extends" "int" "short" "try" +; "catch" "final" "interface" "static" "void" +; "char" "finally" "long" "strictfp" "volatile" +; "class" "float" "native" "super" "while" +; "const" "for" "new" "switch" +; "continue" "goto" "package" "synchronized")) +; +; ;; 3.10.1 +; (Digits (re:+ (re:/ "09"))) +; (DigitsOpt (re:* (re:/ "09"))) +; +; (IntegerTypeSuffix (char-set "lL")) +; (DecimalNumeral (re:or #\0 +; (re:: (re:/ "19") (re:* (re:/ "09"))))) +; (HexDigit (re:/ "09" "af" "AF")) +; (HexNumeral (re:: #\0 (char-set "xX") (re:+ HexDigit))) +; (OctalNumeral (re:: #\0 (re:+ (re:/ "07")))) +; +; ;; 3.10.2 +; (FloatTypeSuffix (char-set "fF")) +; (DoubleTypeSuffix (char-set "dD")) +; +; (FloatA (re:: Digits #\. DigitsOpt (re:? ExponentPart))) +; (FloatB (re:: #\. Digits (re:? ExponentPart))) +; (FloatC (re:: Digits ExponentPart)) +; (FloatD (re:: Digits (re:? ExponentPart))) +; +; (ExponentPart (re:: (char-set "eE") (re:? (char-set "+-")) Digits)) +; +; ;; MORE +; +; ;; 3.10.6 +; (EscapeSequence (re:or "\\b" "\\t" "\\n" "\\f" "\\r" "\\\"" "\\'" "\\\\" +; (re:: #\\ (re:? (re:/ "03")) (re:/ "07") (re:/ "07")) +; (re:: #\\ (re:/ "07")))) +; +; ;; 3.12 +; (Operator (re:or "=" ">" "<" "!" "~" "?" ":" +; "==" "<=" ">=" "!=" "&&" "||" "++" "--" +; "+" "-" "*" "/" "&" "|" "^" "%" "<<" ">>" ">>>" +; "+=" "-=" "*=" "/=" "&=" "|=" "^=" "%=" "<<=" ">>=" ">>>="))) +; +; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; ;;Comment lexers +; +; (define read-line-comment +; (lexer +; [(re:~ #\newline) (read-line-comment input-port)] +; [#\newline end-pos] +; [(eof) end-pos] +; [(special) (read-line-comment input-port)] +; [(special-comment) (read-line-comment input-port)] +; )) +; +; (define read-block-comment +; (lexer +; ["*/" end-pos] +; [(eof) end-pos] +; [(re:or "*" "/" (complement (re:: any-string (re:or "*" "/") any-string))) (read-block-comment input-port)] +; [(special) (read-block-comment input-port)] +; [(special-comment) (read-block-comment input-port)] +; )) +; +; #;(define read-document-comment +; (lexer +; ["**/" end-pos] +; [(eof) end-pos] +; [(re:or "*" "/" (~ (any-string))) (read-document-comment input-port)] +; [(special) (read-document-comment input-port)] +; [(special-comment) (read-document-comment input-port)] +; [(special-error) (read-document-comment input-port)])) +; +; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; ;String lexer +; +; ;get-string: input-port -> (U STRING_LIT STRING_ERROR tokens) +; (define (get-string input-port) +; (letrec ((tokens (get-string-tokens input-port)) +; (last-token (list-ref tokens (sub1 (length tokens)))) +; (tokens->string +; (lambda (toks) +; ;Stops before the last element, which does not have a string +; (if (null? (cdr toks)) +; "" +; (string-append (string (token-value (position-token-token (car toks)))) +; (tokens->string (cdr toks))))))) +; (if (eq? 'STRING_END (token-name (position-token-token last-token))) +; (token-STRING_LIT (list (tokens->string tokens) (position-token-end-pos last-token))) +; (token-STRING_ERROR +; (list (tokens->string tokens) +; (position-token-end-pos last-token) +; (position-token-token last-token)))))) +; +; ;get-string-tokens: input-port -> (list position-token) +; (define (get-string-tokens input-port) +; (let ((tok (get-str-tok input-port))) +; (case (token-name (position-token-token tok)) +; ((STRING_EOF STRING_END STRING_NEWLINE) (list tok)) +; (else (cons tok (get-string-tokens input-port)))))) +; +; (define-tokens str-tok (STRING_CHAR)) +; (define-empty-tokens err (STRING_END STRING_EOF STRING_NEWLINE)) +; +; (define get-str-tok +; (lexer-src-pos +; (#\" (token-STRING_END)) +; (EscapeSequence (token-STRING_CHAR (EscapeSequence->char lexeme))) +; (InputCharacter (token-STRING_CHAR (string-ref lexeme 0))) +; ((re:or CR LF) (token-STRING_NEWLINE)) +; (#\032 (token-STRING_EOF)) +; ((eof) (token-STRING_EOF)))) +; +; ;; 3.10.6 +; (define (EscapeSequence->char es) +; (cond +; ((string=? es "\\b") #\010) +; ((string=? es "\\t") #\011) +; ((string=? es "\\n") #\012) +; ((string=? es "\\f") #\014) +; ((string=? es "\\r") #\015) +; ((string=? es "\\\"") #\") +; ((string=? es "\\'") #\') +; ((string=? es "\\\\") #\\) +; (else (integer->char (string->number (trim-string es 1 0) 8))))) +; +; (define get-token +; (lexer-src-pos +; ;; 3.12 +; (Operator (let ((l lexeme)) +; (cond +; ((string=? l "|") (token-PIPE)) +; ((string=? l "||") (token-OR)) +; ((string=? l "|=") (token-OREQUAL)) +; ((string=? l "=") (token-EQUAL)) +; ((string=? l "<") (token-LT)) +; ((string=? l ">") (token-GT)) +; ((string=? l "<=") (token-LTEQ)) +; ((string=? l ">=") (token-GTEQ)) +; ((string=? l "+") (token-PLUS)) +; ((string=? l "-") (token-MINUS)) +; ((string=? l "*") (token-TIMES)) +; ((string=? l "/") (token-DIVIDE)) +; ((string=? l "^") (token-^T)) +; (else (string->symbol l))))) +; +; ("->" (string->symbol lexeme)) +; ("->>" (string->symbol lexeme)) +; ("->>>" (string->symbol lexeme)) +; +; ;; 3.11 +; ("(" (token-O_PAREN)) +; (")" (token-C_PAREN)) +; ("{" (token-O_BRACE)) +; ("}" (token-C_BRACE)) +; ("[" (token-O_BRACKET)) +; ("]" (token-C_BRACKET)) +; (";" (token-SEMI_COLON)) +; ("," (token-COMMA)) +; ("." (token-PERIOD)) +; +; ;; 3.10.7 +; ("null" (token-NULL_LIT)) +; +; ;; 3.10.5 +; (#\" (get-string input-port)) +; ;(token-STRING_LIT (list->string (get-string input-port)))) +; +; ;; 3.10.4 +; ((re:: #\' (re:~ CR LF #\' #\\) #\') +; (token-CHAR_LIT (string-ref lexeme 1))) +; ((re:: #\' EscapeSequence #\') +; (token-CHAR_LIT (EscapeSequence->char +; (trim-string lexeme 1 1)))) +; +; ;; 3.10.3 +; ("true" (token-TRUE_LIT)) +; ("false" (token-FALSE_LIT)) +; +; ;; 3.10.2 +; ((re:or FloatA FloatB FloatC) +; (token-DOUBLE_LIT (string->number lexeme))) +; ((re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix) +; (token-FLOAT_LIT (string->number (trim-string lexeme 0 1)))) +; ((re:: (re:or FloatA FloatB FloatC FloatD) DoubleTypeSuffix) +; (token-DOUBLE_LIT (string->number (trim-string lexeme 0 1)))) +; +; +; ;; 3.10.1 +; (DecimalNumeral +; (token-INTEGER_LIT (string->number lexeme 10))) +; ((re:: DecimalNumeral IntegerTypeSuffix) +; (token-LONG_LIT (string->number (trim-string lexeme 0 1) 10))) +; ((re:: HexNumeral IntegerTypeSuffix) +; (token-HEXL_LIT (string->number (trim-string lexeme 2 1) 16))) +; (HexNumeral +; (token-HEX_LIT (string->number (trim-string lexeme 2 0) 16))) +; (OctalNumeral +; (token-OCT_LIT (string->number (trim-string lexeme 1 0) 8))) +; ((re:: OctalNumeral IntegerTypeSuffix) +; (token-OCTL_LIT (string->number (trim-string lexeme 1 1) 8))) +; +; #;("dynamic" +; (cond +; ((dynamic?) (string->symbol lexeme)) +; (else (token-IDENTIFIER lexeme)))) +; +; #;((re:or "check" "expect" "within") +; (cond +; ((test-ext?) (string->symbol lexeme)) +; (else (token-IDENTIFIER lexeme)))) +; +; #;((re:or "test" "tests" "testcase") +; (cond +; ((testcase-ext?) (string->symbol lexeme)) +; (else (token-IDENTIFIER lexeme)))) +; +; ;; 3.9 +; (Keyword (string->symbol lexeme)) +; +; ;; 3.8 +; (Identifier (token-IDENTIFIER lexeme)) +; +; ;; 3.7 +; ("//" (begin (read-line-comment input-port) (return-without-pos (get-token input-port)))) +; ("/*" (begin (read-block-comment input-port) (return-without-pos (get-token input-port)))) +; #;("/**" (begin (read-document-comment input-port) (return-without-pos (get-token input-port)))) +; +; #;((special) +; (cond +; ((and (syntax? lexeme) (syntax-property lexeme 'test-case-box)) +; (token-TEST_SUITE (make-test-case lexeme))) +; ((and (syntax? lexeme) (syntax-property lexeme 'example-box)) +; (syntax-case lexeme () +; ((parse-example-box examples) (token-EXAMPLE (make-example-box (syntax examples)))))) +; ((is-a? lexeme (image-snip%)) +; (token-IMAGE_SPECIAL lexeme)) +; (else +; (token-OTHER_SPECIAL (list lexeme start-pos end-pos))))) +; +; ;; 3.6 +; ((re:+ WhiteSpace) (return-without-pos (get-token input-port))) +; +; ;; 3.5 +; (#\032 'EOF) +; ((eof) 'EOF) +; +; ((re:+ (re:/ "09" "az" "AZ")) (token-NUMBER_ERROR lexeme)) +; +; )) +; + + (define-values/invoke-unit java-definitions-parsers@ + (import) + (export (prefix def: parsers^) (prefix def: err^) token-proc^)) + + (define-values/invoke-unit java-interactions-parsers@ + (import) + (export (prefix int: parsers^) (prefix int: err^))) + + (define (parse parser err? err-src err-msg) + (lambda (program-stream location) + (let ([output + ;(with-handlers ((exn? + ; (lambda (e) + ; (string-append "parse function failed with this internal exception:" + ; (exn-message e))))) + (!!! ((!!! parser) (old-tokens->new program-stream) location))]);)]) + (if (err? output) (list (err-msg output) (!!! (err-src output))))))) + + (define parse-beginner (parse def:parse-beginner def:err? def:err-msg def:err-src)) + (define parse-intermediate (parse def:parse-intermediate def:err? def:err-msg def:err-src)) + (define parse-intermediate+access (parse def:parse-intermediate+access def:err? def:err-msg def:err-src)) + (define parse-advanced (parse def:parse-advanced def:err? def:err-msg def:err-src)) + (define parse-beginner-interact (parse int:parse-beginner int:err? int:err-msg int:err-src)) + (define parse-intermediate-interact (parse int:parse-intermediate int:err? int:err-msg int:err-src)) + (define parse-advanced-interact (parse int:parse-advanced int:err? int:err-msg int:err-src)) + + + + + ) \ No newline at end of file diff --git a/collects/profj/info.ss b/collects/profj/info.ss index 16ba7ec36e..53b1b365bb 100644 --- a/collects/profj/info.ss +++ b/collects/profj/info.ss @@ -8,6 +8,7 @@ (define pre-install-collection "pre-installer.ss") (define compile-subcollections '(("profj" "parsers") + ("profj" "comb-parsers") ("profj" "libs" "java" "lang") ("profj" "libs" "java" "io") ("profj" "libs" "java" "util"))) diff --git a/collects/profj/parameters.ss b/collects/profj/parameters.ss index 79acf7d061..fe85c55578 100644 --- a/collects/profj/parameters.ss +++ b/collects/profj/parameters.ss @@ -31,6 +31,8 @@ ;Stores the error function to trigger for parsing (define determine-error (make-parameter (lambda () #t))) + (define new-parser? (make-parameter #t)) + ;Stores a function which when called will produce (->token) of lexed tokens (define lex-stream (make-parameter (lambda () null))) diff --git a/collects/profj/parser.ss b/collects/profj/parser.ss index dd00037547..cb5bb239ea 100644 --- a/collects/profj/parser.ss +++ b/collects/profj/parser.ss @@ -7,11 +7,13 @@ "parsers/general-parsing.ss" "parsers/parse-error.ss" "parsers/lexer.ss" + (prefix err: "comb-parsers/parsers.scm") "ast.ss" "parameters.ss") (require (all-except (lib "lex.ss" "parser-tools") input-port) - (lib "readerr.ss" "syntax")) + (lib "readerr.ss" "syntax") + (lib "force.ss" "lazy")) (provide parse parse-interactions parse-expression parse-type parse-name lex-stream) ;function to lex in the entire port @@ -32,6 +34,21 @@ (begin0 (car token-list) (unless (null? (cdr token-list)) (set! token-list (cdr token-list)))))) + + (define (error-builder parser old-parser lexed loc) + (if (new-parser?) + (lambda () + (let ([result (!!! (parser lexed loc))]) + (printf "Calling new parser ~a ~n" result) + (if (list? result) + (raise-read-error (cadr result) + (car (car result)) + (cadr (car result)) + (caddr (car result)) + (cadddr (car result)) + (car (cddddr (car result)))) + (old-parser)))) + old-parser)) ;main parsing function @@ -42,13 +59,16 @@ (lex-stream (lambda () (getter lexed))) (case level ((beginner) - (determine-error find-beginner-error) + (determine-error (error-builder err:parse-beginner find-beginner-error lexed filename)) (parse-beginner my-get)) ((intermediate) - (determine-error find-intermediate-error) + (determine-error (error-builder err:parse-intermediate find-intermediate-error lexed filename)) (parse-intermediate my-get)) + ((intermediate+access) + (determine-error (error-builder err:parse-intermediate+access (lambda () #t) lexed filename)) + (error)) ((advanced) - (determine-error find-advanced-error) + (determine-error (error-builder err:parse-advanced find-advanced-error lexed filename)) (parse-advanced my-get)) ((full) (parse-full my-get))))) @@ -59,13 +79,13 @@ (lex-stream (lambda () (getter lexed))) (case level ((beginner) - (determine-error find-beginner-error-interactions) + (determine-error (error-builder err:parse-beginner-interact find-beginner-error-interactions lexed loc)) (parse-beginner-interactions my-get)) - ((intermediate) - (determine-error find-intermediate-error-interactions) + ((intermediate intermediate+access) + (determine-error (error-builder err:parse-intermediate-interact find-intermediate-error-interactions lexed loc)) (parse-intermediate-interactions my-get)) ((advanced) - (determine-error find-advanced-error-interactions) + (determine-error (error-builder err:parse-advanced-interact find-advanced-error-interactions lexed loc)) (parse-advanced-interactions my-get)) ((full) (parse-full-interactions my-get))))) diff --git a/collects/profj/parsers/advanced-parser.ss b/collects/profj/parsers/advanced-parser.ss index 5b2dd1b8d4..eb31ab41d8 100644 --- a/collects/profj/parsers/advanced-parser.ss +++ b/collects/profj/parsers/advanced-parser.ss @@ -582,11 +582,11 @@ [(new ClassOrInterfaceType DimExprs) (make-array-alloc #f (build-src 3) (make-type-spec $2 0 (build-src 2 2)) (reverse $3) 0)] ;; 1.1 - [(new PrimitiveType Dims ArrayInitializer) + #;[(new PrimitiveType Dims ArrayInitializer) (begin (display $2) (error 'unimplemented-1.1))] ;; 1.1 - [(new ClassOrInterfaceType Dims ArrayInitializer) (error 'unimplemented-1.1)]) + #;[(new ClassOrInterfaceType Dims ArrayInitializer) (error 'unimplemented-1.1)]) (DimExprs [(DimExpr) (list $1)]