392 lines
16 KiB
Scheme
392 lines
16 KiB
Scheme
(module parsers mzscheme
|
|
(require "parser-units.scm"
|
|
(only (lib "force.ss" "lazy") !!!)
|
|
(only (lib "combinator-unit.ss" "combinator-parser") err^)
|
|
(lib "unit.ss")
|
|
#;(lib "lex.ss" "parser-tools")
|
|
#;(prefix re: (lib "lex-sre.ss" "parser-tools")))
|
|
|
|
(provide parse-beginner parse-intermediate parse-intermediate+access parse-advanced
|
|
parse-beginner-interact parse-intermediate-interact parse-advanced-interact)
|
|
(define (trim-string s f l)
|
|
(substring s f (- (string-length s) l)))
|
|
|
|
; (define-lex-abbrevs
|
|
; ;; 3.4
|
|
; (CR #\015)
|
|
; (LF #\012)
|
|
; (LineTerminator (re:or CR
|
|
; LF
|
|
; (re:: CR LF)))
|
|
; (InputCharacter (re:~ CR LF))
|
|
;
|
|
; ;; 3.6
|
|
; (FF #\014)
|
|
; (TAB #\011)
|
|
; (WhiteSpace (re:or #\space
|
|
; TAB
|
|
; FF
|
|
; LineTerminator))
|
|
;
|
|
; ;; 3.7 (Had to transform CommentTail and CommentTailStar into one RE)
|
|
; ;; (DocumentationComment only appears in version 1 of the spec)
|
|
; (Comment (re:or TraditionalComment
|
|
; EndOfLineComment
|
|
; DocumentationComment))
|
|
; (TraditionalComment (re:: "/*" NotStar CommentTail))
|
|
; (EndOfLineComment (re:: "//" (re:* InputCharacter)))
|
|
; (DocumentationComment (re:: "/**" CommentTailStar))
|
|
; (CommentTail (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
|
|
; (re:* NotStar)
|
|
; (re:+ "*")
|
|
; "/"))
|
|
; (CommentTailStar (re:: (re:* (re:: (re:* "*") NotStarNotSlash (re:* NotStar) "*"))
|
|
; (re:* "*")
|
|
; "/"))
|
|
; (NotStar (re:~ "*"))
|
|
; (NotStarNotSlash (re:~ "*" "/"))
|
|
;
|
|
; (SyntaxComment (re:or TraditionalCommentEOF
|
|
; EndOfLineComment))
|
|
; (TraditionalCommentEOF (re:: "/*" CommentTailEOF))
|
|
; (CommentTailEOF (re:or (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
|
|
; (re:* NotStar)
|
|
; (re:+ "*")
|
|
; "/")
|
|
; (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
|
|
; (re:* NotStar)
|
|
; (re:* "*"))))
|
|
;
|
|
; ;; 3.8 (No need to worry about excluding keywords and such. They will
|
|
; ;; appear first in the lexer spec)
|
|
; ;Not UNICODE compliant
|
|
; (Identifier (re:: JavaLetter (re:* JavaLetterOrDigit)))
|
|
; (JavaLetter (re:or (re:/ "AZ" "az") "_" "$"))
|
|
; (JavaLetterOrDigit (re:or JavaLetter (re:/ "09")))
|
|
;
|
|
; (KnownTypes (re:or "boolean" "byte" "char" "double" "float" "int" "long" "short"
|
|
; "String" "Object"))
|
|
;
|
|
; ;; 3.9
|
|
; (Keyword (re:or "abstract" "default" "if" "private" "this"
|
|
; "boolean" "do" "implements" "protected" "throw"
|
|
; "break" "double" "import" "public" "throws"
|
|
; "byte" "else" "instanceof" "return" "transient"
|
|
; "case" "extends" "int" "short" "try"
|
|
; "catch" "final" "interface" "static" "void"
|
|
; "char" "finally" "long" "strictfp" "volatile"
|
|
; "class" "float" "native" "super" "while"
|
|
; "const" "for" "new" "switch"
|
|
; "continue" "goto" "package" "synchronized"))
|
|
;
|
|
; ;; 3.10.1
|
|
; (Digits (re:+ (re:/ "09")))
|
|
; (DigitsOpt (re:* (re:/ "09")))
|
|
;
|
|
; (IntegerTypeSuffix (char-set "lL"))
|
|
; (DecimalNumeral (re:or #\0
|
|
; (re:: (re:/ "19") (re:* (re:/ "09")))))
|
|
; (HexDigit (re:/ "09" "af" "AF"))
|
|
; (HexNumeral (re:: #\0 (char-set "xX") (re:+ HexDigit)))
|
|
; (OctalNumeral (re:: #\0 (re:+ (re:/ "07"))))
|
|
;
|
|
; ;; 3.10.2
|
|
; (FloatTypeSuffix (char-set "fF"))
|
|
; (DoubleTypeSuffix (char-set "dD"))
|
|
;
|
|
; (FloatA (re:: Digits #\. DigitsOpt (re:? ExponentPart)))
|
|
; (FloatB (re:: #\. Digits (re:? ExponentPart)))
|
|
; (FloatC (re:: Digits ExponentPart))
|
|
; (FloatD (re:: Digits (re:? ExponentPart)))
|
|
;
|
|
; (ExponentPart (re:: (char-set "eE") (re:? (char-set "+-")) Digits))
|
|
;
|
|
; ;; MORE
|
|
;
|
|
; ;; 3.10.6
|
|
; (EscapeSequence (re:or "\\b" "\\t" "\\n" "\\f" "\\r" "\\\"" "\\'" "\\\\"
|
|
; (re:: #\\ (re:? (re:/ "03")) (re:/ "07") (re:/ "07"))
|
|
; (re:: #\\ (re:/ "07"))))
|
|
;
|
|
; ;; 3.12
|
|
; (Operator (re:or "=" ">" "<" "!" "~" "?" ":"
|
|
; "==" "<=" ">=" "!=" "&&" "||" "++" "--"
|
|
; "+" "-" "*" "/" "&" "|" "^" "%" "<<" ">>" ">>>"
|
|
; "+=" "-=" "*=" "/=" "&=" "|=" "^=" "%=" "<<=" ">>=" ">>>=")))
|
|
;
|
|
; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; ;;Comment lexers
|
|
;
|
|
; (define read-line-comment
|
|
; (lexer
|
|
; [(re:~ #\newline) (read-line-comment input-port)]
|
|
; [#\newline end-pos]
|
|
; [(eof) end-pos]
|
|
; [(special) (read-line-comment input-port)]
|
|
; [(special-comment) (read-line-comment input-port)]
|
|
; ))
|
|
;
|
|
; (define read-block-comment
|
|
; (lexer
|
|
; ["*/" end-pos]
|
|
; [(eof) end-pos]
|
|
; [(re:or "*" "/" (complement (re:: any-string (re:or "*" "/") any-string))) (read-block-comment input-port)]
|
|
; [(special) (read-block-comment input-port)]
|
|
; [(special-comment) (read-block-comment input-port)]
|
|
; ))
|
|
;
|
|
; #;(define read-document-comment
|
|
; (lexer
|
|
; ["**/" end-pos]
|
|
; [(eof) end-pos]
|
|
; [(re:or "*" "/" (~ (any-string))) (read-document-comment input-port)]
|
|
; [(special) (read-document-comment input-port)]
|
|
; [(special-comment) (read-document-comment input-port)]
|
|
; [(special-error) (read-document-comment input-port)]))
|
|
;
|
|
; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
; ;String lexer
|
|
;
|
|
; ;get-string: input-port -> (U STRING_LIT STRING_ERROR tokens)
|
|
; (define (get-string input-port)
|
|
; (letrec ((tokens (get-string-tokens input-port))
|
|
; (last-token (list-ref tokens (sub1 (length tokens))))
|
|
; (tokens->string
|
|
; (lambda (toks)
|
|
; ;Stops before the last element, which does not have a string
|
|
; (if (null? (cdr toks))
|
|
; ""
|
|
; (string-append (string (token-value (position-token-token (car toks))))
|
|
; (tokens->string (cdr toks)))))))
|
|
; (if (eq? 'STRING_END (token-name (position-token-token last-token)))
|
|
; (token-STRING_LIT (list (tokens->string tokens) (position-token-end-pos last-token)))
|
|
; (token-STRING_ERROR
|
|
; (list (tokens->string tokens)
|
|
; (position-token-end-pos last-token)
|
|
; (position-token-token last-token))))))
|
|
;
|
|
; ;get-string-tokens: input-port -> (list position-token)
|
|
; (define (get-string-tokens input-port)
|
|
; (let ((tok (get-str-tok input-port)))
|
|
; (case (token-name (position-token-token tok))
|
|
; ((STRING_EOF STRING_END STRING_NEWLINE) (list tok))
|
|
; (else (cons tok (get-string-tokens input-port))))))
|
|
;
|
|
; (define-tokens str-tok (STRING_CHAR))
|
|
; (define-empty-tokens err (STRING_END STRING_EOF STRING_NEWLINE))
|
|
;
|
|
; (define get-str-tok
|
|
; (lexer-src-pos
|
|
; (#\" (token-STRING_END))
|
|
; (EscapeSequence (token-STRING_CHAR (EscapeSequence->char lexeme)))
|
|
; (InputCharacter (token-STRING_CHAR (string-ref lexeme 0)))
|
|
; ((re:or CR LF) (token-STRING_NEWLINE))
|
|
; (#\032 (token-STRING_EOF))
|
|
; ((eof) (token-STRING_EOF))))
|
|
;
|
|
; ;; 3.10.6
|
|
; (define (EscapeSequence->char es)
|
|
; (cond
|
|
; ((string=? es "\\b") #\010)
|
|
; ((string=? es "\\t") #\011)
|
|
; ((string=? es "\\n") #\012)
|
|
; ((string=? es "\\f") #\014)
|
|
; ((string=? es "\\r") #\015)
|
|
; ((string=? es "\\\"") #\")
|
|
; ((string=? es "\\'") #\')
|
|
; ((string=? es "\\\\") #\\)
|
|
; (else (integer->char (string->number (trim-string es 1 0) 8)))))
|
|
;
|
|
; (define get-token
|
|
; (lexer-src-pos
|
|
; ;; 3.12
|
|
; (Operator (let ((l lexeme))
|
|
; (cond
|
|
; ((string=? l "|") (token-PIPE))
|
|
; ((string=? l "||") (token-OR))
|
|
; ((string=? l "|=") (token-OREQUAL))
|
|
; ((string=? l "=") (token-EQUAL))
|
|
; ((string=? l "<") (token-LT))
|
|
; ((string=? l ">") (token-GT))
|
|
; ((string=? l "<=") (token-LTEQ))
|
|
; ((string=? l ">=") (token-GTEQ))
|
|
; ((string=? l "+") (token-PLUS))
|
|
; ((string=? l "-") (token-MINUS))
|
|
; ((string=? l "*") (token-TIMES))
|
|
; ((string=? l "/") (token-DIVIDE))
|
|
; ((string=? l "^") (token-^T))
|
|
; (else (string->symbol l)))))
|
|
;
|
|
; ("->" (string->symbol lexeme))
|
|
; ("->>" (string->symbol lexeme))
|
|
; ("->>>" (string->symbol lexeme))
|
|
;
|
|
; ;; 3.11
|
|
; ("(" (token-O_PAREN))
|
|
; (")" (token-C_PAREN))
|
|
; ("{" (token-O_BRACE))
|
|
; ("}" (token-C_BRACE))
|
|
; ("[" (token-O_BRACKET))
|
|
; ("]" (token-C_BRACKET))
|
|
; (";" (token-SEMI_COLON))
|
|
; ("," (token-COMMA))
|
|
; ("." (token-PERIOD))
|
|
;
|
|
; ;; 3.10.7
|
|
; ("null" (token-NULL_LIT))
|
|
;
|
|
; ;; 3.10.5
|
|
; (#\" (get-string input-port))
|
|
; ;(token-STRING_LIT (list->string (get-string input-port))))
|
|
;
|
|
; ;; 3.10.4
|
|
; ((re:: #\' (re:~ CR LF #\' #\\) #\')
|
|
; (token-CHAR_LIT (string-ref lexeme 1)))
|
|
; ((re:: #\' EscapeSequence #\')
|
|
; (token-CHAR_LIT (EscapeSequence->char
|
|
; (trim-string lexeme 1 1))))
|
|
;
|
|
; ;; 3.10.3
|
|
; ("true" (token-TRUE_LIT))
|
|
; ("false" (token-FALSE_LIT))
|
|
;
|
|
; ;; 3.10.2
|
|
; ((re:or FloatA FloatB FloatC)
|
|
; (token-DOUBLE_LIT (string->number lexeme)))
|
|
; ((re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix)
|
|
; (token-FLOAT_LIT (string->number (trim-string lexeme 0 1))))
|
|
; ((re:: (re:or FloatA FloatB FloatC FloatD) DoubleTypeSuffix)
|
|
; (token-DOUBLE_LIT (string->number (trim-string lexeme 0 1))))
|
|
;
|
|
;
|
|
; ;; 3.10.1
|
|
; (DecimalNumeral
|
|
; (token-INTEGER_LIT (string->number lexeme 10)))
|
|
; ((re:: DecimalNumeral IntegerTypeSuffix)
|
|
; (token-LONG_LIT (string->number (trim-string lexeme 0 1) 10)))
|
|
; ((re:: HexNumeral IntegerTypeSuffix)
|
|
; (token-HEXL_LIT (string->number (trim-string lexeme 2 1) 16)))
|
|
; (HexNumeral
|
|
; (token-HEX_LIT (string->number (trim-string lexeme 2 0) 16)))
|
|
; (OctalNumeral
|
|
; (token-OCT_LIT (string->number (trim-string lexeme 1 0) 8)))
|
|
; ((re:: OctalNumeral IntegerTypeSuffix)
|
|
; (token-OCTL_LIT (string->number (trim-string lexeme 1 1) 8)))
|
|
;
|
|
; #;("dynamic"
|
|
; (cond
|
|
; ((dynamic?) (string->symbol lexeme))
|
|
; (else (token-IDENTIFIER lexeme))))
|
|
;
|
|
; #;((re:or "check" "expect" "within")
|
|
; (cond
|
|
; ((test-ext?) (string->symbol lexeme))
|
|
; (else (token-IDENTIFIER lexeme))))
|
|
;
|
|
; #;((re:or "test" "tests" "testcase")
|
|
; (cond
|
|
; ((testcase-ext?) (string->symbol lexeme))
|
|
; (else (token-IDENTIFIER lexeme))))
|
|
;
|
|
; ;; 3.9
|
|
; (Keyword (string->symbol lexeme))
|
|
;
|
|
; ;; 3.8
|
|
; (Identifier (token-IDENTIFIER lexeme))
|
|
;
|
|
; ;; 3.7
|
|
; ("//" (begin (read-line-comment input-port) (return-without-pos (get-token input-port))))
|
|
; ("/*" (begin (read-block-comment input-port) (return-without-pos (get-token input-port))))
|
|
; #;("/**" (begin (read-document-comment input-port) (return-without-pos (get-token input-port))))
|
|
;
|
|
; #;((special)
|
|
; (cond
|
|
; ((and (syntax? lexeme) (syntax-property lexeme 'test-case-box))
|
|
; (token-TEST_SUITE (make-test-case lexeme)))
|
|
; ((and (syntax? lexeme) (syntax-property lexeme 'example-box))
|
|
; (syntax-case lexeme ()
|
|
; ((parse-example-box examples) (token-EXAMPLE (make-example-box (syntax examples))))))
|
|
; ((is-a? lexeme (image-snip%))
|
|
; (token-IMAGE_SPECIAL lexeme))
|
|
; (else
|
|
; (token-OTHER_SPECIAL (list lexeme start-pos end-pos)))))
|
|
;
|
|
; ;; 3.6
|
|
; ((re:+ WhiteSpace) (return-without-pos (get-token input-port)))
|
|
;
|
|
; ;; 3.5
|
|
; (#\032 'EOF)
|
|
; ((eof) 'EOF)
|
|
;
|
|
; ((re:+ (re:/ "09" "az" "AZ")) (token-NUMBER_ERROR lexeme))
|
|
;
|
|
; ))
|
|
;
|
|
|
|
; (define-values/invoke-unit java-definitions-parsers@
|
|
; (import)
|
|
; (export (prefix def: parsers^) (prefix def: err^) token-proc^))
|
|
;
|
|
; (define-values/invoke-unit java-interactions-parsers@
|
|
; (import)
|
|
; (export (prefix int: parsers^) (prefix int: err^)))
|
|
|
|
(define-values/invoke-unit beginner-definitions-parser@
|
|
(import)
|
|
(export (prefix beginner-def: parsers^) (prefix beginner-def: err^) token-proc^))
|
|
(define-values/invoke-unit beginner-interactions-parsers@
|
|
(import)
|
|
(export (prefix beginner-int: parsers^) (prefix beginner-int: err^)))
|
|
|
|
(define-values/invoke-unit intermediate-definitions-parser@
|
|
(import)
|
|
(export (prefix intermediate-def: parsers^) (prefix intermediate-def: err^)))
|
|
(define-values/invoke-unit intermediate-interactions-parsers@
|
|
(import)
|
|
(export (prefix intermediate-int: parsers^) (prefix intermediate-int: err^)))
|
|
|
|
(define-values/invoke-unit intermediate+access-definitions-parser@
|
|
(import)
|
|
(export (prefix intermediate+acc-def: parsers^) (prefix intermediate+acc-def: err^)))
|
|
(define-values/invoke-unit intermediate+access-interactions-parsers@
|
|
(import)
|
|
(export (prefix intermediate+acc-int: parsers^) (prefix intermediate+acc-int: err^)))
|
|
|
|
|
|
(define-values/invoke-unit advanced-definitions-parser@
|
|
(import)
|
|
(export (prefix advanced-def: parsers^) (prefix advanced-def: err^)))
|
|
(define-values/invoke-unit advanced-interactions-parsers@
|
|
(import)
|
|
(export (prefix advanced-int: parsers^) (prefix advanced-int: err^) ))
|
|
|
|
|
|
(define (parse parser err? err-src err-msg)
|
|
(lambda (program-stream location)
|
|
(let ([output
|
|
;(with-handlers ((exn?
|
|
; (lambda (e)
|
|
; (string-append "parse function failed with this internal exception:"
|
|
; (exn-message e)))))
|
|
(!!! ((!!! parser) (old-tokens->new program-stream) location))]);)])
|
|
(if (err? output) (list (err-msg output) (!!! (err-src output)))))))
|
|
|
|
(define parse-beginner (parse beginner-def:parse-program
|
|
beginner-def:err? beginner-def:err-msg beginner-def:err-src))
|
|
(define parse-intermediate (parse intermediate-def:parse-program
|
|
intermediate-def:err? intermediate-def:err-msg intermediate-def:err-src))
|
|
(define parse-intermediate+access (parse intermediate+acc-def:parse-program
|
|
intermediate+acc-def:err? intermediate+acc-def:err-msg intermediate+acc-def:err-src))
|
|
(define parse-advanced (parse advanced-def:parse-program
|
|
advanced-def:err? advanced-def:err-msg advanced-def:err-src))
|
|
(define parse-beginner-interact (parse beginner-int:parse-program
|
|
beginner-int:err? beginner-int:err-msg beginner-int:err-src))
|
|
(define parse-intermediate-interact (parse intermediate-int:parse-program
|
|
intermediate-int:err? intermediate-int:err-msg intermediate-int:err-src))
|
|
(define parse-advanced-interact (parse advanced-int:parse-program
|
|
advanced-int:err? advanced-int:err-msg advanced-int:err-src))
|
|
|
|
|
|
|
|
|
|
) |