racket/collects/profj/comb-parsers/parsers.scm
2007-07-02 22:26:01 +00:00

392 lines
16 KiB
Scheme

(module parsers mzscheme
(require "parser-units.scm"
(only (lib "force.ss" "lazy") !!!)
(only (lib "combinator-unit.ss" "combinator-parser") err^)
(lib "unit.ss")
#;(lib "lex.ss" "parser-tools")
#;(prefix re: (lib "lex-sre.ss" "parser-tools")))
(provide parse-beginner parse-intermediate parse-intermediate+access parse-advanced
parse-beginner-interact parse-intermediate-interact parse-advanced-interact)
(define (trim-string s f l)
(substring s f (- (string-length s) l)))
; (define-lex-abbrevs
; ;; 3.4
; (CR #\015)
; (LF #\012)
; (LineTerminator (re:or CR
; LF
; (re:: CR LF)))
; (InputCharacter (re:~ CR LF))
;
; ;; 3.6
; (FF #\014)
; (TAB #\011)
; (WhiteSpace (re:or #\space
; TAB
; FF
; LineTerminator))
;
; ;; 3.7 (Had to transform CommentTail and CommentTailStar into one RE)
; ;; (DocumentationComment only appears in version 1 of the spec)
; (Comment (re:or TraditionalComment
; EndOfLineComment
; DocumentationComment))
; (TraditionalComment (re:: "/*" NotStar CommentTail))
; (EndOfLineComment (re:: "//" (re:* InputCharacter)))
; (DocumentationComment (re:: "/**" CommentTailStar))
; (CommentTail (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
; (re:* NotStar)
; (re:+ "*")
; "/"))
; (CommentTailStar (re:: (re:* (re:: (re:* "*") NotStarNotSlash (re:* NotStar) "*"))
; (re:* "*")
; "/"))
; (NotStar (re:~ "*"))
; (NotStarNotSlash (re:~ "*" "/"))
;
; (SyntaxComment (re:or TraditionalCommentEOF
; EndOfLineComment))
; (TraditionalCommentEOF (re:: "/*" CommentTailEOF))
; (CommentTailEOF (re:or (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
; (re:* NotStar)
; (re:+ "*")
; "/")
; (re:: (re:* (re:: (re:* NotStar) (re:+ "*") NotStarNotSlash))
; (re:* NotStar)
; (re:* "*"))))
;
; ;; 3.8 (No need to worry about excluding keywords and such. They will
; ;; appear first in the lexer spec)
; ;Not UNICODE compliant
; (Identifier (re:: JavaLetter (re:* JavaLetterOrDigit)))
; (JavaLetter (re:or (re:/ "AZ" "az") "_" "$"))
; (JavaLetterOrDigit (re:or JavaLetter (re:/ "09")))
;
; (KnownTypes (re:or "boolean" "byte" "char" "double" "float" "int" "long" "short"
; "String" "Object"))
;
; ;; 3.9
; (Keyword (re:or "abstract" "default" "if" "private" "this"
; "boolean" "do" "implements" "protected" "throw"
; "break" "double" "import" "public" "throws"
; "byte" "else" "instanceof" "return" "transient"
; "case" "extends" "int" "short" "try"
; "catch" "final" "interface" "static" "void"
; "char" "finally" "long" "strictfp" "volatile"
; "class" "float" "native" "super" "while"
; "const" "for" "new" "switch"
; "continue" "goto" "package" "synchronized"))
;
; ;; 3.10.1
; (Digits (re:+ (re:/ "09")))
; (DigitsOpt (re:* (re:/ "09")))
;
; (IntegerTypeSuffix (char-set "lL"))
; (DecimalNumeral (re:or #\0
; (re:: (re:/ "19") (re:* (re:/ "09")))))
; (HexDigit (re:/ "09" "af" "AF"))
; (HexNumeral (re:: #\0 (char-set "xX") (re:+ HexDigit)))
; (OctalNumeral (re:: #\0 (re:+ (re:/ "07"))))
;
; ;; 3.10.2
; (FloatTypeSuffix (char-set "fF"))
; (DoubleTypeSuffix (char-set "dD"))
;
; (FloatA (re:: Digits #\. DigitsOpt (re:? ExponentPart)))
; (FloatB (re:: #\. Digits (re:? ExponentPart)))
; (FloatC (re:: Digits ExponentPart))
; (FloatD (re:: Digits (re:? ExponentPart)))
;
; (ExponentPart (re:: (char-set "eE") (re:? (char-set "+-")) Digits))
;
; ;; MORE
;
; ;; 3.10.6
; (EscapeSequence (re:or "\\b" "\\t" "\\n" "\\f" "\\r" "\\\"" "\\'" "\\\\"
; (re:: #\\ (re:? (re:/ "03")) (re:/ "07") (re:/ "07"))
; (re:: #\\ (re:/ "07"))))
;
; ;; 3.12
; (Operator (re:or "=" ">" "<" "!" "~" "?" ":"
; "==" "<=" ">=" "!=" "&&" "||" "++" "--"
; "+" "-" "*" "/" "&" "|" "^" "%" "<<" ">>" ">>>"
; "+=" "-=" "*=" "/=" "&=" "|=" "^=" "%=" "<<=" ">>=" ">>>=")))
;
; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;;Comment lexers
;
; (define read-line-comment
; (lexer
; [(re:~ #\newline) (read-line-comment input-port)]
; [#\newline end-pos]
; [(eof) end-pos]
; [(special) (read-line-comment input-port)]
; [(special-comment) (read-line-comment input-port)]
; ))
;
; (define read-block-comment
; (lexer
; ["*/" end-pos]
; [(eof) end-pos]
; [(re:or "*" "/" (complement (re:: any-string (re:or "*" "/") any-string))) (read-block-comment input-port)]
; [(special) (read-block-comment input-port)]
; [(special-comment) (read-block-comment input-port)]
; ))
;
; #;(define read-document-comment
; (lexer
; ["**/" end-pos]
; [(eof) end-pos]
; [(re:or "*" "/" (~ (any-string))) (read-document-comment input-port)]
; [(special) (read-document-comment input-port)]
; [(special-comment) (read-document-comment input-port)]
; [(special-error) (read-document-comment input-port)]))
;
; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; ;String lexer
;
; ;get-string: input-port -> (U STRING_LIT STRING_ERROR tokens)
; (define (get-string input-port)
; (letrec ((tokens (get-string-tokens input-port))
; (last-token (list-ref tokens (sub1 (length tokens))))
; (tokens->string
; (lambda (toks)
; ;Stops before the last element, which does not have a string
; (if (null? (cdr toks))
; ""
; (string-append (string (token-value (position-token-token (car toks))))
; (tokens->string (cdr toks)))))))
; (if (eq? 'STRING_END (token-name (position-token-token last-token)))
; (token-STRING_LIT (list (tokens->string tokens) (position-token-end-pos last-token)))
; (token-STRING_ERROR
; (list (tokens->string tokens)
; (position-token-end-pos last-token)
; (position-token-token last-token))))))
;
; ;get-string-tokens: input-port -> (list position-token)
; (define (get-string-tokens input-port)
; (let ((tok (get-str-tok input-port)))
; (case (token-name (position-token-token tok))
; ((STRING_EOF STRING_END STRING_NEWLINE) (list tok))
; (else (cons tok (get-string-tokens input-port))))))
;
; (define-tokens str-tok (STRING_CHAR))
; (define-empty-tokens err (STRING_END STRING_EOF STRING_NEWLINE))
;
; (define get-str-tok
; (lexer-src-pos
; (#\" (token-STRING_END))
; (EscapeSequence (token-STRING_CHAR (EscapeSequence->char lexeme)))
; (InputCharacter (token-STRING_CHAR (string-ref lexeme 0)))
; ((re:or CR LF) (token-STRING_NEWLINE))
; (#\032 (token-STRING_EOF))
; ((eof) (token-STRING_EOF))))
;
; ;; 3.10.6
; (define (EscapeSequence->char es)
; (cond
; ((string=? es "\\b") #\010)
; ((string=? es "\\t") #\011)
; ((string=? es "\\n") #\012)
; ((string=? es "\\f") #\014)
; ((string=? es "\\r") #\015)
; ((string=? es "\\\"") #\")
; ((string=? es "\\'") #\')
; ((string=? es "\\\\") #\\)
; (else (integer->char (string->number (trim-string es 1 0) 8)))))
;
; (define get-token
; (lexer-src-pos
; ;; 3.12
; (Operator (let ((l lexeme))
; (cond
; ((string=? l "|") (token-PIPE))
; ((string=? l "||") (token-OR))
; ((string=? l "|=") (token-OREQUAL))
; ((string=? l "=") (token-EQUAL))
; ((string=? l "<") (token-LT))
; ((string=? l ">") (token-GT))
; ((string=? l "<=") (token-LTEQ))
; ((string=? l ">=") (token-GTEQ))
; ((string=? l "+") (token-PLUS))
; ((string=? l "-") (token-MINUS))
; ((string=? l "*") (token-TIMES))
; ((string=? l "/") (token-DIVIDE))
; ((string=? l "^") (token-^T))
; (else (string->symbol l)))))
;
; ("->" (string->symbol lexeme))
; ("->>" (string->symbol lexeme))
; ("->>>" (string->symbol lexeme))
;
; ;; 3.11
; ("(" (token-O_PAREN))
; (")" (token-C_PAREN))
; ("{" (token-O_BRACE))
; ("}" (token-C_BRACE))
; ("[" (token-O_BRACKET))
; ("]" (token-C_BRACKET))
; (";" (token-SEMI_COLON))
; ("," (token-COMMA))
; ("." (token-PERIOD))
;
; ;; 3.10.7
; ("null" (token-NULL_LIT))
;
; ;; 3.10.5
; (#\" (get-string input-port))
; ;(token-STRING_LIT (list->string (get-string input-port))))
;
; ;; 3.10.4
; ((re:: #\' (re:~ CR LF #\' #\\) #\')
; (token-CHAR_LIT (string-ref lexeme 1)))
; ((re:: #\' EscapeSequence #\')
; (token-CHAR_LIT (EscapeSequence->char
; (trim-string lexeme 1 1))))
;
; ;; 3.10.3
; ("true" (token-TRUE_LIT))
; ("false" (token-FALSE_LIT))
;
; ;; 3.10.2
; ((re:or FloatA FloatB FloatC)
; (token-DOUBLE_LIT (string->number lexeme)))
; ((re:: (re:or FloatA FloatB FloatC FloatD) FloatTypeSuffix)
; (token-FLOAT_LIT (string->number (trim-string lexeme 0 1))))
; ((re:: (re:or FloatA FloatB FloatC FloatD) DoubleTypeSuffix)
; (token-DOUBLE_LIT (string->number (trim-string lexeme 0 1))))
;
;
; ;; 3.10.1
; (DecimalNumeral
; (token-INTEGER_LIT (string->number lexeme 10)))
; ((re:: DecimalNumeral IntegerTypeSuffix)
; (token-LONG_LIT (string->number (trim-string lexeme 0 1) 10)))
; ((re:: HexNumeral IntegerTypeSuffix)
; (token-HEXL_LIT (string->number (trim-string lexeme 2 1) 16)))
; (HexNumeral
; (token-HEX_LIT (string->number (trim-string lexeme 2 0) 16)))
; (OctalNumeral
; (token-OCT_LIT (string->number (trim-string lexeme 1 0) 8)))
; ((re:: OctalNumeral IntegerTypeSuffix)
; (token-OCTL_LIT (string->number (trim-string lexeme 1 1) 8)))
;
; #;("dynamic"
; (cond
; ((dynamic?) (string->symbol lexeme))
; (else (token-IDENTIFIER lexeme))))
;
; #;((re:or "check" "expect" "within")
; (cond
; ((test-ext?) (string->symbol lexeme))
; (else (token-IDENTIFIER lexeme))))
;
; #;((re:or "test" "tests" "testcase")
; (cond
; ((testcase-ext?) (string->symbol lexeme))
; (else (token-IDENTIFIER lexeme))))
;
; ;; 3.9
; (Keyword (string->symbol lexeme))
;
; ;; 3.8
; (Identifier (token-IDENTIFIER lexeme))
;
; ;; 3.7
; ("//" (begin (read-line-comment input-port) (return-without-pos (get-token input-port))))
; ("/*" (begin (read-block-comment input-port) (return-without-pos (get-token input-port))))
; #;("/**" (begin (read-document-comment input-port) (return-without-pos (get-token input-port))))
;
; #;((special)
; (cond
; ((and (syntax? lexeme) (syntax-property lexeme 'test-case-box))
; (token-TEST_SUITE (make-test-case lexeme)))
; ((and (syntax? lexeme) (syntax-property lexeme 'example-box))
; (syntax-case lexeme ()
; ((parse-example-box examples) (token-EXAMPLE (make-example-box (syntax examples))))))
; ((is-a? lexeme (image-snip%))
; (token-IMAGE_SPECIAL lexeme))
; (else
; (token-OTHER_SPECIAL (list lexeme start-pos end-pos)))))
;
; ;; 3.6
; ((re:+ WhiteSpace) (return-without-pos (get-token input-port)))
;
; ;; 3.5
; (#\032 'EOF)
; ((eof) 'EOF)
;
; ((re:+ (re:/ "09" "az" "AZ")) (token-NUMBER_ERROR lexeme))
;
; ))
;
; (define-values/invoke-unit java-definitions-parsers@
; (import)
; (export (prefix def: parsers^) (prefix def: err^) token-proc^))
;
; (define-values/invoke-unit java-interactions-parsers@
; (import)
; (export (prefix int: parsers^) (prefix int: err^)))
(define-values/invoke-unit beginner-definitions-parser@
(import)
(export (prefix beginner-def: parsers^) (prefix beginner-def: err^) token-proc^))
(define-values/invoke-unit beginner-interactions-parsers@
(import)
(export (prefix beginner-int: parsers^) (prefix beginner-int: err^)))
(define-values/invoke-unit intermediate-definitions-parser@
(import)
(export (prefix intermediate-def: parsers^) (prefix intermediate-def: err^)))
(define-values/invoke-unit intermediate-interactions-parsers@
(import)
(export (prefix intermediate-int: parsers^) (prefix intermediate-int: err^)))
(define-values/invoke-unit intermediate+access-definitions-parser@
(import)
(export (prefix intermediate+acc-def: parsers^) (prefix intermediate+acc-def: err^)))
(define-values/invoke-unit intermediate+access-interactions-parsers@
(import)
(export (prefix intermediate+acc-int: parsers^) (prefix intermediate+acc-int: err^)))
(define-values/invoke-unit advanced-definitions-parser@
(import)
(export (prefix advanced-def: parsers^) (prefix advanced-def: err^)))
(define-values/invoke-unit advanced-interactions-parsers@
(import)
(export (prefix advanced-int: parsers^) (prefix advanced-int: err^) ))
(define (parse parser err? err-src err-msg)
(lambda (program-stream location)
(let ([output
;(with-handlers ((exn?
; (lambda (e)
; (string-append "parse function failed with this internal exception:"
; (exn-message e)))))
(!!! ((!!! parser) (old-tokens->new program-stream) location))]);)])
(if (err? output) (list (err-msg output) (!!! (err-src output)))))))
(define parse-beginner (parse beginner-def:parse-program
beginner-def:err? beginner-def:err-msg beginner-def:err-src))
(define parse-intermediate (parse intermediate-def:parse-program
intermediate-def:err? intermediate-def:err-msg intermediate-def:err-src))
(define parse-intermediate+access (parse intermediate+acc-def:parse-program
intermediate+acc-def:err? intermediate+acc-def:err-msg intermediate+acc-def:err-src))
(define parse-advanced (parse advanced-def:parse-program
advanced-def:err? advanced-def:err-msg advanced-def:err-src))
(define parse-beginner-interact (parse beginner-int:parse-program
beginner-int:err? beginner-int:err-msg beginner-int:err-src))
(define parse-intermediate-interact (parse intermediate-int:parse-program
intermediate-int:err? intermediate-int:err-msg intermediate-int:err-src))
(define parse-advanced-interact (parse advanced-int:parse-program
advanced-int:err? advanced-int:err-msg advanced-int:err-src))
)