parse tokens into a tree
This commit is contained in:
parent
783ee2cf88
commit
1f9b9b4c51
|
@ -3,22 +3,27 @@
|
||||||
(require rackunit)
|
(require rackunit)
|
||||||
(require parser-tools/lex
|
(require parser-tools/lex
|
||||||
(prefix-in : parser-tools/lex-sre))
|
(prefix-in : parser-tools/lex-sre))
|
||||||
(require racket/match)
|
(require racket/match
|
||||||
|
(for-syntax racket/base
|
||||||
|
syntax/parse))
|
||||||
|
|
||||||
(define-tokens honu-tokens (number identifier))
|
(define-tokens honu-tokens (number identifier))
|
||||||
|
|
||||||
(define-empty-tokens honu-empty-tokens (eof fail whitespace
|
(define-empty-tokens honu-empty-tokens
|
||||||
|
[eof fail whitespace
|
||||||
left-parens right-parens
|
left-parens right-parens
|
||||||
left-bracket right-bracket
|
left-bracket right-bracket
|
||||||
left-brace right-brace
|
left-brace right-brace
|
||||||
end-of-line-comment))
|
end-of-line-comment])
|
||||||
|
|
||||||
|
(define-lex-abbrev digit (:/ #\0 #\9))
|
||||||
(define-lex-abbrev identifier-first-character (:or (:/ #\a #\z)
|
(define-lex-abbrev identifier-first-character (:or (:/ #\a #\z)
|
||||||
(:/ #\A #\Z)))
|
(:/ #\A #\Z)))
|
||||||
(define-lex-abbrev identifier-character identifier-first-character)
|
(define-lex-abbrev identifier-character (:or identifier-first-character
|
||||||
|
digit))
|
||||||
(define-lex-abbrev identifier (:: identifier-first-character
|
(define-lex-abbrev identifier (:: identifier-first-character
|
||||||
(:+ identifier-character)))
|
(:* identifier-character)))
|
||||||
(define-lex-abbrev number (:+ (:/ #\0 #\9)))
|
(define-lex-abbrev number (:+ digit))
|
||||||
|
|
||||||
(define honu-lexer
|
(define honu-lexer
|
||||||
(lexer-src-pos
|
(lexer-src-pos
|
||||||
|
@ -26,27 +31,33 @@
|
||||||
[(:or "#" "//") (token-end-of-line-comment)]
|
[(:or "#" "//") (token-end-of-line-comment)]
|
||||||
[number (token-number (string->number lexeme))]
|
[number (token-number (string->number lexeme))]
|
||||||
["." (token-identifier '|.|)]
|
["." (token-identifier '|.|)]
|
||||||
["(" (token-left-parens)]
|
["(" (token-left-parens)] [")" (token-right-parens)]
|
||||||
[")" (token-right-parens)]
|
["[" (token-left-bracket)] ["]" (token-right-bracket)]
|
||||||
["[" (token-left-bracket)]
|
["{" (token-left-brace)] ["}" (token-right-brace)]
|
||||||
["]" (token-right-bracket)]
|
|
||||||
["{" (token-left-brace)]
|
|
||||||
["}" (token-right-brace)]
|
|
||||||
[identifier (token-identifier (string->symbol lexeme))]
|
[identifier (token-identifier (string->symbol lexeme))]
|
||||||
[(union " " "\t") (token-whitespace)]
|
[(union " " "\t") (token-whitespace)]
|
||||||
))
|
))
|
||||||
|
|
||||||
(define (token-eof? token)
|
(define-syntax (define-token? stx)
|
||||||
(equal? 'eof (token-name token)))
|
(syntax-parse stx
|
||||||
|
[(_ name)
|
||||||
|
(define name? (datum->syntax #'name (string->symbol
|
||||||
|
(format "token-~a?"
|
||||||
|
(symbol->string
|
||||||
|
(syntax->datum #'name))))
|
||||||
|
#'name))
|
||||||
|
(with-syntax ([name? name?])
|
||||||
|
#'(define (name? token)
|
||||||
|
(equal? 'name (token-name token))))]))
|
||||||
|
|
||||||
(define (token-whitespace? token)
|
(define-syntax-rule (define-tokens? name ...)
|
||||||
(equal? 'whitespace (token-name token)))
|
(begin
|
||||||
|
(define-token? name) ...))
|
||||||
|
|
||||||
(define (token-end-of-line-comment? token)
|
(define-tokens? eof whitespace end-of-line-comment number
|
||||||
(equal? 'end-of-line-comment (token-name token)))
|
identifier left-parens right-parens)
|
||||||
|
|
||||||
(define (read-until-end-of-line input)
|
(define (read-until-end-of-line input) (define (finish? what)
|
||||||
(define (finish? what)
|
|
||||||
(or (eof-object? what)
|
(or (eof-object? what)
|
||||||
(= (char->integer #\newline) what)))
|
(= (char->integer #\newline) what)))
|
||||||
(let loop ()
|
(let loop ()
|
||||||
|
@ -54,8 +65,7 @@
|
||||||
(when (not (finish? what))
|
(when (not (finish? what))
|
||||||
(loop))))
|
(loop))))
|
||||||
|
|
||||||
(define (lex-string input)
|
(define (read-tokens port)
|
||||||
(define port (open-input-string input))
|
|
||||||
(let loop ([tokens '()])
|
(let loop ([tokens '()])
|
||||||
(define next (honu-lexer port))
|
(define next (honu-lexer port))
|
||||||
;; (printf "next ~a\n" next)
|
;; (printf "next ~a\n" next)
|
||||||
|
@ -70,32 +80,88 @@
|
||||||
(loop tokens)]
|
(loop tokens)]
|
||||||
[(struct* position-token ([token (? token-whitespace?)] [start-pos start] [end-pos end]))
|
[(struct* position-token ([token (? token-whitespace?)] [start-pos start] [end-pos end]))
|
||||||
(loop tokens)]
|
(loop tokens)]
|
||||||
|
[else (loop (cons next tokens))]
|
||||||
|
#;
|
||||||
[(position-token token start end)
|
[(position-token token start end)
|
||||||
;; (printf "next is ~a eof? ~a\n" token (token-eof? token))
|
;; (printf "next is ~a eof? ~a\n" token (token-eof? token))
|
||||||
(loop (cons token tokens))])))
|
(loop (cons token tokens))])))
|
||||||
|
|
||||||
|
;; convert a string to a stream of tokens
|
||||||
|
(define (lex-string input)
|
||||||
|
(read-tokens (open-input-string input)))
|
||||||
|
|
||||||
|
;; converts a stream of tokens to a tree
|
||||||
|
(define (parse tokens)
|
||||||
|
(let loop ([current '()]
|
||||||
|
[tokens tokens]
|
||||||
|
[stop '()])
|
||||||
|
(if (null? tokens)
|
||||||
|
(reverse current)
|
||||||
|
(match (car tokens)
|
||||||
|
[(position-token token start end)
|
||||||
|
(cond
|
||||||
|
[(or (token-number? token)
|
||||||
|
(token-identifier? token))
|
||||||
|
(loop (cons token current)
|
||||||
|
(cdr tokens)
|
||||||
|
stop)]
|
||||||
|
[(token-right-parens? token)
|
||||||
|
(match stop
|
||||||
|
[(list 'right-parens rest ...)
|
||||||
|
(values (reverse current)
|
||||||
|
rest)]
|
||||||
|
[else (error 'parse "expected a left parentheses before the right parentheses")])]
|
||||||
|
[(token-left-parens? token)
|
||||||
|
(define-values (parsed more-tokens)
|
||||||
|
(loop '(#%parens)
|
||||||
|
(cdr tokens)
|
||||||
|
(cons 'right-parens stop)))
|
||||||
|
(loop (cons parsed current)
|
||||||
|
more-tokens
|
||||||
|
stop)]
|
||||||
|
[else (error 'parse "cannot parse ~a" token)])]))))
|
||||||
|
|
||||||
|
;; strip the source location from the position tokens
|
||||||
|
(define (strip tokens)
|
||||||
|
(for/list ([token tokens])
|
||||||
|
(match token
|
||||||
|
[(position-token token start end) token])))
|
||||||
|
|
||||||
|
(define (honu-read-syntax [port (current-input-port)])
|
||||||
|
(read-tokens (parse port)))
|
||||||
|
|
||||||
|
(define (honu-read [port (current-input-port)])
|
||||||
|
(syntax->datum (honu-read-syntax port)))
|
||||||
|
|
||||||
(test-case
|
(test-case
|
||||||
"Basic tests"
|
"Basic tests"
|
||||||
(check-equal? (lex-string "5")
|
(check-equal? (strip (lex-string "5"))
|
||||||
(list (token-number 5)))
|
(list (token-number 5)))
|
||||||
(check-equal? (lex-string "5 8")
|
(check-equal? (strip (lex-string "5 8"))
|
||||||
(list (token-number 5) (token-number 8)))
|
(list (token-number 5) (token-number 8)))
|
||||||
(check-equal? (lex-string "hello")
|
(check-equal? (strip (lex-string "hello"))
|
||||||
(list (token-identifier 'hello)))
|
(list (token-identifier 'hello)))
|
||||||
(check-equal? (lex-string "()")
|
(check-equal? (strip (lex-string "()"))
|
||||||
(list (token-left-parens)
|
(list (token-left-parens)
|
||||||
(token-right-parens)))
|
(token-right-parens)))
|
||||||
(check-equal? (lex-string "()[]{}")
|
(check-equal? (strip (lex-string "()[]{}"))
|
||||||
(list (token-left-parens)
|
(list (token-left-parens)
|
||||||
(token-right-parens)
|
(token-right-parens)
|
||||||
(token-left-bracket)
|
(token-left-bracket)
|
||||||
(token-right-bracket)
|
(token-right-bracket)
|
||||||
(token-left-brace)
|
(token-left-brace)
|
||||||
(token-right-brace)))
|
(token-right-brace)))
|
||||||
(check-equal? (lex-string "foo // 5")
|
(check-equal? (strip (lex-string "foo // 5"))
|
||||||
(list (token-identifier 'foo)))
|
(list (token-identifier 'foo)))
|
||||||
(check-equal? (lex-string "foo // 5
|
(check-equal? (strip (lex-string "foo // 5
|
||||||
bar")
|
bar"))
|
||||||
(list (token-identifier 'foo)
|
(list (token-identifier 'foo)
|
||||||
(token-identifier 'bar)))
|
(token-identifier 'bar)))
|
||||||
|
(check-equal? (strip (lex-string "f(2)"))
|
||||||
|
(list (token-identifier 'f)
|
||||||
|
(token-left-parens)
|
||||||
|
(token-number 2)
|
||||||
|
(token-right-parens)))
|
||||||
)
|
)
|
||||||
|
|
||||||
|
(parse (lex-string "f(5)"))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user