From 19780023125d88c2eebfef9d46c2d9fd01b731bf Mon Sep 17 00:00:00 2001 From: Jon Rafkind Date: Sun, 24 Jul 2011 23:04:13 -0400 Subject: [PATCH] handle end of line comments --- collects/honu/core/read.rkt | 34 ++++++++++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/collects/honu/core/read.rkt b/collects/honu/core/read.rkt index c49b885b8d..c8385ab0b8 100644 --- a/collects/honu/core/read.rkt +++ b/collects/honu/core/read.rkt @@ -10,19 +10,22 @@ (define-empty-tokens honu-empty-tokens (eof fail whitespace left-parens right-parens left-bracket right-bracket - left-brace right-brace)) + left-brace right-brace + end-of-line-comment)) (define-lex-abbrev identifier-first-character (:or (:/ #\a #\z) (:/ #\A #\Z))) (define-lex-abbrev identifier-character identifier-first-character) (define-lex-abbrev identifier (:: identifier-first-character (:+ identifier-character))) +(define-lex-abbrev number (:+ (:/ #\0 #\9))) (define honu-lexer (lexer-src-pos [(eof) (token-eof)] - [(char-range #\0 #\9) - (token-number (string->number lexeme))] + [(:or "#" "//") (token-end-of-line-comment)] + [number (token-number (string->number lexeme))] + ["." (token-identifier '|.|)] ["(" (token-left-parens)] [")" (token-right-parens)] ["[" (token-left-bracket)] @@ -30,7 +33,7 @@ ["{" (token-left-brace)] ["}" (token-right-brace)] [identifier (token-identifier (string->symbol lexeme))] - [(union " ") (token-whitespace)] + [(union " " "\t") (token-whitespace)] )) (define (token-eof? token) @@ -39,6 +42,18 @@ (define (token-whitespace? token) (equal? 'whitespace (token-name token))) +(define (token-end-of-line-comment? token) + (equal? 'end-of-line-comment (token-name token))) + +(define (read-until-end-of-line input) + (define (finish? what) + (or (eof-object? what) + (= (char->integer #\newline) what))) + (let loop () + (define what (read-byte input)) + (when (not (finish? what)) + (loop)))) + (define (lex-string input) (define port (open-input-string input)) (let loop ([tokens '()]) @@ -48,6 +63,11 @@ [(struct* position-token ([token (? token-eof?)] [start-pos start] [end-pos end])) ;; (printf "done lexing\n") (reverse tokens)] + [(struct* position-token ([token (? token-end-of-line-comment?)] + [start-pos start] + [end-pos end])) + (read-until-end-of-line port) + (loop tokens)] [(struct* position-token ([token (? token-whitespace?)] [start-pos start] [end-pos end])) (loop tokens)] [(position-token token start end) @@ -72,4 +92,10 @@ (token-right-bracket) (token-left-brace) (token-right-brace))) + (check-equal? (lex-string "foo // 5") + (list (token-identifier 'foo))) + (check-equal? (lex-string "foo // 5 + bar") + (list (token-identifier 'foo) + (token-identifier 'bar))) )