From 13c969528e1ff0012f9f85d193c81e25c3138da3 Mon Sep 17 00:00:00 2001 From: Robby Findler Date: Thu, 14 Jul 2016 21:29:04 -0500 Subject: [PATCH] do less invariant checking, because we cannot rely on chained lexers the way the code was trying to closes #2 --- 2d-lib/private/lexer.rkt | 33 +++++++++++++++------------------ 2d-test/tests/lexer-test.rkt | 20 ++++++++++++++++++++ 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/2d-lib/private/lexer.rkt b/2d-lib/private/lexer.rkt index 07532d9..53b7a49 100644 --- a/2d-lib/private/lexer.rkt +++ b/2d-lib/private/lexer.rkt @@ -39,24 +39,18 @@ todo: ;; 'val' -- it isn't necessary for correct operation, but ;; helps find bugs earlier (define (check-char i c2) - ;; here we want to check to make sure we're in sync, but: - - ;; 1) some lexers don't return strings (or return strings - ;; of the wrong sizes); these will be non-strings here - (when (string? val) - ;; 2) whitespace is not always right - ;; (the ones outside the table, - ;; specifically, are always just spaces) - (unless (eq? tok 'white-space) - - ;; 3) sometimes we get specials in the port - (when (char? c2) - - (define c1 (string-ref val i)) - (unless (equal? c1 c2) - (error '2d/lexer.rkt "expected a ~s, got ~s while feeding token ~s" - c1 c2 - (car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))) + ;; here we want to check to make sure we're in sync, but + ;; we cannot count on the lexers to return the same strings + ;; as we saw in the port in general. So, instead we check only + ;; when the token is a parenthesis and the characters are + ;; the double-barred chars (since we made that token) + (when (and (equal? tok 'parenthesis) + (regexp-match? all-double-barred-chars-regexp val)) + (define c1 (string-ref val i)) + (unless (equal? c1 c2) + (error '2d/lexer.rkt "expected a ~s, got ~s while feeding token ~s" + c1 c2 + (car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))) ;; actually read the characters in (define last-i (- end start)) @@ -114,6 +108,9 @@ todo: (define double-barred-chars-regexp (regexp (format "[~a]" (apply string double-barred-chars)))) +(define all-double-barred-chars-regexp + (regexp + (format "^[~a]*$" (apply string double-barred-chars)))) (define (call-chained-lexer uniform-chained-lexer port offset a-2d-lexer-state) (define-values (a b c d e f new-mode) diff --git a/2d-test/tests/lexer-test.rkt b/2d-test/tests/lexer-test.rkt index b50e49d..4e9748b 100644 --- a/2d-test/tests/lexer-test.rkt +++ b/2d-test/tests/lexer-test.rkt @@ -367,3 +367,23 @@ (" ║\n╚═══╝" error #f 20 31 20) (,eof eof #f #f #f 0))) +(check-equal? + (run-lexer "#2dwhatever\n" + "╔════╦════╗\n" + "║123;║5678║\n" + "╚════╩════╝\n") + `(("#2dwhatever" hash-colon-keyword #f 1 12 0) + ("\n" white-space #f 12 13 12) + ("╔════╦════╗" parenthesis #f 13 24 13) + (" " white-space #f 24 25 24) + ("║" parenthesis #f 25 26 25) + ("123" constant #f 26 29 26) + (" " comment #f 29 30 29) + ("║" parenthesis #f 30 31 30) + ("5678" constant #f 31 35 31) + ("║" parenthesis #f 35 36 35) + (" " white-space #f 36 37 36) + ("╚════╩════╝" parenthesis #f 37 48 37) + ("\n" white-space #f 48 49 0) + (,eof eof #f #f #f 0))) +