fix bugs in unstable/2d lexer

original commit: fe515e3ac779d04d25304c0f0bae6b291df828e3
This commit is contained in:
Robby Findler 2013-02-23 11:33:20 -06:00
parent 076fa0c267
commit 670f9e7c51

View File

@ -37,17 +37,23 @@ todo:
(apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state)))) (apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state))))
;; read the characters in (expecting the same string as in 'val') ;; read the characters in (expecting the same string as in 'val')
(for ([c1 (in-string val)] (for ([i (in-range (- end start))])
[i (in-naturals)])
(define c2 (read-char port)) (define c2 (read-char port))
(unless (or
;; don't check these, as they are not always ;; here we want to check to make sure we're in sync, but:
;; right (the ones outside the table, specifically
;; are always just spaces) ;; 1) some lexers don't return strings (or return strings
(eq? tok 'white-space) ;; of the wrong sizes); these will be non-strings here
(equal? c1 c2)) (when (string? val)
(error '2d/lexer.rkt "expected a ~s, got ~s while feeding token ~s" c1 c2 ;; 2) whitespace is not always right
(car (2d-lexer-state-pending-tokens a-2d-lexer-state))))) ;; (the ones outside the table,
;; specifically, are always just spaces)
(unless (eq? tok 'white-space)
(define c1 (string-ref val i))
(unless (equal? c1 c2)
(error '2d/lexer.rkt "expected a ~s, got ~s while feeding token ~s"
c1 c2
(car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))
(values val tok paren (values val tok paren
pos pos
@ -210,14 +216,18 @@ todo:
(define port (open-input-string scratch-string)) (define port (open-input-string scratch-string))
(let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)]) (let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
(define-values (_1 _2 current-pos) (port-next-location port)) (define-values (_1 _2 current-pos) (port-next-location port))
(define-values (str tok paren start end backup new-mode) (define-values (tok-str tok paren start end backup new-mode)
(uniform-chained-lexer port (+ pos offset) mode)) (uniform-chained-lexer port (+ pos offset) mode))
(unless (equal? 'eof tok) (unless (equal? 'eof tok)
(for ([sub-region (in-list (cropped-regions start end regions))]) (for ([sub-region (in-list (cropped-regions start end regions))])
(define start (- (car sub-region) current-pos))
(define end (- (cdr sub-region) current-pos))
(set! collected-tokens (set! collected-tokens
(cons (list (substring str (cons (list (if (and (string? tok-str)
(- (car sub-region) current-pos) (< start (string-length tok-str))
(- (cdr sub-region) current-pos)) (<= end (string-length tok-str)))
(substring tok-str start end)
(list 'strange-token tok-str))
tok tok
paren paren
(+ base-position (car sub-region)) (+ base-position (car sub-region))