fix unstable/2d's lexer to handle eof better
original commit: 87a8e6f677d9538001766910ba119dde8066b400
This commit is contained in:
parent
1b75e51175
commit
b49c680dd5
|
@ -156,17 +156,15 @@ todo:
|
||||||
|
|
||||||
(define the-state (make-state line pos (string-length first-tok-string)))
|
(define the-state (make-state line pos (string-length first-tok-string)))
|
||||||
(setup-state the-state)
|
(setup-state the-state)
|
||||||
|
|
||||||
;; would like to be able to stop this loop
|
;; would like to be able to stop this loop
|
||||||
;; and process only part of the table,
|
;; and process only part of the table,
|
||||||
;; but that works only when there are no broken
|
;; but that works only when there are no broken
|
||||||
;; edges of the table that span the place I want to stop.
|
;; edges of the table that span the place I want to stop.
|
||||||
(define failed
|
(define failed
|
||||||
(with-handlers ((exn:fail:read?
|
(with-handlers ((exn:fail:read? values))
|
||||||
(λ (exn) exn)))
|
|
||||||
(let loop ([map #f])
|
(let loop ([map #f])
|
||||||
(define new-map
|
(define new-map
|
||||||
;; this might raise a read exception: what then?
|
|
||||||
(parse-2dcond-one-step peek-port (object-name peek-port) #f #f pos the-state map))
|
(parse-2dcond-one-step peek-port (object-name peek-port) #f #f pos the-state map))
|
||||||
(when new-map
|
(when new-map
|
||||||
(loop new-map)))))
|
(loop new-map)))))
|
||||||
|
@ -177,153 +175,181 @@ todo:
|
||||||
;; no matter how long eol-string is, it counts for 1 position only.
|
;; no matter how long eol-string is, it counts for 1 position only.
|
||||||
(+ pos (string-length first-tok-string) 1)))
|
(+ pos (string-length first-tok-string) 1)))
|
||||||
|
|
||||||
(define final-tokens
|
(cond
|
||||||
(cond
|
[(exn:fail:read:eof? failed)
|
||||||
[(exn:fail:read? failed)
|
;; in this case, the source location for the error
|
||||||
(define error-pos (- (srcloc-position (car (exn:fail:read-srclocs failed)))
|
;; should be the beginning of the #2d token,
|
||||||
base-position)) ;; account for the newline
|
;; so we just turn the whole thing red in a single token
|
||||||
(define peek-port2 (peeking-input-port port))
|
(define tok-string
|
||||||
(port-count-lines! peek-port2)
|
(string-append
|
||||||
|
first-tok-string
|
||||||
;; pull the newline out of peek-port2
|
(apply string
|
||||||
(for ([x (in-range (string-length eol-string))]) (read-char peek-port2))
|
(let loop ()
|
||||||
|
(define c (read-char port))
|
||||||
(define (pull-chars n)
|
(cond
|
||||||
(apply
|
[(eof-object? c) '()]
|
||||||
string
|
[else (cons c (loop))])))))
|
||||||
(let loop ([n n])
|
(values tok-string 'error #f
|
||||||
(cond
|
pos (+ pos (string-length tok-string))
|
||||||
[(zero? n) '()]
|
0
|
||||||
[else (cons (read-char peek-port2) (loop (- n 1)))]))))
|
#f)]
|
||||||
(define before-token (list (pull-chars error-pos)
|
[else
|
||||||
'no-color
|
(define final-tokens
|
||||||
#f
|
(cond
|
||||||
(+ base-position 1)
|
[(exn:fail:read? failed)
|
||||||
(+ base-position 1 error-pos)))
|
(define error-pos (- (srcloc-position (car (exn:fail:read-srclocs failed)))
|
||||||
(define end-of-table-approx
|
base-position)) ;; account for the newline
|
||||||
(let ([peek-port3 (peeking-input-port peek-port2)])
|
(when (< error-pos 0)
|
||||||
(port-count-lines! peek-port3)
|
(error 'unstable/2d/lexer.rkt "got error-pos < 0: ~s ~s\n"
|
||||||
(let loop ()
|
(srcloc-position (car (exn:fail:read-srclocs failed)))
|
||||||
(define l (read-line peek-port3))
|
base-position))
|
||||||
(define-values (line col pos) (port-next-location peek-port3))
|
(define peek-port2 (peeking-input-port port))
|
||||||
(cond
|
(port-count-lines! peek-port2)
|
||||||
[(and (string? l)
|
(define (pull-chars n)
|
||||||
(regexp-match double-barred-chars-regexp l))
|
(apply
|
||||||
(loop)]
|
string
|
||||||
[else pos]))))
|
(let loop ([n n])
|
||||||
(define after-token
|
(cond
|
||||||
(list (pull-chars (- end-of-table-approx 1))
|
[(zero? n) '()]
|
||||||
'error
|
[else (cons (read-char peek-port2) (loop (- n 1)))]))))
|
||||||
#f
|
(cond
|
||||||
(+ base-position 1 error-pos)
|
|
||||||
(+ base-position 1 error-pos end-of-table-approx -1)))
|
[else
|
||||||
(list newline-token before-token after-token)]
|
|
||||||
[else
|
;; pull the newline out of peek-port2
|
||||||
|
(for ([x (in-range (string-length eol-string))]) (read-char peek-port2))
|
||||||
(define lhses (close-cell-graph cell-connections (length table-column-breaks) (length rows)))
|
|
||||||
(define scratch-string (make-string (for/sum ([ss (in-list rows)])
|
(define before-token (list (pull-chars error-pos)
|
||||||
(for/sum ([s (in-list ss)])
|
'no-color
|
||||||
(string-length s)))
|
#f
|
||||||
#\space))
|
(+ base-position 1)
|
||||||
(define collected-tokens '())
|
(+ base-position 1 error-pos)))
|
||||||
(define rows-as-vector (apply vector (reverse rows)))
|
(define end-of-table-approx
|
||||||
(for ([set-of-indicies (in-list (sort (set->list lhses) compare/xy
|
(let ([peek-port3 (peeking-input-port peek-port2)])
|
||||||
#:key smallest-representative))])
|
(port-count-lines! peek-port3)
|
||||||
(define regions
|
(let loop ()
|
||||||
(fill-scratch-string set-of-indicies
|
(define l (read-line peek-port3))
|
||||||
rows-as-vector
|
(define-values (line col pos) (port-next-location peek-port3))
|
||||||
scratch-string
|
(cond
|
||||||
table-column-breaks
|
[(and (string? l)
|
||||||
initial-space-count
|
(regexp-match double-barred-chars-regexp l))
|
||||||
#t))
|
(loop)]
|
||||||
(define port (open-input-string scratch-string))
|
[else pos]))))
|
||||||
(port-count-lines! port)
|
(define after-token
|
||||||
(let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
|
(list (pull-chars (- end-of-table-approx 1))
|
||||||
(define-values (_1 _2 current-pos) (port-next-location port))
|
'error
|
||||||
(define-values (tok-str tok paren start end backup new-mode)
|
#f
|
||||||
(uniform-chained-lexer port (+ pos offset) mode))
|
(+ base-position 1 error-pos)
|
||||||
(unless (equal? 'eof tok)
|
(+ base-position 1 error-pos end-of-table-approx -1)))
|
||||||
(for ([sub-region (in-list (cropped-regions start end regions))])
|
(if (zero? error-pos)
|
||||||
(define start (- (car sub-region) current-pos))
|
(list newline-token after-token)
|
||||||
(define end (- (cdr sub-region) current-pos))
|
(list newline-token before-token after-token))])]
|
||||||
(set! collected-tokens
|
[else
|
||||||
(cons (list (if (and (string? tok-str)
|
|
||||||
(< start (string-length tok-str))
|
(define lhses (close-cell-graph cell-connections (length table-column-breaks) (length rows)))
|
||||||
(<= end (string-length tok-str)))
|
(define scratch-string (make-string (for/sum ([ss (in-list rows)])
|
||||||
(substring tok-str start end)
|
(for/sum ([s (in-list ss)])
|
||||||
(list 'strange-token tok-str))
|
(string-length s)))
|
||||||
tok
|
#\space))
|
||||||
paren
|
(define collected-tokens '())
|
||||||
(+ base-position (car sub-region))
|
(define rows-as-vector (apply vector (reverse rows)))
|
||||||
(+ base-position (cdr sub-region)))
|
(for ([set-of-indicies (in-list (sort (set->list lhses) compare/xy
|
||||||
collected-tokens)))
|
#:key smallest-representative))])
|
||||||
(loop new-mode))))
|
(define regions
|
||||||
|
(fill-scratch-string set-of-indicies
|
||||||
(define (collect-double-barred-token pending-start i offset str)
|
rows-as-vector
|
||||||
(when pending-start
|
scratch-string
|
||||||
(set! collected-tokens (cons (list (substring str pending-start i)
|
table-column-breaks
|
||||||
'parenthesis
|
initial-space-count
|
||||||
#f
|
#t))
|
||||||
(+ base-position offset pending-start)
|
(define port (open-input-string scratch-string))
|
||||||
(+ base-position offset i))
|
(port-count-lines! port)
|
||||||
collected-tokens))))
|
(let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
|
||||||
|
(define-values (_1 _2 current-pos) (port-next-location port))
|
||||||
(for/fold ([offset 1]) ([strs (in-list (reverse (cons (list current-line) rows)))])
|
(define-values (tok-str tok paren start end backup new-mode)
|
||||||
(for/fold ([offset offset]) ([str (in-list strs)])
|
(uniform-chained-lexer port (+ pos offset) mode))
|
||||||
(let loop ([i 0]
|
(unless (equal? 'eof tok)
|
||||||
[pending-start #f])
|
(for ([sub-region (in-list (cropped-regions start end regions))])
|
||||||
(cond
|
(define start (- (car sub-region) current-pos))
|
||||||
[(< i (string-length str))
|
(define end (- (cdr sub-region) current-pos))
|
||||||
(define c (string-ref str i))
|
(set! collected-tokens
|
||||||
|
(cons (list (if (and (string? tok-str)
|
||||||
|
(< start (string-length tok-str))
|
||||||
|
(<= end (string-length tok-str)))
|
||||||
|
(substring tok-str start end)
|
||||||
|
(list 'strange-token tok-str))
|
||||||
|
tok
|
||||||
|
paren
|
||||||
|
(+ base-position (car sub-region))
|
||||||
|
(+ base-position (cdr sub-region)))
|
||||||
|
collected-tokens)))
|
||||||
|
(loop new-mode))))
|
||||||
|
|
||||||
|
(define (collect-double-barred-token pending-start i offset str)
|
||||||
|
(when pending-start
|
||||||
|
(set! collected-tokens (cons (list (substring str pending-start i)
|
||||||
|
'parenthesis
|
||||||
|
#f
|
||||||
|
(+ base-position offset pending-start)
|
||||||
|
(+ base-position offset i))
|
||||||
|
collected-tokens))))
|
||||||
|
|
||||||
|
(for/fold ([offset 1]) ([strs (in-list (reverse (cons (list current-line) rows)))])
|
||||||
|
(for/fold ([offset offset]) ([str (in-list strs)])
|
||||||
|
(let loop ([i 0]
|
||||||
|
[pending-start #f])
|
||||||
(cond
|
(cond
|
||||||
[(member c double-barred-chars)
|
[(< i (string-length str))
|
||||||
(loop (+ i 1)
|
(define c (string-ref str i))
|
||||||
(if pending-start pending-start i))]
|
(cond
|
||||||
|
[(member c double-barred-chars)
|
||||||
|
(loop (+ i 1)
|
||||||
|
(if pending-start pending-start i))]
|
||||||
|
[else
|
||||||
|
(collect-double-barred-token pending-start i offset str)
|
||||||
|
(loop (+ i 1) #f)])]
|
||||||
[else
|
[else
|
||||||
(collect-double-barred-token pending-start i offset str)
|
(collect-double-barred-token pending-start i offset str)]))
|
||||||
(loop (+ i 1) #f)])]
|
(+ (string-length str) offset)))
|
||||||
[else
|
|
||||||
(collect-double-barred-token pending-start i offset str)]))
|
(define sorted-tokens (sort collected-tokens <
|
||||||
(+ (string-length str) offset)))
|
#:key (λ (x) (list-ref x 3))))
|
||||||
|
|
||||||
(define sorted-tokens (sort collected-tokens <
|
;; there will be gaps that correspond to the places outside of the
|
||||||
#:key (λ (x) (list-ref x 3))))
|
;; outermost rectangle (at a minimum, newlines); this fills those
|
||||||
|
;; in with whitespace tokens
|
||||||
;; there will be gaps that correspond to the places outside of the
|
;; NOTE: this code does not deal properly with \r\n newline combinations
|
||||||
;; outermost rectangle (at a minimum, newlines); this fills those
|
(define cracks-filled-in-tokens
|
||||||
;; in with whitespace tokens
|
(let loop ([fst newline-token]
|
||||||
;; NOTE: this code does not deal properly with \r\n newline combinations
|
[tokens sorted-tokens])
|
||||||
(define cracks-filled-in-tokens
|
|
||||||
(let loop ([fst newline-token]
|
|
||||||
[tokens sorted-tokens])
|
|
||||||
(cond
|
|
||||||
[(null? tokens) (list fst)]
|
|
||||||
[else
|
|
||||||
(define snd (car tokens))
|
|
||||||
(cond
|
(cond
|
||||||
[(= (list-ref fst 4)
|
[(null? tokens) (list fst)]
|
||||||
(list-ref snd 3))
|
|
||||||
(cons fst (loop snd (cdr tokens)))]
|
|
||||||
[else
|
[else
|
||||||
(define new-start (list-ref fst 4))
|
(define snd (car tokens))
|
||||||
(define new-end (list-ref snd 3))
|
(cond
|
||||||
(list* fst
|
[(= (list-ref fst 4)
|
||||||
(list
|
(list-ref snd 3))
|
||||||
; these are not the real characters ...
|
(cons fst (loop snd (cdr tokens)))]
|
||||||
(make-string (- new-end new-start) #\space)
|
[else
|
||||||
'white-space
|
(define new-start (list-ref fst 4))
|
||||||
#f
|
(define new-end (list-ref snd 3))
|
||||||
new-start
|
(list* fst
|
||||||
new-end)
|
(list
|
||||||
(loop snd (cdr tokens)))])])))
|
; these are not the real characters ...
|
||||||
cracks-filled-in-tokens]))
|
(make-string (- new-end new-start) #\space)
|
||||||
|
'white-space
|
||||||
|
#f
|
||||||
|
new-start
|
||||||
|
new-end)
|
||||||
|
(loop snd (cdr tokens)))])])))
|
||||||
|
cracks-filled-in-tokens]))
|
||||||
|
|
||||||
(values first-tok-string 'hash-colon-keyword #f
|
(values first-tok-string 'hash-colon-keyword #f
|
||||||
pos (+ pos (string-length first-tok-string))
|
pos (+ pos (string-length first-tok-string))
|
||||||
0
|
0
|
||||||
(2d-lexer-state final-tokens
|
(2d-lexer-state final-tokens
|
||||||
#t
|
#t
|
||||||
(2d-lexer-state-chained-state a-2d-lexer-state)))]))
|
(2d-lexer-state-chained-state a-2d-lexer-state)))])]))
|
||||||
|
|
||||||
(define (cropped-regions start end regions)
|
(define (cropped-regions start end regions)
|
||||||
(define result-regions '())
|
(define result-regions '())
|
||||||
|
|
|
@ -314,9 +314,9 @@ example uses:
|
||||||
(cond
|
(cond
|
||||||
[(eof-object? c)
|
[(eof-object? c)
|
||||||
(raise-read-eof-error
|
(raise-read-eof-error
|
||||||
"expected eof; "
|
"unexpected eof; "
|
||||||
source _line _col _pos
|
source _line _col _pos
|
||||||
(and _pos (- _pos (+ current-line-start-position chars-read))))]
|
(and _pos (- (+ current-line-start-position chars-read) _pos)))]
|
||||||
[(equal? c #\return)
|
[(equal? c #\return)
|
||||||
(cond
|
(cond
|
||||||
[(equal? #\newline (peek-char port))
|
[(equal? #\newline (peek-char port))
|
||||||
|
|
|
@ -31,7 +31,14 @@ example uses:
|
||||||
(case-lambda
|
(case-lambda
|
||||||
[(char port)
|
[(char port)
|
||||||
(define-values (line col pos) (port-next-location port))
|
(define-values (line col pos) (port-next-location port))
|
||||||
(dispatch-proc char port #f line col pos read/recursive previous-readtable)]
|
|
||||||
|
;; the "-2"s here are because the initial line and column
|
||||||
|
;; are supposed be at the beginning of the thing read, not
|
||||||
|
;; after the "#2" has been consumed.
|
||||||
|
(dispatch-proc char port #f line
|
||||||
|
(and col (- col 2))
|
||||||
|
(and pos (- pos 2))
|
||||||
|
read/recursive previous-readtable)]
|
||||||
[(char port source _line _col _pos)
|
[(char port source _line _col _pos)
|
||||||
(dispatch-proc char port source _line _col _pos
|
(dispatch-proc char port source _line _col _pos
|
||||||
(λ (a b c) (read-syntax/recursive source a b c))
|
(λ (a b c) (read-syntax/recursive source a b c))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user