fix unstable/2d reader for ports with specials

(eg drracket files with comment boxes or images)
This commit is contained in:
Robby Findler 2013-10-24 21:34:44 -05:00
parent 27caccdbb1
commit aa3bef0468
3 changed files with 118 additions and 32 deletions

View File

@ -48,11 +48,15 @@ todo:
;; (the ones outside the table,
;; specifically, are always just spaces)
(unless (eq? tok 'white-space)
(define c1 (string-ref val i))
(unless (equal? c1 c2)
(error '2d/lexer.rkt "expected a ~s, got ~s while feeding token ~s"
c1 c2
(car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))
;; 3) sometimes we get specials in the port
(when (char? c2)
(define c1 (string-ref val i))
(unless (equal? c1 c2)
(error '2d/lexer.rkt "expected a ~s, got ~s while feeding token ~s"
c1 c2
(car (2d-lexer-state-pending-tokens a-2d-lexer-state))))))))
;; actually read the characters in
(define last-i (- end start))
@ -65,12 +69,12 @@ todo:
;; of those and then use str-offset when indexing into the string
[str-offset 0])
(unless (= i last-i)
(define c2 (read-char port))
(define c2 (read-char-or-special port))
(check-char (+ str-offset i) c2)
(cond
[(and (equal? c2 #\return)
(equal? (peek-char port) #\newline))
(read-char port)
(equal? (peek-char-or-special port) #\newline))
(read-char-or-special port)
(check-char (+ str-offset i 1) #\newline)
(loop (+ i 1)
(+ str-offset 1))]
@ -92,9 +96,11 @@ todo:
(if (null? next-tokens)
new-state
(dont-stop new-state)))]
[(equal? #\# (peek-char port))
[(equal? #\# (peek-char-or-special port))
(define pp (peeking-input-port port))
(define chars (list (read-char pp) (read-char pp) (read-char pp)))
(define chars (list (read-char-or-special pp)
(read-char-or-special pp)
(read-char-or-special pp)))
(cond
[(equal? chars '(#\# #\2 #\d))
(start-new-2d-cond-lexing port a-2d-lexer-state uniform-chained-lexer offset)]
@ -119,9 +125,9 @@ todo:
(define (start-new-2d-cond-lexing port a-2d-lexer-state uniform-chained-lexer offset)
(define-values (line col pos) (port-next-location port))
;; consume #\# #\2 and #\d that must be there (peeked them earlier)
(read-char port)
(read-char port)
(read-char port)
(read-char-or-special port)
(read-char-or-special port)
(read-char-or-special port)
;; read in the keyword and get those tokens
(define-values (backwards-chars eol-string)
@ -135,7 +141,7 @@ todo:
(equal? c #\newline))
(values kwd-chars (string c))]
[else
(read-char port) ;; actually get the char
(read-char-or-special port) ;; actually get the char
(loop (cons c kwd-chars))])))
(define first-tok-string
(apply string (reverse backwards-chars)))
@ -157,7 +163,11 @@ todo:
c-pos))
(define peek-port (peeking-input-port port))
;; pull the newline out of the peek-port
(for ([x (in-range (string-length eol-string))]) (read-char peek-port))
(for ([x (in-range (string-length eol-string))]
[c1 (in-string eol-string)])
(define c2 (read-char-or-special peek-port))
(unless (equal? c1 c2)
(error 'unstable/2d/lexer.rkt "got an unexpected char.1 ~s vs ~s" c1 c2)))
(define the-state (make-state line pos (string-length first-tok-string)))
(setup-state the-state)
@ -205,7 +215,7 @@ todo:
(define error-pos (- (srcloc-position (car (exn:fail:read-srclocs failed)))
base-position)) ;; account for the newline
(when (< error-pos 0)
(error 'unstable/2d/lexer.rkt "got error-pos < 0: ~s ~s\n"
(error 'unstable/2d/lexer.rkt "got error-pos < 0: ~s ~s"
(srcloc-position (car (exn:fail:read-srclocs failed)))
base-position))
(define peek-port2 (peeking-input-port port))
@ -216,13 +226,20 @@ todo:
(let loop ([n n])
(cond
[(zero? n) '()]
[else (cons (read-char peek-port2) (loop (- n 1)))]))))
[else
(define c (read-char-or-special peek-port2))
(cond
[(char? c)
(cons c (loop (- n 1)))]
[else
;; drop replace specials with spaces
(cons #\space (loop (- n 1)))])]))))
(cond
[else
;; pull the newline out of peek-port2
(for ([x (in-range (string-length eol-string))]) (read-char peek-port2))
(for ([x (in-range (string-length eol-string))]) (read-char-or-special peek-port2))
(define before-token (list (pull-chars error-pos)
'no-color
@ -232,14 +249,22 @@ todo:
(define end-of-table-approx
(let ([peek-port3 (peeking-input-port peek-port2)])
(port-count-lines! peek-port3)
(define (read-line/check-double-barred)
(let loop ([found-double-barred? #f])
(define c (read-char-or-special peek-port3))
(cond
[(or (equal? c #\n) (eof-object? c))
found-double-barred?]
[else (loop (or found-double-barred?
(member c double-barred-chars)))])))
(let loop ()
(define l (read-line peek-port3))
(define-values (line col pos) (port-next-location peek-port3))
(define found-double-barred? (read-line/check-double-barred))
(cond
[(and (string? l)
(regexp-match double-barred-chars-regexp l))
[found-double-barred?
(loop)]
[else pos]))))
[else
(define-values (line col pos) (port-next-location peek-port3))
pos]))))
(define after-token
(list (pull-chars (- end-of-table-approx 1))
'error
@ -251,7 +276,9 @@ todo:
(list newline-token before-token after-token))])]
[else
(define lhses (close-cell-graph cell-connections (length table-column-breaks) (length rows)))
(define lhses (close-cell-graph cell-connections
(length table-column-breaks)
(length rows)))
(define scratch-string (make-string (for/sum ([ss (in-list rows)])
(for/sum ([s (in-list ss)])
(string-length s)))

View File

@ -312,18 +312,20 @@ example uses:
(set! current-line-number (+ current-line-number 1)))
(define chars
(let loop ([chars-read 0])
(define c (read-char port))
(define c (read-char-or-special port))
(cond
[(eof-object? c)
(raise-read-eof-error
"unexpected eof; "
source _line _col _pos
(and _pos (- (+ current-line-start-position chars-read) _pos)))]
[(not (char? c))
(readerr "unexpected special" chars-read)]
[(equal? c #\return)
(cond
[(equal? #\newline (peek-char port))
[(equal? #\newline (peek-char-or-special port))
(set! newline-char-count 2)
(list c (read-char port))]
(list c (read-char-or-special port))]
[else
(set! newline-char-count 1)
(list c)])]

View File

@ -4,7 +4,8 @@
syntax-color/scribble-lexer
syntax-color/lexer-contract
unstable/options
unstable/2d/private/lexer)
unstable/2d/private/lexer
racket/port)
(check-equal? (cropped-regions 0 10 '()) '())
(check-equal? (cropped-regions 0 10 '((0 . 10))) '((0 . 10)))
@ -16,16 +17,26 @@
(check-equal? (cropped-regions 0 10 '((-5 . 10))) '((0 . 10)))
(check-equal? (cropped-regions 13 37 '((11 . 13))) '())
(define (run-lexer #:sub-lexer [sub-lexer/no-ex racket-lexer] . strs)
(define (run-lexer #:sub-lexer [sub-lexer/no-ex racket-lexer] . strs/specials)
(define sub-lexer (if (has-option? sub-lexer/no-ex)
(exercise-option sub-lexer/no-ex)
sub-lexer/no-ex))
(define port (open-input-string (apply string-append strs)))
(port-count-lines! port)
(define-values (in out) (make-pipe-with-specials))
(thread
(λ ()
(let loop ([s strs/specials])
(cond
[(list? s)
(for ([s (in-list strs/specials)])
(loop s))]
[(string? s) (display s out)]
[else (write-special s out)]))
(close-output-port out)))
(port-count-lines! in)
(define the-lexer (exercise-option (2d-lexer sub-lexer)))
(let loop ([mode #f])
(define-values (val tok paren start end backup new-mode)
(the-lexer port 0 mode))
(the-lexer in 0 mode))
(cons (list val tok paren start end backup)
(cond
[(equal? tok 'eof) '()]
@ -310,3 +321,49 @@
"╠═════╬═══════╣\n"
"║@h{z}║ @i{w} ║\n"
"╚═════╩═══════╝\n"))
(check-equal?
(run-lexer "#2" 'not-a-char)
`(("#2" error #f 1 3 0)
("" no-color #f 3 4 0)
(,eof eof #f #f #f 0)))
(check-equal?
(run-lexer "#2d\n" 'not-a-char)
`(("#2d" hash-colon-keyword #f 1 4 0)
("\n" white-space #f 4 5 4)
(" " error #f 5 6 5)
(,eof eof #f #f #f 0)))
(check-equal?
(run-lexer "#2d\n" 'not-a-char)
`(("#2d" hash-colon-keyword #f 1 4 0)
("\n" white-space #f 4 5 4)
("" no-color #f 5 6 5)
(" " error #f 6 7 6)
(,eof eof #f #f #f 0)))
(check-equal?
(run-lexer "#2dsomething\n"
"╔═══╗\n"
"" 'special "\n"
"╚═══╝")
`(("#2dsomething" hash-colon-keyword #f 1 13 0)
("\n" white-space #f 13 14 13)
("╔═══╗\n" no-color #f 14 22 14)
("\n╚═══╝" error #f 22 31 22)
(,eof eof #f #f #f 0)))
(check-equal?
(run-lexer "#2dsomething\n"
"╔═══╗\n"
'special
"\n"
"╚═══╝")
`(("#2dsomething" hash-colon-keyword #f 1 13 0)
("\n" white-space #f 13 14 13)
("╔═══╗\n" no-color #f 14 20 14)
("\n╚═══╝" error #f 20 31 20)
(,eof eof #f #f #f 0)))