more 2d lexer repairs
This commit is contained in:
parent
250880d2a4
commit
a0059f791a
|
@ -1,6 +1,8 @@
|
|||
#lang at-exp racket/base
|
||||
(require rackunit
|
||||
syntax-color/racket-lexer
|
||||
syntax-color/scribble-lexer
|
||||
unstable/options
|
||||
unstable/2d/private/lexer)
|
||||
|
||||
(check-equal? (cropped-regions 0 10 '()) '())
|
||||
|
@ -13,12 +15,16 @@
|
|||
(check-equal? (cropped-regions 0 10 '((-5 . 10))) '((0 . 10)))
|
||||
(check-equal? (cropped-regions 13 37 '((11 . 13))) '())
|
||||
|
||||
(define (run-lexer . strs)
|
||||
(define (run-lexer #:sub-lexer [sub-lexer/no-ex racket-lexer] . strs)
|
||||
(define sub-lexer (if (has-option? sub-lexer/no-ex)
|
||||
(exercise-option sub-lexer/no-ex)
|
||||
sub-lexer/no-ex))
|
||||
(define port (open-input-string (apply string-append strs)))
|
||||
(port-count-lines! port)
|
||||
(define the-lexer (exercise-option (2d-lexer sub-lexer)))
|
||||
(let loop ([mode #f])
|
||||
(define-values (val tok paren start end backup new-mode)
|
||||
((lexer racket-lexer) port 0 mode))
|
||||
(the-lexer port 0 mode))
|
||||
(cons (list val tok paren start end backup)
|
||||
(cond
|
||||
[(equal? tok 'eof) '()]
|
||||
|
@ -81,6 +87,37 @@
|
|||
("╚══╩═══╝" parenthesis #f 41 49 41)
|
||||
(,eof eof #f #f #f 0)))
|
||||
|
||||
(printf "skipping the \\r\\n test: see the cracks-filled-in-tokens definition for where the bug lies\n")
|
||||
#;
|
||||
(check-equal?
|
||||
(run-lexer "#2d\r\n"
|
||||
"╔══╦═══╗\r\n"
|
||||
"║+ ║abc║\r\n"
|
||||
"╠══╬═══╣\r\n"
|
||||
"║34║def║\r\n"
|
||||
"╚══╩═══╝\r\n")
|
||||
`(("#2d" hash-colon-keyword #f 1 4 0)
|
||||
("\r\n" white-space #f 4 5 4)
|
||||
("╔══╦═══╗" parenthesis #f 5 13 5)
|
||||
(" " white-space #f 13 14 13)
|
||||
("║" parenthesis #f 14 15 14)
|
||||
("+" symbol #f 15 16 15)
|
||||
(" " white-space #f 16 17 16)
|
||||
("║" parenthesis #f 17 18 17)
|
||||
("\"a\"" string #f 18 21 18)
|
||||
("║" parenthesis #f 21 22 21)
|
||||
(" " white-space #f 22 23 22)
|
||||
("╠══╬═══╣" parenthesis #f 23 31 23)
|
||||
(" " white-space #f 31 32 31)
|
||||
("║" parenthesis #f 32 33 32)
|
||||
("34" constant #f 33 35 33)
|
||||
("║" parenthesis #f 35 36 35)
|
||||
("\"b\"" string #f 36 39 36)
|
||||
("║" parenthesis #f 39 40 39)
|
||||
(" " white-space #f 40 41 40)
|
||||
("╚══╩═══╝" parenthesis #f 41 49 41)
|
||||
(,eof eof #f #f #f 0)))
|
||||
|
||||
;; test tokens that cross lines (and thus need cropping)
|
||||
(check-equal?
|
||||
@run-lexer{#2d
|
||||
|
@ -174,3 +211,71 @@
|
|||
("\n" white-space #f 52 53 0)
|
||||
(,eof eof #f #f #f 0)))
|
||||
|
||||
(define-values (dont-care dont-care?)
|
||||
(let ()
|
||||
(struct dont-care ())
|
||||
(values (dont-care) dont-care?)))
|
||||
|
||||
(define (equal?/dont-care x y)
|
||||
(let loop ([x x][y y])
|
||||
(cond
|
||||
[(or (dont-care? x) (dont-care? y))
|
||||
#t]
|
||||
[(and (pair? x) (pair? y))
|
||||
(and (loop (car x) (car y))
|
||||
(loop (cdr x) (cdr y)))]
|
||||
[else (equal? x y)])))
|
||||
|
||||
(check-pred
|
||||
(λ (x)
|
||||
(equal?/dont-care
|
||||
x
|
||||
`(("#2d" hash-colon-keyword #f 1 4 0)
|
||||
("\n" white-space #f 4 5 4)
|
||||
("╔═════╦═══════╗" parenthesis #f 5 20 5)
|
||||
(" " white-space #f 20 21 20)
|
||||
("║" parenthesis #f 21 22 21)
|
||||
("@" parenthesis #f 22 23 22)
|
||||
("f" symbol #f 23 24 23)
|
||||
("{" parenthesis |{| 24 25 24)
|
||||
(,dont-care string #f 25 26 25)
|
||||
("}" parenthesis |}| 26 27 26)
|
||||
("║" parenthesis #f 27 28 27)
|
||||
(" " white-space #f 28 29 28)
|
||||
("@" parenthesis #f 29 30 29)
|
||||
("g" symbol #f 30 31 30)
|
||||
("{" parenthesis |{| 31 32 31)
|
||||
(,dont-care string #f 32 33 32)
|
||||
("}" parenthesis |}| 33 34 33)
|
||||
(" " white-space #f 34 35 34)
|
||||
("║" parenthesis #f 35 36 35)
|
||||
(" " white-space #f 36 37 36)
|
||||
("╠═════╬═══════╣" parenthesis #f 37 52 37)
|
||||
(" " white-space #f 52 53 52)
|
||||
("║" parenthesis #f 53 54 53)
|
||||
("@" parenthesis #f 54 55 54)
|
||||
("h" symbol #f 55 56 55)
|
||||
("{" parenthesis |{| 56 57 56)
|
||||
(,dont-care string #f 57 58 57)
|
||||
("}" parenthesis |}| 58 59 58)
|
||||
("║" parenthesis #f 59 60 59)
|
||||
(" " white-space #f 60 61 60)
|
||||
("@" parenthesis #f 61 62 61)
|
||||
("i" symbol #f 62 63 62)
|
||||
("{" parenthesis |{| 63 64 63)
|
||||
(,dont-care string #f 64 65 64)
|
||||
("}" parenthesis |}| 65 66 65)
|
||||
(" " white-space #f 66 67 66)
|
||||
("║" parenthesis #f 67 68 67)
|
||||
(" " white-space #f 68 69 68)
|
||||
("╚═════╩═══════╝" parenthesis #f 69 84 69)
|
||||
("\n" white-space #f 84 85 0)
|
||||
(,eof eof #f 85 85 0))))
|
||||
(run-lexer #:sub-lexer scribble-lexer
|
||||
"#2d\n"
|
||||
"╔═════╦═══════╗\n"
|
||||
"║@f{x}║ @g{y} ║\n"
|
||||
"╠═════╬═══════╣\n"
|
||||
"║@h{z}║ @i{w} ║\n"
|
||||
"╚═════╩═══════╝\n"))
|
||||
|
||||
|
|
|
@ -35,5 +35,5 @@
|
|||
(define theirs
|
||||
(or (and proc (proc key #f))
|
||||
(dynamic-require 'syntax-color/racket-lexer 'racket-lexer)))
|
||||
((dynamic-require 'unstable/2d/lexer 'lexer) theirs)]
|
||||
((dynamic-require 'unstable/2d/lexer '2d-lexer) theirs)]
|
||||
[else (if proc (proc key defval) defval)])))))
|
||||
|
|
|
@ -1,3 +1,3 @@
|
|||
#lang racket/base
|
||||
(require "private/lexer.rkt")
|
||||
(provide lexer)
|
||||
(provide 2d-lexer)
|
||||
|
|
|
@ -2,24 +2,22 @@
|
|||
(require "read-util.rkt"
|
||||
"../dir-chars.rkt"
|
||||
racket/set
|
||||
racket/port)
|
||||
racket/port
|
||||
racket/contract
|
||||
syntax-color/lexer-contract)
|
||||
|
||||
#|
|
||||
|
||||
todo:
|
||||
- backup delta
|
||||
- errors
|
||||
- do I need absolute positions? (start & end)? yes, for filling gaps.
|
||||
- break up the table into two pieces
|
||||
... build test suite
|
||||
|
||||
- break up the table into pieces
|
||||
to better cope with edits
|
||||
|
||||
|#
|
||||
|
||||
(provide lexer
|
||||
(provide (contract-out [2d-lexer (-> lexer/c lexer/c)])
|
||||
cropped-regions)
|
||||
|
||||
(define (lexer chained-lexer)
|
||||
(define (2d-lexer chained-lexer)
|
||||
(define uniform-chained-lexer
|
||||
(cond
|
||||
[(procedure-arity-includes? chained-lexer 3)
|
||||
|
@ -36,10 +34,11 @@ todo:
|
|||
(define-values (val tok paren start end)
|
||||
(apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state))))
|
||||
|
||||
;; read the characters in (expecting the same string as in 'val')
|
||||
(for ([i (in-range (- end start))])
|
||||
(define c2 (read-char port))
|
||||
|
||||
;; this helper function checks to make sure that what's
|
||||
;; in the port is actually what was predicted by the
|
||||
;; 'val' -- it isn't necessary for correct operation, but
|
||||
;; helps find bugs earlier
|
||||
(define (check-char i c2)
|
||||
;; here we want to check to make sure we're in sync, but:
|
||||
|
||||
;; 1) some lexers don't return strings (or return strings
|
||||
|
@ -55,6 +54,30 @@ todo:
|
|||
c1 c2
|
||||
(car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))
|
||||
|
||||
;; actually read the characters in
|
||||
(define last-i (- end start))
|
||||
(let loop ([i 0]
|
||||
|
||||
;; str-offset helps deal with the way line-counting ports handle
|
||||
;; \r\n combinations. That is, (- end start) will be a number that
|
||||
;; doesn't match the length of the string in the case that there
|
||||
;; are \r\n pairs in the port. We'll increment str-offset for each
|
||||
;; of those and then use str-offset when indexing into the string
|
||||
[str-offset 0])
|
||||
(unless (= i last-i)
|
||||
(define c2 (read-char port))
|
||||
(check-char (+ str-offset i) c2)
|
||||
(cond
|
||||
[(and (equal? c2 #\return)
|
||||
(equal? (peek-char port) #\newline))
|
||||
(read-char port)
|
||||
(check-char (+ str-offset i 1) #\newline)
|
||||
(loop (+ i 1)
|
||||
(+ str-offset 1))]
|
||||
[else
|
||||
(loop (+ i 1)
|
||||
str-offset)])))
|
||||
|
||||
(values val tok paren
|
||||
pos
|
||||
(+ (- end start) pos)
|
||||
|
@ -214,6 +237,7 @@ todo:
|
|||
initial-space-count
|
||||
#t))
|
||||
(define port (open-input-string scratch-string))
|
||||
(port-count-lines! port)
|
||||
(let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
|
||||
(define-values (_1 _2 current-pos) (port-next-location port))
|
||||
(define-values (tok-str tok paren start end backup new-mode)
|
||||
|
@ -268,6 +292,7 @@ todo:
|
|||
;; there will be gaps that correspond to the places outside of the
|
||||
;; outermost rectangle (at a minimum, newlines); this fills those
|
||||
;; in with whitespace tokens
|
||||
;; NOTE: this code does not deal properly with \r\n newline combinations
|
||||
(define cracks-filled-in-tokens
|
||||
(let loop ([fst newline-token]
|
||||
[tokens sorted-tokens])
|
||||
|
|
Loading…
Reference in New Issue
Block a user