more 2d lexer repairs
original commit: a0059f791a929bdb91a043d9419a622912ea65ee
This commit is contained in:
parent
670f9e7c51
commit
1b75e51175
|
@ -35,5 +35,5 @@
|
||||||
(define theirs
|
(define theirs
|
||||||
(or (and proc (proc key #f))
|
(or (and proc (proc key #f))
|
||||||
(dynamic-require 'syntax-color/racket-lexer 'racket-lexer)))
|
(dynamic-require 'syntax-color/racket-lexer 'racket-lexer)))
|
||||||
((dynamic-require 'unstable/2d/lexer 'lexer) theirs)]
|
((dynamic-require 'unstable/2d/lexer '2d-lexer) theirs)]
|
||||||
[else (if proc (proc key defval) defval)])))))
|
[else (if proc (proc key defval) defval)])))))
|
||||||
|
|
|
@ -1,3 +1,3 @@
|
||||||
#lang racket/base
|
#lang racket/base
|
||||||
(require "private/lexer.rkt")
|
(require "private/lexer.rkt")
|
||||||
(provide lexer)
|
(provide 2d-lexer)
|
||||||
|
|
|
@ -2,24 +2,22 @@
|
||||||
(require "read-util.rkt"
|
(require "read-util.rkt"
|
||||||
"../dir-chars.rkt"
|
"../dir-chars.rkt"
|
||||||
racket/set
|
racket/set
|
||||||
racket/port)
|
racket/port
|
||||||
|
racket/contract
|
||||||
|
syntax-color/lexer-contract)
|
||||||
|
|
||||||
#|
|
#|
|
||||||
|
|
||||||
todo:
|
todo:
|
||||||
- backup delta
|
- break up the table into pieces
|
||||||
- errors
|
to better cope with edits
|
||||||
- do I need absolute positions? (start & end)? yes, for filling gaps.
|
|
||||||
- break up the table into two pieces
|
|
||||||
... build test suite
|
|
||||||
|
|
||||||
|
|
||||||
|#
|
|#
|
||||||
|
|
||||||
(provide lexer
|
(provide (contract-out [2d-lexer (-> lexer/c lexer/c)])
|
||||||
cropped-regions)
|
cropped-regions)
|
||||||
|
|
||||||
(define (lexer chained-lexer)
|
(define (2d-lexer chained-lexer)
|
||||||
(define uniform-chained-lexer
|
(define uniform-chained-lexer
|
||||||
(cond
|
(cond
|
||||||
[(procedure-arity-includes? chained-lexer 3)
|
[(procedure-arity-includes? chained-lexer 3)
|
||||||
|
@ -36,10 +34,11 @@ todo:
|
||||||
(define-values (val tok paren start end)
|
(define-values (val tok paren start end)
|
||||||
(apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state))))
|
(apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state))))
|
||||||
|
|
||||||
;; read the characters in (expecting the same string as in 'val')
|
;; this helper function checks to make sure that what's
|
||||||
(for ([i (in-range (- end start))])
|
;; in the port is actually what was predicted by the
|
||||||
(define c2 (read-char port))
|
;; 'val' -- it isn't necessary for correct operation, but
|
||||||
|
;; helps find bugs earlier
|
||||||
|
(define (check-char i c2)
|
||||||
;; here we want to check to make sure we're in sync, but:
|
;; here we want to check to make sure we're in sync, but:
|
||||||
|
|
||||||
;; 1) some lexers don't return strings (or return strings
|
;; 1) some lexers don't return strings (or return strings
|
||||||
|
@ -55,6 +54,30 @@ todo:
|
||||||
c1 c2
|
c1 c2
|
||||||
(car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))
|
(car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))
|
||||||
|
|
||||||
|
;; actually read the characters in
|
||||||
|
(define last-i (- end start))
|
||||||
|
(let loop ([i 0]
|
||||||
|
|
||||||
|
;; str-offset helps deal with the way line-counting ports handle
|
||||||
|
;; \r\n combinations. That is, (- end start) will be a number that
|
||||||
|
;; doesn't match the length of the string in the case that there
|
||||||
|
;; are \r\n pairs in the port. We'll increment str-offset for each
|
||||||
|
;; of those and then use str-offset when indexing into the string
|
||||||
|
[str-offset 0])
|
||||||
|
(unless (= i last-i)
|
||||||
|
(define c2 (read-char port))
|
||||||
|
(check-char (+ str-offset i) c2)
|
||||||
|
(cond
|
||||||
|
[(and (equal? c2 #\return)
|
||||||
|
(equal? (peek-char port) #\newline))
|
||||||
|
(read-char port)
|
||||||
|
(check-char (+ str-offset i 1) #\newline)
|
||||||
|
(loop (+ i 1)
|
||||||
|
(+ str-offset 1))]
|
||||||
|
[else
|
||||||
|
(loop (+ i 1)
|
||||||
|
str-offset)])))
|
||||||
|
|
||||||
(values val tok paren
|
(values val tok paren
|
||||||
pos
|
pos
|
||||||
(+ (- end start) pos)
|
(+ (- end start) pos)
|
||||||
|
@ -214,6 +237,7 @@ todo:
|
||||||
initial-space-count
|
initial-space-count
|
||||||
#t))
|
#t))
|
||||||
(define port (open-input-string scratch-string))
|
(define port (open-input-string scratch-string))
|
||||||
|
(port-count-lines! port)
|
||||||
(let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
|
(let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
|
||||||
(define-values (_1 _2 current-pos) (port-next-location port))
|
(define-values (_1 _2 current-pos) (port-next-location port))
|
||||||
(define-values (tok-str tok paren start end backup new-mode)
|
(define-values (tok-str tok paren start end backup new-mode)
|
||||||
|
@ -268,6 +292,7 @@ todo:
|
||||||
;; there will be gaps that correspond to the places outside of the
|
;; there will be gaps that correspond to the places outside of the
|
||||||
;; outermost rectangle (at a minimum, newlines); this fills those
|
;; outermost rectangle (at a minimum, newlines); this fills those
|
||||||
;; in with whitespace tokens
|
;; in with whitespace tokens
|
||||||
|
;; NOTE: this code does not deal properly with \r\n newline combinations
|
||||||
(define cracks-filled-in-tokens
|
(define cracks-filled-in-tokens
|
||||||
(let loop ([fst newline-token]
|
(let loop ([fst newline-token]
|
||||||
[tokens sorted-tokens])
|
[tokens sorted-tokens])
|
||||||
|
|
Loading…
Reference in New Issue
Block a user