From 1b75e5117565134b56f8f70bd9e7cd856d6c5dad Mon Sep 17 00:00:00 2001
From: Robby Findler <robby@racket-lang.org>
Date: Sun, 24 Feb 2013 16:36:16 -0600
Subject: [PATCH] more 2d lexer repairs

original commit: a0059f791a929bdb91a043d9419a622912ea65ee
---
 collects/unstable/2d/lang/reader.rkt   |  2 +-
 collects/unstable/2d/lexer.rkt         |  2 +-
 collects/unstable/2d/private/lexer.rkt | 51 +++++++++++++++++++-------
 3 files changed, 40 insertions(+), 15 deletions(-)

diff --git a/collects/unstable/2d/lang/reader.rkt b/collects/unstable/2d/lang/reader.rkt
index 1cf56e6..8a5ecbd 100644
--- a/collects/unstable/2d/lang/reader.rkt
+++ b/collects/unstable/2d/lang/reader.rkt
@@ -35,5 +35,5 @@
           (define theirs 
             (or (and proc (proc key #f))
                 (dynamic-require 'syntax-color/racket-lexer 'racket-lexer)))
-          ((dynamic-require 'unstable/2d/lexer 'lexer) theirs)]
+          ((dynamic-require 'unstable/2d/lexer '2d-lexer) theirs)]
          [else (if proc (proc key defval) defval)])))))
diff --git a/collects/unstable/2d/lexer.rkt b/collects/unstable/2d/lexer.rkt
index 466c853..b40285c 100644
--- a/collects/unstable/2d/lexer.rkt
+++ b/collects/unstable/2d/lexer.rkt
@@ -1,3 +1,3 @@
 #lang racket/base
 (require "private/lexer.rkt")
-(provide lexer)
+(provide 2d-lexer)
diff --git a/collects/unstable/2d/private/lexer.rkt b/collects/unstable/2d/private/lexer.rkt
index 2e7431e..3ce7359 100644
--- a/collects/unstable/2d/private/lexer.rkt
+++ b/collects/unstable/2d/private/lexer.rkt
@@ -2,24 +2,22 @@
 (require "read-util.rkt"
          "../dir-chars.rkt"
          racket/set
-         racket/port)
+         racket/port
+         racket/contract
+         syntax-color/lexer-contract)
 
 #|
 
 todo:
- - backup delta
- - errors
- - do I need absolute positions? (start & end)? yes, for filling gaps.
- - break up the table into two pieces
- ... build test suite
-
+ - break up the table into pieces
+   to better cope with edits
 
 |#
 
-(provide lexer
+(provide (contract-out [2d-lexer (-> lexer/c lexer/c)])
          cropped-regions)
 
-(define (lexer chained-lexer)
+(define (2d-lexer chained-lexer)
   (define uniform-chained-lexer
     (cond
       [(procedure-arity-includes? chained-lexer 3)
@@ -36,10 +34,11 @@ todo:
        (define-values (val tok paren start end)
          (apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state))))
        
-       ;; read the characters in (expecting the same string as in 'val')
-       (for ([i (in-range (- end start))])
-         (define c2 (read-char port))
-         
+       ;; this helper function checks to make sure that what's
+       ;; in the port is actually what was predicted by the
+       ;; 'val' -- it isn't necessary for correct operation, but
+       ;; helps find bugs earlier
+       (define (check-char i c2)
          ;; here we want to check to make sure we're in sync, but:
          
          ;; 1) some lexers don't return strings (or return strings
@@ -55,6 +54,30 @@ todo:
                       c1 c2 
                       (car (2d-lexer-state-pending-tokens a-2d-lexer-state)))))))
        
+       ;; actually read the characters in
+       (define last-i (- end start))
+       (let loop ([i 0]
+                  
+                  ;; str-offset helps deal with the way line-counting ports handle 
+                  ;; \r\n combinations. That is, (- end start) will be a number that
+                  ;; doesn't match the length of the string in the case that there 
+                  ;; are \r\n pairs in the port. We'll increment str-offset for each 
+                  ;; of those and then use str-offset when indexing into the string
+                  [str-offset 0])
+         (unless (= i last-i)
+           (define c2 (read-char port))
+           (check-char (+ str-offset i) c2)
+           (cond
+             [(and (equal? c2 #\return)
+                   (equal? (peek-char port) #\newline))
+              (read-char port)
+              (check-char (+ str-offset i 1) #\newline)
+              (loop (+ i 1)
+                    (+ str-offset 1))]
+             [else 
+              (loop (+ i 1)
+                    str-offset)])))
+       
        (values val tok paren 
                pos
                (+ (- end start) pos)
@@ -214,6 +237,7 @@ todo:
                                    initial-space-count
                                    #t))
             (define port (open-input-string scratch-string))
+            (port-count-lines! port)
             (let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)])
               (define-values (_1 _2 current-pos) (port-next-location port))
               (define-values (tok-str tok paren start end backup new-mode)
@@ -268,6 +292,7 @@ todo:
           ;; there will be gaps that correspond to the places outside of the
           ;; outermost rectangle (at a minimum, newlines); this fills those 
           ;; in with whitespace tokens
+          ;; NOTE: this code does not deal properly with \r\n newline combinations
           (define cracks-filled-in-tokens
             (let loop ([fst newline-token]
                        [tokens sorted-tokens])