diff --git a/collects/tests/unstable/2d/lexer-test.rkt b/collects/tests/unstable/2d/lexer-test.rkt index ed5455cda8..9c6b5c03bb 100644 --- a/collects/tests/unstable/2d/lexer-test.rkt +++ b/collects/tests/unstable/2d/lexer-test.rkt @@ -1,6 +1,8 @@ #lang at-exp racket/base (require rackunit syntax-color/racket-lexer + syntax-color/scribble-lexer + unstable/options unstable/2d/private/lexer) (check-equal? (cropped-regions 0 10 '()) '()) @@ -13,12 +15,16 @@ (check-equal? (cropped-regions 0 10 '((-5 . 10))) '((0 . 10))) (check-equal? (cropped-regions 13 37 '((11 . 13))) '()) -(define (run-lexer . strs) +(define (run-lexer #:sub-lexer [sub-lexer/no-ex racket-lexer] . strs) + (define sub-lexer (if (has-option? sub-lexer/no-ex) + (exercise-option sub-lexer/no-ex) + sub-lexer/no-ex)) (define port (open-input-string (apply string-append strs))) (port-count-lines! port) + (define the-lexer (exercise-option (2d-lexer sub-lexer))) (let loop ([mode #f]) (define-values (val tok paren start end backup new-mode) - ((lexer racket-lexer) port 0 mode)) + (the-lexer port 0 mode)) (cons (list val tok paren start end backup) (cond [(equal? tok 'eof) '()] @@ -81,6 +87,37 @@ ("╚══╩═══╝" parenthesis #f 41 49 41) (,eof eof #f #f #f 0))) +(printf "skipping the \\r\\n test: see the cracks-filled-in-tokens definition for where the bug lies\n") +#; +(check-equal? + (run-lexer "#2d\r\n" + "╔══╦═══╗\r\n" + "║+ ║abc║\r\n" + "╠══╬═══╣\r\n" + "║34║def║\r\n" + "╚══╩═══╝\r\n") + `(("#2d" hash-colon-keyword #f 1 4 0) + ("\r\n" white-space #f 4 5 4) + ("╔══╦═══╗" parenthesis #f 5 13 5) + (" " white-space #f 13 14 13) + ("║" parenthesis #f 14 15 14) + ("+" symbol #f 15 16 15) + (" " white-space #f 16 17 16) + ("║" parenthesis #f 17 18 17) + ("\"a\"" string #f 18 21 18) + ("║" parenthesis #f 21 22 21) + (" " white-space #f 22 23 22) + ("╠══╬═══╣" parenthesis #f 23 31 23) + (" " white-space #f 31 32 31) + ("║" parenthesis #f 32 33 32) + ("34" constant #f 33 35 33) + ("║" parenthesis #f 35 36 35) + ("\"b\"" string #f 36 39 36) + ("║" parenthesis #f 39 40 39) + (" " white-space #f 40 41 40) + ("╚══╩═══╝" parenthesis #f 41 49 41) + (,eof eof #f #f #f 0))) + ;; test tokens that cross lines (and thus need cropping) (check-equal? @run-lexer{#2d @@ -174,3 +211,71 @@ ("\n" white-space #f 52 53 0) (,eof eof #f #f #f 0))) +(define-values (dont-care dont-care?) + (let () + (struct dont-care ()) + (values (dont-care) dont-care?))) + +(define (equal?/dont-care x y) + (let loop ([x x][y y]) + (cond + [(or (dont-care? x) (dont-care? y)) + #t] + [(and (pair? x) (pair? y)) + (and (loop (car x) (car y)) + (loop (cdr x) (cdr y)))] + [else (equal? x y)]))) + +(check-pred + (λ (x) + (equal?/dont-care + x + `(("#2d" hash-colon-keyword #f 1 4 0) + ("\n" white-space #f 4 5 4) + ("╔═════╦═══════╗" parenthesis #f 5 20 5) + (" " white-space #f 20 21 20) + ("║" parenthesis #f 21 22 21) + ("@" parenthesis #f 22 23 22) + ("f" symbol #f 23 24 23) + ("{" parenthesis |{| 24 25 24) + (,dont-care string #f 25 26 25) + ("}" parenthesis |}| 26 27 26) + ("║" parenthesis #f 27 28 27) + (" " white-space #f 28 29 28) + ("@" parenthesis #f 29 30 29) + ("g" symbol #f 30 31 30) + ("{" parenthesis |{| 31 32 31) + (,dont-care string #f 32 33 32) + ("}" parenthesis |}| 33 34 33) + (" " white-space #f 34 35 34) + ("║" parenthesis #f 35 36 35) + (" " white-space #f 36 37 36) + ("╠═════╬═══════╣" parenthesis #f 37 52 37) + (" " white-space #f 52 53 52) + ("║" parenthesis #f 53 54 53) + ("@" parenthesis #f 54 55 54) + ("h" symbol #f 55 56 55) + ("{" parenthesis |{| 56 57 56) + (,dont-care string #f 57 58 57) + ("}" parenthesis |}| 58 59 58) + ("║" parenthesis #f 59 60 59) + (" " white-space #f 60 61 60) + ("@" parenthesis #f 61 62 61) + ("i" symbol #f 62 63 62) + ("{" parenthesis |{| 63 64 63) + (,dont-care string #f 64 65 64) + ("}" parenthesis |}| 65 66 65) + (" " white-space #f 66 67 66) + ("║" parenthesis #f 67 68 67) + (" " white-space #f 68 69 68) + ("╚═════╩═══════╝" parenthesis #f 69 84 69) + ("\n" white-space #f 84 85 0) + (,eof eof #f 85 85 0)))) + (run-lexer #:sub-lexer scribble-lexer + "#2d\n" + "╔═════╦═══════╗\n" + "║@f{x}║ @g{y} ║\n" + "╠═════╬═══════╣\n" + "║@h{z}║ @i{w} ║\n" + "╚═════╩═══════╝\n")) + diff --git a/collects/unstable/2d/lang/reader.rkt b/collects/unstable/2d/lang/reader.rkt index 1cf56e6c7b..8a5ecbdb56 100644 --- a/collects/unstable/2d/lang/reader.rkt +++ b/collects/unstable/2d/lang/reader.rkt @@ -35,5 +35,5 @@ (define theirs (or (and proc (proc key #f)) (dynamic-require 'syntax-color/racket-lexer 'racket-lexer))) - ((dynamic-require 'unstable/2d/lexer 'lexer) theirs)] + ((dynamic-require 'unstable/2d/lexer '2d-lexer) theirs)] [else (if proc (proc key defval) defval)]))))) diff --git a/collects/unstable/2d/lexer.rkt b/collects/unstable/2d/lexer.rkt index 466c8532b9..b40285cb19 100644 --- a/collects/unstable/2d/lexer.rkt +++ b/collects/unstable/2d/lexer.rkt @@ -1,3 +1,3 @@ #lang racket/base (require "private/lexer.rkt") -(provide lexer) +(provide 2d-lexer) diff --git a/collects/unstable/2d/private/lexer.rkt b/collects/unstable/2d/private/lexer.rkt index 2e7431e286..3ce73597f6 100644 --- a/collects/unstable/2d/private/lexer.rkt +++ b/collects/unstable/2d/private/lexer.rkt @@ -2,24 +2,22 @@ (require "read-util.rkt" "../dir-chars.rkt" racket/set - racket/port) + racket/port + racket/contract + syntax-color/lexer-contract) #| todo: - - backup delta - - errors - - do I need absolute positions? (start & end)? yes, for filling gaps. - - break up the table into two pieces - ... build test suite - + - break up the table into pieces + to better cope with edits |# -(provide lexer +(provide (contract-out [2d-lexer (-> lexer/c lexer/c)]) cropped-regions) -(define (lexer chained-lexer) +(define (2d-lexer chained-lexer) (define uniform-chained-lexer (cond [(procedure-arity-includes? chained-lexer 3) @@ -36,10 +34,11 @@ todo: (define-values (val tok paren start end) (apply values (car (2d-lexer-state-pending-tokens a-2d-lexer-state)))) - ;; read the characters in (expecting the same string as in 'val') - (for ([i (in-range (- end start))]) - (define c2 (read-char port)) - + ;; this helper function checks to make sure that what's + ;; in the port is actually what was predicted by the + ;; 'val' -- it isn't necessary for correct operation, but + ;; helps find bugs earlier + (define (check-char i c2) ;; here we want to check to make sure we're in sync, but: ;; 1) some lexers don't return strings (or return strings @@ -55,6 +54,30 @@ todo: c1 c2 (car (2d-lexer-state-pending-tokens a-2d-lexer-state))))))) + ;; actually read the characters in + (define last-i (- end start)) + (let loop ([i 0] + + ;; str-offset helps deal with the way line-counting ports handle + ;; \r\n combinations. That is, (- end start) will be a number that + ;; doesn't match the length of the string in the case that there + ;; are \r\n pairs in the port. We'll increment str-offset for each + ;; of those and then use str-offset when indexing into the string + [str-offset 0]) + (unless (= i last-i) + (define c2 (read-char port)) + (check-char (+ str-offset i) c2) + (cond + [(and (equal? c2 #\return) + (equal? (peek-char port) #\newline)) + (read-char port) + (check-char (+ str-offset i 1) #\newline) + (loop (+ i 1) + (+ str-offset 1))] + [else + (loop (+ i 1) + str-offset)]))) + (values val tok paren pos (+ (- end start) pos) @@ -214,6 +237,7 @@ todo: initial-space-count #t)) (define port (open-input-string scratch-string)) + (port-count-lines! port) (let loop ([mode (2d-lexer-state-chained-state a-2d-lexer-state)]) (define-values (_1 _2 current-pos) (port-next-location port)) (define-values (tok-str tok paren start end backup new-mode) @@ -268,6 +292,7 @@ todo: ;; there will be gaps that correspond to the places outside of the ;; outermost rectangle (at a minimum, newlines); this fills those ;; in with whitespace tokens + ;; NOTE: this code does not deal properly with \r\n newline combinations (define cracks-filled-in-tokens (let loop ([fst newline-token] [tokens sorted-tokens])