2d/2d-lib/private/read-util.rkt
2015-09-07 17:47:55 -05:00

699 lines
26 KiB
Racket

#lang racket/base
#|
ideas:
- 2dcond
- 2dmatch
- literal tables in scribble layout?
- something for 2d graphics?
example uses:
- unifier
- subtyping relation
- merge (from merge-sort)
|#
(require racket/port
syntax/readerr
racket/match
racket/set
;syntax/rect
"../dir-chars.rkt"
(for-syntax racket/base
racket/list))
(provide parse-2dcond
parse-2dcond-one-step
setup-state
make-state
copy-state
chars->desc
smallest-representative
all-line-of-interest
current-lines
close-cell-graph
compare/xy
fill-scratch-string)
(define all-line-of-interest (make-hash))
(define current-lines (make-parameter #f))
(define-syntax (line-of-interest stx)
(with-syntax ([line (syntax-line stx)])
(syntax-local-lift-expression #'(hash-set! all-line-of-interest line #t))
#'(visited line)))
(define (visited line)
(define t (current-lines))
(when t
(hash-remove! t line)))
;; fill-scratch-string : (setof (list/c number? number?))
;; (vector (listof string?))
;; (or/c string? #f)
;; (listof number)
;; number
;; [boolean?]
;; -> (if scratch-string
;; (listof (cons/c number? number?))
;; void?)
;; scratch-string gets filled in from the 'lines' argument.
;; If compute-regions? is #t, then this function constructs the regions of the
;; string that are have been filled in (as a list of pairs of start/end coordinates)
;; and returns that (not counting the regions outside of the table to the right--
;; these get filled in to the string, but the regions are not included)
;; the resulting regions are sorted (smaller to bigger values) and non-overlapping
(define (fill-scratch-string set-of-indicies
lines
scratch-string
table-column-breaks
initial-space-count
[compute-regions? #f])
(define scratch-pos 0)
(define eols '())
(define segments '())
(define cur-seg-start #f)
(define cur-seg-end #f)
(define (add-keeper)
(cond
[(equal? cur-seg-end scratch-pos)
(set! cur-seg-end (+ cur-seg-end 1))]
[else
(record-position)
(set! cur-seg-start scratch-pos)
(set! cur-seg-end (+ scratch-pos 1))]))
(define (record-position)
(when (and cur-seg-start cur-seg-end)
;; port positions count from 1, but here
;; we're counting from 0 in the string, so inc
(set! segments (cons (cons (+ cur-seg-start 1)
(+ cur-seg-end 1))
segments))))
(define-syntax-rule
(set-scratch! in? c)
(begin
(let ([x c])
;(unless (char-whitespace? x) (printf "putting ~s @ ~s\n" x scratch-pos))
(string-set! scratch-string scratch-pos x))
(when in? (when compute-regions? (add-keeper)))))
(define-syntax-rule
(clear-scratch!)
(when scratch-string (string-set! scratch-string scratch-pos #\space)))
(define-syntax-rule
(inc-scratch-pos! e)
(set! scratch-pos (+ scratch-pos e)))
(for ([lines (in-vector lines)]
[y (in-naturals)])
(for ([line (in-list lines)]
[l-num (in-naturals)])
(define first-line? (zero? l-num))
;; skip over initial spaces: we know that the string is already right here
;; because it is initialized with spaces and never changed
;; the +1 is for the first character (in the current line)
;; of the table, which is always a table edge character
(inc-scratch-pos! (+ initial-space-count 1))
(define end-of-table-position
(for/fold ([start-pos-in-line (+ initial-space-count 1)])
([table-column-break (in-list table-column-breaks)]
[x (in-naturals)])
(cond
[(and (set-member? set-of-indicies (list x y))
(or (not first-line?)
(set-member? set-of-indicies (list x (- y 1)))))
(for ([j (in-range table-column-break)])
(set-scratch! #t (string-ref line (+ j start-pos-in-line)))
(inc-scratch-pos! 1))
(if (if first-line?
(and (set-member? set-of-indicies (list (+ x 1) (- y 1)))
(set-member? set-of-indicies (list (+ x 1) y))
(set-member? set-of-indicies (list x (- y 1))))
(set-member? set-of-indicies (list (+ x 1) y)))
(set-scratch! #t (string-ref line (+ table-column-break start-pos-in-line)))
(clear-scratch!))
(inc-scratch-pos! 1)]
[else
(for ([j (in-range table-column-break)])
(clear-scratch!)
(inc-scratch-pos! 1))
(clear-scratch!)
(inc-scratch-pos! 1)])
(+ start-pos-in-line table-column-break 1)))
(set! eols (cons (cons end-of-table-position (string-length line))
eols))
(for ([j (in-range end-of-table-position (string-length line))])
(set-scratch! #f (string-ref line j))
(inc-scratch-pos! 1))))
(when compute-regions?
(record-position)
(reverse segments)))
(define (compare/xy p1 p2)
(cond
[(= (list-ref p1 0) (list-ref p2 0))
(< (list-ref p1 1) (list-ref p2 1))]
[else
(< (list-ref p1 0) (list-ref p2 0))]))
(define (smallest-representative set)
(define lst (set->list set))
(let loop ([best (car lst)]
[rest (cdr lst)])
(cond
[(null? rest) best]
[else
(cond
[(compare/xy best (car rest))
(loop best (cdr rest))]
[else
(loop (car rest) (cdr rest))])])))
(define (close-cell-graph edges width height)
(define res (make-hash))
(for ([x (in-range width)])
(for ([y (in-range height)])
(hash-set! res (list x y) (set (list x y)))))
(let loop ()
(define something-changed? #f)
(define (add-all n1 n2)
(define in-n1 (hash-ref res n1))
(define in-n2 (hash-ref res n2))
(for ([new-node (in-set in-n1)])
(unless (set-member? in-n2 new-node)
(set! something-changed? #t)
(hash-set! res n2 (set-add in-n2 new-node)))))
(for ([(node-src nodes) (in-hash edges)])
(for ([node-dest (in-set nodes)])
(add-all node-dest node-src)
(add-all node-src node-dest)))
(when something-changed? (loop)))
(apply set (hash-map res (λ (x y) y))))
(begin-for-syntax
(define state-components
;; these are the state variables for the parse-2d-cond procedure
'((current-line-number _line)
(current-line-start-position (+ (or _pos 0) chars-read))
(current-line #f)
(current-line-length 0)
(initial-space-count 0)
(initial-column-guide #f)
(newline-char-count 0)
(table-column-breaks '())
(table-column-guides '())
(right-edge-column #f)
(pending-row '())
(rows '())
(current-row 0)
(cell-connections (make-hash))
(position-of-first-cell (hash)))))
(define-syntax (setup-state stx)
(syntax-case stx ()
[(_ state-struct-id #;state-accessor #;state-mutator)
#`(begin
#,@(for/list ([state-component (in-list state-components)]
[i (in-naturals)])
(with-syntax ([id (datum->syntax #'state-struct-id (car state-component))]
[i i])
#'(define-syntax id
(make-set!-transformer
(λ (stx)
(syntax-case stx (set!)
[(set! x e)
#'(state-mutator state-struct-id i e)]
[x
(identifier? #'x)
#'(state-accessor state-struct-id i)])))))))]))
(define-syntax (state-struct stx)
(syntax-case stx ()
[(_ make-state state-accessor state-mutator copy-state)
#`(begin
(define-values (state-type state-constructor state? state-accessor state-mutator)
(make-struct-type 'parse-2d-cond-state #f #,(length state-components) 0 #f '() #f))
(define (make-state _line _pos chars-read)
(state-constructor #,@(for/list ([state-component (in-list state-components)])
(list-ref state-component 1))))
(define (copy-state the-state)
(state-constructor #,@(for/list ([state-component (in-list state-components)]
[i (in-naturals)])
#`(state-accessor the-state #,i)))))]))
(state-struct make-state state-accessor state-mutator copy-state)
(define (parse-2dcond port source _line _col _pos chars-read)
(define the-state (make-state _line _pos chars-read))
(let loop ([map #f])
(define new-map
(parse-2dcond-one-step port source _line _col _pos the-state map))
(cond
[new-map
(loop new-map)]
[else
(setup-state the-state)
(values cell-connections
(apply vector (reverse rows))
table-column-breaks
initial-space-count
position-of-first-cell)])))
(struct guide (char srcloc) #:transparent)
;; parse-2dcond returns four values:
;; - a hash table encoding a graph that shows where the
;; broken walls are in the 2d
;; - a vector of lists of strings containing the all of the line
;; of the table except the last one; the first string in each
;; list is the boundary line between the two rows
;; - a list of numbers showing the size of each column, not
;; counting the separator character (and not taking into
;; acount broken walls)
;; - the number of spaces to the left of the 2d (same for all lines)
(define (parse-2dcond-one-step port source _line _col _pos the-state last-left-map)
;; this sets up all of the state variables so they
;; look up the fields of 'the-state' and mutate
;; the fields of 'the-state'; state-components lists
;; of the state variables and their initial values
(setup-state the-state)
(define (add-node col row)
(define k (list col row))
(unless (hash-ref cell-connections k #f)
(hash-set! cell-connections k (set))))
(define (add-edge col1 row1 col2 row2)
(define (add-->edge col1 row1 col2 row2)
(add-node col1 row1)
(define k (list col1 row1))
(hash-set! cell-connections k (set-add (hash-ref cell-connections k) (list col2 row2))))
(add-->edge col1 row1 col2 row2)
(add-->edge col2 row2 col1 row1))
(define (fetch-next-line)
(when current-line
(set! pending-row (cons current-line pending-row)))
(set! current-line-start-position
(+ current-line-start-position
current-line-length
newline-char-count))
(when current-line-number
(set! current-line-number (+ current-line-number 1)))
(define chars
(let loop ([chars-read 0])
(define c (read-char-or-special port))
(cond
[(eof-object? c)
(raise-read-eof-error
"unexpected eof; "
source _line _col _pos
(and _pos (- (+ current-line-start-position chars-read) _pos)))]
[(not (char? c))
(readerr "unexpected special" chars-read)]
[(equal? c #\return)
(cond
[(equal? #\newline (peek-char-or-special port))
(set! newline-char-count 2)
(list c (read-char-or-special port))]
[else
(set! newline-char-count 1)
(list c)])]
[(equal? c #\newline)
(set! newline-char-count 1)
(list c)]
[(and (equal? c #\╝) (equal? right-edge-column chars-read))
;; if we find a ╝ at the width of the table,
;; then we don't want
;; to consume any more characters and
;; instead to allow subsequent characters
;; to be part of some other thing that's
;; being read (presumably a close paren)
(set! newline-char-count 0)
(list c)]
[else
(cons c (loop (+ chars-read 1)))])))
(set! current-line (apply string chars))
(set! current-line-length (- (string-length current-line) newline-char-count)))
(define (process-first-line)
(fetch-next-line)
(let loop ([pos 0])
(cond
[(< pos current-line-length)
(cond
[(equal? #\space (string-ref current-line pos))
(loop (+ pos 1))]
[(equal? #\╔ (string-ref current-line pos))
(set! initial-column-guide (make-a-guide pos))
(set! initial-space-count pos)]
[else
(line-of-interest)
(readerr "expected the first non-whitespace character in the table to be ╔"
pos)])]
[else
(line-of-interest)
(readerr "expected some non-whitespace characters in the first line of the table"
0
pos)]))
(let loop ([pos (+ initial-space-count 1)]
[current-column-width 0]
[column 0]
[column-breaks '()]
[column-guides '()])
(cond
[(< pos current-line-length)
(case (string-ref current-line pos)
[(#\╦)
(add-node column 0)
(loop (+ pos 1) 0 (+ column 1)
(cons current-column-width column-breaks)
(cons (make-a-guide pos) column-guides))]
[(#\═) (loop (+ pos 1) (+ current-column-width 1) column
column-breaks column-guides)]
[(#\╗)
(add-node column 0)
(whitespace-to-end (+ pos 1))
(set! table-column-breaks (reverse (cons current-column-width column-breaks)))
(set! right-edge-column pos)
(set! table-column-guides (reverse (cons (make-a-guide pos) column-guides)))]
[else
(line-of-interest)
(readerr "expected only ═ ╦ and ╗ characters along the top of the grid" pos)])]
[else
(line-of-interest)
(readerr "expected ╗ to terminate the first line" pos)])))
(define (process-a-line current-map previous-line-separator?)
(fetch-next-line)
;; check leading space
(let loop ([n 0])
(cond
[(= n initial-space-count) (void)]
[(and (< n current-line-length)
(equal? #\space (string-ref current-line n)))
(loop (+ n 1))]
[else
(line-of-interest)
(readerr "expected leading space" n)]))
(case (string-ref current-line initial-space-count)
[(#\║) (values (continue-line current-map previous-line-separator?) #t)]
[(#\╠) (values (start-new-block current-map) #f)]
[(#\╚) (values (finish-table current-map) #f)]
[else
(line-of-interest)
(readerr/expected '(#\║ #\╠ #\╚)
initial-space-count
#:guides (list initial-column-guide))]))
(define (start-new-block previous-map)
(set! current-row (+ current-row 1))
(add-node 0 current-row)
(set! rows (cons (reverse pending-row) rows))
(set! pending-row '())
(let loop ([current-cell-size (car table-column-breaks)]
[table-column-breaks (cdr table-column-breaks)]
[pos (+ initial-space-count 1)]
;; whether or not the section of the line
;; we're currently traversing is there (or not)
[cell-wall-broken? #f]
;; the srcloc of the spot that led us to the decision
;; of which boolean that cell-wall-broken? should be
[cell-wall-guide (make-a-guide initial-space-count)]
;; this is the result, being built up backwards
[map '()]
;; this is the map from the previous cell;
;; it tells us which characters here have to point upwards
[previous-map previous-map]
[current-column 0])
(cond
[(zero? current-cell-size)
(unless (< pos current-line-length)
(line-of-interest)
(readerr "line ended too soon" pos))
(define sep (string-ref current-line pos))
(cond
[(and cell-wall-broken? (not (car previous-map)))
(unless (equal? sep #\╔)
(when (double-barred-char? sep)
(line-of-interest)
(readerr "expected not to find a cell boundary character" pos)))]
[else
(define allowed-chars
(if (null? table-column-breaks)
(list (get-one (not cell-wall-broken?) (car previous-map) #f #f)
(get-one (not cell-wall-broken?) (car previous-map) #f #t))
(list (get-one (not cell-wall-broken?) (car previous-map) #f #f)
(get-one (not cell-wall-broken?) (car previous-map) #f #t)
(get-one (not cell-wall-broken?) (car previous-map) #t #f)
(get-one (not cell-wall-broken?) (car previous-map) #t #t))))
(unless (member sep allowed-chars)
(line-of-interest)
(readerr/expected (filter values allowed-chars) pos))])
(cond
[(null? table-column-breaks)
(whitespace-to-end (+ pos 1))
(reverse (cons #t map))]
[else
(define next-cell-wall-broken? (not (member sep rt-chars)))
(define edge-going-down? (and (member sep dn-chars) #t))
(define next-column (+ current-column 1))
(add-node next-column current-row)
(when next-cell-wall-broken?
(add-edge next-column current-row
next-column (- current-row 1)))
(unless edge-going-down?
(add-edge next-column current-row
(- next-column 1) current-row))
(loop (car table-column-breaks)
(cdr table-column-breaks)
(+ pos 1)
next-cell-wall-broken?
(make-a-guide pos)
(cons edge-going-down? map)
(cdr previous-map)
next-column)])]
[else
(unless (< pos current-line-length)
(line-of-interest)
(readerr "line ended in the middle of a cell" pos))
(cond
[cell-wall-broken?
(when (double-barred-char? (string-ref current-line pos))
(line-of-interest)
(readerr
(format "expected not to find a cell boundary character (based on earlier ~a)"
(guide-char cell-wall-guide))
pos
#:guides (list cell-wall-guide)))]
[else
(unless (equal? (string-ref current-line pos) #\═)
(line-of-interest)
(readerr/expected '(#\═) pos #:guides (list cell-wall-guide)))])
(loop (- current-cell-size 1)
table-column-breaks
(+ pos 1)
cell-wall-broken?
cell-wall-guide
map
previous-map
current-column)])))
(define (continue-line map previous-line-separator?)
(let loop ([current-cell-size (car table-column-breaks)]
[table-column-breaks (cdr table-column-breaks)]
[map map]
[pos (+ initial-space-count 1)]
[column-number 0]
[starting-a-new-cell? #t])
(cond
[(zero? current-cell-size)
(unless (< pos current-line-length)
(line-of-interest)
(readerr "line ended at the boundary of a cell, expected the edge of the cell" pos))
(cond
[(car map)
(unless (equal? (string-ref current-line pos) #\║)
(line-of-interest)
(readerr/expected '(#\║) pos))]
[else
(when (double-barred-char? (string-ref current-line pos))
(line-of-interest)
(readerr "expected not to find a cell boundary character" pos))])
(cond
[(null? table-column-breaks)
(whitespace-to-end (+ pos 1))]
[else
(loop (car table-column-breaks)
(cdr table-column-breaks)
(cdr map)
(+ pos 1)
(+ column-number 1)
#t)])]
[else
(unless (< pos current-line-length)
(line-of-interest)
(readerr "line ended in the middle of a cell" pos))
(when (double-barred-char? (string-ref current-line pos))
(line-of-interest)
(readerr "expected not to find a cell boundary character" pos))
(when previous-line-separator?
(when starting-a-new-cell?
(set! position-of-first-cell
(hash-set
position-of-first-cell
(list column-number current-row)
(guide-srcloc (make-a-guide pos))))))
(loop (- current-cell-size 1)
table-column-breaks
map
(+ pos 1)
column-number
#f)]))
map)
(define (finish-table map)
(set! rows (cons (reverse pending-row) rows))
(let loop ([current-cell-size (car table-column-breaks)]
[table-column-breaks (cdr table-column-breaks)]
[map map]
[pos (+ initial-space-count 1)])
(cond
[(zero? current-cell-size)
(unless (< pos current-line-length)
(line-of-interest)
(readerr "line ended in the middle of a cell" pos))
(define expected-char
(cond
[(null? table-column-breaks) #\╝]
[(car map) #\╩]
[else #\═]))
(unless (equal? (string-ref current-line pos) expected-char)
(line-of-interest)
(readerr/expected (list expected-char) pos))
(cond
[(null? table-column-breaks)
#f]
[else
(loop (car table-column-breaks)
(cdr table-column-breaks)
(cdr map)
(+ pos 1))])]
[else
(unless (< pos current-line-length)
(line-of-interest)
(readerr "line ended in the middle of a cell" pos))
(unless (equal? (string-ref current-line pos) #\═)
(line-of-interest)
(readerr/expected '(#\═) pos))
(loop (- current-cell-size 1)
table-column-breaks
map
(+ pos 1))])))
(define (whitespace-to-end pos)
(let loop ([pos pos])
(when (< pos current-line-length)
(define c (string-ref current-line pos))
(cond
[(equal? #\space c)
(loop (+ pos 1))]
[(equal? #\; c)
(void)]
[else
(line-of-interest)
(readerr "expected only whitespace outside of the table" pos)]))))
(define (make-a-guide pos-in-line)
(guide (string-ref current-line pos-in-line)
(srcloc source current-line-number pos-in-line
(+ current-line-start-position pos-in-line)
1)))
(define (readerr/expected chars pos-in-line #:guides [guides '()])
(readerr (format "expected ~a~a~a"
(if (null? (cdr chars))
""
"one of ")
(chars->desc chars "or")
(if (null? guides)
""
(format " (based on earlier ~a)"
(chars->desc (map guide-char guides)
"and"))))
pos-in-line
#:guides guides))
(define (readerr msg pos-in-line [span 1] #:guides [guides '()])
(raise-read-error msg
source
current-line-number
pos-in-line
(+ current-line-start-position pos-in-line)
span
#:extra-srclocs (map guide-srcloc guides)))
(let loop ([map (or last-left-map
(begin
(process-first-line)
(map (λ (x) #t) table-column-breaks)))]
[previous-line-separator? #t])
(define-values (next-map continue?) (process-a-line map previous-line-separator?))
(cond
[continue? (loop next-map #f)]
[next-map next-map]
[else #f])))
;; chars : non-empty-list-of-char -> string
(define (chars->desc chars sep)
(cond
[(null? (cdr chars))
(format "~a" (car chars))]
[else
(define commas? (pair? (cddr chars)))
(apply
string-append
(let loop ([chars chars]
[first? #t])
(cond
[(null? (cdr chars))
(list (format "~a~a ~a"
(if first? "" " ")
sep
(car chars)))]
[else
(cons (format "~a~a~a"
(if first? "" " ")
(car chars)
(if commas? "," ""))
(loop (cdr chars) #f))])))]))
(define (double-barred-char? c) (member c double-barred-chars))
(define (get-one lt? up? rt? dn?)
(define (cmb a b) (if a b (not b)))
(for/or ([c (in-list double-barred-chars)])
(and (cmb lt? (member c lt-chars))
(cmb up? (member c up-chars))
(cmb rt? (member c rt-chars))
(cmb dn? (member c dn-chars))
c)))