New wrap-line' function in a new
scribble/text/wrap' module.
This should probably become documented if it's stable enough. For now it's intended for building wrapping functions, so it's only documented in comments the file.
This commit is contained in:
parent
c161540751
commit
89b9ad0dde
105
collects/scribble/text/wrap.rkt
Normal file
105
collects/scribble/text/wrap.rkt
Normal file
|
@ -0,0 +1,105 @@
|
||||||
|
#lang racket/base
|
||||||
|
|
||||||
|
(provide wrap-width wrap-line)
|
||||||
|
|
||||||
|
;; The default wrap width for `wrape-line'.
|
||||||
|
(define wrap-width (make-parameter 79))
|
||||||
|
|
||||||
|
;; `wrap-line' wraps the `line' string into a list of strings that fit `width'
|
||||||
|
;; characters.
|
||||||
|
;;
|
||||||
|
;; This is intended to be a building block for fancier wrappers -- for
|
||||||
|
;; examples, ones that have a prefix for all lines, that indent/outdent of the
|
||||||
|
;; first line, that treat some prefix regexp as a known indentation, that
|
||||||
|
;; "squeeze" all whitespaces, that can be used as part of some other output and
|
||||||
|
;; use the starting column to set the width, or even ones that take a multiline
|
||||||
|
;; string and identify paragraphs to wrap individually. For now, it's not
|
||||||
|
;; intended to be used directly so it's only documented here -- this is likely
|
||||||
|
;; to change if the interface is stable enough.
|
||||||
|
;;
|
||||||
|
;; `width' can be either a width or an improper list of widths, where the last
|
||||||
|
;; element is used for the rest. For example, (cons 72 70) indicates a width
|
||||||
|
;; of 72 characters for the first line, and 70 for the rest.
|
||||||
|
;;
|
||||||
|
;; `split-word' controls what happens when a word is split: it is ivoked with
|
||||||
|
;; the word that is to be split, and an integer indicating how many characters
|
||||||
|
;; could fit on the first line. In most cases the string will be just a word,
|
||||||
|
;; but if it was the first/last word with only spaces before/after it, then the
|
||||||
|
;; string will have these spaces too. It is expected to return three values:
|
||||||
|
;; the first two are a string to put on the current line and a string that goes
|
||||||
|
;; first on the next one. Either of these can be `#f' to indicate nothing
|
||||||
|
;; (this is different from returning "", since that will keep the spaces next
|
||||||
|
;; to it). The third result is relevant when the second result is longer than
|
||||||
|
;; the wrapping width: #t indicates that the second result is considered a word
|
||||||
|
;; and should not be rewrapped, and #f indicates that it can. Spaces from the
|
||||||
|
;; original string are preserved, even if there are more than one space between
|
||||||
|
;; words; spaces between a word that moves to the next line and the preceding
|
||||||
|
;; word are usually dropped, but they can be preserved too if the first result
|
||||||
|
;; is "" rather than #f (and same for the space between a word that stays on
|
||||||
|
;; the same line when the following word moves doen). If the first result is
|
||||||
|
;; `#f' and it was the only word on the line, then the line that would
|
||||||
|
;; otherwise be empty is dropped. Note that depending on what `split-word'
|
||||||
|
;; chooses to do, the result may still have lines that are longer than `width'
|
||||||
|
;; characters. The default `split-word' returns (values #f the-word #t).
|
||||||
|
;;
|
||||||
|
;; Sidenote: never returns an empty list.
|
||||||
|
;;
|
||||||
|
;; Caveats: considers only spaces as whitespace; not too efficient since it
|
||||||
|
;; constructs intermediate strings for its processing; the need for the third
|
||||||
|
;; boolean result from `split-word' is suspicious and might be dropped or
|
||||||
|
;; changed.
|
||||||
|
;;
|
||||||
|
(define (wrap-line line [width* (wrap-width)] [split-word #f])
|
||||||
|
(define (loop str strs width*)
|
||||||
|
(define width (if (pair? width*) (car width*) width*))
|
||||||
|
(define strlen (string-length str))
|
||||||
|
(define (do-word s1 w1 w2 s2) ; s1--spaces--w1--word--w2--spaces--s2
|
||||||
|
(define w1* (if (eq? 0 s1) 0 w1)) ;\ w1*..w2* includes spaces
|
||||||
|
(define w2* (if (eq? strlen s2) s2 w2)) ;/ for first/last word
|
||||||
|
(define word (substring str w1* w2*))
|
||||||
|
(define-values [r1 r2 r3]
|
||||||
|
(if split-word (split-word word (- width w1*)) (values #f word #t)))
|
||||||
|
(let* ([1st (cond [r1 (string-append (substring str 0 w1*) r1)]
|
||||||
|
[(eq? w1* 0) #f]
|
||||||
|
[else (substring str 0 s1)])]
|
||||||
|
[strs (if 1st (cons 1st strs) strs)]
|
||||||
|
[width* (cond [(not r1) width*]
|
||||||
|
[(pair? width*) (cdr width*)]
|
||||||
|
[else width*])]
|
||||||
|
[2nd (and r2 r3
|
||||||
|
(>= (string-length r2)
|
||||||
|
(if (pair? width*) (car width*) width*))
|
||||||
|
r2)]
|
||||||
|
[strs (if 2nd (cons 2nd strs) strs)]
|
||||||
|
[width* (cond [(not r2) width*]
|
||||||
|
[(pair? width*) (cdr width*)]
|
||||||
|
[else width*])]
|
||||||
|
[rst (cond [(and (not 2nd) r2)
|
||||||
|
(string-append r2 (substring str w2*))]
|
||||||
|
[(eq? w2* strlen) #f]
|
||||||
|
[else (substring str s2)])])
|
||||||
|
(if rst (loop rst strs width*) (reverse strs))))
|
||||||
|
(cond
|
||||||
|
[(strlen . <= . width) (reverse (cons str strs))]
|
||||||
|
;; wrapping point is inside a word
|
||||||
|
[(regexp-match-positions #rx"^([^ ][^ ]+) *" str (sub1 width))
|
||||||
|
=> (λ (m2)
|
||||||
|
(define m1 (regexp-match-positions #rx" *([^ ]+)$" str 0 width))
|
||||||
|
(do-word (caar m1) (caadr m1) (cdadr m2) (cdar m2)))]
|
||||||
|
;; wrapping point is probably between words
|
||||||
|
[(regexp-match-positions #rx" *([^ ]+) *$" str 0 width)
|
||||||
|
=> (λ (m1)
|
||||||
|
(define m2 (regexp-match-positions #rx"[^ ]" str width))
|
||||||
|
(if m2 ; there is a following word, so we are between words
|
||||||
|
(loop (substring str (caar m2))
|
||||||
|
(cons (substring str 0 (cdadr m1)) strs)
|
||||||
|
(if (pair? width*) (cdr width*) width*))
|
||||||
|
;; no following word, so the following spaces are part of a word
|
||||||
|
;; that does need to be split
|
||||||
|
(do-word (caar m1) (caadr m1) strlen strlen)))]
|
||||||
|
;; all spaces in first line => part of the following word
|
||||||
|
[(regexp-match-positions #rx"([^ ]+) *" str width)
|
||||||
|
=> (λ (m) (do-word 0 (caar m) (cdadr m) (cdar m)))]
|
||||||
|
;; no word at all => split the whole "word" (which is all spaces)
|
||||||
|
[else (do-word 0 0 strlen strlen)]))
|
||||||
|
(loop line '() width*))
|
|
@ -1,9 +1,11 @@
|
||||||
#lang racket/base
|
#lang racket/base
|
||||||
|
|
||||||
(require tests/eli-tester
|
(require tests/eli-tester
|
||||||
"reader.rkt" "text-lang.rkt" "collect.rkt" "docs.rkt")
|
"reader.rkt" "collect.rkt" "text-lang.rkt" "text-wrap.rkt"
|
||||||
|
"docs.rkt")
|
||||||
|
|
||||||
(test do (reader-tests)
|
(test do (reader-tests)
|
||||||
do (begin/collect-tests)
|
do (begin/collect-tests)
|
||||||
do (text-lang-tests)
|
do (text-lang-tests)
|
||||||
|
do (wrap-tests)
|
||||||
do (docs-tests))
|
do (docs-tests))
|
||||||
|
|
121
collects/tests/scribble/text-wrap.rkt
Normal file
121
collects/tests/scribble/text-wrap.rkt
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
#lang racket/base
|
||||||
|
|
||||||
|
(require tests/eli-tester scribble/text/wrap)
|
||||||
|
|
||||||
|
#; ; generate test cases from Emacs
|
||||||
|
(defmacro wrap-line (str col &rest dummy)
|
||||||
|
(let ((r (with-temp-buffer
|
||||||
|
(fundamental-mode)
|
||||||
|
(insert str)
|
||||||
|
(let (; straightforward filling, no fanciness
|
||||||
|
(left-margin 0) (fill-column col)
|
||||||
|
(adaptive-fill-regexp "\\`\\'")
|
||||||
|
(adaptive-fill-first-line-regexp "\\`\\'")
|
||||||
|
(filladapt-mode nil))
|
||||||
|
(fill-region-as-paragraph (point-min) (point-max) nil t)
|
||||||
|
(format "'%S" (split-string
|
||||||
|
(buffer-substring (point-min) (point-max))
|
||||||
|
"\n"))))))
|
||||||
|
`(insert ,r)))
|
||||||
|
|
||||||
|
(provide wrap-tests)
|
||||||
|
(module+ main (wrap-tests))
|
||||||
|
(define (wrap-tests)
|
||||||
|
(define (w line width result1 result2)
|
||||||
|
(test (wrap-line line width) => result1
|
||||||
|
(wrap-line line width
|
||||||
|
(λ (w n) (values (substring w 0 n) (substring w n) #f)))
|
||||||
|
=> result2))
|
||||||
|
(test (w "eli barzilay maze is life" 25
|
||||||
|
'("eli barzilay maze is life")
|
||||||
|
'("eli barzilay maze is life"))
|
||||||
|
(w "eli barzilay maze is life" 10
|
||||||
|
'("eli" "barzilay" "maze is" "life")
|
||||||
|
'("eli barzil" "ay maze is" "life"))
|
||||||
|
(w "eli barzilay maze is life" 11
|
||||||
|
'("eli" "barzilay" "maze is" "life")
|
||||||
|
'("eli barzila" "y maze is l" "ife"))
|
||||||
|
(w "eli barzilay maze is life" 12
|
||||||
|
'("eli barzilay" "maze is life")
|
||||||
|
'("eli barzilay" "maze is life"))
|
||||||
|
(w "eli barzilay maze is life" 13
|
||||||
|
'("eli barzilay" "maze is life")
|
||||||
|
'("eli barzilay" "maze is life"))
|
||||||
|
(w "eli barzilay maze is life" 14
|
||||||
|
'("eli barzilay" "maze is life")
|
||||||
|
'("eli barzilay m" "aze is life"))
|
||||||
|
(w "eli barzilay maze is life" 15
|
||||||
|
'("eli barzilay" "maze is life")
|
||||||
|
'("eli barzilay m" "aze is life"))
|
||||||
|
(w "eli barzilay maze is life" 15
|
||||||
|
'("eli barzilay" "maze is life")
|
||||||
|
'("eli barzilay m" "aze is life"))
|
||||||
|
(w "eli barzilay maze is life" 15
|
||||||
|
'("eli barzilay" "maze is life")
|
||||||
|
'("eli barzilay ma" "ze is life"))
|
||||||
|
(w "0123456789" 10
|
||||||
|
'("0123456789")
|
||||||
|
'("0123456789"))
|
||||||
|
(w " xxxxx" 10
|
||||||
|
'(" xxxxx")
|
||||||
|
'(" xxxxx"))
|
||||||
|
(w " xxxxxx" 10
|
||||||
|
'(" xxxxxx")
|
||||||
|
'(" xxxxx" "x"))
|
||||||
|
(w " xxxxxxxxxxx" 10
|
||||||
|
'(" xxxxxxxxxxx")
|
||||||
|
'(" xxxxx" "xxxxxx"))
|
||||||
|
(w "xxxxx " 10
|
||||||
|
'("xxxxx ")
|
||||||
|
'("xxxxx "))
|
||||||
|
(w "xxxxxx " 10
|
||||||
|
'("xxxxxx ")
|
||||||
|
'("xxxxxx " " "))
|
||||||
|
(w " xxxxx " 10
|
||||||
|
'(" xxxxx ")
|
||||||
|
'(" xxxxx " " "))
|
||||||
|
(w " x " 10
|
||||||
|
'(" x ")
|
||||||
|
'(" x " " "))
|
||||||
|
(w "" 10
|
||||||
|
'("")
|
||||||
|
'(""))
|
||||||
|
(w " " 10
|
||||||
|
'(" ")
|
||||||
|
'(" "))
|
||||||
|
(w " " 10
|
||||||
|
'(" ")
|
||||||
|
'(" "))
|
||||||
|
(w " " 10
|
||||||
|
'(" ")
|
||||||
|
'(" " " "))
|
||||||
|
(w "xxxxxxxxxxx " 10
|
||||||
|
'("xxxxxxxxxxx ")
|
||||||
|
'("xxxxxxxxxx" "x "))
|
||||||
|
(w " xxxxxxxxxxx " 10
|
||||||
|
'(" xxxxxxxxxxx ")
|
||||||
|
'(" xxxxx" "xxxxxx "))
|
||||||
|
(w " xxxxxxxxxxx " 10
|
||||||
|
'(" xxxxxxxxxxx ")
|
||||||
|
'(" xxxxx" "xxxxxx "))
|
||||||
|
(w " xxxxxxxxxxx x" 10
|
||||||
|
'(" xxxxxxxxxxx" "x")
|
||||||
|
'(" xxxxx" "xxxxxx x"))
|
||||||
|
(w " xxxxxxxxxxx x" 10
|
||||||
|
'(" xxxxxxxxxxx" "x")
|
||||||
|
'(" xxxxx" "xxxxxx x"))
|
||||||
|
(w "x x x x x x x x x x x" '(10 . 8)
|
||||||
|
'("x x x x x" "x x x x" "x x")
|
||||||
|
'("x x x x x" "x x x x" "x x"))
|
||||||
|
(w "xx x x x x x x x x x x" '(10 . 8)
|
||||||
|
'("xx x x x x" "x x x x" "x x")
|
||||||
|
'("xx x x x x" "x x x x" "x x"))
|
||||||
|
(w "x x x x xxx x x x x x" '(10 . 8)
|
||||||
|
'("x x x x" "xxx x x" "x x x")
|
||||||
|
'("x x x x xx" "x x x x" "x x"))
|
||||||
|
(w "xxxx xxxx xxxx xxxx" '(10 . 5)
|
||||||
|
'("xxxx xxxx" "xxxx" "xxxx")
|
||||||
|
'("xxxx xxxx" "xxxx" "xxxx"))
|
||||||
|
(w "xxxx xxxxxxxx xxxx" '(10 . 5)
|
||||||
|
'("xxxx" "xxxxxxxx" "xxxx")
|
||||||
|
'("xxxx xxxxx" "xxx x" "xxx"))))
|
Loading…
Reference in New Issue
Block a user