From 89b9ad0dde5298516934f4df016c27b60e10f86d Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Sun, 6 May 2012 04:36:09 -0400 Subject: [PATCH] New `wrap-line' function in a new `scribble/text/wrap' module. This should probably become documented if it's stable enough. For now it's intended for building wrapping functions, so it's only documented in comments the file. --- collects/scribble/text/wrap.rkt | 105 ++++++++++++++++++++++ collects/tests/scribble/main.rkt | 4 +- collects/tests/scribble/text-wrap.rkt | 121 ++++++++++++++++++++++++++ 3 files changed, 229 insertions(+), 1 deletion(-) create mode 100644 collects/scribble/text/wrap.rkt create mode 100644 collects/tests/scribble/text-wrap.rkt diff --git a/collects/scribble/text/wrap.rkt b/collects/scribble/text/wrap.rkt new file mode 100644 index 0000000000..6749a68568 --- /dev/null +++ b/collects/scribble/text/wrap.rkt @@ -0,0 +1,105 @@ +#lang racket/base + +(provide wrap-width wrap-line) + +;; The default wrap width for `wrape-line'. +(define wrap-width (make-parameter 79)) + +;; `wrap-line' wraps the `line' string into a list of strings that fit `width' +;; characters. +;; +;; This is intended to be a building block for fancier wrappers -- for +;; examples, ones that have a prefix for all lines, that indent/outdent of the +;; first line, that treat some prefix regexp as a known indentation, that +;; "squeeze" all whitespaces, that can be used as part of some other output and +;; use the starting column to set the width, or even ones that take a multiline +;; string and identify paragraphs to wrap individually. For now, it's not +;; intended to be used directly so it's only documented here -- this is likely +;; to change if the interface is stable enough. +;; +;; `width' can be either a width or an improper list of widths, where the last +;; element is used for the rest. For example, (cons 72 70) indicates a width +;; of 72 characters for the first line, and 70 for the rest. +;; +;; `split-word' controls what happens when a word is split: it is ivoked with +;; the word that is to be split, and an integer indicating how many characters +;; could fit on the first line. In most cases the string will be just a word, +;; but if it was the first/last word with only spaces before/after it, then the +;; string will have these spaces too. It is expected to return three values: +;; the first two are a string to put on the current line and a string that goes +;; first on the next one. Either of these can be `#f' to indicate nothing +;; (this is different from returning "", since that will keep the spaces next +;; to it). The third result is relevant when the second result is longer than +;; the wrapping width: #t indicates that the second result is considered a word +;; and should not be rewrapped, and #f indicates that it can. Spaces from the +;; original string are preserved, even if there are more than one space between +;; words; spaces between a word that moves to the next line and the preceding +;; word are usually dropped, but they can be preserved too if the first result +;; is "" rather than #f (and same for the space between a word that stays on +;; the same line when the following word moves doen). If the first result is +;; `#f' and it was the only word on the line, then the line that would +;; otherwise be empty is dropped. Note that depending on what `split-word' +;; chooses to do, the result may still have lines that are longer than `width' +;; characters. The default `split-word' returns (values #f the-word #t). +;; +;; Sidenote: never returns an empty list. +;; +;; Caveats: considers only spaces as whitespace; not too efficient since it +;; constructs intermediate strings for its processing; the need for the third +;; boolean result from `split-word' is suspicious and might be dropped or +;; changed. +;; +(define (wrap-line line [width* (wrap-width)] [split-word #f]) + (define (loop str strs width*) + (define width (if (pair? width*) (car width*) width*)) + (define strlen (string-length str)) + (define (do-word s1 w1 w2 s2) ; s1--spaces--w1--word--w2--spaces--s2 + (define w1* (if (eq? 0 s1) 0 w1)) ;\ w1*..w2* includes spaces + (define w2* (if (eq? strlen s2) s2 w2)) ;/ for first/last word + (define word (substring str w1* w2*)) + (define-values [r1 r2 r3] + (if split-word (split-word word (- width w1*)) (values #f word #t))) + (let* ([1st (cond [r1 (string-append (substring str 0 w1*) r1)] + [(eq? w1* 0) #f] + [else (substring str 0 s1)])] + [strs (if 1st (cons 1st strs) strs)] + [width* (cond [(not r1) width*] + [(pair? width*) (cdr width*)] + [else width*])] + [2nd (and r2 r3 + (>= (string-length r2) + (if (pair? width*) (car width*) width*)) + r2)] + [strs (if 2nd (cons 2nd strs) strs)] + [width* (cond [(not r2) width*] + [(pair? width*) (cdr width*)] + [else width*])] + [rst (cond [(and (not 2nd) r2) + (string-append r2 (substring str w2*))] + [(eq? w2* strlen) #f] + [else (substring str s2)])]) + (if rst (loop rst strs width*) (reverse strs)))) + (cond + [(strlen . <= . width) (reverse (cons str strs))] + ;; wrapping point is inside a word + [(regexp-match-positions #rx"^([^ ][^ ]+) *" str (sub1 width)) + => (λ (m2) + (define m1 (regexp-match-positions #rx" *([^ ]+)$" str 0 width)) + (do-word (caar m1) (caadr m1) (cdadr m2) (cdar m2)))] + ;; wrapping point is probably between words + [(regexp-match-positions #rx" *([^ ]+) *$" str 0 width) + => (λ (m1) + (define m2 (regexp-match-positions #rx"[^ ]" str width)) + (if m2 ; there is a following word, so we are between words + (loop (substring str (caar m2)) + (cons (substring str 0 (cdadr m1)) strs) + (if (pair? width*) (cdr width*) width*)) + ;; no following word, so the following spaces are part of a word + ;; that does need to be split + (do-word (caar m1) (caadr m1) strlen strlen)))] + ;; all spaces in first line => part of the following word + [(regexp-match-positions #rx"([^ ]+) *" str width) + => (λ (m) (do-word 0 (caar m) (cdadr m) (cdar m)))] + ;; no word at all => split the whole "word" (which is all spaces) + [else (do-word 0 0 strlen strlen)])) + (loop line '() width*)) diff --git a/collects/tests/scribble/main.rkt b/collects/tests/scribble/main.rkt index 6493534362..0c8226950e 100644 --- a/collects/tests/scribble/main.rkt +++ b/collects/tests/scribble/main.rkt @@ -1,9 +1,11 @@ #lang racket/base (require tests/eli-tester - "reader.rkt" "text-lang.rkt" "collect.rkt" "docs.rkt") + "reader.rkt" "collect.rkt" "text-lang.rkt" "text-wrap.rkt" + "docs.rkt") (test do (reader-tests) do (begin/collect-tests) do (text-lang-tests) + do (wrap-tests) do (docs-tests)) diff --git a/collects/tests/scribble/text-wrap.rkt b/collects/tests/scribble/text-wrap.rkt new file mode 100644 index 0000000000..0d2c489d23 --- /dev/null +++ b/collects/tests/scribble/text-wrap.rkt @@ -0,0 +1,121 @@ +#lang racket/base + +(require tests/eli-tester scribble/text/wrap) + +#; ; generate test cases from Emacs +(defmacro wrap-line (str col &rest dummy) + (let ((r (with-temp-buffer + (fundamental-mode) + (insert str) + (let (; straightforward filling, no fanciness + (left-margin 0) (fill-column col) + (adaptive-fill-regexp "\\`\\'") + (adaptive-fill-first-line-regexp "\\`\\'") + (filladapt-mode nil)) + (fill-region-as-paragraph (point-min) (point-max) nil t) + (format "'%S" (split-string + (buffer-substring (point-min) (point-max)) + "\n")))))) + `(insert ,r))) + +(provide wrap-tests) +(module+ main (wrap-tests)) +(define (wrap-tests) + (define (w line width result1 result2) + (test (wrap-line line width) => result1 + (wrap-line line width + (λ (w n) (values (substring w 0 n) (substring w n) #f))) + => result2)) + (test (w "eli barzilay maze is life" 25 + '("eli barzilay maze is life") + '("eli barzilay maze is life")) + (w "eli barzilay maze is life" 10 + '("eli" "barzilay" "maze is" "life") + '("eli barzil" "ay maze is" "life")) + (w "eli barzilay maze is life" 11 + '("eli" "barzilay" "maze is" "life") + '("eli barzila" "y maze is l" "ife")) + (w "eli barzilay maze is life" 12 + '("eli barzilay" "maze is life") + '("eli barzilay" "maze is life")) + (w "eli barzilay maze is life" 13 + '("eli barzilay" "maze is life") + '("eli barzilay" "maze is life")) + (w "eli barzilay maze is life" 14 + '("eli barzilay" "maze is life") + '("eli barzilay m" "aze is life")) + (w "eli barzilay maze is life" 15 + '("eli barzilay" "maze is life") + '("eli barzilay m" "aze is life")) + (w "eli barzilay maze is life" 15 + '("eli barzilay" "maze is life") + '("eli barzilay m" "aze is life")) + (w "eli barzilay maze is life" 15 + '("eli barzilay" "maze is life") + '("eli barzilay ma" "ze is life")) + (w "0123456789" 10 + '("0123456789") + '("0123456789")) + (w " xxxxx" 10 + '(" xxxxx") + '(" xxxxx")) + (w " xxxxxx" 10 + '(" xxxxxx") + '(" xxxxx" "x")) + (w " xxxxxxxxxxx" 10 + '(" xxxxxxxxxxx") + '(" xxxxx" "xxxxxx")) + (w "xxxxx " 10 + '("xxxxx ") + '("xxxxx ")) + (w "xxxxxx " 10 + '("xxxxxx ") + '("xxxxxx " " ")) + (w " xxxxx " 10 + '(" xxxxx ") + '(" xxxxx " " ")) + (w " x " 10 + '(" x ") + '(" x " " ")) + (w "" 10 + '("") + '("")) + (w " " 10 + '(" ") + '(" ")) + (w " " 10 + '(" ") + '(" ")) + (w " " 10 + '(" ") + '(" " " ")) + (w "xxxxxxxxxxx " 10 + '("xxxxxxxxxxx ") + '("xxxxxxxxxx" "x ")) + (w " xxxxxxxxxxx " 10 + '(" xxxxxxxxxxx ") + '(" xxxxx" "xxxxxx ")) + (w " xxxxxxxxxxx " 10 + '(" xxxxxxxxxxx ") + '(" xxxxx" "xxxxxx ")) + (w " xxxxxxxxxxx x" 10 + '(" xxxxxxxxxxx" "x") + '(" xxxxx" "xxxxxx x")) + (w " xxxxxxxxxxx x" 10 + '(" xxxxxxxxxxx" "x") + '(" xxxxx" "xxxxxx x")) + (w "x x x x x x x x x x x" '(10 . 8) + '("x x x x x" "x x x x" "x x") + '("x x x x x" "x x x x" "x x")) + (w "xx x x x x x x x x x x" '(10 . 8) + '("xx x x x x" "x x x x" "x x") + '("xx x x x x" "x x x x" "x x")) + (w "x x x x xxx x x x x x" '(10 . 8) + '("x x x x" "xxx x x" "x x x") + '("x x x x xx" "x x x x" "x x")) + (w "xxxx xxxx xxxx xxxx" '(10 . 5) + '("xxxx xxxx" "xxxx" "xxxx") + '("xxxx xxxx" "xxxx" "xxxx")) + (w "xxxx xxxxxxxx xxxx" '(10 . 5) + '("xxxx" "xxxxxxxx" "xxxx") + '("xxxx xxxxx" "xxx x" "xxx"))))