Add `string-split'.

2012-05-23 18:38:09 -04:00 · 2012-05-23 18:38:09 -04:00 · 784857e9fa
commit 784857e9fa
parent dcf2754a57
3 changed files with 55 additions and 15 deletions
--- a/collects/racket/string.rkt
+++ b/collects/racket/string.rkt
@ -1,6 +1,7 @@
 #lang racket/base

-(provide string-append* string-join string-trim string-normalize-spaces)
+(provide string-append* string-join string-trim string-normalize-spaces
+         string-split)

 (define string-append*
  (case-lambda [(strs) (apply string-append strs)] ; optimize common case
@ -67,11 +68,20 @@
        [r         (substring str 0 r)]
        [else      str]))

+(define (internal-split who str sep trim? +?)
+  (define rxs (get-rxs who sep +?))
+  (define-values [l r]
+    (if trim? (internal-trim who str sep #t #t (cdr rxs)) (values #f #f)))
+  (define strs (regexp-split (car rxs) str (or l 0) r))
+  ;; Seems to make more sense for these functions (eg, this corresponds to
+  ;; simple uses where `string-split' in Emacs uses t for `omit-nulls' (but we
+  ;; don't do that for all nulls).)
+  (if (equal? strs '("")) '() strs))
+
+(define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f])
+  (internal-split 'string-split str sep trim? +?))
+
 (define (string-normalize-spaces str [sep none] [space " "]
                                 #:trim? [trim? #t] #:repeat? [+? #f])
-  (define rxs (get-rxs 'string-normalize-spaces sep +?))
-  (define-values [l r]
-    (if trim?
-      (internal-trim 'string-normalize-spaces str sep #t #t (cdr rxs))
-      (values #f #f)))
-  (string-join (regexp-split (car rxs) str (or l 0) r) space))
+  (string-join (internal-split 'string-normalize-spaces str sep trim? +?)
+               space))
--- a/collects/scribblings/reference/strings.scrbl
+++ b/collects/scribblings/reference/strings.scrbl
@ -429,6 +429,29 @@ of matches is trimmed.  (Note that with a regexp separator you can use
  (string-trim "aaaxaayaa" "aa")
 ]}

+@defproc[(string-split [str string?]
+                       [sep (or/c string? regexp?) #px"\\s+"]
+                       [#:trim? trim? any/c #t]
+                       [#:repeat? repeat? any/c #f])
+         (listof string?)]{
+
+Splits the input @racket[str] on whitespaces, returning a list of
+strings.  The input is trimmed first.
+
+Similarly to @racket[string-trim], @racket[sep] can be given as a string
+or a (p)regexp to use a different separator, and @racket[repeat?]
+controls matching repeated sequences.  @racket[trim?] determines whether
+trimming is done (the default).
+
+@mz-examples[#:eval string-eval
+  (string-split "  foo bar  baz \r\n\t")
+  (string-split "  ")
+  (string-split "  " #:trim? #f)
+]
+
+(Note that unlike @racket[regexp-split], an empty input string results
+in an empty list.)}
+
@defproc[(string-normalize-spaces [str string?]
                                  [sep (or/c string? regexp?) #px"\\s+"]
                                  [space string? " "]
@ -440,15 +463,14 @@ Normalizes spaces in the input @racket[str] by trimming it (using
@racket[string-trim]) and replacing all whitespace sequences in the
 result with a single space.

-Similarly to @racket[string-trim], @racket[sep] can be given as a string
-or a (p)regexp, and @racket[repeat?] controls matching repeated
-sequences.  In addition, you can specify @racket[space] for an alternate
-space replacement.  @racket[trim?] determines whether trimming is done
-(the default).
+You can specify @racket[space] for an alternate space replacement.

@mz-examples[#:eval string-eval
  (string-normalize-spaces "  foo bar  baz \r\n\t")
-]}
+]
+
+Note that this is the same as
+@racket[(string-join (string-split str sep ....) space)]}


@close-eval[string-eval]
--- a/collects/tests/racket/string.rktl
+++ b/collects/tests/racket/string.rktl
@ -430,7 +430,15 @@
  ;; this should return "" or "ba" (could also be "ab"), but it seems sensible
  ;; to do this (I haven't seen any existing trimmers that make any relevant
  ;; decision on this)
-  (test "" string-trim "ababa" "aba")
-  )
+  (test "" string-trim "ababa" "aba"))
+
+;; ---------- string-split ----------
+(let ()
+  (for ([s (in-list '("x y z" " x y z "  "\nx y z" "  \t x\r\r\ry    z\n"))])
+    (test '("x" "y" "z") string-split s))
+  (for ([s (in-list '(" " "   "  "\n\t\r"))])
+    (test '() string-split s))
+  (test '("x" "y" "z") string-split "axayaza" "a")
+  (test '("" "x" "y" "z" "") string-split "axayaza" "a" #:trim? #f))

 (report-errs)