Add `string-split'.

This commit is contained in:
Eli Barzilay 2012-05-23 18:38:09 -04:00
parent dcf2754a57
commit 784857e9fa
3 changed files with 55 additions and 15 deletions

View File

@ -1,6 +1,7 @@
#lang racket/base
(provide string-append* string-join string-trim string-normalize-spaces)
(provide string-append* string-join string-trim string-normalize-spaces
string-split)
(define string-append*
(case-lambda [(strs) (apply string-append strs)] ; optimize common case
@ -67,11 +68,20 @@
[r (substring str 0 r)]
[else str]))
(define (internal-split who str sep trim? +?)
(define rxs (get-rxs who sep +?))
(define-values [l r]
(if trim? (internal-trim who str sep #t #t (cdr rxs)) (values #f #f)))
(define strs (regexp-split (car rxs) str (or l 0) r))
;; Seems to make more sense for these functions (eg, this corresponds to
;; simple uses where `string-split' in Emacs uses t for `omit-nulls' (but we
;; don't do that for all nulls).)
(if (equal? strs '("")) '() strs))
(define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f])
(internal-split 'string-split str sep trim? +?))
(define (string-normalize-spaces str [sep none] [space " "]
#:trim? [trim? #t] #:repeat? [+? #f])
(define rxs (get-rxs 'string-normalize-spaces sep +?))
(define-values [l r]
(if trim?
(internal-trim 'string-normalize-spaces str sep #t #t (cdr rxs))
(values #f #f)))
(string-join (regexp-split (car rxs) str (or l 0) r) space))
(string-join (internal-split 'string-normalize-spaces str sep trim? +?)
space))

View File

@ -429,6 +429,29 @@ of matches is trimmed. (Note that with a regexp separator you can use
(string-trim "aaaxaayaa" "aa")
]}
@defproc[(string-split [str string?]
[sep (or/c string? regexp?) #px"\\s+"]
[#:trim? trim? any/c #t]
[#:repeat? repeat? any/c #f])
(listof string?)]{
Splits the input @racket[str] on whitespaces, returning a list of
strings. The input is trimmed first.
Similarly to @racket[string-trim], @racket[sep] can be given as a string
or a (p)regexp to use a different separator, and @racket[repeat?]
controls matching repeated sequences. @racket[trim?] determines whether
trimming is done (the default).
@mz-examples[#:eval string-eval
(string-split " foo bar baz \r\n\t")
(string-split " ")
(string-split " " #:trim? #f)
]
(Note that unlike @racket[regexp-split], an empty input string results
in an empty list.)}
@defproc[(string-normalize-spaces [str string?]
[sep (or/c string? regexp?) #px"\\s+"]
[space string? " "]
@ -440,15 +463,14 @@ Normalizes spaces in the input @racket[str] by trimming it (using
@racket[string-trim]) and replacing all whitespace sequences in the
result with a single space.
Similarly to @racket[string-trim], @racket[sep] can be given as a string
or a (p)regexp, and @racket[repeat?] controls matching repeated
sequences. In addition, you can specify @racket[space] for an alternate
space replacement. @racket[trim?] determines whether trimming is done
(the default).
You can specify @racket[space] for an alternate space replacement.
@mz-examples[#:eval string-eval
(string-normalize-spaces " foo bar baz \r\n\t")
]}
]
Note that this is the same as
@racket[(string-join (string-split str sep ....) space)]}
@close-eval[string-eval]

View File

@ -430,7 +430,15 @@
;; this should return "" or "ba" (could also be "ab"), but it seems sensible
;; to do this (I haven't seen any existing trimmers that make any relevant
;; decision on this)
(test "" string-trim "ababa" "aba")
)
(test "" string-trim "ababa" "aba"))
;; ---------- string-split ----------
(let ()
(for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))])
(test '("x" "y" "z") string-split s))
(for ([s (in-list '(" " " " "\n\t\r"))])
(test '() string-split s))
(test '("x" "y" "z") string-split "axayaza" "a")
(test '("" "x" "y" "z" "") string-split "axayaza" "a" #:trim? #f))
(report-errs)