Add `string-split'.
This commit is contained in:
parent
dcf2754a57
commit
784857e9fa
|
@ -1,6 +1,7 @@
|
|||
#lang racket/base
|
||||
|
||||
(provide string-append* string-join string-trim string-normalize-spaces)
|
||||
(provide string-append* string-join string-trim string-normalize-spaces
|
||||
string-split)
|
||||
|
||||
(define string-append*
|
||||
(case-lambda [(strs) (apply string-append strs)] ; optimize common case
|
||||
|
@ -67,11 +68,20 @@
|
|||
[r (substring str 0 r)]
|
||||
[else str]))
|
||||
|
||||
(define (internal-split who str sep trim? +?)
|
||||
(define rxs (get-rxs who sep +?))
|
||||
(define-values [l r]
|
||||
(if trim? (internal-trim who str sep #t #t (cdr rxs)) (values #f #f)))
|
||||
(define strs (regexp-split (car rxs) str (or l 0) r))
|
||||
;; Seems to make more sense for these functions (eg, this corresponds to
|
||||
;; simple uses where `string-split' in Emacs uses t for `omit-nulls' (but we
|
||||
;; don't do that for all nulls).)
|
||||
(if (equal? strs '("")) '() strs))
|
||||
|
||||
(define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f])
|
||||
(internal-split 'string-split str sep trim? +?))
|
||||
|
||||
(define (string-normalize-spaces str [sep none] [space " "]
|
||||
#:trim? [trim? #t] #:repeat? [+? #f])
|
||||
(define rxs (get-rxs 'string-normalize-spaces sep +?))
|
||||
(define-values [l r]
|
||||
(if trim?
|
||||
(internal-trim 'string-normalize-spaces str sep #t #t (cdr rxs))
|
||||
(values #f #f)))
|
||||
(string-join (regexp-split (car rxs) str (or l 0) r) space))
|
||||
(string-join (internal-split 'string-normalize-spaces str sep trim? +?)
|
||||
space))
|
||||
|
|
|
@ -429,6 +429,29 @@ of matches is trimmed. (Note that with a regexp separator you can use
|
|||
(string-trim "aaaxaayaa" "aa")
|
||||
]}
|
||||
|
||||
@defproc[(string-split [str string?]
|
||||
[sep (or/c string? regexp?) #px"\\s+"]
|
||||
[#:trim? trim? any/c #t]
|
||||
[#:repeat? repeat? any/c #f])
|
||||
(listof string?)]{
|
||||
|
||||
Splits the input @racket[str] on whitespaces, returning a list of
|
||||
strings. The input is trimmed first.
|
||||
|
||||
Similarly to @racket[string-trim], @racket[sep] can be given as a string
|
||||
or a (p)regexp to use a different separator, and @racket[repeat?]
|
||||
controls matching repeated sequences. @racket[trim?] determines whether
|
||||
trimming is done (the default).
|
||||
|
||||
@mz-examples[#:eval string-eval
|
||||
(string-split " foo bar baz \r\n\t")
|
||||
(string-split " ")
|
||||
(string-split " " #:trim? #f)
|
||||
]
|
||||
|
||||
(Note that unlike @racket[regexp-split], an empty input string results
|
||||
in an empty list.)}
|
||||
|
||||
@defproc[(string-normalize-spaces [str string?]
|
||||
[sep (or/c string? regexp?) #px"\\s+"]
|
||||
[space string? " "]
|
||||
|
@ -440,15 +463,14 @@ Normalizes spaces in the input @racket[str] by trimming it (using
|
|||
@racket[string-trim]) and replacing all whitespace sequences in the
|
||||
result with a single space.
|
||||
|
||||
Similarly to @racket[string-trim], @racket[sep] can be given as a string
|
||||
or a (p)regexp, and @racket[repeat?] controls matching repeated
|
||||
sequences. In addition, you can specify @racket[space] for an alternate
|
||||
space replacement. @racket[trim?] determines whether trimming is done
|
||||
(the default).
|
||||
You can specify @racket[space] for an alternate space replacement.
|
||||
|
||||
@mz-examples[#:eval string-eval
|
||||
(string-normalize-spaces " foo bar baz \r\n\t")
|
||||
]}
|
||||
]
|
||||
|
||||
Note that this is the same as
|
||||
@racket[(string-join (string-split str sep ....) space)]}
|
||||
|
||||
|
||||
@close-eval[string-eval]
|
||||
|
|
|
@ -430,7 +430,15 @@
|
|||
;; this should return "" or "ba" (could also be "ab"), but it seems sensible
|
||||
;; to do this (I haven't seen any existing trimmers that make any relevant
|
||||
;; decision on this)
|
||||
(test "" string-trim "ababa" "aba")
|
||||
)
|
||||
(test "" string-trim "ababa" "aba"))
|
||||
|
||||
;; ---------- string-split ----------
|
||||
(let ()
|
||||
(for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))])
|
||||
(test '("x" "y" "z") string-split s))
|
||||
(for ([s (in-list '(" " " " "\n\t\r"))])
|
||||
(test '() string-split s))
|
||||
(test '("x" "y" "z") string-split "axayaza" "a")
|
||||
(test '("" "x" "y" "z" "") string-split "axayaza" "a" #:trim? #f))
|
||||
|
||||
(report-errs)
|
||||
|
|
Loading…
Reference in New Issue
Block a user