Add `string-split'.
This commit is contained in:
parent
dcf2754a57
commit
784857e9fa
|
@ -1,6 +1,7 @@
|
||||||
#lang racket/base
|
#lang racket/base
|
||||||
|
|
||||||
(provide string-append* string-join string-trim string-normalize-spaces)
|
(provide string-append* string-join string-trim string-normalize-spaces
|
||||||
|
string-split)
|
||||||
|
|
||||||
(define string-append*
|
(define string-append*
|
||||||
(case-lambda [(strs) (apply string-append strs)] ; optimize common case
|
(case-lambda [(strs) (apply string-append strs)] ; optimize common case
|
||||||
|
@ -67,11 +68,20 @@
|
||||||
[r (substring str 0 r)]
|
[r (substring str 0 r)]
|
||||||
[else str]))
|
[else str]))
|
||||||
|
|
||||||
|
(define (internal-split who str sep trim? +?)
|
||||||
|
(define rxs (get-rxs who sep +?))
|
||||||
|
(define-values [l r]
|
||||||
|
(if trim? (internal-trim who str sep #t #t (cdr rxs)) (values #f #f)))
|
||||||
|
(define strs (regexp-split (car rxs) str (or l 0) r))
|
||||||
|
;; Seems to make more sense for these functions (eg, this corresponds to
|
||||||
|
;; simple uses where `string-split' in Emacs uses t for `omit-nulls' (but we
|
||||||
|
;; don't do that for all nulls).)
|
||||||
|
(if (equal? strs '("")) '() strs))
|
||||||
|
|
||||||
|
(define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f])
|
||||||
|
(internal-split 'string-split str sep trim? +?))
|
||||||
|
|
||||||
(define (string-normalize-spaces str [sep none] [space " "]
|
(define (string-normalize-spaces str [sep none] [space " "]
|
||||||
#:trim? [trim? #t] #:repeat? [+? #f])
|
#:trim? [trim? #t] #:repeat? [+? #f])
|
||||||
(define rxs (get-rxs 'string-normalize-spaces sep +?))
|
(string-join (internal-split 'string-normalize-spaces str sep trim? +?)
|
||||||
(define-values [l r]
|
space))
|
||||||
(if trim?
|
|
||||||
(internal-trim 'string-normalize-spaces str sep #t #t (cdr rxs))
|
|
||||||
(values #f #f)))
|
|
||||||
(string-join (regexp-split (car rxs) str (or l 0) r) space))
|
|
||||||
|
|
|
@ -429,6 +429,29 @@ of matches is trimmed. (Note that with a regexp separator you can use
|
||||||
(string-trim "aaaxaayaa" "aa")
|
(string-trim "aaaxaayaa" "aa")
|
||||||
]}
|
]}
|
||||||
|
|
||||||
|
@defproc[(string-split [str string?]
|
||||||
|
[sep (or/c string? regexp?) #px"\\s+"]
|
||||||
|
[#:trim? trim? any/c #t]
|
||||||
|
[#:repeat? repeat? any/c #f])
|
||||||
|
(listof string?)]{
|
||||||
|
|
||||||
|
Splits the input @racket[str] on whitespaces, returning a list of
|
||||||
|
strings. The input is trimmed first.
|
||||||
|
|
||||||
|
Similarly to @racket[string-trim], @racket[sep] can be given as a string
|
||||||
|
or a (p)regexp to use a different separator, and @racket[repeat?]
|
||||||
|
controls matching repeated sequences. @racket[trim?] determines whether
|
||||||
|
trimming is done (the default).
|
||||||
|
|
||||||
|
@mz-examples[#:eval string-eval
|
||||||
|
(string-split " foo bar baz \r\n\t")
|
||||||
|
(string-split " ")
|
||||||
|
(string-split " " #:trim? #f)
|
||||||
|
]
|
||||||
|
|
||||||
|
(Note that unlike @racket[regexp-split], an empty input string results
|
||||||
|
in an empty list.)}
|
||||||
|
|
||||||
@defproc[(string-normalize-spaces [str string?]
|
@defproc[(string-normalize-spaces [str string?]
|
||||||
[sep (or/c string? regexp?) #px"\\s+"]
|
[sep (or/c string? regexp?) #px"\\s+"]
|
||||||
[space string? " "]
|
[space string? " "]
|
||||||
|
@ -440,15 +463,14 @@ Normalizes spaces in the input @racket[str] by trimming it (using
|
||||||
@racket[string-trim]) and replacing all whitespace sequences in the
|
@racket[string-trim]) and replacing all whitespace sequences in the
|
||||||
result with a single space.
|
result with a single space.
|
||||||
|
|
||||||
Similarly to @racket[string-trim], @racket[sep] can be given as a string
|
You can specify @racket[space] for an alternate space replacement.
|
||||||
or a (p)regexp, and @racket[repeat?] controls matching repeated
|
|
||||||
sequences. In addition, you can specify @racket[space] for an alternate
|
|
||||||
space replacement. @racket[trim?] determines whether trimming is done
|
|
||||||
(the default).
|
|
||||||
|
|
||||||
@mz-examples[#:eval string-eval
|
@mz-examples[#:eval string-eval
|
||||||
(string-normalize-spaces " foo bar baz \r\n\t")
|
(string-normalize-spaces " foo bar baz \r\n\t")
|
||||||
]}
|
]
|
||||||
|
|
||||||
|
Note that this is the same as
|
||||||
|
@racket[(string-join (string-split str sep ....) space)]}
|
||||||
|
|
||||||
|
|
||||||
@close-eval[string-eval]
|
@close-eval[string-eval]
|
||||||
|
|
|
@ -430,7 +430,15 @@
|
||||||
;; this should return "" or "ba" (could also be "ab"), but it seems sensible
|
;; this should return "" or "ba" (could also be "ab"), but it seems sensible
|
||||||
;; to do this (I haven't seen any existing trimmers that make any relevant
|
;; to do this (I haven't seen any existing trimmers that make any relevant
|
||||||
;; decision on this)
|
;; decision on this)
|
||||||
(test "" string-trim "ababa" "aba")
|
(test "" string-trim "ababa" "aba"))
|
||||||
)
|
|
||||||
|
;; ---------- string-split ----------
|
||||||
|
(let ()
|
||||||
|
(for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))])
|
||||||
|
(test '("x" "y" "z") string-split s))
|
||||||
|
(for ([s (in-list '(" " " " "\n\t\r"))])
|
||||||
|
(test '() string-split s))
|
||||||
|
(test '("x" "y" "z") string-split "axayaza" "a")
|
||||||
|
(test '("" "x" "y" "z" "") string-split "axayaza" "a" #:trim? #f))
|
||||||
|
|
||||||
(report-errs)
|
(report-errs)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user