From 784857e9faab18ded1de77b36fd2ada32796390f Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Wed, 23 May 2012 18:38:09 -0400 Subject: [PATCH] Add `string-split'. --- collects/racket/string.rkt | 24 ++++++++++---- collects/scribblings/reference/strings.scrbl | 34 ++++++++++++++++---- collects/tests/racket/string.rktl | 12 +++++-- 3 files changed, 55 insertions(+), 15 deletions(-) diff --git a/collects/racket/string.rkt b/collects/racket/string.rkt index 6cae70ddb9..d1b762b038 100644 --- a/collects/racket/string.rkt +++ b/collects/racket/string.rkt @@ -1,6 +1,7 @@ #lang racket/base -(provide string-append* string-join string-trim string-normalize-spaces) +(provide string-append* string-join string-trim string-normalize-spaces + string-split) (define string-append* (case-lambda [(strs) (apply string-append strs)] ; optimize common case @@ -67,11 +68,20 @@ [r (substring str 0 r)] [else str])) +(define (internal-split who str sep trim? +?) + (define rxs (get-rxs who sep +?)) + (define-values [l r] + (if trim? (internal-trim who str sep #t #t (cdr rxs)) (values #f #f))) + (define strs (regexp-split (car rxs) str (or l 0) r)) + ;; Seems to make more sense for these functions (eg, this corresponds to + ;; simple uses where `string-split' in Emacs uses t for `omit-nulls' (but we + ;; don't do that for all nulls).) + (if (equal? strs '("")) '() strs)) + +(define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f]) + (internal-split 'string-split str sep trim? +?)) + (define (string-normalize-spaces str [sep none] [space " "] #:trim? [trim? #t] #:repeat? [+? #f]) - (define rxs (get-rxs 'string-normalize-spaces sep +?)) - (define-values [l r] - (if trim? - (internal-trim 'string-normalize-spaces str sep #t #t (cdr rxs)) - (values #f #f))) - (string-join (regexp-split (car rxs) str (or l 0) r) space)) + (string-join (internal-split 'string-normalize-spaces str sep trim? +?) + space)) diff --git a/collects/scribblings/reference/strings.scrbl b/collects/scribblings/reference/strings.scrbl index fe78cd2755..156e16c886 100644 --- a/collects/scribblings/reference/strings.scrbl +++ b/collects/scribblings/reference/strings.scrbl @@ -429,6 +429,29 @@ of matches is trimmed. (Note that with a regexp separator you can use (string-trim "aaaxaayaa" "aa") ]} +@defproc[(string-split [str string?] + [sep (or/c string? regexp?) #px"\\s+"] + [#:trim? trim? any/c #t] + [#:repeat? repeat? any/c #f]) + (listof string?)]{ + +Splits the input @racket[str] on whitespaces, returning a list of +strings. The input is trimmed first. + +Similarly to @racket[string-trim], @racket[sep] can be given as a string +or a (p)regexp to use a different separator, and @racket[repeat?] +controls matching repeated sequences. @racket[trim?] determines whether +trimming is done (the default). + +@mz-examples[#:eval string-eval + (string-split " foo bar baz \r\n\t") + (string-split " ") + (string-split " " #:trim? #f) +] + +(Note that unlike @racket[regexp-split], an empty input string results +in an empty list.)} + @defproc[(string-normalize-spaces [str string?] [sep (or/c string? regexp?) #px"\\s+"] [space string? " "] @@ -440,15 +463,14 @@ Normalizes spaces in the input @racket[str] by trimming it (using @racket[string-trim]) and replacing all whitespace sequences in the result with a single space. -Similarly to @racket[string-trim], @racket[sep] can be given as a string -or a (p)regexp, and @racket[repeat?] controls matching repeated -sequences. In addition, you can specify @racket[space] for an alternate -space replacement. @racket[trim?] determines whether trimming is done -(the default). +You can specify @racket[space] for an alternate space replacement. @mz-examples[#:eval string-eval (string-normalize-spaces " foo bar baz \r\n\t") -]} +] + +Note that this is the same as +@racket[(string-join (string-split str sep ....) space)]} @close-eval[string-eval] diff --git a/collects/tests/racket/string.rktl b/collects/tests/racket/string.rktl index 1a57a2110d..cc79ebc569 100644 --- a/collects/tests/racket/string.rktl +++ b/collects/tests/racket/string.rktl @@ -430,7 +430,15 @@ ;; this should return "" or "ba" (could also be "ab"), but it seems sensible ;; to do this (I haven't seen any existing trimmers that make any relevant ;; decision on this) - (test "" string-trim "ababa" "aba") - ) + (test "" string-trim "ababa" "aba")) + +;; ---------- string-split ---------- +(let () + (for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))]) + (test '("x" "y" "z") string-split s)) + (for ([s (in-list '(" " " " "\n\t\r"))]) + (test '() string-split s)) + (test '("x" "y" "z") string-split "axayaza" "a") + (test '("" "x" "y" "z" "") string-split "axayaza" "a" #:trim? #f)) (report-errs)