Add string-trim' and string-normalize-spaces'.

This commit is contained in:
Eli Barzilay 2012-04-18 15:19:36 -04:00
parent e2e2a1e3e1
commit 6f215759ae
3 changed files with 129 additions and 21 deletions

View File

@ -1,6 +1,6 @@
#lang scheme/base
#lang racket/base
(provide string-append* string-join)
(provide string-append* string-join string-trim string-normalize-spaces)
(define string-append*
(case-lambda [(strs) (apply string-append strs)] ; optimize common case
@ -20,3 +20,44 @@
[(null? strs) ""]
[(null? (cdr strs)) (car strs)]
[else (apply string-append (add-between strs sep))]))
;; See http://en.wikipedia.org/wiki/Trimming_(computer_programming) for a nice
;; overview of popular names etc for these functions;
;; http://blog.stevenlevithan.com/archives/faster-trim-javascript for some ways
;; to implement trimming.
(define (string-trim str [rx #px"\\s+"]
#:left? [left? #t] #:right? [right? #t])
(unless (string? str) (raise-type-error 'string-trim "string" str))
(unless (regexp? rx) (raise-type-error 'string-trim "regexp" rx))
(define len (string-length str))
(if (zero? len)
str
(let* ([start (if (and left? (regexp-match? rx (substring str 0 1)))
(cdar (regexp-match-positions rx str))
0)]
[end (and right? (< start len)
(regexp-match? rx (substring str (- len 1)))
(for/or ([i (in-range (- len 2) (- start 1) -1)])
(and (not (regexp-match? rx (substring str i (add1 i))))
(add1 i))))])
(if (and (not start) (not end))
str
(substring str (or start 0) (or end len))))))
(define (string-normalize-spaces str [rx #px"\\s+"]
#:space [space " "] #:trim? [trim? #t])
(define ps (regexp-match-positions* rx str))
(if (null? ps)
str
(let ([drop-first? (and trim? (zero? (caar ps)))]
[len (string-length str)])
(let loop ([ps (if drop-first? (cdr ps) ps)]
[i (if drop-first? (cdar ps) 0)]
[r '()])
(if (or (null? ps) (and trim? (= len (cdar ps))))
(apply string-append
(reverse (cons (substring str i (if (null? ps) len (caar ps)))
r)))
(loop (cdr ps)
(cdar ps)
(list* space (substring str i (caar ps)) r)))))))

View File

@ -398,4 +398,38 @@ each pair of strings in @racket[strs].
(string-join '("one" "two" "three" "four") " potato ")
]}
@defproc[(string-trim [str string?] [rx regexp? #px"\\s+"]
[#:left? left? any/c #t] [#:right? right? any/c #t])
string?]{
Trims the input @racket[str] by removing prefix and suffix matches of
@racket[rx]. Use @racket[#:left?] or @racket[#:right?] to suppress
trimming one of these sides.
The @racket[rx] regexp should match a whole (non-empty) sequence of
spaces and should not rely on surrounding context. This means that it
should usually end with a @litchar{+}, and that it should not use
@litchar{^}, @litchar{$}, or other lookaheads and lookbacks. (The
regexp is expected to both identify a whole sequence of spaces, and
match on a non-empty part of such a sequence.)
@mz-examples[#:eval string-eval
(string-trim " foo bar baz \r\n\t")
]}
@defproc[(string-normalize-spaces
[str string?] [rx regexp? #px"\\s+"]
[#:space space string? " "] [#:trim? trim? any/c #t])
string?]{
Normalizes spaces (matching @racket[rx]) in the input @racket[str] by
replacing them with @racket[space]. In the default configuration, this
will replace any sequence of whitespaces by a single space character.
In addition, prefix and suffix spaces are trimmed if @racket[trim?] is
true, otherwise they get normalized too.
@mz-examples[#:eval string-eval
(string-normalize-spaces " foo bar baz \r\n\t")
]}
@close-eval[string-eval]

View File

@ -337,25 +337,6 @@
(t " 12 34 " #f "" " 12 34 ")
)
;; ---------- string-append* ----------
(let ()
(test "" string-append* '())
(test "" string-append* '(""))
(test "" string-append* '("" ""))
(test "0123456789" string-append* '("0123456789"))
(test "0123456789" string-append* "0123456789" '())
(test "0123456789" string-append* "0123456789" '(""))
(test "0123456789" string-append* "0123456789" '("" ""))
(test "0123456789" string-append* "01234567" '("8" "9")))
;; ---------- string-join ----------
(let ()
(test "" string-join '() " ")
(test "" string-join '("") " ")
(test " " string-join '("" "") " ")
(test "x y" string-join '("x" "y") " ")
(test "x" string-join '("x") " "))
;; String splitting can take longer than byte-string splitting,
;; but it should have the same computational complexity.
(let ()
@ -373,5 +354,57 @@
(and ((* 100 (- bcpu bgc)) . < . (- scpu sgc))
"suspiciously long time for regexp string split")))
;; ---------- string-append* ----------
(let ([t (λ (x . xs) (test ))])
(test "" string-append* '())
(test "" string-append* '(""))
(test "" string-append* '("" ""))
(test "0123456789" string-append* '("0123456789"))
(test "0123456789" string-append* "0123456789" '())
(test "0123456789" string-append* "0123456789" '(""))
(test "0123456789" string-append* "0123456789" '("" ""))
(test "0123456789" string-append* "01234567" '("8" "9")))
;; ---------- string-join ----------
(let ()
(test "" string-join '() " ")
(test "" string-join '("") " ")
(test " " string-join '("" "") " ")
(test "x" string-join '("x") " ")
(test "x y" string-join '("x" "y") " ")
(test "x y z" string-join '("x" "y" "z") " ")
(test "x,y,z" string-join '("x" "y" "z") ","))
;; ---------- string-trim & string-normalize-spaces ----------
(let ()
(define spaces '("" " " " " "\r" "\r\n\t "))
(define ++ string-append)
(define-syntax-rule (with-spaces id E ...)
(for ([id (in-list spaces)]) E ...))
(define (both result arg)
(test result string-trim arg)
(test result string-normalize-spaces arg))
(define (norm s) (if (equal? "" s) s " "))
(with-spaces s1
(both "" s1)
(with-spaces s2
(both "x" (++ s1 "x" s2))
(both "xx" (++ s1 "xx" s2))
(with-spaces s3
(test (++ "x" s3 "x") string-trim (++ s1 "x" s3 "x" s2))
(test (++ "x" (norm s3) "x")
string-normalize-spaces (++ s1 "x" s3 "x" s2))
(test (++ (norm s1) "x" (norm s3) "x" (norm s2))
string-normalize-spaces (++ s1 "x" s3 "x" s2) #:trim? #f)
(with-spaces s4
(test (++ "x" (norm s3) "y" (norm s4) "z")
string-normalize-spaces (++ s1 "x" s3 "y" s4 "z" s2))
(test (++ (norm s1) "x" (norm s3) "y" (norm s4) "z" (norm s2))
string-normalize-spaces (++ s1 "x" s3 "y" s4 "z" s2)
#:trim? #f)))))
(test "\t x \t" string-trim " \t x \t " #px" +")
(test " x" string-trim " x " #:left? #f)
(test "x " string-trim " x " #:right? #f)
(test " x " string-trim " x " #:left? #f #:right? #f))
(report-errs)