Add string-trim' and
string-normalize-spaces'.
This commit is contained in:
parent
e2e2a1e3e1
commit
6f215759ae
|
@ -1,6 +1,6 @@
|
|||
#lang scheme/base
|
||||
#lang racket/base
|
||||
|
||||
(provide string-append* string-join)
|
||||
(provide string-append* string-join string-trim string-normalize-spaces)
|
||||
|
||||
(define string-append*
|
||||
(case-lambda [(strs) (apply string-append strs)] ; optimize common case
|
||||
|
@ -20,3 +20,44 @@
|
|||
[(null? strs) ""]
|
||||
[(null? (cdr strs)) (car strs)]
|
||||
[else (apply string-append (add-between strs sep))]))
|
||||
|
||||
;; See http://en.wikipedia.org/wiki/Trimming_(computer_programming) for a nice
|
||||
;; overview of popular names etc for these functions;
|
||||
;; http://blog.stevenlevithan.com/archives/faster-trim-javascript for some ways
|
||||
;; to implement trimming.
|
||||
(define (string-trim str [rx #px"\\s+"]
|
||||
#:left? [left? #t] #:right? [right? #t])
|
||||
(unless (string? str) (raise-type-error 'string-trim "string" str))
|
||||
(unless (regexp? rx) (raise-type-error 'string-trim "regexp" rx))
|
||||
(define len (string-length str))
|
||||
(if (zero? len)
|
||||
str
|
||||
(let* ([start (if (and left? (regexp-match? rx (substring str 0 1)))
|
||||
(cdar (regexp-match-positions rx str))
|
||||
0)]
|
||||
[end (and right? (< start len)
|
||||
(regexp-match? rx (substring str (- len 1)))
|
||||
(for/or ([i (in-range (- len 2) (- start 1) -1)])
|
||||
(and (not (regexp-match? rx (substring str i (add1 i))))
|
||||
(add1 i))))])
|
||||
(if (and (not start) (not end))
|
||||
str
|
||||
(substring str (or start 0) (or end len))))))
|
||||
|
||||
(define (string-normalize-spaces str [rx #px"\\s+"]
|
||||
#:space [space " "] #:trim? [trim? #t])
|
||||
(define ps (regexp-match-positions* rx str))
|
||||
(if (null? ps)
|
||||
str
|
||||
(let ([drop-first? (and trim? (zero? (caar ps)))]
|
||||
[len (string-length str)])
|
||||
(let loop ([ps (if drop-first? (cdr ps) ps)]
|
||||
[i (if drop-first? (cdar ps) 0)]
|
||||
[r '()])
|
||||
(if (or (null? ps) (and trim? (= len (cdar ps))))
|
||||
(apply string-append
|
||||
(reverse (cons (substring str i (if (null? ps) len (caar ps)))
|
||||
r)))
|
||||
(loop (cdr ps)
|
||||
(cdar ps)
|
||||
(list* space (substring str i (caar ps)) r)))))))
|
||||
|
|
|
@ -398,4 +398,38 @@ each pair of strings in @racket[strs].
|
|||
(string-join '("one" "two" "three" "four") " potato ")
|
||||
]}
|
||||
|
||||
@defproc[(string-trim [str string?] [rx regexp? #px"\\s+"]
|
||||
[#:left? left? any/c #t] [#:right? right? any/c #t])
|
||||
string?]{
|
||||
|
||||
Trims the input @racket[str] by removing prefix and suffix matches of
|
||||
@racket[rx]. Use @racket[#:left?] or @racket[#:right?] to suppress
|
||||
trimming one of these sides.
|
||||
|
||||
The @racket[rx] regexp should match a whole (non-empty) sequence of
|
||||
spaces and should not rely on surrounding context. This means that it
|
||||
should usually end with a @litchar{+}, and that it should not use
|
||||
@litchar{^}, @litchar{$}, or other lookaheads and lookbacks. (The
|
||||
regexp is expected to both identify a whole sequence of spaces, and
|
||||
match on a non-empty part of such a sequence.)
|
||||
|
||||
@mz-examples[#:eval string-eval
|
||||
(string-trim " foo bar baz \r\n\t")
|
||||
]}
|
||||
|
||||
@defproc[(string-normalize-spaces
|
||||
[str string?] [rx regexp? #px"\\s+"]
|
||||
[#:space space string? " "] [#:trim? trim? any/c #t])
|
||||
string?]{
|
||||
|
||||
Normalizes spaces (matching @racket[rx]) in the input @racket[str] by
|
||||
replacing them with @racket[space]. In the default configuration, this
|
||||
will replace any sequence of whitespaces by a single space character.
|
||||
In addition, prefix and suffix spaces are trimmed if @racket[trim?] is
|
||||
true, otherwise they get normalized too.
|
||||
|
||||
@mz-examples[#:eval string-eval
|
||||
(string-normalize-spaces " foo bar baz \r\n\t")
|
||||
]}
|
||||
|
||||
@close-eval[string-eval]
|
||||
|
|
|
@ -337,25 +337,6 @@
|
|||
(t " 12 34 " #f "" " 12 34 ")
|
||||
)
|
||||
|
||||
;; ---------- string-append* ----------
|
||||
(let ()
|
||||
(test "" string-append* '())
|
||||
(test "" string-append* '(""))
|
||||
(test "" string-append* '("" ""))
|
||||
(test "0123456789" string-append* '("0123456789"))
|
||||
(test "0123456789" string-append* "0123456789" '())
|
||||
(test "0123456789" string-append* "0123456789" '(""))
|
||||
(test "0123456789" string-append* "0123456789" '("" ""))
|
||||
(test "0123456789" string-append* "01234567" '("8" "9")))
|
||||
|
||||
;; ---------- string-join ----------
|
||||
(let ()
|
||||
(test "" string-join '() " ")
|
||||
(test "" string-join '("") " ")
|
||||
(test " " string-join '("" "") " ")
|
||||
(test "x y" string-join '("x" "y") " ")
|
||||
(test "x" string-join '("x") " "))
|
||||
|
||||
;; String splitting can take longer than byte-string splitting,
|
||||
;; but it should have the same computational complexity.
|
||||
(let ()
|
||||
|
@ -373,5 +354,57 @@
|
|||
(and ((* 100 (- bcpu bgc)) . < . (- scpu sgc))
|
||||
"suspiciously long time for regexp string split")))
|
||||
|
||||
;; ---------- string-append* ----------
|
||||
(let ([t (λ (x . xs) (test ))])
|
||||
(test "" string-append* '())
|
||||
(test "" string-append* '(""))
|
||||
(test "" string-append* '("" ""))
|
||||
(test "0123456789" string-append* '("0123456789"))
|
||||
(test "0123456789" string-append* "0123456789" '())
|
||||
(test "0123456789" string-append* "0123456789" '(""))
|
||||
(test "0123456789" string-append* "0123456789" '("" ""))
|
||||
(test "0123456789" string-append* "01234567" '("8" "9")))
|
||||
|
||||
;; ---------- string-join ----------
|
||||
(let ()
|
||||
(test "" string-join '() " ")
|
||||
(test "" string-join '("") " ")
|
||||
(test " " string-join '("" "") " ")
|
||||
(test "x" string-join '("x") " ")
|
||||
(test "x y" string-join '("x" "y") " ")
|
||||
(test "x y z" string-join '("x" "y" "z") " ")
|
||||
(test "x,y,z" string-join '("x" "y" "z") ","))
|
||||
|
||||
;; ---------- string-trim & string-normalize-spaces ----------
|
||||
(let ()
|
||||
(define spaces '("" " " " " "\r" "\r\n\t "))
|
||||
(define ++ string-append)
|
||||
(define-syntax-rule (with-spaces id E ...)
|
||||
(for ([id (in-list spaces)]) E ...))
|
||||
(define (both result arg)
|
||||
(test result string-trim arg)
|
||||
(test result string-normalize-spaces arg))
|
||||
(define (norm s) (if (equal? "" s) s " "))
|
||||
(with-spaces s1
|
||||
(both "" s1)
|
||||
(with-spaces s2
|
||||
(both "x" (++ s1 "x" s2))
|
||||
(both "xx" (++ s1 "xx" s2))
|
||||
(with-spaces s3
|
||||
(test (++ "x" s3 "x") string-trim (++ s1 "x" s3 "x" s2))
|
||||
(test (++ "x" (norm s3) "x")
|
||||
string-normalize-spaces (++ s1 "x" s3 "x" s2))
|
||||
(test (++ (norm s1) "x" (norm s3) "x" (norm s2))
|
||||
string-normalize-spaces (++ s1 "x" s3 "x" s2) #:trim? #f)
|
||||
(with-spaces s4
|
||||
(test (++ "x" (norm s3) "y" (norm s4) "z")
|
||||
string-normalize-spaces (++ s1 "x" s3 "y" s4 "z" s2))
|
||||
(test (++ (norm s1) "x" (norm s3) "y" (norm s4) "z" (norm s2))
|
||||
string-normalize-spaces (++ s1 "x" s3 "y" s4 "z" s2)
|
||||
#:trim? #f)))))
|
||||
(test "\t x \t" string-trim " \t x \t " #px" +")
|
||||
(test " x" string-trim " x " #:left? #f)
|
||||
(test "x " string-trim " x " #:right? #f)
|
||||
(test " x " string-trim " x " #:left? #f #:right? #f))
|
||||
|
||||
(report-errs)
|
||||
|
|
Loading…
Reference in New Issue
Block a user