Optimize splitting strings on whitespace

Use an optimized function, instead of regexp-split, when string-split
is called with the default separator and trim setting.
This commit is contained in:
Brian Adkins 2021-03-23 12:16:07 -04:00 committed by Matthew Flatt
parent 2a718d8162
commit d061df81ce
2 changed files with 27 additions and 2 deletions

View File

@ -482,7 +482,7 @@
;; ---------- string-split ----------
(let ()
(for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))])
(for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry \n\f\n z\n"))])
(test '("x" "y" "z") string-split s))
(for ([s (in-list '(" " " " "\n\t\r"))])
(test '() string-split s))

View File

@ -124,8 +124,33 @@
;; don't do that for all nulls).)
(if (equal? strs '("")) '() strs))
;; A faster split implementation when splitting on whitespace. The
;; string will also be trimmed.
(define (internal-split-whitespace str)
(define (is-whitespace? c)
(or (eq? c #\space)
(eq? c #\tab)
(eq? c #\newline)
(eq? c #\return)
(eq? c #\page)))
(define len (string-length str))
(let loop ([beg 0] [end 0])
(cond [(= end len)
(if (= beg end)
'()
(list (substring str beg end)))]
[(is-whitespace? (string-ref str end))
(let ([pos (add1 end)])
(if (= beg end)
(loop pos pos)
(cons (substring str beg end)
(loop pos pos))))]
[else (loop beg (add1 end))])))
(define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f])
(internal-split 'string-split str sep trim? +?))
(if (and (eq? sep none) trim?)
(internal-split-whitespace str)
(internal-split 'string-split str sep trim? +?)))
(define (string-normalize-spaces str [sep none] [space " "]
#:trim? [trim? #t] #:repeat? [+? #f])