From d061df81ce4c7ee5d15388b5db48b7d1c24444e4 Mon Sep 17 00:00:00 2001 From: Brian Adkins Date: Tue, 23 Mar 2021 12:16:07 -0400 Subject: [PATCH] Optimize splitting strings on whitespace Use an optimized function, instead of regexp-split, when string-split is called with the default separator and trim setting. --- .../racket-test-core/tests/racket/string.rktl | 2 +- racket/collects/racket/string.rkt | 27 ++++++++++++++++++- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/pkgs/racket-test-core/tests/racket/string.rktl b/pkgs/racket-test-core/tests/racket/string.rktl index 868f0e7d9f..e8ddacf12f 100644 --- a/pkgs/racket-test-core/tests/racket/string.rktl +++ b/pkgs/racket-test-core/tests/racket/string.rktl @@ -482,7 +482,7 @@ ;; ---------- string-split ---------- (let () - (for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))]) + (for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry \n\f\n z\n"))]) (test '("x" "y" "z") string-split s)) (for ([s (in-list '(" " " " "\n\t\r"))]) (test '() string-split s)) diff --git a/racket/collects/racket/string.rkt b/racket/collects/racket/string.rkt index c7622c5bd0..c22fe11bc5 100644 --- a/racket/collects/racket/string.rkt +++ b/racket/collects/racket/string.rkt @@ -124,8 +124,33 @@ ;; don't do that for all nulls).) (if (equal? strs '("")) '() strs)) +;; A faster split implementation when splitting on whitespace. The +;; string will also be trimmed. +(define (internal-split-whitespace str) + (define (is-whitespace? c) + (or (eq? c #\space) + (eq? c #\tab) + (eq? c #\newline) + (eq? c #\return) + (eq? c #\page))) + (define len (string-length str)) + (let loop ([beg 0] [end 0]) + (cond [(= end len) + (if (= beg end) + '() + (list (substring str beg end)))] + [(is-whitespace? (string-ref str end)) + (let ([pos (add1 end)]) + (if (= beg end) + (loop pos pos) + (cons (substring str beg end) + (loop pos pos))))] + [else (loop beg (add1 end))]))) + (define (string-split str [sep none] #:trim? [trim? #t] #:repeat? [+? #f]) - (internal-split 'string-split str sep trim? +?)) + (if (and (eq? sep none) trim?) + (internal-split-whitespace str) + (internal-split 'string-split str sep trim? +?))) (define (string-normalize-spaces str [sep none] [space " "] #:trim? [trim? #t] #:repeat? [+? #f])