From fd149ca1c0d45354f461e5c6a77e4120569fde87 Mon Sep 17 00:00:00 2001 From: Gustavo Massaccesi Date: Thu, 17 May 2018 23:15:24 -0300 Subject: [PATCH] fix cache of regexp in string-{split,trim,normalize-spaces} When the separator is a string, these function construct a regexp that is cached to make repeated calls faster. But when the string is mutated it is necessary to recalculate the regexp. --- .../racket-test-core/tests/racket/string.rktl | 19 +++++++ racket/collects/racket/string.rkt | 50 +++++++++++-------- 2 files changed, 47 insertions(+), 22 deletions(-) diff --git a/pkgs/racket-test-core/tests/racket/string.rktl b/pkgs/racket-test-core/tests/racket/string.rktl index 2eeabedf14..fb9a6a06a9 100644 --- a/pkgs/racket-test-core/tests/racket/string.rktl +++ b/pkgs/racket-test-core/tests/racket/string.rktl @@ -463,6 +463,18 @@ ;; decision on this) (test "" string-trim "ababa" "aba")) +;test that mutable strings are not cached incorrectly +(let ([str (string-copy "_1_")]) + (test "x_2_" string-trim "_1_x_2_" str) ;add str to the internal cache + (string-set! str 1 #\2) + (test "_1_x" string-trim "_1_x_2_" str) ;verify that the new str is used +) +(let ([str (string-copy "_1_")]) + (test "x x_2_x" string-normalize-spaces "x_1_x_2_x" str) ;add str to the internal cache + (string-set! str 1 #\2) + (test "x_1_x x" string-normalize-spaces "x_1_x_2_x" str) ;verify that the new str is used +) + ;; ---------- string-split ---------- (let () (for ([s (in-list '("x y z" " x y z " "\nx y z" " \t x\r\r\ry z\n"))]) @@ -473,6 +485,13 @@ (test '("" "x" "y" "z" "") string-split "axayaza" "a" #:trim? #f) (test '("foo" "bar" "baz") string-split "foo,bar;baz" #rx",|;")) +;test that mutable strings are not cached incorrectly +(let ([str (string-copy "_1_")]) + (test '("x" "x_2_x") string-split "x_1_x_2_x" str) ;add str to the internal cache + (string-set! str 1 #\2) + (test '("x_1_x" "x") string-split "x_1_x_2_x" str) ;verify that the new str is used +) + ;; ---------- string-replace/* ---------- (let () (test "" string-replace "" "|" "?") diff --git a/racket/collects/racket/string.rkt b/racket/collects/racket/string.rkt index 33df306f0f..1eb66810d6 100644 --- a/racket/collects/racket/string.rkt +++ b/racket/collects/racket/string.rkt @@ -29,8 +29,12 @@ #:after-last [after-last none]) (unless (and (list? strs) (andmap string? strs)) (raise-argument-error 'string-join "(listof string?)" strs)) + (unless (or (string? before-first) (eq? before-first none)) + (raise-argument-error 'string-join "string?" before-first)) (unless (string? sep) (raise-argument-error 'string-join "string?" sep)) + (unless (or (string? after-last) (eq? after-last none)) + (raise-argument-error 'string-join "string?" after-last)) (let* ([r (if (or (null? strs) (null? (cdr strs))) strs (add-between strs sep #:before-last before-last))] @@ -38,6 +42,24 @@ [r (if (eq? before-first none) r (cons before-first r))]) (apply string-append r))) +;; Cache for string-replace and string-split and similar functions: +;; A mutable string weakly holds a immutable copy until it is collected +;; or modified (and used as a argument of string-replace, string-split, ...). +;; The immutable copy weakly holds the regexp that is used in string-replace. +;; Using string->immutable-string directly in string-replace is not a useful +;; because the immutable copy could be immediately collected. + +(define immutable-cache (make-weak-hasheq)) +(define (string->immutable-string/cache str) + (if (immutable? str) + str + (let ([old (hash-ref immutable-cache str #f)]) + (if (and old (string=? str old)) + old + (let ([new (string->immutable-string str)]) + (hash-set! immutable-cache str new) + new))))) + ;; Utility for the functions below: get a string or a regexp and return a list ;; of the regexp (strings are converted using `regexp-quote'), and versions ;; that match at the beginning/end. @@ -47,8 +69,11 @@ (hash-set! t none spaces) (hash-set! t+ none spaces)) (λ (who rx +?) - (hash-ref! (if +? t+ t) rx - (λ () (let* ([s (cond [(string? rx) (regexp-quote rx)] + (define rx* (if (string? rx) + (string->immutable-string/cache rx) + rx)) + (hash-ref! (if +? t+ t) rx* + (λ () (let* ([s (cond [(string? rx) (regexp-quote rx*)] [(regexp? rx) (string-append "(?:" (object-name rx) ")")] [else (raise-argument-error @@ -104,26 +129,7 @@ (string-join (internal-split 'string-normalize-spaces str sep trim? +?) space)) - -;; Caches for string-replace: -;; A mutable string weakly holds a immutable copy until it is collected -;; or modified (and used as a argument of string-replace). -;; The immutable copy weakly holds the regexp that is used in string-replace. -;; Using string->immutable-string directly in string-replace is not a useful -;; because the immutable copy could be immediately collected. - -(define immutable-cache (make-weak-hasheq)) -(define (string->immutable-string/cache str) - (if (immutable? str) - str - (let ([old (hash-ref immutable-cache str #f)]) - (if (and old (string=? str old)) - old - (let ([new (string->immutable-string str)]) - (hash-set! immutable-cache str new) - new))))) - -(define replace-cache (make-weak-hash)) +(define replace-cache (make-weak-hasheq)) (define (string-replace str from to #:all? [all? #t]) (unless (string? str) (raise-argument-error 'string-replace "string?" str)) (unless (string? to) (raise-argument-error 'string-replace "string?" to))