From dcf2754a57aada6ff1b9fac545961b4becac414b Mon Sep 17 00:00:00 2001 From: Eli Barzilay Date: Wed, 23 May 2012 17:06:03 -0400 Subject: [PATCH] Clarify `regexp-split' on an empty input. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The text that says that (regexp-split #rx"whatever" "") returns '("") rather than '() is If `input' contains no matches [...] the result is a list containing input’s content [...] as a single element. This is a little implicit, if you consider such an input as having nothing left to match over so it's as if there is no input (with a port this confusion is a little clearer). Clarify with an example in the docs, and also add tests. --- collects/scribblings/reference/regexps.scrbl | 1 + collects/tests/racket/string.rktl | 3 +++ 2 files changed, 4 insertions(+) diff --git a/collects/scribblings/reference/regexps.scrbl b/collects/scribblings/reference/regexps.scrbl index 4fe3307ce7..f25da08d15 100644 --- a/collects/scribblings/reference/regexps.scrbl +++ b/collects/scribblings/reference/regexps.scrbl @@ -785,6 +785,7 @@ an end-of-file if @racket[input] is an input port). (regexp-split #rx"" "12 34") (regexp-split #rx" *" "12 34") (regexp-split #px"\\b" "12, 13 and 14.") +(regexp-split #rx" +" "") ]} @;------------------------------------------------------------------------ diff --git a/collects/tests/racket/string.rktl b/collects/tests/racket/string.rktl index 602ee40c2e..1a57a2110d 100644 --- a/collects/tests/racket/string.rktl +++ b/collects/tests/racket/string.rktl @@ -311,6 +311,9 @@ (t '("" "" " " "" "" "") eof " *" "12 34") (t '(" " "" "" " " "" "" " " "") eof " *" " 12 34 ") (t '("" "" " " "" "" "") " " " *" " 12 34 " 1 6) + (t '("") eof " *" "") + (t '("") eof " *" "1234" 4) + (t '("") "34" " *" "1234" 2 2) (t regexp-match-positions*) (t '((0 . 0) (1 . 1) (2 . 2) (3 . 3)) eof "" "123") (t '((0 . 0) (1 . 1) (2 . 3) (3 . 3) (4 . 4) (5 . 5)) eof " *" "12 34")