diff --git a/collects/tests/mzscheme/string.ss b/collects/tests/mzscheme/string.ss
index 49e1439513..22d27e0d35 100644
--- a/collects/tests/mzscheme/string.ss
+++ b/collects/tests/mzscheme/string.ss
@@ -25,113 +25,212 @@
 
 ;; ---------- regexp-quote ----------
 (let ([s (list->string
-	  (let loop ([i 0])
-	    (if (= i 256)
-		null
-		(cons (integer->char i) (loop (add1 i))))))])
+          (let loop ([i 0])
+            (if (= i 256)
+              null
+              (cons (integer->char i) (loop (add1 i))))))])
   (test (list s) regexp-match (regexp-quote s) s)
   (test (string-append "!" s "!")
-	regexp-replace
+        regexp-replace
         (regexp-quote s) s (regexp-replace-quote (string-append "!" s "!"))))
 
-;; ---------- regexp-match* ----------
-(test '("a" "b" "c") regexp-match* "[abc]" "here's a buck")
-(test '("b" "c") regexp-match* "[abc]" "here's a buck" 8)
-(test '("a") regexp-match* "[abc]" "here's a buck" 0 8)
-(test '("a" "b" "c") regexp-match* "[abc]" "here's a buck" 0 #f)
-
-(test '(#"a" #"b" #"c") regexp-match* "[abc]" #"here's a buck")
-(test '(#"b" #"c") regexp-match* "[abc]" #"here's a buck" 8)
-(test '(#"a") regexp-match* "[abc]" #"here's a buck" 0 8)
-(test '(#"a" #"b" #"c") regexp-match* "[abc]" #"here's a buck" 0 #f)
-
-(test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck")
-(test '(#"b" #"c") regexp-match* #"[abc]" "here's a buck" 8)
-(test '(#"a") regexp-match* #"[abc]" "here's a buck" 0 8)
-(test '(#"a" #"b" #"c") regexp-match* #"[abc]" "here's a buck" 0 #f)
-
-(test '(#"a" #"b" #"c")
-      regexp-match* "[abc]" (open-input-string "here's a buck"))
-(test '(#"b" #"c") regexp-match* "[abc]" (open-input-string "here's a buck") 8)
-(let ([s (open-input-string "here's a buck")])
-  (test '(#"a") regexp-match* "[abc]" s 0 8)
-  (test " buck" read-string 50 s))
-(let ([s (open-input-string "here's a buck")])
-  (test '(#"a" #"b" #"c") regexp-match* "[abc]" s 0 #f)
-  (test eof read-char s))
-
-;; ---------- regexp-match-positions* ----------
-(test '((7 . 8) (9 . 10) (11 . 12))
-      regexp-match-positions* "[abc]" "here's a buck")
-(test '((9 . 10) (11 . 12))
-      regexp-match-positions* "[abc]" "here's a buck" 8)
-(test '((7 . 8))
-      regexp-match-positions* "[abc]" "here's a buck" 0 8)
-(test '((7 . 8) (9 . 10) (11 . 12))
-      regexp-match-positions* "[abc]" "here's a buck" 0 #f)
-
-(test '((7 . 8) (9 . 10) (11 . 12))
-      regexp-match-positions* "[abc]" (open-input-string "here's a buck"))
-(test '((9 . 10) (11 . 12))
-      regexp-match-positions* "[abc]" (open-input-string "here's a buck") 8)
-(let ([s (open-input-string "here's a buck")])
-  (test '((7 . 8)) regexp-match-positions* "[abc]" s 0 8)
-  (test " buck" read-string 50 s))
-(let ([s (open-input-string "here's a buck")])
-  (test '((7 . 8) (9 . 10) (11 . 12)) regexp-match-positions* "[abc]" s 0 #f)
-  (test eof read-char s))
-
-;; ---------- regexp-match-peek-positions* ----------
-(let ([s (open-input-string "here's a buck")])
-  (test '((7 . 8) (9 . 10) (11 . 12))
-        regexp-match-peek-positions*
-        "[abc]" (open-input-string "here's a buck"))
-  (test '((9 . 10) (11 . 12))
-        regexp-match-peek-positions*
-        "[abc]" (open-input-string "here's a buck") 8)
-  (test '((7 . 8))
-        regexp-match-peek-positions* "[abc]" s 0 8)
-  (test '((7 . 8) (9 . 10) (11 . 12))
-        regexp-match-peek-positions* "[abc]" s 0 #f)
-  (test "here's a buck"
-        read-string 50 s))
-
-;; ---------- regexp-split ----------
-(test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck")
-(test '(" " "u" "k")           regexp-split "[abc]" "here's a buck" 8)
-(test '("" "u" "k")            regexp-split "[abc]" "here's a buck" 9)
-(test '("" "u" "")             regexp-split "[abc]" "here's a buck" 9 12)
-(test '("here's " "")          regexp-split "[abc]" "here's a buck" 0 8)
-(test '("here's " " ")         regexp-split "[abc]" "here's a buck" 0 9)
-(test '("here's " " " "u" "k") regexp-split "[abc]" "here's a buck" 0 #f)
-
-(test '(#"here's " #" " #"u" #"k")
-      regexp-split "[abc]" (open-input-string "here's a buck"))
-(test '(#" " #"u" #"k")
-      regexp-split "[abc]" (open-input-string "here's a buck") 8)
-(test '(#"" #"u" #"k")
-      regexp-split "[abc]" (open-input-string "here's a buck") 9)
-(let ([s (open-input-string "here's a buck")])
-  (test '(#"" #"u" #"") regexp-split "[abc]" s 9 12)
-  (test "k" read-string 50 s))
-(let ([s (open-input-string "here's a buck")])
-  (test '(#"here's " #"") regexp-split "[abc]" s 0 8)
-  (test " buck" read-string 50 s))
-(let ([s (open-input-string "here's a buck")])
-  (test '(#"here's " #" ") regexp-split "[abc]" s 0 9)
-  (test "buck" read-string 50 s))
-(let ([s (open-input-string "here's a buck")])
-  (test '(#"here's " #" " #"u" #"k") regexp-split "[abc]" s 0 #f)
-  (test eof read-char s))
-
-;; test with zero-length matches
-(test '("" "f" "o" "o" "") regexp-split #rx"" "foo")
-(test '("" "f" "o" "o" " " "b" "a" "r" "") regexp-split #rx"" "foo bar")
-(test '("" "f" "o" "o" "" "b" "a" "r" "") regexp-split #rx" *" "foo bar")
-(test '("f" "" "ar") regexp-split #rx"oo| b" "foo bar")
-(test '("foo bar" "") regexp-split #rx"$" "foo bar")
-;; this doesn't work (like in Emacs) because ^ matches the start pos
-;; (test '("" "foo bar") regexp-split #rx"^" "foo bar")
+;; ---------- regexp-<?>* functions ----------
+(let ()
+  (define (->b x)
+    (cond [(list? x) (map ->b x)]
+          [(string? x) (string->bytes/utf-8 x)]
+          [else x]))
+  (define fun* #f)
+  (define t
+    (case-lambda
+      [(fun) (set! fun* fun)]
+      [(res left rx str . args)
+       (unless (eq? fun* regexp-match-peek-positions*)
+         ;; test with a string
+         (apply test res fun* rx str args)
+         ;; test with a byte-regexp and/or a byte string
+         (apply test (->b res) fun* (->b rx) str args)
+         (apply test (->b res) fun* rx (->b str) args)
+         (apply test (->b res) fun* (->b rx) (->b str) args))
+       ;; test with a port, and test leftovers
+       (let ([p (open-input-string str)])
+         (apply test (->b res) fun* rx p args)
+         (when left (test left read-string 50 p)))]))
+  ;; --------------------
+  (t regexp-match*)
+  (t '("a" "b" "c") eof "[abc]" " a b c ")
+  (t '("b" "c") eof     "[abc]" " a b c " 2)
+  (t '("b" "c") eof     "[abc]" " a b c " 3)
+  (t '("a") " b c "     "[abc]" " a b c " 0 2)
+  (t '("a") "b c "      "[abc]" " a b c " 0 3)
+  (t '("a" "b" "c") eof "[abc]" " a b c " 0 #f)
+  (t '("a" "b" "c") eof "[abc]" " a b c " 0 7)
+  (t '("a" "b" "c") eof "[abc]" "a b c")
+  (t '("b" "c") eof     "[abc]" "a b c" 1)
+  (t '("b" "c") eof     "[abc]" "a b c" 2)
+  (t '("a") " b c"      "[abc]" "a b c" 0 1)
+  (t '("a") "b c"       "[abc]" "a b c" 0 2)
+  (t '("a" "b" "c") eof "[abc]" "a b c" 0)
+  (t '("a" "b" "c") eof "[abc]" "a b c" 0 #f)
+  (t '("a" "b" "c") eof "[abc]" "a b c" 0 5)
+  ;; --------------------
+  (t regexp-match-positions*)
+  (t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ")
+  (t '((3 . 4) (5 . 6)) eof         "[abc]" " a b c " 2)
+  (t '((3 . 4) (5 . 6)) eof         "[abc]" " a b c " 3)
+  (t '((1 . 2)) " b c "             "[abc]" " a b c " 0 2)
+  (t '((1 . 2)) "b c "              "[abc]" " a b c " 0 3)
+  (t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c " 0 #f)
+  (t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c " 0 7)
+  (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c")
+  (t '((2 . 3) (4 . 5)) eof     "[abc]" "a b c" 1)
+  (t '((2 . 3) (4 . 5)) eof     "[abc]" "a b c" 2)
+  (t '((0 . 1)) " b c"      "[abc]" "a b c" 0 1)
+  (t '((0 . 1)) "b c"       "[abc]" "a b c" 0 2)
+  (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0)
+  (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 #f)
+  (t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 5)
+  ;; --------------------
+  (t regexp-split)
+  (t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
+  (t '("2" "3" "4") eof     "[abc]" "1a2b3c4" 2)
+  (t '("" "3" "4") eof      "[abc]" "1a2b3c4" 3)
+  (t '("1" "") "2b3c4"      "[abc]" "1a2b3c4" 0 2)
+  (t '("1" "2") "b3c4"      "[abc]" "1a2b3c4" 0 3)
+  (t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
+  (t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
+  (t '("" "1" "2" "") eof   "[abc]" "a1b2c")
+  (t '("1" "2" "") eof      "[abc]" "a1b2c" 1)
+  (t '("" "2" "") eof       "[abc]" "a1b2c" 2)
+  (t '("" "") "1b2c"        "[abc]" "a1b2c" 0 1)
+  (t '("" "1") "b2c"        "[abc]" "a1b2c" 0 2)
+  (t '("" "1" "2" "") eof   "[abc]" "a1b2c" 0)
+  (t '("" "1" "2" "") eof   "[abc]" "a1b2c" 0 #f)
+  (t '("" "1" "2" "") eof   "[abc]" "a1b2c" 0 5)
+  ;; --------------------
+  (t regexp-match-peek-positions*)
+  (err/rt-test (regexp-match-peek-positions* "[abc]" "a b c"))
+  (t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ")
+  (t '((3 . 4) (5 . 6))         " a b c " "[abc]" " a b c " 2)
+  (t '((3 . 4) (5 . 6))         " a b c " "[abc]" " a b c " 3)
+  (t '((1 . 2))                 " a b c " "[abc]" " a b c " 0 2)
+  (t '((1 . 2))                 " a b c " "[abc]" " a b c " 0 3)
+  (t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 0 #f)
+  (t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 0 7)
+  (t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c")
+  (t '((2 . 3) (4 . 5))         "a b c" "[abc]" "a b c" 1)
+  (t '((2 . 3) (4 . 5))         "a b c" "[abc]" "a b c" 2)
+  (t '((0 . 1))                 "a b c" "[abc]" "a b c" 0 1)
+  (t '((0 . 1))                 "a b c" "[abc]" "a b c" 0 2)
+  (t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 #f)
+  (t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 5)
+  ;; ---------- tests with zero-length matches ----------
+  ;; Many of these tests can be repeated with Perl.  To try something
+  ;; in Perl, put this code in a file:
+  ;;   #!/usr/bin/perl
+  ;;   sub test {
+  ;;     my ($rx,$str) = @_; @words = split /$rx/, $str;
+  ;;     print "(t '(";
+  ;;     while (@words) {print '"'.shift(@words).'"'.(@words?" ":"");}
+  ;;     print ") eof \"$rx\" \"$str\")\n";
+  ;;   };
+  ;;   test("[abc]","1a2b3");
+  ;; and it will print a test that does what perl is doing.  Tests
+  ;; that differ from Perl have explanations.
+  ;;
+  (t regexp-split)
+  ;; test("a","a");
+  ;; (t '() eof "a" "a")
+  ;;   perl returns an empty list, we return '("" ""), and this is a
+  ;;   difference that is unrelated to dealing with empty matches,
+  ;;   just the way that perl's split throws out some empty matches.
+  (t '("" "") eof "a" "a")
+  ;; test("3","123");
+  ;; (t '("12") eof "3" "123")
+  ;;   a more straightforward demonstration of this
+  (t '("12" "") eof "3" "123")
+  ;; test("a","1a2a3");
+  (t '("1" "2" "3") eof "a" "1a2a3")
+  ;; test("","123");
+  (t '("1" "2" "3") eof "" "123")
+  ;; test("","12 34");
+  (t '("1" "2" " " "3" "4") eof "" "12 34")
+  ;; test(" *","123");
+  (t '("1" "2" "3") eof " *" "123")
+  ;; test(" *","12 34");
+  (t '("1" "2" "3" "4") eof " *" "12 34")
+  ;; test(" *"," 12 34 ");
+  ;; (t '("" "1" "2" "3" "4") eof " *" " 12 34 ")
+  ;;   perl drops the last empty string, we don't -- unrelated to
+  ;;   empty matches (same as the <"a","a"> case above)
+  (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 ")
+  ;; test("2|", "1234");
+  (t '("1" "3" "4") eof "2|" "1234")
+  ;; test("1|", "1234");
+  (t '("" "2" "3" "4") eof "1|" "1234")
+  ;; test("4|", "1234");
+  ;;   perl drops the last empty string, we don't, same as above
+  (t '("1" "2" "3" "") eof "4|" "1234")
+  ;; test("|2", "1234");
+  ;; (t '("1" "" "3" "4") eof "|2" "1234")
+  ;;   perl will find the "2", we can't do that since we'll always
+  ;;   find the empty match first, so it's just like using "" (to do
+  ;;   the perl thing, we'll need a hook into the matcher's C code, or
+  ;;   some way of saying `match this pattern but prefer a non-empty
+  ;;   match if possible')
+  (t '("1" "2" "3" "4") eof "|2" "1234")
+  ;; test("2|3", "1234");
+  (t '("1" "" "4") eof "2|3" "1234")
+  ;; test("1|2", "1234");
+  (t '("" "" "34") eof "1|2" "1234")
+  ;; test("3|4", "1234");
+  ;; (t '("12") eof "3|4" "1234")
+  ;;   again, perl dumps empty matches at the end, even two
+  (t '("12" "" "") eof "3|4" "1234")
+  ;; test('$',"123");
+  (t '("123") eof "$" "123")
+  ;; test('^',"123");
+  ;; (t '("123") eof "^" "123")
+  ;;   this is a technicality: perl matches "^" once, but mzscheme
+  ;;   matches on whatever `start' may be; perl is treating it as a
+  ;;   beginning-of-line instead of a ...-of-string behind your back
+  ;;   "since it isn't much use otherwise"
+  ;;   (http://perldoc.perl.org/functions/split.html); so our correct
+  ;;   test is:
+  (t '("1" "2" "3") eof "^" "123")
+  ;;   and we can get the same with "(m?:^)":
+  (t '("123") eof "(m?:^)" "123")
+  ;; test("^|a", "abc");
+  ;; (t '("" "bc") eof "^|a" "abc")
+  ;;   same deal here, use "(m?:^)":
+  (t '("" "bc") eof "(m?:^|a)" "abc")
+  ;; some tests with bounds (these have no perl equivalences)
+  (t '("1" "2" " " "3" "4") eof "" "12 34" 0)
+  (t '("1" "2" " " "3" "4") eof "" "12 34" 0 #f)
+  (t '("1" "2" " " "3" "4") eof "" "12 34" 0 5)
+  (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0)
+  (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 #f)
+  (t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 7)
+  (t '("2" "3") eof " *" "123" 1)
+  (t '("2" "3" "4") eof " *" "12 34" 1)
+  (t '("2" "3" "4" "") eof " *" " 12 34 " 2)
+  (t '("1") "23" "" "123" 0 1)
+  (t '("2") "3" "" "123" 1 2)
+  (t '("3") eof "" "123" 2 3)
+  (t '("1" "2") "3" " *" "123" 0 2)
+  (t '("2" "3") eof " *" "123" 1 3)
+  ;; more tests with match*
+  (t regexp-match*)
+  (t '("" "") eof "" "123")
+  (t '("" " " "") eof " *" "12 34")
+  (t '(" " "" " " "" " ") eof " *" " 12 34 ")
+  (t '("" " " "") " " " *" " 12 34 " 1 6)
+  (t regexp-match-positions*)
+  (t '((1 . 1) (2 . 2)) eof "" "123")
+  (t '((1 . 1) (2 . 3) (4 . 4)) eof " *" "12 34")
+  (t '((0 . 1) (2 . 2) (3 . 4) (5 . 5) (6 . 7)) eof " *" " 12 34 ")
+  (t '((2 . 2) (3 . 4) (5 . 5)) " " " *" " 12 34 " 1 6)
+  )
 
 ;; ---------- string-append* ----------
 (let ()