Add #:match-select' and #:gap-select' keyword arguments to

`regexp-match*', and the two `-positions' variants.
This commit is contained in:
Eli Barzilay 2012-03-12 02:06:22 -04:00
parent f538c2e076
commit 0eb5f09e23
3 changed files with 343 additions and 113 deletions

View File

@ -233,42 +233,91 @@
(port-success-k
(lambda (acc new-start new-end)
(loop acc new-start new-end ipre 0-ok?))
acc start end mstart mend)
acc start end m)
(loop (success-choose start m acc)
mend end ipre 0-ok?)))))))))))
;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
(regexp-loop regexp-match-positions* loop start end pattern string ipre
;; success-choose:
(lambda (start ms acc) (cons (car ms) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port-success-k: need to shift index of rest as reading; cannot
;; do a tail call without adding another state variable to the
;; regexp loop, so this remains inefficient
(lambda (loop acc start end mstart mend)
(append (map (lambda (p)
(cons (+ mend (car p)) (+ mend (cdr p))))
(loop '() 0 (and end (- end mend))))
(cons (cons mstart mend) acc)))
;; other port functions: use string case
#f #f
;; flags
#f #f))
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""]
#:match-select [match-select car])
;; Note: no need for a #:gap-select, since it is easily inferred from the
;; resulting matches
(if (eq? match-select car)
;; common case
(regexp-loop regexp-match-positions* loop start end pattern string ipre
;; success-choose:
(lambda (start ms acc) (cons (car ms) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port-success-k: need to shift index of rest as reading; cannot
;; do a tail call without adding another state variable to the
;; regexp loop, so this remains inefficient
(lambda (loop acc start end ms)
(let ([mstart (caar ms)] [mend (cdar ms)])
(append (map (lambda (p) (cons (+ mend (car p)) (+ mend (cdr p))))
(loop '() 0 (and end (- end mend))))
(cons (car ms) acc))))
;; other port functions: use string case
#f #f
;; flags
#f #f)
;; using some selector
(regexp-loop regexp-match-positions* loop start end pattern string ipre
;; success-choose:
(lambda (start ms acc) (cons (match-select ms) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port-success-k: need to shift index of rest as reading; cannot
;; do a tail call without adding another state variable to the
;; regexp loop, so this remains inefficient
(lambda (loop acc start end ms)
(let* ([mend (cdar ms)]
[rest (loop '() 0 (and end (- end mend)))]
[s (match-select ms)])
;; assumes that it's a valid selector (and always works the same,
;; ie, does not return a single pair sometimes a list of pairs at
;; other times)
(append
(map (if (or (and (pair? s) (exact-integer? (car s))) (not s))
;; assume that it returns a single match
(lambda (p) (cons (+ mend (car p)) (+ mend (cdr p))))
;; assume that it returns a list
(lambda (ps)
(map (lambda (p)
(and p (cons (+ mend (car p)) (+ mend (cdr p)))))
ps)))
rest)
(cons s acc))))
;; other port functions: use string case
#f #f
;; flags
#f #f)))
;; Returns all the positions at which the pattern matched.
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]
[ipre #""])
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
;; success-choose:
(lambda (start ms acc) (cons (car ms) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port functions: use string case
#f #f #f
;; flags
#f #t))
[ipre #""]
#:match-select [match-select car])
(if (eq? match-select car)
;; common case
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
;; success-choose:
(lambda (start ms acc) (cons (car ms) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port functions: use string case
#f #f #f
;; flags
#f #t)
;; using some selector
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
;; success-choose:
(lambda (start ms acc) (cons (match-select ms) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port functions: use string case
#f #f #f
;; flags
#f #t)))
;; Small helper for the functions below
(define (get-buf+sub string pattern)
@ -413,22 +462,91 @@
#t #f)))])
regexp-replace*))
;; Returns all the matches for the pattern in the string.
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""])
(define-values [buf sub] (get-buf+sub string pattern))
(regexp-loop regexp-match* loop start end pattern buf ipre
;; success-choose:
(lambda (start ms acc) (cons (sub buf (caar ms) (cdar ms)) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port-success-k:
#f
;; port-success-choose:
(lambda (leftovers ms acc) (cons (car ms) acc))
;; port-failure-k:
(lambda (acc leftover) acc)
;; flags
#f #f))
;; Returns all the matches for the pattern in the string, optionally
;; other submatches and/or gap strings too.
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""]
#:match-select [match-select car]
#:gap-select [gap-select #f])
(cond
;; nonsensical case => throw an error
[(and (not match-select) (not gap-select))
;; An alternative would be to throw '(), but that's non-uniform in that
;; it wouldn't consume the contents of a port. Another alternative is
;; to do the full code to consume what's needed, but that's spending
;; cycles to always get a '() result.
(raise
(exn:fail:contract
"regexp-match*: one of `match-select' or `gap-select' must be non-#f"
(current-continuation-marks)))]
;; no match-select => same as `regexp-split'
[(not match-select) (regexp-split pattern string start end ipre)]
;; uncommon case => full code
[(not (eq? match-select car))
(define-values [buf sub] (get-buf+sub string pattern))
(regexp-loop regexp-explode loop start end pattern buf ipre
;; success-choose:
(lambda (start ms acc)
(cons (let ([s (match-select ms)])
;; assumes a valid selector
(cond [(not (pair? s)) s] ; #f or '()
[(integer? (car s)) (sub buf (car s) (cdr s))]
[else (map (lambda (m)
(and m (sub buf (car m) (cdr m))))
s)]))
(if gap-select (cons (sub buf start (caar ms)) acc) acc)))
;; failure-k:
(lambda (acc start end)
(if gap-select
(cons (if end (sub buf start end) (sub buf start)) acc)
acc))
;; port-success-k:
#f
;; port-success-choose:
(lambda (leftovers ms acc)
;; assumes a valid selector too: here it's used with a bytes list so
;; it's simple
(cons (match-select ms) (if gap-select (cons leftovers acc) acc)))
;; port-failure-k:
(lambda (acc leftover)
(if (and gap-select leftover) (cons leftover acc) acc))
;; flags
gap-select #f)]
;; default for matches, but also include gaps
[gap-select
(define-values [buf sub] (get-buf+sub string pattern))
(regexp-loop regexp-explode loop start end pattern buf ipre
;; success-choose:
(lambda (start ms acc)
(cons (sub buf (caar ms) (cdar ms))
(cons (sub buf start (caar ms)) acc)))
;; failure-k:
(lambda (acc start end)
(cons (if end (sub buf start end) (sub buf start)) acc))
;; port-success-k:
#f
;; port-success-choose:
(lambda (leftovers ms acc) (cons (car ms) (cons leftovers acc)))
;; port-failure-k:
(lambda (acc leftover) (if leftover (cons leftover acc) acc))
;; flags
gap-select #f)]
;; default case => optimized with specific code (this is the previous
;; functionality of `regexp-explode*' too)
[else
(define-values [buf sub] (get-buf+sub string pattern))
(regexp-loop regexp-match* loop start end pattern buf ipre
;; success-choose:
(lambda (start ms acc) (cons (sub buf (caar ms) (cdar ms)) acc))
;; failure-k:
(lambda (acc start end) acc)
;; port-success-k:
#f
;; port-success-choose:
(lambda (leftovers ms acc) (cons (car ms) acc))
;; port-failure-k:
(lambda (acc leftover) acc)
;; flags
#f #f)]))
(define (regexp-match-exact? p s)
(let ([m (regexp-match-positions p s)])

View File

@ -400,26 +400,29 @@ bytes. To avoid such interleaving, use @racket[regexp-match-peek]
[input (or/c string? bytes? path? input-port?)]
[start-pos exact-nonnegative-integer? 0]
[end-pos (or/c exact-nonnegative-integer? #f) #f]
[input-prefix bytes? #""])
[input-prefix bytes? #""]
[#:match-select match-select
(or/c ((listof any?) . -> . (or/c any? (listof any?)))
#f)
car]
[#:gap-select gap-select boolean? #f])
(if (and (or (string? pattern) (regexp? pattern))
(or (string? input) (path? input)))
(listof string?)
(listof bytes?))]{
(listof (or/c string? (listof (or/c #f string?))))
(listof (or/c bytes? (listof (or/c #f bytes?)))))]{
Like @racket[regexp-match], but the result is a list of strings or
byte strings corresponding to a sequence of matches of
@racket[pattern] in @racket[input]. (Unlike @racket[regexp-match],
results for parenthesized sub-patterns in @racket[pattern] are not
returned.)
@racket[pattern] in @racket[input].
The @racket[pattern] is used in order to find matches, where each
match attempt starts at the end of the last match, and @litchar{^} is
allowed to match the beginning of the input (if @racket[input-prefix]
is @racket[#""]) only for the first match. Empty matches are handled
like other matches, returning a zero-length string or byte sequence
(they are more useful in the complementing @racket[regexp-split]
function), but @racket[pattern] is restricted from matching an empty
sequence immediately after an empty match.
(they are more useful in making this a complement of
@racket[regexp-split]), but @racket[pattern] is restricted from
matching an empty sequence immediately after an empty match.
If @racket[input] contains no matches (in the range @racket[start-pos]
to @racket[end-pos]), @racket[null] is returned. Otherwise, each item
@ -432,6 +435,35 @@ port).
@examples[
(regexp-match* #rx"x." "12x4x6")
(regexp-match* #rx"x*" "12x4x6")
]
@racket[match-select] specifies the collected results. The default of
@racket[car] means that the result is the list of matches without
returning parenthesized sub-patterns. It can be given as a `selector'
function which chooses an item from a list, or it can choose a list of
items. For example, you can use @racket[cdr] to get a list of lists
of parenthesized sub-patterns matches, or @racket[values] (as an
identity function) to get the full matches as well. (Note that the
selector must choose an element of its input list or a list of
elements, but it must not inspect its input as they can be either a
list of strings or a list of position pairs. Furthermore, the
selector must be consistent in its choice(s).)
@examples[
(regexp-match* #rx"x(.)" "12x4x6" #:match-select cadr)
(regexp-match* #rx"x(.)" "12x4x6" #:match-select values)
]
In addition, specifying @racket[gap-select] as a non-@racket[#f] value
will make the result an interleaved list of the matches as well as the
separators between them matches, starting and ending with a separator.
In this case, @racket[match-select] can be given as @racket[#f] to
return @emph{only} the separators, making such uses equivalent to
@racket[regexp-split].
@examples[
(regexp-match* #rx"x(.)" "12x4x6" #:match-select cadr #:gap-select #t)
(regexp-match* #rx"x(.)" "12x4x6" #:match-select #f #:gap-select #t)
]}
@ -494,16 +526,27 @@ positions indicate the number of bytes that were read, including
[input (or/c string? bytes? path? input-port?)]
[start-pos exact-nonnegative-integer? 0]
[end-pos (or/c exact-nonnegative-integer? #f) #f]
[input-prefix bytes? #""])
(listof (cons/c exact-nonnegative-integer?
exact-nonnegative-integer?))]{
[input-prefix bytes? #""]
[#:match-select match-select
((listof any?) . -> . (or/c any? (listof any?)))
car])
(or/c (listof (cons/c exact-nonnegative-integer?
exact-nonnegative-integer?))
(listof (listof (or/c #f (cons/c exact-nonnegative-integer?
exact-nonnegative-integer?)))))]{
Like @racket[regexp-match-positions], but returns multiple matches
like @racket[regexp-match*].
@examples[
(regexp-match-positions* #rx"x." "12x4x6")
]}
(regexp-match-positions* #rx"x(.)" "12x4x6" #:match-select cadr)
]
Note that unlike @racket[regexp-match*], there is no
@racket[#:gap-select] input keyword, as this information can be easily
inferred from the resulting matches.
}
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
@ -623,11 +666,13 @@ used to match @racket[pattern].}
[start-pos exact-nonnegative-integer? 0]
[end-pos (or/c exact-nonnegative-integer? #f) #f]
[input-prefix bytes? #""])
(listof (cons/c exact-nonnegative-integer?
exact-nonnegative-integer?))]{
(or/c (listof (cons/c exact-nonnegative-integer?
exact-nonnegative-integer?))
(listof (listof (or/c #f (cons/c exact-nonnegative-integer?
exact-nonnegative-integer?)))))]{
Like @racket[regexp-match-peek-positions], but returns multiple matches like
@racket[regexp-match*].}
@racket[regexp-match-positions*].}
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
[input (or/c string? bytes? path? input-port?)]

View File

@ -41,23 +41,28 @@
[(string? x) (string->bytes/utf-8 x)]
[(pregexp? x) (byte-pregexp (->b (object-name x)))]
[else x]))
(define fun* #f)
(define funs '())
(define (test-funs ks vs res left rx str . args)
(unless (memq regexp-match-peek-positions* funs)
(for ([fun (in-list funs)])
;; test with a string
(keyword-apply test ks vs res fun rx str args)
;; test with a byte-regexp and/or a byte string
(keyword-apply test ks vs (->b res) fun (->b rx) str args)
(keyword-apply test ks vs (->b res) fun rx (->b str) args)
(keyword-apply test ks vs (->b res) fun (->b rx) (->b str) args)))
(for ([fun (in-list funs)])
;; test with a port, and test leftovers
(when left
(let ([p (open-input-string str)])
(keyword-apply test ks vs (->b res) fun rx p args)
(keyword-apply test '() '() left read-string 50 p '())))))
(define t
(case-lambda
[(fun) (set! fun* fun)]
[(res left rx str . args)
(unless (eq? fun* regexp-match-peek-positions*)
;; test with a string
(apply test res fun* rx str args)
;; test with a byte-regexp and/or a byte string
(apply test (->b res) fun* (->b rx) str args)
(apply test (->b res) fun* rx (->b str) args)
(apply test (->b res) fun* (->b rx) (->b str) args))
;; test with a port, and test leftovers
(when left
(let ([p (open-input-string str)])
(apply test (->b res) fun* rx p args)
(test left read-string 50 p)))]))
(make-keyword-procedure
test-funs
(case-lambda [(fun*) (set! funs (if (list? fun*) fun* (list fun*)))]
[(res left rx str . args)
(apply test-funs '() '() res left rx str args)])))
;; --------------------
(t regexp-match*)
(t '("a" "b" "c") eof "[abc]" " a b c ")
@ -75,15 +80,65 @@
(t '("a" "b" "c") eof "[abc]" "a b c" 0)
(t '("a" "b" "c") eof "[abc]" "a b c" 0 #f)
(t '("a" "b" "c") eof "[abc]" "a b c" 0 5)
(t '("a") eof "^." "a b c" 0 5 #"")
(t '() eof "^." "a b c" 0 5 #"x")
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
(for-each (lambda (cvt)
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
(list values byte-regexp byte-pregexp))
(t '("a") eof "^." "a b c" 0 5 #"")
(t '() eof "^." "a b c" 0 5 #"x")
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
;; --------------------
(t regexp-match-positions*)
(define (regexp-explode . xs) (apply regexp-match* #:gap-select #t xs))
(t regexp-explode)
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c ")
(t '("" "a" "+" "b" " = " "c" " ") eof "[abc]" "a+b = c ")
(t '(" " "b" " " "c" " ") eof "[abc]" " a b c " 2)
(t '("" "b" " " "c" " ") eof "[abc]" " a b c " 3)
(t '(" " "a" "") " b c " "[abc]" " a b c " 0 2)
(t '(" " "a" " ") "b c " "[abc]" " a b c " 0 3)
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c " 0 #f)
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c " 0 7)
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c")
(t '(" " "b" " " "c" "") eof "[abc]" "a b c" 1)
(t '("" "b" " " "c" "") eof "[abc]" "a b c" 2)
(t '("" "a" "") " b c" "[abc]" "a b c" 0 1)
(t '("" "a" " ") "b c" "[abc]" "a b c" 0 2)
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0)
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0 #f)
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0 5)
(t '("" "a" " b c") eof "^." "a b c" 0 5 #"")
(t '("a b c") eof "^." "a b c" 0 5 #"x")
(t '("" "a\n" "" "b\n" "" "c\n" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '("a\n" "b\n" "" "c\n" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
(test '(#"a" #"\x80" #"z" #"\x80" #"q")
regexp-explode (cvt #"\x80") #"a\x80z\x80q"))
;; --------------------
(t regexp-match*) ; some tests with a match-select etc
;; (tests with #f for #:match-select and #t for #:gap-select done below with
;; `regexp-split')
(err/rt-test (regexp-match* "[abc]" "a b c"
#:match-select #f #:gap-select #f))
(t '("a" "b" "c") eof
"<([abc])>" "<a> <b> <c>" #:match-select cadr)
(t '(("a") ("b") ("c")) eof
"<([abc])>" "<a> <b> <c>" #:match-select cdr)
(t '(("<a>" "a") ("<b>" "b") ("<c>" "c")) eof
"<([abc])>" "<a> <b> <c>" #:match-select values)
(t '("" "a" " + " "b" " = " "c" "") eof
"<([abc])>" "<a> + <b> = <c>" #:match-select cadr #:gap-select #t)
(t '("" ("<a>" "a") " + " ("<b>" "b") " = " ("<c>" "c") "") eof
"<([abc])>" "<a> + <b> = <c>" #:match-select values #:gap-select #t)
(t '("" ("<a" "a" #f) " + " ("<b" "b" #f) " = " ("<c" "c" #f) "") eof
"<([abc])(>)?" "<a + <b = <c" #:match-select values #:gap-select #t)
;; --------------------
(t (list regexp-match-positions*
;; also try the generic path
(lambda xs (apply regexp-match-positions* xs
#:match-select (lambda (x) (car x))))
;; and try it with a list result too
(lambda xs
(map car (apply regexp-match-positions* xs
#:match-select values)))))
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ")
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 2)
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 3)
@ -103,35 +158,20 @@
(t '() eof "^." "a b c" 0 5 #"x")
(t '((0 . 2) (2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '((2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
(for-each (lambda (cvt)
(test '((1 . 2) (3 . 4)) regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
(list values byte-regexp byte-pregexp))
(t '((0 . 2) (5 . 7) (10 . 12)) eof "<([abc])(>)?" "<a + <b = <c")
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
(test '((1 . 2) (3 . 4))
regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
;; --------------------
(t regexp-split)
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
(t '("" " b c") eof "^." "a b c" 0 5 #"")
(t '("a b c") eof "^." "a b c" 0 5 #"x")
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
(for-each (lambda (cvt)
(test '(#"" #"a" #"z" #"q" #"") regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
(list values byte-regexp byte-pregexp))
;; --------------------
(t regexp-match-peek-positions*)
(t (list regexp-match-peek-positions*
;; also try the generic path
(lambda xs (apply regexp-match-peek-positions* xs
#:match-select (lambda (x) (car x))))
;; and try it with a list result too
(lambda xs
(map (lambda (x) (and (= 1 (length x)) (car x)))
(apply regexp-match-peek-positions* xs
#:match-select values)))))
(err/rt-test (regexp-match-peek-positions* "[abc]" "a b c"))
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ")
(t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 2)
@ -151,6 +191,33 @@
(t '() "a b c" "^." "a b c" 0 5 #"x")
(t '((0 . 2) (2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '((2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
;; --------------------
(t (list regexp-split
;; also via an equivalent `regexp-match*' configuration
(lambda xs
(apply regexp-match* xs #:match-select #f #:gap-select #t))))
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
(t '("" " b c") eof "^." "a b c" 0 5 #"")
(t '("a b c") eof "^." "a b c" 0 5 #"x")
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
(test '(#"" #"a" #"z" #"q" #"")
regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
;; ---------- tests with zero-length matches ----------
;; Many of these tests can be repeated with Perl. To try something in Perl,
;; put this code in a file: