Perl-like handling of empty matches in regexp-match*, etc., though without Perl-like filtering of empty strings in the result of regexp-split (PR 10855)

This commit is contained in:
Matthew Flatt 2010-04-14 22:53:14 -04:00
parent 5abf8bb530
commit 4bc155905a
8 changed files with 217 additions and 98 deletions

View File

@ -16,9 +16,12 @@
call-with-input-file call-with-input-file
call-with-output-file call-with-output-file
with-input-from-file with-input-from-file
with-output-to-file) with-output-to-file
regexp-replace*)
(all-from "private/list.ss") (all-from "private/list.ss")
(all-from "private/string.ss") (all-from-except "private/string.ss"
-regexp-replace*)
(rename -regexp-replace* regexp-replace*)
identifier? identifier?
(all-from "private/stxcase-scheme.ss") (all-from "private/stxcase-scheme.ss")
(all-from "private/qqstx.ss") (all-from "private/qqstx.ss")

View File

@ -8,7 +8,8 @@
regexp-match-peek-positions* regexp-match-peek-positions*
regexp-split regexp-split
regexp-match-exact? regexp-match-exact?
regexp-try-match) regexp-try-match
-regexp-replace*)
(require (for-syntax "stxcase-scheme.ss")) (require (for-syntax "stxcase-scheme.ss"))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -111,8 +112,11 @@
(define (no-empty-edge-matches n) (define (no-empty-edge-matches n)
(make-regexp-tweaker (lambda (rx) (make-regexp-tweaker (lambda (rx)
(if (bytes? rx) (if (bytes? rx)
(bytes-append #"(?=.)(?:" rx #")(?<=" (make-bytes n (char->integer #\.)) #")") (bytes-append #"(?:"
(format "(?=.)(?:~a)(?<=~a)" rx (make-bytes n (char->integer #\.))))))) rx
#")(?<=" (make-bytes n (char->integer #\.)) #")")
(format "(?:~a)(?<=~a)"
rx (make-bytes n (char->integer #\.)))))))
(define-syntax-rule (regexp-loop (define-syntax-rule (regexp-loop
name loop start end pattern string name loop start end pattern string
ipre ipre
@ -159,11 +163,11 @@
(format "ending offset index out of range [~a,~a]: " start len) (format "ending offset index out of range [~a,~a]: " start len)
end)) end))
(reverse (reverse
(let loop ([acc '()] [start start] [end end] [ipre ipre] [rx #f] [rx-lb 0]) (let loop ([acc '()] [start start] [end end] [ipre ipre] [0-ok? #t])
(let* ([new-rx-lb (add1 (bytes-length ipre))] (let* ([rx (if 0-ok?
[rx (if (= rx-lb new-rx-lb) orig-rx
rx ((no-empty-edge-matches (add1 (bytes-length ipre)))
((no-empty-edge-matches new-rx-lb) orig-rx))]) orig-rx))])
(if (and port-success-choose (input-port? string)) (if (and port-success-choose (input-port? string))
;; Input port match, get string ;; Input port match, get string
@ -187,41 +191,46 @@
c)) c))
void))] void))]
[end (and end (- end start))]) [end (and end (- end start))])
(let-values ([(m ipre) (regexp-match/end rx string 0 end spitout ipre (let-values ([(ms ipre) (regexp-match/end rx
string 0 end spitout ipre
max-lookbehind)]) max-lookbehind)])
(let* ([m (and m (car m))] (let* ([m (and ms (car ms))]
[discarded/leftovers (if need-leftover? [discarded/leftovers (if need-leftover?
(get-output-bytes spitout) (get-output-bytes spitout)
discarded/leftovers)] discarded/leftovers)]
[end (and end m [skipped (if need-leftover?
(- end (if need-leftover?
(bstring-length discarded/leftovers) (bstring-length discarded/leftovers)
discarded/leftovers) discarded/leftovers)]
(bstring-length m)))]) [got (and m (bstring-length m))]
[end (and end m
(- end skipped got))])
(if m (if m
(loop (cons (port-success-choose m discarded/leftovers) acc) (let ([0-ok? (not (zero? got))])
0 end ipre (loop (port-success-choose m discarded/leftovers ms acc)
rx new-rx-lb) 0 end ipre 0-ok?))
(port-failure-k acc discarded/leftovers))))) (port-failure-k acc discarded/leftovers)))))
;; String/port match, get positions ;; String/port match, get positions
(let-values ([(m ipre) (let-values ([(m ipre)
(if peek? (if peek?
(regexp-match-peek-positions/end rx string start end #f ipre (regexp-match-peek-positions/end rx
string start end #f ipre
max-lookbehind) max-lookbehind)
(regexp-match-positions/end rx string start end #f ipre (regexp-match-positions/end rx
string start end #f ipre
max-lookbehind))]) max-lookbehind))])
(if (not m) (if (not m)
(failure-k acc start end) (failure-k acc start end)
(let ([mstart (caar m)] [mend (cdar m)]) (let* ([mstart (caar m)]
[mend (cdar m)]
[0-ok? (not (= mstart mend))])
(if port-success-k (if port-success-k
(port-success-k (port-success-k
(lambda (acc new-start new-end) (lambda (acc new-start new-end)
(loop acc new-start new-end ipre rx new-rx-lb)) (loop acc new-start new-end ipre 0-ok?))
acc start end mstart mend) acc start end mstart mend)
(loop (cons (success-choose start mstart mend) acc) (loop (success-choose start mstart mend m acc)
mend end ipre rx new-rx-lb))))))))))) mend end ipre 0-ok?)))))))))))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""]) (define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
@ -230,7 +239,7 @@
pattern string pattern string
ipre ipre
;; success-choose: ;; success-choose:
(lambda (start mstart mend) (cons mstart mend)) (lambda (start mstart mend ms acc) (cons (cons mstart mend) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) acc) (lambda (acc start end) acc)
;; port-success-k: need to shift index of rest as reading; cannot ;; port-success-k: need to shift index of rest as reading; cannot
@ -255,7 +264,7 @@
pattern string pattern string
ipre ipre
;; success-choose: ;; success-choose:
(lambda (start mstart mend) (cons mstart mend)) (lambda (start mstart mend ms acc) (cons (cons mstart mend) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) acc) (lambda (acc start end) acc)
;; port functions: use string case ;; port functions: use string case
@ -275,19 +284,133 @@
(define sub (if (bytes? buf) subbytes substring)) (define sub (if (bytes? buf) subbytes substring))
(regexp-loop regexp-split loop start end pattern buf ipre (regexp-loop regexp-split loop start end pattern buf ipre
;; success-choose: ;; success-choose:
(lambda (start mstart mend) (sub buf start mstart)) (lambda (start mstart mend ms acc) (cons (sub buf start mstart) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) (lambda (acc start end)
(cons (if end (sub buf start end) (sub buf start)) acc)) (cons (if end (sub buf start end) (sub buf start)) acc))
;; port-success-k: ;; port-success-k:
#f #f
;; port-success-choose: ;; port-success-choose:
(lambda (match-string leftovers) leftovers) (lambda (match-string leftovers ms acc) (cons leftovers acc))
;; port-failure-k: ;; port-failure-k:
(lambda (acc leftover) (if leftover (cons leftover acc) acc)) (lambda (acc leftover) (if leftover (cons leftover acc) acc))
#t #t
#f)) #f))
;; Like splitting, but insert a replacement between matches
(define -regexp-replace*
(let ([regexp-replace*
(lambda (pattern string orig-replacement [ipre #""])
(define buf (if (and (string? string) (or (byte-regexp? pattern)
(bytes? pattern)))
(string->bytes/utf-8 string (char->integer #\?))
string))
(define sub (if (bytes? buf) subbytes substring))
(define start 0)
(define end #f)
(define needs-string?
(and (or (string? pattern) (regexp? pattern))
(string? string)))
(define replacement
(if (and (not needs-string?) (string? orig-replacement))
(string->bytes/utf-8 orig-replacement)
orig-replacement))
(define (check proc arg)
(let ([v (proc arg)])
(unless (if needs-string?
(string? v)
(bytes? v))
(raise-mismatch-error '|regexp-replace* (calling given filter procedure)|
(if needs-string?
"expected a string result: "
"expected a byte string result: ")
v))
v))
(define rx:sub #rx#"^(?:[^&\\]*[\\][&\\])*[^&\\]*(?:&|[\\](?=[^&\\]|$))")
(define need-replac? (and (not (procedure? replacement))
(regexp-match? rx:sub replacement)))
(define (replac ms str)
(if need-replac?
((if (string? str) bytes->string/utf-8 values)
(apply
bytes-append
(let ([str (if (string? str) (string->bytes/utf-8 str) str)]
[get-match (lambda (n)
(if (n . < . (length ms))
(let* ([p (list-ref ms n)]
[s (if (pair? p)
(sub buf (car p) (cdr p))
p)])
(if (string? s)
(string->bytes/utf-8 s)
s))
#""))])
(let loop ([pos 0])
(let ([m (regexp-match-positions #rx#"[\\&]" str pos)])
(if m
(cons (subbytes str pos (caar m))
(cond
[(equal? (char->integer #\&) (bytes-ref str (caar m)))
(cons (get-match 0) (loop (cdar m)))]
[(= (cdar m) (bytes-length str))
;; \ with no following character
(list (get-match 0))]
[(let ([next (bytes-ref str (cdar m))])
(or (and (equal? (char->integer #\&) next)
#"&")
(and (equal? (char->integer #\\) next)
#"\\")))
=> (lambda (s)
(cons s (loop (add1 (cdar m)))))]
[else
(let ([n (regexp-match #rx#"^[0-9]+" str (cdar m))])
(if n
(cons (get-match (string->number (bytes->string/utf-8 (car n))))
(loop (+ (cdar m) (bytes-length (car n)))))
(cons (get-match 0)
(loop (cdar m)))))]))
(list (subbytes str pos))))))))
str))
(when (or (string? pattern) (bytes? pattern)
(regexp? pattern) (byte-regexp? pattern))
(unless (or (string? string)
(bytes? string))
(raise-type-error 'regexp-replace* "string or byte string" string))
(unless (or (string? replacement)
(bytes? replacement)
(and (procedure? replacement)
(procedure-arity-includes? replacement 1)))
(raise-type-error 'regexp-replace* "string, byte string, or procedure (arity 1)"
replacement))
(when (and needs-string? (bytes? replacement))
(raise-mismatch-error 'regexp-replace*
"cannot replace a string with a byte string: "
replacement)))
(apply
(if (bytes? buf) bytes-append string-append)
(regexp-loop
regexp-replace* loop start end pattern buf ipre
;; success-choose:
(lambda (start mstart mend ms acc) (list* (if (procedure? replacement)
(check
replacement
(sub buf mstart mend))
(replac ms replacement))
(sub buf start mstart)
acc))
;; failure-k:
(lambda (acc start end)
(cons (if end (sub buf start end) (sub buf start)) acc))
;; port-success-k:
#f
;; port-success-choose:
#f
;; port-failure-k:
#f
#t
#f)))])
regexp-replace*))
;; Returns all the matches for the pattern in the string. ;; Returns all the matches for the pattern in the string.
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""]) (define (regexp-match* pattern string [start 0] [end #f] [ipre #""])
(define buf (if (and (string? string) (or (byte-regexp? pattern) (define buf (if (and (string? string) (or (byte-regexp? pattern)
@ -297,13 +420,13 @@
(define sub (if (bytes? buf) subbytes substring)) (define sub (if (bytes? buf) subbytes substring))
(regexp-loop regexp-match* loop start end pattern buf ipre (regexp-loop regexp-match* loop start end pattern buf ipre
;; success-choose: ;; success-choose:
(lambda (start mstart mend) (sub buf mstart mend)) (lambda (start mstart mend ms acc) (cons (sub buf mstart mend) acc))
;; failure-k: ;; failure-k:
(lambda (acc start end) acc) (lambda (acc start end) acc)
;; port-success-k: ;; port-success-k:
#f #f
;; port-success-choose: ;; port-success-choose:
(lambda (match-string leftovers) match-string) (lambda (match-string leftovers ms acc) (cons match-string acc))
;; port-failure-k: ;; port-failure-k:
(lambda (acc leftover) acc) (lambda (acc leftover) acc)
#f #f

View File

@ -336,14 +336,13 @@ results for parenthesized sub-patterns in @scheme[pattern] are not
returned.) returned.)
The @scheme[pattern] is used in order to find matches, where each The @scheme[pattern] is used in order to find matches, where each
match attempt starts at the end of the last match, and @litchar{$} is match attempt starts at the end of the last match, and @litchar{^} is
allowed to match the beginning of the input (if @scheme[input-prefix] allowed to match the beginning of the input (if @scheme[input-prefix]
is @scheme[#""]) only for the first match. Empty matches are handled is @scheme[#""]) only for the first match. Empty matches are handled
like other matches, returning a zero-length string or byte sequence like other matches, returning a zero-length string or byte sequence
(they are more useful in the complementing @scheme[regexp-split] (they are more useful in the complementing @scheme[regexp-split]
function), but @scheme[pattern] is restricted from matching an empty function), but @scheme[pattern] is restricted from matching an empty
string at the beginning (or right after a previous match) or at the sequence immediately after an empty match.
end.
If @scheme[input] contains no matches (in the range @scheme[start-pos] If @scheme[input] contains no matches (in the range @scheme[start-pos]
to @scheme[end-pos]), @scheme[null] is returned. Otherwise, each item to @scheme[end-pos]), @scheme[null] is returned. Otherwise, each item
@ -355,6 +354,7 @@ port).
@examples[ @examples[
(regexp-match* #rx"x." "12x4x6") (regexp-match* #rx"x." "12x4x6")
(regexp-match* #rx"x*" "12x4x6")
]} ]}
@ -639,8 +639,8 @@ strings (if @scheme[pattern] is a string or character regexp and
@scheme[input] is a string) or byte strings (otherwise) from in @scheme[input] is a string) or byte strings (otherwise) from in
@scheme[input] that are separated by matches to @scheme[input] that are separated by matches to
@scheme[pattern]. Adjacent matches are separated with @scheme[""] or @scheme[pattern]. Adjacent matches are separated with @scheme[""] or
@scheme[#""]. Zero-length matches are treated the same as in @scheme[#""]. Zero-length matches are treated the same as for
@scheme[regexp-match*], but are more useful in this case. @scheme[regexp-match*].
If @scheme[input] contains no matches (in the range @scheme[start-pos] If @scheme[input] contains no matches (in the range @scheme[start-pos]
to @scheme[end-pos]), the result is a list containing @scheme[input]'s to @scheme[end-pos]), the result is a list containing @scheme[input]'s

View File

@ -1062,13 +1062,15 @@
(test '(#"") regexp-match "$" (open-input-string "123") 3) (test '(#"") regexp-match "$" (open-input-string "123") 3)
(test '(#"") regexp-match-peek "" (open-input-string "123") 3) (test '(#"") regexp-match-peek "" (open-input-string "123") 3)
(test "1b2b3" regexp-replace* "" "123" "b") (test "b1b2b3b" regexp-replace* "" "123" "b")
(test "1b23" regexp-replace* "(?=2)" "123" "b") (test "1b23" regexp-replace* "(?=2)" "123" "b")
(test "ax\u03BB" regexp-replace* "" "a\u03BB" "x") (test "xax\u03BBx" regexp-replace* "" "a\u03BB" "x")
(test "ax\u03BBxb" regexp-replace* "" "a\u03BBb" "x") (test "xax\u03BBxbx" regexp-replace* "" "a\u03BBb" "x")
(test #"ax\316x\273xb" regexp-replace* #"" "a\u03BBb" #"x") (test #"xax\316x\273xbx" regexp-replace* #"" "a\u03BBb" #"x")
(test "1=2===3" regexp-replace* "2*" "123" (lambda (s) (string-append "=" s "="))) (test "==1=2===3==" regexp-replace* "2*" "123" (lambda (s) (string-append "=" s "=")))
(test "1=2===3==4" regexp-replace* "2*" "1234" (lambda (s) (string-append "=" s "="))) (test "==1=2===3==4==" regexp-replace* "2*" "1234" (lambda (s) (string-append "=" s "=")))
(test "x&b\\ab=cy&w\\aw=z" regexp-replace* #rx"a(.)" "xabcyawz" "\\&\\1\\\\&\\99=")
;; Test weird port offsets: ;; Test weird port offsets:
(define (test-weird-offset regexp-match regexp-match-positions) (define (test-weird-offset regexp-match regexp-match-positions)

View File

@ -912,6 +912,7 @@
(#"(?<=(foo)a)bar" #"fooabar" (#"bar" #"foo")) (#"(?<=(foo)a)bar" #"fooabar" (#"bar" #"foo"))
(#"(?<=(foo)a)bar" #"bar" #f) (#"(?<=(foo)a)bar" #"bar" #f)
(#"(?<=(foo)a)bar" #"foobbar" #f) (#"(?<=(foo)a)bar" #"foobbar" #f)
(#"a(?<=x|ab)b" #"ab" #f)
(#"(?>(\\.\\d\\d[1-9]?))\\d+" #"1.230003938" (#".230003938" #".23")) (#"(?>(\\.\\d\\d[1-9]?))\\d+" #"1.230003938" (#".230003938" #".23"))
(#"(?>(\\.\\d\\d[1-9]?))\\d+" #"1.875000282" (#".875000282" #".875")) (#"(?>(\\.\\d\\d[1-9]?))\\d+" #"1.875000282" (#".875000282" #".875"))
(#"(?>(\\.\\d\\d[1-9]?))\\d+" #"1.235 " #f) (#"(?>(\\.\\d\\d[1-9]?))\\d+" #"1.235 " #f)

View File

@ -180,27 +180,25 @@
;; test("a","1a2a3"); ;; test("a","1a2a3");
(t '("1" "2" "3") eof "a" "1a2a3") (t '("1" "2" "3") eof "a" "1a2a3")
;; test("","123"); ;; test("","123");
(t '("1" "2" "3") eof "" "123") (t '("" "1" "2" "3" "") eof "" "123")
;; test("","12 34"); ;; test("","12 34");
(t '("1" "2" " " "3" "4") eof "" "12 34") (t '("" "1" "2" " " "3" "4" "") eof "" "12 34")
;; test(" *","123"); ;; test(" *","123");
(t '("1" "2" "3") eof " *" "123") (t '("" "1" "2" "3" "") eof " *" "123")
;; test(" *","12 34"); ;; test(" *","12 34");
(t '("1" "2" "3" "4") eof " *" "12 34") (t '("" "1" "2" "" "3" "4" "") eof " *" "12 34")
;; test(" *"," 12 34 "); ;; test(" *"," 12 34 ");
;; (t '("" "1" "2" "3" "4") eof " *" " 12 34 ") (t '("" "" "1" "2" "" "3" "4" "" "") eof " *" " 12 34 ")
;; again, perl drops the last empty string but we don't
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 ")
;; test("2|", "1234"); ;; test("2|", "1234");
(t '("1" "3" "4") eof "2|" "1234") (t '("" "1" "" "3" "4" "") eof "2|" "1234")
;; test("1|", "1234"); ;; test("1|", "1234");
(t '("" "2" "3" "4") eof "1|" "1234") (t '("" "" "2" "3" "4" "") eof "1|" "1234")
;; test("4|", "1234"); ;; test("4|", "1234");
;; (t '("1" "2" "3") eof "4|" "1234") ;; (t '("1" "2" "3") eof "4|" "1234")
;; perl perl drops the last empty string again ;; perl perl drops the last empty string again
(t '("1" "2" "3" "") eof "4|" "1234") (t '("" "1" "2" "3" "" "") eof "4|" "1234")
;; test("|2", "1234"); ;; test("|2", "1234");
(t '("1" "" "3" "4") eof "|2" "1234") (t '("" "1" "" "" "3" "4" "") eof "|2" "1234")
;; test("2|3", "1234"); ;; test("2|3", "1234");
(t '("1" "" "4") eof "2|3" "1234") (t '("1" "" "4") eof "2|3" "1234")
;; test("2|3|4", "12345"); ;; test("2|3|4", "12345");
@ -216,55 +214,46 @@
;; ...and even three in this example ;; ...and even three in this example
(t '("1" "" "" "") eof "2|3|4" "1234") (t '("1" "" "" "") eof "2|3|4" "1234")
;; test('$',"123"); ;; test('$',"123");
(t '("123") eof "$" "123") (t '("123" "") eof "$" "123")
;; test('^',"123"); ;; test('^',"123");
;; (t '("123") eof "^" "123") (t '("" "123") eof "^" "123")
;; this is a technicality: perl matches "^" once, but mzscheme matches on
;; whatever `start' may be; perl is treating it as a beginning-of-line
;; instead of a beginning-of-string behind your back "since it isn't much
;; use otherwise" (http://perldoc.perl.org/functions/split.html); but we
;; don't allow empty matches at the beginning, so a `^' will never match,
;; and we get the same behavior anyway:
(t '("123") eof "^" "123")
;; test('^',"123\n456"); ;; test('^',"123\n456");
;; (t '("123\n" "456") eof "^" "123\n456") (t '("" "123\n" "456") eof "(?m:^)" "123\n456")
;; we can get the same behavior as perl's with "(m?:^)":
(t '("123\n" "456") eof "(?m:^)" "123\n456")
;; test("\\b", "123 456"); ;; test("\\b", "123 456");
(t '("123" " " "456") eof #px"\\b" "123 456") (t '("" "123" " " "456" "") eof #px"\\b" "123 456")
;; test("^|a", "abc"); ;; test("^|a", "abc");
(t '("" "bc") eof "^|a" "abc") (t '("" "" "bc") eof "^|a" "abc")
;; some tests with bounds (these have no perl equivalences) ;; some tests with bounds (these have no perl equivalences)
(t '("1" "2" " " "3" "4") eof "" "12 34" 0) (t '("" "1" "2" " " "3" "4" "") eof "" "12 34" 0)
(t '("1" "2" " " "3" "4") eof "" "12 34" 0 #f) (t '("" "1" "2" " " "3" "4" "") eof "" "12 34" 0 #f)
(t '("1" "2" " " "3" "4") eof "" "12 34" 0 5) (t '("" "1" "2" " " "3" "4" "") eof "" "12 34" 0 5)
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0) (t '("" "" "1" "2" "" "3" "4" "" "") eof " *" " 12 34 " 0)
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 #f) (t '("" "" "1" "2" "" "3" "4" "" "") eof " *" " 12 34 " 0 #f)
(t '("" "1" "2" "3" "4" "") eof " *" " 12 34 " 0 7) (t '("" "" "1" "2" "" "3" "4" "" "") eof " *" " 12 34 " 0 7)
(t '("2" "3") eof " *" "123" 1) (t '("" "2" "3" "") eof " *" "123" 1)
(t '("2" "3" "4") eof " *" "12 34" 1) (t '("" "2" "" "3" "4" "") eof " *" "12 34" 1)
(t '("2" "3" "4" "") eof " *" " 12 34 " 2) (t '("" "2" "" "3" "4" "" "") eof " *" " 12 34 " 2)
(t '("1") "23" "" "123" 0 1) (t '("" "1" "") "23" "" "123" 0 1)
(t '("2") "3" "" "123" 1 2) (t '("" "2" "") "3" "" "123" 1 2)
(t '("3") eof "" "123" 2 3) (t '("" "3" "") eof "" "123" 2 3)
(t '("1" "2") "3" " *" "123" 0 2) (t '("" "1" "2" "") "3" " *" "123" 0 2)
(t '("2" "3") eof " *" "123" 1 3) (t '("" "2" "3" "") eof " *" "123" 1 3)
;; more tests with match* ;; more tests with match*
(t regexp-match*) (t regexp-match*)
(t '("" "") eof "" "123") (t '("" "" "" "") eof "" "123")
(t '("" " " "") eof " *" "12 34") (t '("" "" " " "" "" "") eof " *" "12 34")
(t '(" " "" " " "" " ") eof " *" " 12 34 ") (t '(" " "" "" " " "" "" " " "") eof " *" " 12 34 ")
(t '("" " " "") " " " *" " 12 34 " 1 6) (t '("" "" " " "" "" "") " " " *" " 12 34 " 1 6)
(t regexp-match-positions*) (t regexp-match-positions*)
(t '((1 . 1) (2 . 2)) eof "" "123") (t '((0 . 0) (1 . 1) (2 . 2) (3 . 3)) eof "" "123")
(t '((1 . 1) (2 . 3) (4 . 4)) eof " *" "12 34") (t '((0 . 0) (1 . 1) (2 . 3) (3 . 3) (4 . 4) (5 . 5)) eof " *" "12 34")
(t '((0 . 1) (2 . 2) (3 . 4) (5 . 5) (6 . 7)) eof " *" " 12 34 ") (t '((0 . 1) (1 . 1) (2 . 2) (3 . 4) (4 . 4) (5 . 5) (6 . 7) (7 . 7)) eof " *" " 12 34 ")
(t '((2 . 2) (3 . 4) (5 . 5)) " " " *" " 12 34 " 1 6) (t '((1 . 1) (2 . 2) (3 . 4) (4 . 4) (5 . 5) (6 . 6)) " " " *" " 12 34 " 1 6)
(t regexp-match-peek-positions*) (t regexp-match-peek-positions*)
(t '((1 . 1) (2 . 2)) "123" "" "123") (t '((0 . 0) (1 . 1) (2 . 2) (3 . 3)) "123" "" "123")
(t '((1 . 1) (2 . 3) (4 . 4)) "12 34" " *" "12 34") (t '((0 . 0) (1 . 1) (2 . 3) (3 . 3) (4 . 4) (5 . 5)) "12 34" " *" "12 34")
(t '((0 . 1) (2 . 2) (3 . 4) (5 . 5) (6 . 7)) " 12 34 " " *" " 12 34 ") (t '((0 . 1) (1 . 1) (2 . 2) (3 . 4) (4 . 4) (5 . 5) (6 . 7) (7 . 7)) " 12 34 " " *" " 12 34 ")
(t '((2 . 2) (3 . 4) (5 . 5)) " 12 34 " " *" " 12 34 " 1 6) (t '((1 . 1) (2 . 2) (3 . 4) (4 . 4) (5 . 5) (6 . 6)) " 12 34 " " *" " 12 34 " 1 6)
;; finally, some tests for the match* + split property ;; finally, some tests for the match* + split property
(t (lambda (rx str) (t (lambda (rx str)
(let ([s (regexp-split rx str)] (let ([s (regexp-split rx str)]

View File

@ -1,6 +1,7 @@
Version 4.2.5.9 Version 4.2.5.10
regexp-match* et al. now disable ^ matching on all but the first regexp-match* et al. now disable ^ matching on all but the first
match match, and empty matches are allowed in all positions except
immediately after an empty match
Version 4.2.5.3 Version 4.2.5.3
Added chaperones Added chaperones

View File

@ -3420,7 +3420,7 @@ regmatch(Regwork *rw, rxpos prog)
if (is - rw->input_min >= no) { if (is - rw->input_min >= no) {
rw->input = save - no; rw->input = save - no;
if (regmatch(rw, next)) { if (regmatch(rw, next)) {
if (is == save) { if (rw->input == save) {
/* Match */ /* Match */
if (!t) return 0; if (!t) return 0;
found = 1; found = 1;