Add #:match-select' and
#:gap-select' keyword arguments to
`regexp-match*', and the two `-positions' variants.
This commit is contained in:
parent
f538c2e076
commit
0eb5f09e23
|
@ -233,42 +233,91 @@
|
|||
(port-success-k
|
||||
(lambda (acc new-start new-end)
|
||||
(loop acc new-start new-end ipre 0-ok?))
|
||||
acc start end mstart mend)
|
||||
acc start end m)
|
||||
(loop (success-choose start m acc)
|
||||
mend end ipre 0-ok?)))))))))))
|
||||
|
||||
;; Returns all the positions at which the pattern matched.
|
||||
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
|
||||
(regexp-loop regexp-match-positions* loop start end pattern string ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (car ms) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port-success-k: need to shift index of rest as reading; cannot
|
||||
;; do a tail call without adding another state variable to the
|
||||
;; regexp loop, so this remains inefficient
|
||||
(lambda (loop acc start end mstart mend)
|
||||
(append (map (lambda (p)
|
||||
(cons (+ mend (car p)) (+ mend (cdr p))))
|
||||
(loop '() 0 (and end (- end mend))))
|
||||
(cons (cons mstart mend) acc)))
|
||||
;; other port functions: use string case
|
||||
#f #f
|
||||
;; flags
|
||||
#f #f))
|
||||
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""]
|
||||
#:match-select [match-select car])
|
||||
;; Note: no need for a #:gap-select, since it is easily inferred from the
|
||||
;; resulting matches
|
||||
(if (eq? match-select car)
|
||||
;; common case
|
||||
(regexp-loop regexp-match-positions* loop start end pattern string ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (car ms) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port-success-k: need to shift index of rest as reading; cannot
|
||||
;; do a tail call without adding another state variable to the
|
||||
;; regexp loop, so this remains inefficient
|
||||
(lambda (loop acc start end ms)
|
||||
(let ([mstart (caar ms)] [mend (cdar ms)])
|
||||
(append (map (lambda (p) (cons (+ mend (car p)) (+ mend (cdr p))))
|
||||
(loop '() 0 (and end (- end mend))))
|
||||
(cons (car ms) acc))))
|
||||
;; other port functions: use string case
|
||||
#f #f
|
||||
;; flags
|
||||
#f #f)
|
||||
;; using some selector
|
||||
(regexp-loop regexp-match-positions* loop start end pattern string ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (match-select ms) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port-success-k: need to shift index of rest as reading; cannot
|
||||
;; do a tail call without adding another state variable to the
|
||||
;; regexp loop, so this remains inefficient
|
||||
(lambda (loop acc start end ms)
|
||||
(let* ([mend (cdar ms)]
|
||||
[rest (loop '() 0 (and end (- end mend)))]
|
||||
[s (match-select ms)])
|
||||
;; assumes that it's a valid selector (and always works the same,
|
||||
;; ie, does not return a single pair sometimes a list of pairs at
|
||||
;; other times)
|
||||
(append
|
||||
(map (if (or (and (pair? s) (exact-integer? (car s))) (not s))
|
||||
;; assume that it returns a single match
|
||||
(lambda (p) (cons (+ mend (car p)) (+ mend (cdr p))))
|
||||
;; assume that it returns a list
|
||||
(lambda (ps)
|
||||
(map (lambda (p)
|
||||
(and p (cons (+ mend (car p)) (+ mend (cdr p)))))
|
||||
ps)))
|
||||
rest)
|
||||
(cons s acc))))
|
||||
;; other port functions: use string case
|
||||
#f #f
|
||||
;; flags
|
||||
#f #f)))
|
||||
|
||||
;; Returns all the positions at which the pattern matched.
|
||||
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]
|
||||
[ipre #""])
|
||||
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (car ms) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port functions: use string case
|
||||
#f #f #f
|
||||
;; flags
|
||||
#f #t))
|
||||
[ipre #""]
|
||||
#:match-select [match-select car])
|
||||
(if (eq? match-select car)
|
||||
;; common case
|
||||
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (car ms) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port functions: use string case
|
||||
#f #f #f
|
||||
;; flags
|
||||
#f #t)
|
||||
;; using some selector
|
||||
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (match-select ms) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port functions: use string case
|
||||
#f #f #f
|
||||
;; flags
|
||||
#f #t)))
|
||||
|
||||
;; Small helper for the functions below
|
||||
(define (get-buf+sub string pattern)
|
||||
|
@ -413,22 +462,91 @@
|
|||
#t #f)))])
|
||||
regexp-replace*))
|
||||
|
||||
;; Returns all the matches for the pattern in the string.
|
||||
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""])
|
||||
(define-values [buf sub] (get-buf+sub string pattern))
|
||||
(regexp-loop regexp-match* loop start end pattern buf ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (sub buf (caar ms) (cdar ms)) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port-success-k:
|
||||
#f
|
||||
;; port-success-choose:
|
||||
(lambda (leftovers ms acc) (cons (car ms) acc))
|
||||
;; port-failure-k:
|
||||
(lambda (acc leftover) acc)
|
||||
;; flags
|
||||
#f #f))
|
||||
;; Returns all the matches for the pattern in the string, optionally
|
||||
;; other submatches and/or gap strings too.
|
||||
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""]
|
||||
#:match-select [match-select car]
|
||||
#:gap-select [gap-select #f])
|
||||
(cond
|
||||
;; nonsensical case => throw an error
|
||||
[(and (not match-select) (not gap-select))
|
||||
;; An alternative would be to throw '(), but that's non-uniform in that
|
||||
;; it wouldn't consume the contents of a port. Another alternative is
|
||||
;; to do the full code to consume what's needed, but that's spending
|
||||
;; cycles to always get a '() result.
|
||||
(raise
|
||||
(exn:fail:contract
|
||||
"regexp-match*: one of `match-select' or `gap-select' must be non-#f"
|
||||
(current-continuation-marks)))]
|
||||
;; no match-select => same as `regexp-split'
|
||||
[(not match-select) (regexp-split pattern string start end ipre)]
|
||||
;; uncommon case => full code
|
||||
[(not (eq? match-select car))
|
||||
(define-values [buf sub] (get-buf+sub string pattern))
|
||||
(regexp-loop regexp-explode loop start end pattern buf ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc)
|
||||
(cons (let ([s (match-select ms)])
|
||||
;; assumes a valid selector
|
||||
(cond [(not (pair? s)) s] ; #f or '()
|
||||
[(integer? (car s)) (sub buf (car s) (cdr s))]
|
||||
[else (map (lambda (m)
|
||||
(and m (sub buf (car m) (cdr m))))
|
||||
s)]))
|
||||
(if gap-select (cons (sub buf start (caar ms)) acc) acc)))
|
||||
;; failure-k:
|
||||
(lambda (acc start end)
|
||||
(if gap-select
|
||||
(cons (if end (sub buf start end) (sub buf start)) acc)
|
||||
acc))
|
||||
;; port-success-k:
|
||||
#f
|
||||
;; port-success-choose:
|
||||
(lambda (leftovers ms acc)
|
||||
;; assumes a valid selector too: here it's used with a bytes list so
|
||||
;; it's simple
|
||||
(cons (match-select ms) (if gap-select (cons leftovers acc) acc)))
|
||||
;; port-failure-k:
|
||||
(lambda (acc leftover)
|
||||
(if (and gap-select leftover) (cons leftover acc) acc))
|
||||
;; flags
|
||||
gap-select #f)]
|
||||
;; default for matches, but also include gaps
|
||||
[gap-select
|
||||
(define-values [buf sub] (get-buf+sub string pattern))
|
||||
(regexp-loop regexp-explode loop start end pattern buf ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc)
|
||||
(cons (sub buf (caar ms) (cdar ms))
|
||||
(cons (sub buf start (caar ms)) acc)))
|
||||
;; failure-k:
|
||||
(lambda (acc start end)
|
||||
(cons (if end (sub buf start end) (sub buf start)) acc))
|
||||
;; port-success-k:
|
||||
#f
|
||||
;; port-success-choose:
|
||||
(lambda (leftovers ms acc) (cons (car ms) (cons leftovers acc)))
|
||||
;; port-failure-k:
|
||||
(lambda (acc leftover) (if leftover (cons leftover acc) acc))
|
||||
;; flags
|
||||
gap-select #f)]
|
||||
;; default case => optimized with specific code (this is the previous
|
||||
;; functionality of `regexp-explode*' too)
|
||||
[else
|
||||
(define-values [buf sub] (get-buf+sub string pattern))
|
||||
(regexp-loop regexp-match* loop start end pattern buf ipre
|
||||
;; success-choose:
|
||||
(lambda (start ms acc) (cons (sub buf (caar ms) (cdar ms)) acc))
|
||||
;; failure-k:
|
||||
(lambda (acc start end) acc)
|
||||
;; port-success-k:
|
||||
#f
|
||||
;; port-success-choose:
|
||||
(lambda (leftovers ms acc) (cons (car ms) acc))
|
||||
;; port-failure-k:
|
||||
(lambda (acc leftover) acc)
|
||||
;; flags
|
||||
#f #f)]))
|
||||
|
||||
(define (regexp-match-exact? p s)
|
||||
(let ([m (regexp-match-positions p s)])
|
||||
|
|
|
@ -400,26 +400,29 @@ bytes. To avoid such interleaving, use @racket[regexp-match-peek]
|
|||
[input (or/c string? bytes? path? input-port?)]
|
||||
[start-pos exact-nonnegative-integer? 0]
|
||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||
[input-prefix bytes? #""])
|
||||
[input-prefix bytes? #""]
|
||||
[#:match-select match-select
|
||||
(or/c ((listof any?) . -> . (or/c any? (listof any?)))
|
||||
#f)
|
||||
car]
|
||||
[#:gap-select gap-select boolean? #f])
|
||||
(if (and (or (string? pattern) (regexp? pattern))
|
||||
(or (string? input) (path? input)))
|
||||
(listof string?)
|
||||
(listof bytes?))]{
|
||||
(listof (or/c string? (listof (or/c #f string?))))
|
||||
(listof (or/c bytes? (listof (or/c #f bytes?)))))]{
|
||||
|
||||
Like @racket[regexp-match], but the result is a list of strings or
|
||||
byte strings corresponding to a sequence of matches of
|
||||
@racket[pattern] in @racket[input]. (Unlike @racket[regexp-match],
|
||||
results for parenthesized sub-patterns in @racket[pattern] are not
|
||||
returned.)
|
||||
@racket[pattern] in @racket[input].
|
||||
|
||||
The @racket[pattern] is used in order to find matches, where each
|
||||
match attempt starts at the end of the last match, and @litchar{^} is
|
||||
allowed to match the beginning of the input (if @racket[input-prefix]
|
||||
is @racket[#""]) only for the first match. Empty matches are handled
|
||||
like other matches, returning a zero-length string or byte sequence
|
||||
(they are more useful in the complementing @racket[regexp-split]
|
||||
function), but @racket[pattern] is restricted from matching an empty
|
||||
sequence immediately after an empty match.
|
||||
(they are more useful in making this a complement of
|
||||
@racket[regexp-split]), but @racket[pattern] is restricted from
|
||||
matching an empty sequence immediately after an empty match.
|
||||
|
||||
If @racket[input] contains no matches (in the range @racket[start-pos]
|
||||
to @racket[end-pos]), @racket[null] is returned. Otherwise, each item
|
||||
|
@ -432,6 +435,35 @@ port).
|
|||
@examples[
|
||||
(regexp-match* #rx"x." "12x4x6")
|
||||
(regexp-match* #rx"x*" "12x4x6")
|
||||
]
|
||||
|
||||
@racket[match-select] specifies the collected results. The default of
|
||||
@racket[car] means that the result is the list of matches without
|
||||
returning parenthesized sub-patterns. It can be given as a `selector'
|
||||
function which chooses an item from a list, or it can choose a list of
|
||||
items. For example, you can use @racket[cdr] to get a list of lists
|
||||
of parenthesized sub-patterns matches, or @racket[values] (as an
|
||||
identity function) to get the full matches as well. (Note that the
|
||||
selector must choose an element of its input list or a list of
|
||||
elements, but it must not inspect its input as they can be either a
|
||||
list of strings or a list of position pairs. Furthermore, the
|
||||
selector must be consistent in its choice(s).)
|
||||
|
||||
@examples[
|
||||
(regexp-match* #rx"x(.)" "12x4x6" #:match-select cadr)
|
||||
(regexp-match* #rx"x(.)" "12x4x6" #:match-select values)
|
||||
]
|
||||
|
||||
In addition, specifying @racket[gap-select] as a non-@racket[#f] value
|
||||
will make the result an interleaved list of the matches as well as the
|
||||
separators between them matches, starting and ending with a separator.
|
||||
In this case, @racket[match-select] can be given as @racket[#f] to
|
||||
return @emph{only} the separators, making such uses equivalent to
|
||||
@racket[regexp-split].
|
||||
|
||||
@examples[
|
||||
(regexp-match* #rx"x(.)" "12x4x6" #:match-select cadr #:gap-select #t)
|
||||
(regexp-match* #rx"x(.)" "12x4x6" #:match-select #f #:gap-select #t)
|
||||
]}
|
||||
|
||||
|
||||
|
@ -494,16 +526,27 @@ positions indicate the number of bytes that were read, including
|
|||
[input (or/c string? bytes? path? input-port?)]
|
||||
[start-pos exact-nonnegative-integer? 0]
|
||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||
[input-prefix bytes? #""])
|
||||
(listof (cons/c exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))]{
|
||||
[input-prefix bytes? #""]
|
||||
[#:match-select match-select
|
||||
((listof any?) . -> . (or/c any? (listof any?)))
|
||||
car])
|
||||
(or/c (listof (cons/c exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))
|
||||
(listof (listof (or/c #f (cons/c exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?)))))]{
|
||||
|
||||
Like @racket[regexp-match-positions], but returns multiple matches
|
||||
like @racket[regexp-match*].
|
||||
|
||||
@examples[
|
||||
(regexp-match-positions* #rx"x." "12x4x6")
|
||||
]}
|
||||
(regexp-match-positions* #rx"x(.)" "12x4x6" #:match-select cadr)
|
||||
]
|
||||
|
||||
Note that unlike @racket[regexp-match*], there is no
|
||||
@racket[#:gap-select] input keyword, as this information can be easily
|
||||
inferred from the resulting matches.
|
||||
}
|
||||
|
||||
|
||||
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||
|
@ -623,11 +666,13 @@ used to match @racket[pattern].}
|
|||
[start-pos exact-nonnegative-integer? 0]
|
||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||
[input-prefix bytes? #""])
|
||||
(listof (cons/c exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))]{
|
||||
(or/c (listof (cons/c exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?))
|
||||
(listof (listof (or/c #f (cons/c exact-nonnegative-integer?
|
||||
exact-nonnegative-integer?)))))]{
|
||||
|
||||
Like @racket[regexp-match-peek-positions], but returns multiple matches like
|
||||
@racket[regexp-match*].}
|
||||
@racket[regexp-match-positions*].}
|
||||
|
||||
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||
[input (or/c string? bytes? path? input-port?)]
|
||||
|
|
|
@ -41,23 +41,28 @@
|
|||
[(string? x) (string->bytes/utf-8 x)]
|
||||
[(pregexp? x) (byte-pregexp (->b (object-name x)))]
|
||||
[else x]))
|
||||
(define fun* #f)
|
||||
(define funs '())
|
||||
(define (test-funs ks vs res left rx str . args)
|
||||
(unless (memq regexp-match-peek-positions* funs)
|
||||
(for ([fun (in-list funs)])
|
||||
;; test with a string
|
||||
(keyword-apply test ks vs res fun rx str args)
|
||||
;; test with a byte-regexp and/or a byte string
|
||||
(keyword-apply test ks vs (->b res) fun (->b rx) str args)
|
||||
(keyword-apply test ks vs (->b res) fun rx (->b str) args)
|
||||
(keyword-apply test ks vs (->b res) fun (->b rx) (->b str) args)))
|
||||
(for ([fun (in-list funs)])
|
||||
;; test with a port, and test leftovers
|
||||
(when left
|
||||
(let ([p (open-input-string str)])
|
||||
(keyword-apply test ks vs (->b res) fun rx p args)
|
||||
(keyword-apply test '() '() left read-string 50 p '())))))
|
||||
(define t
|
||||
(case-lambda
|
||||
[(fun) (set! fun* fun)]
|
||||
[(res left rx str . args)
|
||||
(unless (eq? fun* regexp-match-peek-positions*)
|
||||
;; test with a string
|
||||
(apply test res fun* rx str args)
|
||||
;; test with a byte-regexp and/or a byte string
|
||||
(apply test (->b res) fun* (->b rx) str args)
|
||||
(apply test (->b res) fun* rx (->b str) args)
|
||||
(apply test (->b res) fun* (->b rx) (->b str) args))
|
||||
;; test with a port, and test leftovers
|
||||
(when left
|
||||
(let ([p (open-input-string str)])
|
||||
(apply test (->b res) fun* rx p args)
|
||||
(test left read-string 50 p)))]))
|
||||
(make-keyword-procedure
|
||||
test-funs
|
||||
(case-lambda [(fun*) (set! funs (if (list? fun*) fun* (list fun*)))]
|
||||
[(res left rx str . args)
|
||||
(apply test-funs '() '() res left rx str args)])))
|
||||
;; --------------------
|
||||
(t regexp-match*)
|
||||
(t '("a" "b" "c") eof "[abc]" " a b c ")
|
||||
|
@ -75,15 +80,65 @@
|
|||
(t '("a" "b" "c") eof "[abc]" "a b c" 0)
|
||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0 #f)
|
||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0 5)
|
||||
(t '("a") eof "^." "a b c" 0 5 #"")
|
||||
(t '() eof "^." "a b c" 0 5 #"x")
|
||||
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
(for-each (lambda (cvt)
|
||||
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
|
||||
(list values byte-regexp byte-pregexp))
|
||||
(t '("a") eof "^." "a b c" 0 5 #"")
|
||||
(t '() eof "^." "a b c" 0 5 #"x")
|
||||
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
|
||||
;; --------------------
|
||||
(t regexp-match-positions*)
|
||||
(define (regexp-explode . xs) (apply regexp-match* #:gap-select #t xs))
|
||||
(t regexp-explode)
|
||||
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c ")
|
||||
(t '("" "a" "+" "b" " = " "c" " ") eof "[abc]" "a+b = c ")
|
||||
(t '(" " "b" " " "c" " ") eof "[abc]" " a b c " 2)
|
||||
(t '("" "b" " " "c" " ") eof "[abc]" " a b c " 3)
|
||||
(t '(" " "a" "") " b c " "[abc]" " a b c " 0 2)
|
||||
(t '(" " "a" " ") "b c " "[abc]" " a b c " 0 3)
|
||||
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c " 0 #f)
|
||||
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c " 0 7)
|
||||
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c")
|
||||
(t '(" " "b" " " "c" "") eof "[abc]" "a b c" 1)
|
||||
(t '("" "b" " " "c" "") eof "[abc]" "a b c" 2)
|
||||
(t '("" "a" "") " b c" "[abc]" "a b c" 0 1)
|
||||
(t '("" "a" " ") "b c" "[abc]" "a b c" 0 2)
|
||||
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0)
|
||||
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0 #f)
|
||||
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0 5)
|
||||
(t '("" "a" " b c") eof "^." "a b c" 0 5 #"")
|
||||
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
||||
(t '("" "a\n" "" "b\n" "" "c\n" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '("a\n" "b\n" "" "c\n" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||
(test '(#"a" #"\x80" #"z" #"\x80" #"q")
|
||||
regexp-explode (cvt #"\x80") #"a\x80z\x80q"))
|
||||
;; --------------------
|
||||
(t regexp-match*) ; some tests with a match-select etc
|
||||
;; (tests with #f for #:match-select and #t for #:gap-select done below with
|
||||
;; `regexp-split')
|
||||
(err/rt-test (regexp-match* "[abc]" "a b c"
|
||||
#:match-select #f #:gap-select #f))
|
||||
(t '("a" "b" "c") eof
|
||||
"<([abc])>" "<a> <b> <c>" #:match-select cadr)
|
||||
(t '(("a") ("b") ("c")) eof
|
||||
"<([abc])>" "<a> <b> <c>" #:match-select cdr)
|
||||
(t '(("<a>" "a") ("<b>" "b") ("<c>" "c")) eof
|
||||
"<([abc])>" "<a> <b> <c>" #:match-select values)
|
||||
(t '("" "a" " + " "b" " = " "c" "") eof
|
||||
"<([abc])>" "<a> + <b> = <c>" #:match-select cadr #:gap-select #t)
|
||||
(t '("" ("<a>" "a") " + " ("<b>" "b") " = " ("<c>" "c") "") eof
|
||||
"<([abc])>" "<a> + <b> = <c>" #:match-select values #:gap-select #t)
|
||||
(t '("" ("<a" "a" #f) " + " ("<b" "b" #f) " = " ("<c" "c" #f) "") eof
|
||||
"<([abc])(>)?" "<a + <b = <c" #:match-select values #:gap-select #t)
|
||||
;; --------------------
|
||||
(t (list regexp-match-positions*
|
||||
;; also try the generic path
|
||||
(lambda xs (apply regexp-match-positions* xs
|
||||
#:match-select (lambda (x) (car x))))
|
||||
;; and try it with a list result too
|
||||
(lambda xs
|
||||
(map car (apply regexp-match-positions* xs
|
||||
#:match-select values)))))
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ")
|
||||
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 2)
|
||||
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 3)
|
||||
|
@ -103,35 +158,20 @@
|
|||
(t '() eof "^." "a b c" 0 5 #"x")
|
||||
(t '((0 . 2) (2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '((2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
(for-each (lambda (cvt)
|
||||
(test '((1 . 2) (3 . 4)) regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
|
||||
(list values byte-regexp byte-pregexp))
|
||||
(t '((0 . 2) (5 . 7) (10 . 12)) eof "<([abc])(>)?" "<a + <b = <c")
|
||||
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||
(test '((1 . 2) (3 . 4))
|
||||
regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
|
||||
;; --------------------
|
||||
(t regexp-split)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
|
||||
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
|
||||
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
|
||||
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
|
||||
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
|
||||
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
|
||||
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
|
||||
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
|
||||
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
||||
(t '("" " b c") eof "^." "a b c" 0 5 #"")
|
||||
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
||||
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
(for-each (lambda (cvt)
|
||||
(test '(#"" #"a" #"z" #"q" #"") regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
|
||||
(list values byte-regexp byte-pregexp))
|
||||
;; --------------------
|
||||
(t regexp-match-peek-positions*)
|
||||
(t (list regexp-match-peek-positions*
|
||||
;; also try the generic path
|
||||
(lambda xs (apply regexp-match-peek-positions* xs
|
||||
#:match-select (lambda (x) (car x))))
|
||||
;; and try it with a list result too
|
||||
(lambda xs
|
||||
(map (lambda (x) (and (= 1 (length x)) (car x)))
|
||||
(apply regexp-match-peek-positions* xs
|
||||
#:match-select values)))))
|
||||
(err/rt-test (regexp-match-peek-positions* "[abc]" "a b c"))
|
||||
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ")
|
||||
(t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 2)
|
||||
|
@ -151,6 +191,33 @@
|
|||
(t '() "a b c" "^." "a b c" 0 5 #"x")
|
||||
(t '((0 . 2) (2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '((2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
;; --------------------
|
||||
(t (list regexp-split
|
||||
;; also via an equivalent `regexp-match*' configuration
|
||||
(lambda xs
|
||||
(apply regexp-match* xs #:match-select #f #:gap-select #t))))
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
|
||||
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
|
||||
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
|
||||
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
|
||||
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
|
||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
|
||||
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
|
||||
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
|
||||
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
|
||||
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
||||
(t '("" " b c") eof "^." "a b c" 0 5 #"")
|
||||
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
||||
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||
(test '(#"" #"a" #"z" #"q" #"")
|
||||
regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
|
||||
;; ---------- tests with zero-length matches ----------
|
||||
;; Many of these tests can be repeated with Perl. To try something in Perl,
|
||||
;; put this code in a file:
|
||||
|
|
Loading…
Reference in New Issue
Block a user