Add #:match-select' and
#:gap-select' keyword arguments to
`regexp-match*', and the two `-positions' variants.
This commit is contained in:
parent
f538c2e076
commit
0eb5f09e23
|
@ -233,12 +233,17 @@
|
||||||
(port-success-k
|
(port-success-k
|
||||||
(lambda (acc new-start new-end)
|
(lambda (acc new-start new-end)
|
||||||
(loop acc new-start new-end ipre 0-ok?))
|
(loop acc new-start new-end ipre 0-ok?))
|
||||||
acc start end mstart mend)
|
acc start end m)
|
||||||
(loop (success-choose start m acc)
|
(loop (success-choose start m acc)
|
||||||
mend end ipre 0-ok?)))))))))))
|
mend end ipre 0-ok?)))))))))))
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
|
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""]
|
||||||
|
#:match-select [match-select car])
|
||||||
|
;; Note: no need for a #:gap-select, since it is easily inferred from the
|
||||||
|
;; resulting matches
|
||||||
|
(if (eq? match-select car)
|
||||||
|
;; common case
|
||||||
(regexp-loop regexp-match-positions* loop start end pattern string ipre
|
(regexp-loop regexp-match-positions* loop start end pattern string ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start ms acc) (cons (car ms) acc))
|
(lambda (start ms acc) (cons (car ms) acc))
|
||||||
|
@ -247,19 +252,53 @@
|
||||||
;; port-success-k: need to shift index of rest as reading; cannot
|
;; port-success-k: need to shift index of rest as reading; cannot
|
||||||
;; do a tail call without adding another state variable to the
|
;; do a tail call without adding another state variable to the
|
||||||
;; regexp loop, so this remains inefficient
|
;; regexp loop, so this remains inefficient
|
||||||
(lambda (loop acc start end mstart mend)
|
(lambda (loop acc start end ms)
|
||||||
(append (map (lambda (p)
|
(let ([mstart (caar ms)] [mend (cdar ms)])
|
||||||
(cons (+ mend (car p)) (+ mend (cdr p))))
|
(append (map (lambda (p) (cons (+ mend (car p)) (+ mend (cdr p))))
|
||||||
(loop '() 0 (and end (- end mend))))
|
(loop '() 0 (and end (- end mend))))
|
||||||
(cons (cons mstart mend) acc)))
|
(cons (car ms) acc))))
|
||||||
;; other port functions: use string case
|
;; other port functions: use string case
|
||||||
#f #f
|
#f #f
|
||||||
;; flags
|
;; flags
|
||||||
#f #f))
|
#f #f)
|
||||||
|
;; using some selector
|
||||||
|
(regexp-loop regexp-match-positions* loop start end pattern string ipre
|
||||||
|
;; success-choose:
|
||||||
|
(lambda (start ms acc) (cons (match-select ms) acc))
|
||||||
|
;; failure-k:
|
||||||
|
(lambda (acc start end) acc)
|
||||||
|
;; port-success-k: need to shift index of rest as reading; cannot
|
||||||
|
;; do a tail call without adding another state variable to the
|
||||||
|
;; regexp loop, so this remains inefficient
|
||||||
|
(lambda (loop acc start end ms)
|
||||||
|
(let* ([mend (cdar ms)]
|
||||||
|
[rest (loop '() 0 (and end (- end mend)))]
|
||||||
|
[s (match-select ms)])
|
||||||
|
;; assumes that it's a valid selector (and always works the same,
|
||||||
|
;; ie, does not return a single pair sometimes a list of pairs at
|
||||||
|
;; other times)
|
||||||
|
(append
|
||||||
|
(map (if (or (and (pair? s) (exact-integer? (car s))) (not s))
|
||||||
|
;; assume that it returns a single match
|
||||||
|
(lambda (p) (cons (+ mend (car p)) (+ mend (cdr p))))
|
||||||
|
;; assume that it returns a list
|
||||||
|
(lambda (ps)
|
||||||
|
(map (lambda (p)
|
||||||
|
(and p (cons (+ mend (car p)) (+ mend (cdr p)))))
|
||||||
|
ps)))
|
||||||
|
rest)
|
||||||
|
(cons s acc))))
|
||||||
|
;; other port functions: use string case
|
||||||
|
#f #f
|
||||||
|
;; flags
|
||||||
|
#f #f)))
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]
|
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]
|
||||||
[ipre #""])
|
[ipre #""]
|
||||||
|
#:match-select [match-select car])
|
||||||
|
(if (eq? match-select car)
|
||||||
|
;; common case
|
||||||
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
|
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start ms acc) (cons (car ms) acc))
|
(lambda (start ms acc) (cons (car ms) acc))
|
||||||
|
@ -268,7 +307,17 @@
|
||||||
;; port functions: use string case
|
;; port functions: use string case
|
||||||
#f #f #f
|
#f #f #f
|
||||||
;; flags
|
;; flags
|
||||||
#f #t))
|
#f #t)
|
||||||
|
;; using some selector
|
||||||
|
(regexp-loop regexp-match-peek-positions* loop start end pattern string ipre
|
||||||
|
;; success-choose:
|
||||||
|
(lambda (start ms acc) (cons (match-select ms) acc))
|
||||||
|
;; failure-k:
|
||||||
|
(lambda (acc start end) acc)
|
||||||
|
;; port functions: use string case
|
||||||
|
#f #f #f
|
||||||
|
;; flags
|
||||||
|
#f #t)))
|
||||||
|
|
||||||
;; Small helper for the functions below
|
;; Small helper for the functions below
|
||||||
(define (get-buf+sub string pattern)
|
(define (get-buf+sub string pattern)
|
||||||
|
@ -413,8 +462,77 @@
|
||||||
#t #f)))])
|
#t #f)))])
|
||||||
regexp-replace*))
|
regexp-replace*))
|
||||||
|
|
||||||
;; Returns all the matches for the pattern in the string.
|
;; Returns all the matches for the pattern in the string, optionally
|
||||||
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""])
|
;; other submatches and/or gap strings too.
|
||||||
|
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""]
|
||||||
|
#:match-select [match-select car]
|
||||||
|
#:gap-select [gap-select #f])
|
||||||
|
(cond
|
||||||
|
;; nonsensical case => throw an error
|
||||||
|
[(and (not match-select) (not gap-select))
|
||||||
|
;; An alternative would be to throw '(), but that's non-uniform in that
|
||||||
|
;; it wouldn't consume the contents of a port. Another alternative is
|
||||||
|
;; to do the full code to consume what's needed, but that's spending
|
||||||
|
;; cycles to always get a '() result.
|
||||||
|
(raise
|
||||||
|
(exn:fail:contract
|
||||||
|
"regexp-match*: one of `match-select' or `gap-select' must be non-#f"
|
||||||
|
(current-continuation-marks)))]
|
||||||
|
;; no match-select => same as `regexp-split'
|
||||||
|
[(not match-select) (regexp-split pattern string start end ipre)]
|
||||||
|
;; uncommon case => full code
|
||||||
|
[(not (eq? match-select car))
|
||||||
|
(define-values [buf sub] (get-buf+sub string pattern))
|
||||||
|
(regexp-loop regexp-explode loop start end pattern buf ipre
|
||||||
|
;; success-choose:
|
||||||
|
(lambda (start ms acc)
|
||||||
|
(cons (let ([s (match-select ms)])
|
||||||
|
;; assumes a valid selector
|
||||||
|
(cond [(not (pair? s)) s] ; #f or '()
|
||||||
|
[(integer? (car s)) (sub buf (car s) (cdr s))]
|
||||||
|
[else (map (lambda (m)
|
||||||
|
(and m (sub buf (car m) (cdr m))))
|
||||||
|
s)]))
|
||||||
|
(if gap-select (cons (sub buf start (caar ms)) acc) acc)))
|
||||||
|
;; failure-k:
|
||||||
|
(lambda (acc start end)
|
||||||
|
(if gap-select
|
||||||
|
(cons (if end (sub buf start end) (sub buf start)) acc)
|
||||||
|
acc))
|
||||||
|
;; port-success-k:
|
||||||
|
#f
|
||||||
|
;; port-success-choose:
|
||||||
|
(lambda (leftovers ms acc)
|
||||||
|
;; assumes a valid selector too: here it's used with a bytes list so
|
||||||
|
;; it's simple
|
||||||
|
(cons (match-select ms) (if gap-select (cons leftovers acc) acc)))
|
||||||
|
;; port-failure-k:
|
||||||
|
(lambda (acc leftover)
|
||||||
|
(if (and gap-select leftover) (cons leftover acc) acc))
|
||||||
|
;; flags
|
||||||
|
gap-select #f)]
|
||||||
|
;; default for matches, but also include gaps
|
||||||
|
[gap-select
|
||||||
|
(define-values [buf sub] (get-buf+sub string pattern))
|
||||||
|
(regexp-loop regexp-explode loop start end pattern buf ipre
|
||||||
|
;; success-choose:
|
||||||
|
(lambda (start ms acc)
|
||||||
|
(cons (sub buf (caar ms) (cdar ms))
|
||||||
|
(cons (sub buf start (caar ms)) acc)))
|
||||||
|
;; failure-k:
|
||||||
|
(lambda (acc start end)
|
||||||
|
(cons (if end (sub buf start end) (sub buf start)) acc))
|
||||||
|
;; port-success-k:
|
||||||
|
#f
|
||||||
|
;; port-success-choose:
|
||||||
|
(lambda (leftovers ms acc) (cons (car ms) (cons leftovers acc)))
|
||||||
|
;; port-failure-k:
|
||||||
|
(lambda (acc leftover) (if leftover (cons leftover acc) acc))
|
||||||
|
;; flags
|
||||||
|
gap-select #f)]
|
||||||
|
;; default case => optimized with specific code (this is the previous
|
||||||
|
;; functionality of `regexp-explode*' too)
|
||||||
|
[else
|
||||||
(define-values [buf sub] (get-buf+sub string pattern))
|
(define-values [buf sub] (get-buf+sub string pattern))
|
||||||
(regexp-loop regexp-match* loop start end pattern buf ipre
|
(regexp-loop regexp-match* loop start end pattern buf ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
|
@ -428,7 +546,7 @@
|
||||||
;; port-failure-k:
|
;; port-failure-k:
|
||||||
(lambda (acc leftover) acc)
|
(lambda (acc leftover) acc)
|
||||||
;; flags
|
;; flags
|
||||||
#f #f))
|
#f #f)]))
|
||||||
|
|
||||||
(define (regexp-match-exact? p s)
|
(define (regexp-match-exact? p s)
|
||||||
(let ([m (regexp-match-positions p s)])
|
(let ([m (regexp-match-positions p s)])
|
||||||
|
|
|
@ -400,26 +400,29 @@ bytes. To avoid such interleaving, use @racket[regexp-match-peek]
|
||||||
[input (or/c string? bytes? path? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[input-prefix bytes? #""])
|
[input-prefix bytes? #""]
|
||||||
|
[#:match-select match-select
|
||||||
|
(or/c ((listof any?) . -> . (or/c any? (listof any?)))
|
||||||
|
#f)
|
||||||
|
car]
|
||||||
|
[#:gap-select gap-select boolean? #f])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(or (string? input) (path? input)))
|
(or (string? input) (path? input)))
|
||||||
(listof string?)
|
(listof (or/c string? (listof (or/c #f string?))))
|
||||||
(listof bytes?))]{
|
(listof (or/c bytes? (listof (or/c #f bytes?)))))]{
|
||||||
|
|
||||||
Like @racket[regexp-match], but the result is a list of strings or
|
Like @racket[regexp-match], but the result is a list of strings or
|
||||||
byte strings corresponding to a sequence of matches of
|
byte strings corresponding to a sequence of matches of
|
||||||
@racket[pattern] in @racket[input]. (Unlike @racket[regexp-match],
|
@racket[pattern] in @racket[input].
|
||||||
results for parenthesized sub-patterns in @racket[pattern] are not
|
|
||||||
returned.)
|
|
||||||
|
|
||||||
The @racket[pattern] is used in order to find matches, where each
|
The @racket[pattern] is used in order to find matches, where each
|
||||||
match attempt starts at the end of the last match, and @litchar{^} is
|
match attempt starts at the end of the last match, and @litchar{^} is
|
||||||
allowed to match the beginning of the input (if @racket[input-prefix]
|
allowed to match the beginning of the input (if @racket[input-prefix]
|
||||||
is @racket[#""]) only for the first match. Empty matches are handled
|
is @racket[#""]) only for the first match. Empty matches are handled
|
||||||
like other matches, returning a zero-length string or byte sequence
|
like other matches, returning a zero-length string or byte sequence
|
||||||
(they are more useful in the complementing @racket[regexp-split]
|
(they are more useful in making this a complement of
|
||||||
function), but @racket[pattern] is restricted from matching an empty
|
@racket[regexp-split]), but @racket[pattern] is restricted from
|
||||||
sequence immediately after an empty match.
|
matching an empty sequence immediately after an empty match.
|
||||||
|
|
||||||
If @racket[input] contains no matches (in the range @racket[start-pos]
|
If @racket[input] contains no matches (in the range @racket[start-pos]
|
||||||
to @racket[end-pos]), @racket[null] is returned. Otherwise, each item
|
to @racket[end-pos]), @racket[null] is returned. Otherwise, each item
|
||||||
|
@ -432,6 +435,35 @@ port).
|
||||||
@examples[
|
@examples[
|
||||||
(regexp-match* #rx"x." "12x4x6")
|
(regexp-match* #rx"x." "12x4x6")
|
||||||
(regexp-match* #rx"x*" "12x4x6")
|
(regexp-match* #rx"x*" "12x4x6")
|
||||||
|
]
|
||||||
|
|
||||||
|
@racket[match-select] specifies the collected results. The default of
|
||||||
|
@racket[car] means that the result is the list of matches without
|
||||||
|
returning parenthesized sub-patterns. It can be given as a `selector'
|
||||||
|
function which chooses an item from a list, or it can choose a list of
|
||||||
|
items. For example, you can use @racket[cdr] to get a list of lists
|
||||||
|
of parenthesized sub-patterns matches, or @racket[values] (as an
|
||||||
|
identity function) to get the full matches as well. (Note that the
|
||||||
|
selector must choose an element of its input list or a list of
|
||||||
|
elements, but it must not inspect its input as they can be either a
|
||||||
|
list of strings or a list of position pairs. Furthermore, the
|
||||||
|
selector must be consistent in its choice(s).)
|
||||||
|
|
||||||
|
@examples[
|
||||||
|
(regexp-match* #rx"x(.)" "12x4x6" #:match-select cadr)
|
||||||
|
(regexp-match* #rx"x(.)" "12x4x6" #:match-select values)
|
||||||
|
]
|
||||||
|
|
||||||
|
In addition, specifying @racket[gap-select] as a non-@racket[#f] value
|
||||||
|
will make the result an interleaved list of the matches as well as the
|
||||||
|
separators between them matches, starting and ending with a separator.
|
||||||
|
In this case, @racket[match-select] can be given as @racket[#f] to
|
||||||
|
return @emph{only} the separators, making such uses equivalent to
|
||||||
|
@racket[regexp-split].
|
||||||
|
|
||||||
|
@examples[
|
||||||
|
(regexp-match* #rx"x(.)" "12x4x6" #:match-select cadr #:gap-select #t)
|
||||||
|
(regexp-match* #rx"x(.)" "12x4x6" #:match-select #f #:gap-select #t)
|
||||||
]}
|
]}
|
||||||
|
|
||||||
|
|
||||||
|
@ -494,16 +526,27 @@ positions indicate the number of bytes that were read, including
|
||||||
[input (or/c string? bytes? path? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[input-prefix bytes? #""])
|
[input-prefix bytes? #""]
|
||||||
(listof (cons/c exact-nonnegative-integer?
|
[#:match-select match-select
|
||||||
exact-nonnegative-integer?))]{
|
((listof any?) . -> . (or/c any? (listof any?)))
|
||||||
|
car])
|
||||||
|
(or/c (listof (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?))
|
||||||
|
(listof (listof (or/c #f (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?)))))]{
|
||||||
|
|
||||||
Like @racket[regexp-match-positions], but returns multiple matches
|
Like @racket[regexp-match-positions], but returns multiple matches
|
||||||
like @racket[regexp-match*].
|
like @racket[regexp-match*].
|
||||||
|
|
||||||
@examples[
|
@examples[
|
||||||
(regexp-match-positions* #rx"x." "12x4x6")
|
(regexp-match-positions* #rx"x." "12x4x6")
|
||||||
]}
|
(regexp-match-positions* #rx"x(.)" "12x4x6" #:match-select cadr)
|
||||||
|
]
|
||||||
|
|
||||||
|
Note that unlike @racket[regexp-match*], there is no
|
||||||
|
@racket[#:gap-select] input keyword, as this information can be easily
|
||||||
|
inferred from the resulting matches.
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match? [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
|
@ -623,11 +666,13 @@ used to match @racket[pattern].}
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[input-prefix bytes? #""])
|
[input-prefix bytes? #""])
|
||||||
(listof (cons/c exact-nonnegative-integer?
|
(or/c (listof (cons/c exact-nonnegative-integer?
|
||||||
exact-nonnegative-integer?))]{
|
exact-nonnegative-integer?))
|
||||||
|
(listof (listof (or/c #f (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?)))))]{
|
||||||
|
|
||||||
Like @racket[regexp-match-peek-positions], but returns multiple matches like
|
Like @racket[regexp-match-peek-positions], but returns multiple matches like
|
||||||
@racket[regexp-match*].}
|
@racket[regexp-match-positions*].}
|
||||||
|
|
||||||
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? path? input-port?)]
|
[input (or/c string? bytes? path? input-port?)]
|
||||||
|
|
|
@ -41,23 +41,28 @@
|
||||||
[(string? x) (string->bytes/utf-8 x)]
|
[(string? x) (string->bytes/utf-8 x)]
|
||||||
[(pregexp? x) (byte-pregexp (->b (object-name x)))]
|
[(pregexp? x) (byte-pregexp (->b (object-name x)))]
|
||||||
[else x]))
|
[else x]))
|
||||||
(define fun* #f)
|
(define funs '())
|
||||||
(define t
|
(define (test-funs ks vs res left rx str . args)
|
||||||
(case-lambda
|
(unless (memq regexp-match-peek-positions* funs)
|
||||||
[(fun) (set! fun* fun)]
|
(for ([fun (in-list funs)])
|
||||||
[(res left rx str . args)
|
|
||||||
(unless (eq? fun* regexp-match-peek-positions*)
|
|
||||||
;; test with a string
|
;; test with a string
|
||||||
(apply test res fun* rx str args)
|
(keyword-apply test ks vs res fun rx str args)
|
||||||
;; test with a byte-regexp and/or a byte string
|
;; test with a byte-regexp and/or a byte string
|
||||||
(apply test (->b res) fun* (->b rx) str args)
|
(keyword-apply test ks vs (->b res) fun (->b rx) str args)
|
||||||
(apply test (->b res) fun* rx (->b str) args)
|
(keyword-apply test ks vs (->b res) fun rx (->b str) args)
|
||||||
(apply test (->b res) fun* (->b rx) (->b str) args))
|
(keyword-apply test ks vs (->b res) fun (->b rx) (->b str) args)))
|
||||||
|
(for ([fun (in-list funs)])
|
||||||
;; test with a port, and test leftovers
|
;; test with a port, and test leftovers
|
||||||
(when left
|
(when left
|
||||||
(let ([p (open-input-string str)])
|
(let ([p (open-input-string str)])
|
||||||
(apply test (->b res) fun* rx p args)
|
(keyword-apply test ks vs (->b res) fun rx p args)
|
||||||
(test left read-string 50 p)))]))
|
(keyword-apply test '() '() left read-string 50 p '())))))
|
||||||
|
(define t
|
||||||
|
(make-keyword-procedure
|
||||||
|
test-funs
|
||||||
|
(case-lambda [(fun*) (set! funs (if (list? fun*) fun* (list fun*)))]
|
||||||
|
[(res left rx str . args)
|
||||||
|
(apply test-funs '() '() res left rx str args)])))
|
||||||
;; --------------------
|
;; --------------------
|
||||||
(t regexp-match*)
|
(t regexp-match*)
|
||||||
(t '("a" "b" "c") eof "[abc]" " a b c ")
|
(t '("a" "b" "c") eof "[abc]" " a b c ")
|
||||||
|
@ -79,11 +84,61 @@
|
||||||
(t '() eof "^." "a b c" 0 5 #"x")
|
(t '() eof "^." "a b c" 0 5 #"x")
|
||||||
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
(for-each (lambda (cvt)
|
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||||
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
|
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
|
||||||
(list values byte-regexp byte-pregexp))
|
|
||||||
;; --------------------
|
;; --------------------
|
||||||
(t regexp-match-positions*)
|
(define (regexp-explode . xs) (apply regexp-match* #:gap-select #t xs))
|
||||||
|
(t regexp-explode)
|
||||||
|
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c ")
|
||||||
|
(t '("" "a" "+" "b" " = " "c" " ") eof "[abc]" "a+b = c ")
|
||||||
|
(t '(" " "b" " " "c" " ") eof "[abc]" " a b c " 2)
|
||||||
|
(t '("" "b" " " "c" " ") eof "[abc]" " a b c " 3)
|
||||||
|
(t '(" " "a" "") " b c " "[abc]" " a b c " 0 2)
|
||||||
|
(t '(" " "a" " ") "b c " "[abc]" " a b c " 0 3)
|
||||||
|
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c " 0 #f)
|
||||||
|
(t '(" " "a" " " "b" " " "c" " ") eof "[abc]" " a b c " 0 7)
|
||||||
|
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c")
|
||||||
|
(t '(" " "b" " " "c" "") eof "[abc]" "a b c" 1)
|
||||||
|
(t '("" "b" " " "c" "") eof "[abc]" "a b c" 2)
|
||||||
|
(t '("" "a" "") " b c" "[abc]" "a b c" 0 1)
|
||||||
|
(t '("" "a" " ") "b c" "[abc]" "a b c" 0 2)
|
||||||
|
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0)
|
||||||
|
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0 #f)
|
||||||
|
(t '("" "a" " " "b" " " "c" "") eof "[abc]" "a b c" 0 5)
|
||||||
|
(t '("" "a" " b c") eof "^." "a b c" 0 5 #"")
|
||||||
|
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
||||||
|
(t '("" "a\n" "" "b\n" "" "c\n" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
|
(t '("a\n" "b\n" "" "c\n" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
|
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||||
|
(test '(#"a" #"\x80" #"z" #"\x80" #"q")
|
||||||
|
regexp-explode (cvt #"\x80") #"a\x80z\x80q"))
|
||||||
|
;; --------------------
|
||||||
|
(t regexp-match*) ; some tests with a match-select etc
|
||||||
|
;; (tests with #f for #:match-select and #t for #:gap-select done below with
|
||||||
|
;; `regexp-split')
|
||||||
|
(err/rt-test (regexp-match* "[abc]" "a b c"
|
||||||
|
#:match-select #f #:gap-select #f))
|
||||||
|
(t '("a" "b" "c") eof
|
||||||
|
"<([abc])>" "<a> <b> <c>" #:match-select cadr)
|
||||||
|
(t '(("a") ("b") ("c")) eof
|
||||||
|
"<([abc])>" "<a> <b> <c>" #:match-select cdr)
|
||||||
|
(t '(("<a>" "a") ("<b>" "b") ("<c>" "c")) eof
|
||||||
|
"<([abc])>" "<a> <b> <c>" #:match-select values)
|
||||||
|
(t '("" "a" " + " "b" " = " "c" "") eof
|
||||||
|
"<([abc])>" "<a> + <b> = <c>" #:match-select cadr #:gap-select #t)
|
||||||
|
(t '("" ("<a>" "a") " + " ("<b>" "b") " = " ("<c>" "c") "") eof
|
||||||
|
"<([abc])>" "<a> + <b> = <c>" #:match-select values #:gap-select #t)
|
||||||
|
(t '("" ("<a" "a" #f) " + " ("<b" "b" #f) " = " ("<c" "c" #f) "") eof
|
||||||
|
"<([abc])(>)?" "<a + <b = <c" #:match-select values #:gap-select #t)
|
||||||
|
;; --------------------
|
||||||
|
(t (list regexp-match-positions*
|
||||||
|
;; also try the generic path
|
||||||
|
(lambda xs (apply regexp-match-positions* xs
|
||||||
|
#:match-select (lambda (x) (car x))))
|
||||||
|
;; and try it with a list result too
|
||||||
|
(lambda xs
|
||||||
|
(map car (apply regexp-match-positions* xs
|
||||||
|
#:match-select values)))))
|
||||||
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ")
|
(t '((1 . 2) (3 . 4) (5 . 6)) eof "[abc]" " a b c ")
|
||||||
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 2)
|
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 2)
|
||||||
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 3)
|
(t '((3 . 4) (5 . 6)) eof "[abc]" " a b c " 3)
|
||||||
|
@ -103,35 +158,20 @@
|
||||||
(t '() eof "^." "a b c" 0 5 #"x")
|
(t '() eof "^." "a b c" 0 5 #"x")
|
||||||
(t '((0 . 2) (2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
(t '((0 . 2) (2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
(t '((2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
(t '((2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
(for-each (lambda (cvt)
|
(t '((0 . 2) (5 . 7) (10 . 12)) eof "<([abc])(>)?" "<a + <b = <c")
|
||||||
(test '((1 . 2) (3 . 4)) regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
|
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||||
(list values byte-regexp byte-pregexp))
|
(test '((1 . 2) (3 . 4))
|
||||||
|
regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
|
||||||
;; --------------------
|
;; --------------------
|
||||||
(t regexp-split)
|
(t (list regexp-match-peek-positions*
|
||||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
|
;; also try the generic path
|
||||||
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
|
(lambda xs (apply regexp-match-peek-positions* xs
|
||||||
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
|
#:match-select (lambda (x) (car x))))
|
||||||
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
|
;; and try it with a list result too
|
||||||
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
|
(lambda xs
|
||||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
|
(map (lambda (x) (and (= 1 (length x)) (car x)))
|
||||||
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
|
(apply regexp-match-peek-positions* xs
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
|
#:match-select values)))))
|
||||||
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
|
|
||||||
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
|
|
||||||
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
|
|
||||||
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
|
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
|
||||||
(t '("" " b c") eof "^." "a b c" 0 5 #"")
|
|
||||||
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
|
||||||
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
|
||||||
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
|
||||||
(for-each (lambda (cvt)
|
|
||||||
(test '(#"" #"a" #"z" #"q" #"") regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
|
|
||||||
(list values byte-regexp byte-pregexp))
|
|
||||||
;; --------------------
|
|
||||||
(t regexp-match-peek-positions*)
|
|
||||||
(err/rt-test (regexp-match-peek-positions* "[abc]" "a b c"))
|
(err/rt-test (regexp-match-peek-positions* "[abc]" "a b c"))
|
||||||
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ")
|
(t '((1 . 2) (3 . 4) (5 . 6)) " a b c " "[abc]" " a b c ")
|
||||||
(t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 2)
|
(t '((3 . 4) (5 . 6)) " a b c " "[abc]" " a b c " 2)
|
||||||
|
@ -151,6 +191,33 @@
|
||||||
(t '() "a b c" "^." "a b c" 0 5 #"x")
|
(t '() "a b c" "^." "a b c" 0 5 #"x")
|
||||||
(t '((0 . 2) (2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
(t '((0 . 2) (2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
(t '((2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
(t '((2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
|
;; --------------------
|
||||||
|
(t (list regexp-split
|
||||||
|
;; also via an equivalent `regexp-match*' configuration
|
||||||
|
(lambda xs
|
||||||
|
(apply regexp-match* xs #:match-select #f #:gap-select #t))))
|
||||||
|
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4")
|
||||||
|
(t '("2" "3" "4") eof "[abc]" "1a2b3c4" 2)
|
||||||
|
(t '("" "3" "4") eof "[abc]" "1a2b3c4" 3)
|
||||||
|
(t '("1" "") "2b3c4" "[abc]" "1a2b3c4" 0 2)
|
||||||
|
(t '("1" "2") "b3c4" "[abc]" "1a2b3c4" 0 3)
|
||||||
|
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 #f)
|
||||||
|
(t '("1" "2" "3" "4") eof "[abc]" "1a2b3c4" 0 7)
|
||||||
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c")
|
||||||
|
(t '("1" "2" "") eof "[abc]" "a1b2c" 1)
|
||||||
|
(t '("" "2" "") eof "[abc]" "a1b2c" 2)
|
||||||
|
(t '("" "") "1b2c" "[abc]" "a1b2c" 0 1)
|
||||||
|
(t '("" "1") "b2c" "[abc]" "a1b2c" 0 2)
|
||||||
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
||||||
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
||||||
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
||||||
|
(t '("" " b c") eof "^." "a b c" 0 5 #"")
|
||||||
|
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
||||||
|
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
|
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
|
(for ([cvt (in-list (list values byte-regexp byte-pregexp))])
|
||||||
|
(test '(#"" #"a" #"z" #"q" #"")
|
||||||
|
regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
|
||||||
;; ---------- tests with zero-length matches ----------
|
;; ---------- tests with zero-length matches ----------
|
||||||
;; Many of these tests can be repeated with Perl. To try something in Perl,
|
;; Many of these tests can be repeated with Perl. To try something in Perl,
|
||||||
;; put this code in a file:
|
;; put this code in a file:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user