improve regexp support (PR 10855, halfway)
This commit is contained in:
parent
3fe4a22dbd
commit
1c34ccec44
|
@ -108,25 +108,29 @@
|
||||||
;; Helper macro for the regexp functions below, with some utilities.
|
;; Helper macro for the regexp functions below, with some utilities.
|
||||||
(define (bstring-length s)
|
(define (bstring-length s)
|
||||||
(if (bytes? s) (bytes-length s) (string-length s)))
|
(if (bytes? s) (bytes-length s) (string-length s)))
|
||||||
(define no-empty-edge-matches
|
(define (no-empty-edge-matches n)
|
||||||
(make-regexp-tweaker (lambda (rx)
|
(make-regexp-tweaker (lambda (rx)
|
||||||
(if (bytes? rx)
|
(if (bytes? rx)
|
||||||
(bytes-append #"(?=.)(?:" rx #")(?<=.)")
|
(bytes-append #"(?=.)(?:" rx #")(?<=" (make-bytes n (char->integer #\.)) #")")
|
||||||
(format "(?=.)(?:~a)(?<=.)" rx)))))
|
(format "(?=.)(?:~a)(?<=~a)" rx (make-bytes n (char->integer #\.)))))))
|
||||||
(define (bstring->no-edge-regexp name pattern)
|
|
||||||
(if (or (regexp? pattern) (byte-regexp? pattern)
|
|
||||||
(string? pattern) (bytes? pattern))
|
|
||||||
(no-empty-edge-matches pattern)
|
|
||||||
(raise-type-error
|
|
||||||
name "regexp, byte regexp, string, or byte string" pattern)))
|
|
||||||
(define-syntax-rule (regexp-loop
|
(define-syntax-rule (regexp-loop
|
||||||
name loop start end rx string
|
name loop start end pattern string
|
||||||
|
ipre
|
||||||
success-choose failure-k
|
success-choose failure-k
|
||||||
port-success-k port-success-choose port-failure-k
|
port-success-k port-success-choose port-failure-k
|
||||||
need-leftover? peek?)
|
need-leftover? peek?)
|
||||||
(let ([len (cond [(string? string) (string-length string)]
|
(let* ([len (cond [(string? string) (string-length string)]
|
||||||
[(bytes? string) (bytes-length string)]
|
[(bytes? string) (bytes-length string)]
|
||||||
[else #f])])
|
[else #f])]
|
||||||
|
[orig-rx (cond [(bytes? pattern) (byte-regexp pattern)]
|
||||||
|
[(string? pattern) (regexp pattern)]
|
||||||
|
[(regexp? pattern) pattern]
|
||||||
|
[(byte-regexp? pattern) pattern]
|
||||||
|
[else
|
||||||
|
(raise-type-error 'name
|
||||||
|
"regexp, byte regexp, string, or byte string"
|
||||||
|
pattern)])]
|
||||||
|
[max-lookbehind (regexp-max-lookbehind orig-rx)])
|
||||||
(if peek?
|
(if peek?
|
||||||
(unless (input-port? string)
|
(unless (input-port? string)
|
||||||
(raise-type-error 'name "input port" string))
|
(raise-type-error 'name "input port" string))
|
||||||
|
@ -140,6 +144,8 @@
|
||||||
(and (number? end) (exact? end) (integer? end)
|
(and (number? end) (exact? end) (integer? end)
|
||||||
(end . >= . 0)))
|
(end . >= . 0)))
|
||||||
(raise-type-error 'name "non-negative exact integer or false" end))
|
(raise-type-error 'name "non-negative exact integer or false" end))
|
||||||
|
(unless (bytes? ipre)
|
||||||
|
(raise-type-error 'name "byte string" ipre))
|
||||||
(unless (or (input-port? string) (and len (start . <= . len)))
|
(unless (or (input-port? string) (and len (start . <= . len)))
|
||||||
(raise-mismatch-error
|
(raise-mismatch-error
|
||||||
'name
|
'name
|
||||||
|
@ -153,66 +159,76 @@
|
||||||
(format "ending offset index out of range [~a,~a]: " start len)
|
(format "ending offset index out of range [~a,~a]: " start len)
|
||||||
end))
|
end))
|
||||||
(reverse
|
(reverse
|
||||||
(let loop ([acc '()] [start start] [end end])
|
(let loop ([acc '()] [start start] [end end] [ipre ipre] [rx #f] [rx-lb 0])
|
||||||
|
(let* ([new-rx-lb (add1 (bytes-length ipre))]
|
||||||
|
[rx (if (= rx-lb new-rx-lb)
|
||||||
|
rx
|
||||||
|
((no-empty-edge-matches new-rx-lb) orig-rx))])
|
||||||
|
(if (and port-success-choose (input-port? string))
|
||||||
|
|
||||||
(if (and port-success-choose (input-port? string))
|
;; Input port match, get string
|
||||||
|
(let* ([_ (when (positive? start)
|
||||||
;; Input port match, get string
|
;; Skip start chars:
|
||||||
(let* ([_ (when (positive? start)
|
(let ([s (make-bytes 4096)])
|
||||||
;; Skip start chars:
|
(let loop ([n 0])
|
||||||
(let ([s (make-bytes 4096)])
|
(unless (= n start)
|
||||||
(let loop ([n 0])
|
(let ([m (read-bytes-avail!
|
||||||
(unless (= n start)
|
s string 0 (min (- start n) 4096))])
|
||||||
(let ([m (read-bytes-avail!
|
(unless (eof-object? m) (loop (+ n m))))))))]
|
||||||
s string 0 (min (- start n) 4096))])
|
[discarded/leftovers (if need-leftover? #f 0)]
|
||||||
(unless (eof-object? m) (loop (+ n m))))))))]
|
[spitout (if need-leftover?
|
||||||
[discarded/leftovers (if need-leftover? #f 0)]
|
(open-output-bytes)
|
||||||
[spitout (if need-leftover?
|
(make-output-port
|
||||||
(open-output-bytes)
|
'counter always-evt
|
||||||
(make-output-port
|
(lambda (s start end flush? breakable?)
|
||||||
'counter always-evt
|
(let ([c (- end start)])
|
||||||
(lambda (s start end flush? breakable?)
|
(set! discarded/leftovers
|
||||||
(let ([c (- end start)])
|
(+ c discarded/leftovers))
|
||||||
(set! discarded/leftovers
|
c))
|
||||||
(+ c discarded/leftovers))
|
void))]
|
||||||
c))
|
[end (and end (- end start))])
|
||||||
void))]
|
(let-values ([(m ipre) (regexp-match/end rx string 0 end spitout ipre
|
||||||
[end (and end (- end start))]
|
max-lookbehind)])
|
||||||
[m (regexp-match rx string 0 end spitout)]
|
(let* ([m (and m (car m))]
|
||||||
[m (and m (car m))]
|
[discarded/leftovers (if need-leftover?
|
||||||
[discarded/leftovers (if need-leftover?
|
(get-output-bytes spitout)
|
||||||
(get-output-bytes spitout)
|
discarded/leftovers)]
|
||||||
discarded/leftovers)]
|
[end (and end m
|
||||||
[end (and end m
|
(- end (if need-leftover?
|
||||||
(- end (if need-leftover?
|
(bstring-length discarded/leftovers)
|
||||||
(bstring-length discarded/leftovers)
|
discarded/leftovers)
|
||||||
discarded/leftovers)
|
(bstring-length m)))])
|
||||||
(bstring-length m)))])
|
(if m
|
||||||
(if m
|
(loop (cons (port-success-choose m discarded/leftovers) acc)
|
||||||
(loop (cons (port-success-choose m discarded/leftovers) acc)
|
0 end ipre
|
||||||
0 end)
|
rx new-rx-lb)
|
||||||
(port-failure-k acc discarded/leftovers)))
|
(port-failure-k acc discarded/leftovers)))))
|
||||||
|
|
||||||
;; String/port match, get positions
|
;; String/port match, get positions
|
||||||
(let ([m (if peek?
|
(let-values ([(m ipre)
|
||||||
(regexp-match-peek-positions rx string start end)
|
(if peek?
|
||||||
(regexp-match-positions rx string start end))])
|
(regexp-match-peek-positions/end rx string start end #f ipre
|
||||||
(if (not m)
|
max-lookbehind)
|
||||||
(failure-k acc start end)
|
(regexp-match-positions/end rx string start end #f ipre
|
||||||
(let ([mstart (caar m)] [mend (cdar m)])
|
max-lookbehind))])
|
||||||
(if port-success-k
|
|
||||||
(port-success-k
|
(if (not m)
|
||||||
(lambda (acc new-start new-end)
|
(failure-k acc start end)
|
||||||
(loop acc new-start new-end))
|
(let ([mstart (caar m)] [mend (cdar m)])
|
||||||
acc start end mstart mend)
|
(if port-success-k
|
||||||
(loop (cons (success-choose start mstart mend) acc)
|
(port-success-k
|
||||||
mend end))))))))))
|
(lambda (acc new-start new-end)
|
||||||
|
(loop acc new-start new-end ipre rx new-rx-lb))
|
||||||
|
acc start end mstart mend)
|
||||||
|
(loop (cons (success-choose start mstart mend) acc)
|
||||||
|
mend end ipre rx new-rx-lb)))))))))))
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
(define (regexp-match-positions* pattern string [start 0] [end #f] [ipre #""])
|
||||||
(regexp-loop
|
(regexp-loop
|
||||||
regexp-match-positions* loop start end
|
regexp-match-positions* loop start end
|
||||||
(bstring->no-edge-regexp 'regexp-match-positions* pattern) string
|
pattern string
|
||||||
|
ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend) (cons mstart mend))
|
(lambda (start mstart mend) (cons mstart mend))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
|
@ -233,10 +249,11 @@
|
||||||
#f))
|
#f))
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-peek-positions* pattern string [start 0] [end #f])
|
(define (regexp-match-peek-positions* pattern string [start 0] [end #f] [ipre #""])
|
||||||
(regexp-loop
|
(regexp-loop
|
||||||
regexp-match-peek-positions* loop start end
|
regexp-match-peek-positions* loop start end
|
||||||
(bstring->no-edge-regexp 'regexp-match-peek-positions* pattern) string
|
pattern string
|
||||||
|
ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend) (cons mstart mend))
|
(lambda (start mstart mend) (cons mstart mend))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
|
@ -250,13 +267,13 @@
|
||||||
|
|
||||||
;; Splits a string into a list by removing any piece which matches
|
;; Splits a string into a list by removing any piece which matches
|
||||||
;; the pattern.
|
;; the pattern.
|
||||||
(define (regexp-split pattern string [start 0] [end #f])
|
(define (regexp-split pattern string [start 0] [end #f] [ipre #""])
|
||||||
(define rx (bstring->no-edge-regexp 'regexp-split pattern))
|
(define buf (if (and (string? string) (or (byte-regexp? pattern)
|
||||||
(define buf (if (and (string? string) (byte-regexp? rx))
|
(bytes? pattern)))
|
||||||
(string->bytes/utf-8 string (char->integer #\?))
|
(string->bytes/utf-8 string (char->integer #\?))
|
||||||
string))
|
string))
|
||||||
(define sub (if (bytes? buf) subbytes substring))
|
(define sub (if (bytes? buf) subbytes substring))
|
||||||
(regexp-loop regexp-split loop start end rx buf
|
(regexp-loop regexp-split loop start end pattern buf ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend) (sub buf start mstart))
|
(lambda (start mstart mend) (sub buf start mstart))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
|
@ -272,13 +289,13 @@
|
||||||
#f))
|
#f))
|
||||||
|
|
||||||
;; Returns all the matches for the pattern in the string.
|
;; Returns all the matches for the pattern in the string.
|
||||||
(define (regexp-match* pattern string [start 0] [end #f])
|
(define (regexp-match* pattern string [start 0] [end #f] [ipre #""])
|
||||||
(define rx (bstring->no-edge-regexp 'regexp-match* pattern))
|
(define buf (if (and (string? string) (or (byte-regexp? pattern)
|
||||||
(define buf (if (and (string? string) (byte-regexp? rx))
|
(bytes? pattern)))
|
||||||
(string->bytes/utf-8 string (char->integer #\?))
|
(string->bytes/utf-8 string (char->integer #\?))
|
||||||
string))
|
string))
|
||||||
(define sub (if (bytes? buf) subbytes substring))
|
(define sub (if (bytes? buf) subbytes substring))
|
||||||
(regexp-loop regexp-match* loop start end rx buf
|
(regexp-loop regexp-match* loop start end pattern buf ipre
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend) (sub buf mstart mend))
|
(lambda (start mstart mend) (sub buf mstart mend))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
|
|
|
@ -195,6 +195,16 @@ case-sensitively.
|
||||||
(regexp-match (regexp-quote ".") "apple.scm")
|
(regexp-match (regexp-quote ".") "apple.scm")
|
||||||
]}
|
]}
|
||||||
|
|
||||||
|
@defproc[(regexp-max-lookbehind [pattern (or/c regexp? byte-regexp?)])
|
||||||
|
exact-nonnegative-integer?]{
|
||||||
|
|
||||||
|
Returns the maximum number of bytes that @scheme[pattern] may consult
|
||||||
|
before the starting position of a match to determine the match. For
|
||||||
|
example, the pattern @litchar{(?<=abc)d} consults three bytes
|
||||||
|
preceding a matching @litchar{d}, while @litchar{e(?<=a..)d} consults
|
||||||
|
two bytes before a matching @litchar{ed}. A @litchar{^} pattern may
|
||||||
|
consult a preceding byte to determine whether the current position is
|
||||||
|
the start of the input or of a line.}
|
||||||
|
|
||||||
@;------------------------------------------------------------------------
|
@;------------------------------------------------------------------------
|
||||||
@section{Regexp Matching}
|
@section{Regexp Matching}
|
||||||
|
@ -203,7 +213,8 @@ case-sensitively.
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f])
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(string? input))
|
||||||
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
||||||
|
@ -228,9 +239,18 @@ is an input port, and if the end-of-file is reached before
|
||||||
@scheme[start-pos] bytes are skipped, then the match fails.
|
@scheme[start-pos] bytes are skipped, then the match fails.
|
||||||
|
|
||||||
In @scheme[pattern], a start-of-string @litchar{^} refers to the first
|
In @scheme[pattern], a start-of-string @litchar{^} refers to the first
|
||||||
position of @scheme[input] after @scheme[start-pos], and the
|
position of @scheme[input] after @scheme[start-pos], assuming that
|
||||||
end-of-input @litchar{$} refers to the @scheme[end-pos]th position or
|
@scheme[input-prefix] is @scheme[#""]. The end-of-input @litchar{$}
|
||||||
(in the case of an input port) the end of file, whichever comes first.
|
refers to the @scheme[end-pos]th position or (in the case of an input
|
||||||
|
port) the end of file, whichever comes first, assuming that
|
||||||
|
@scheme[output-prefix] is @scheme[#f].
|
||||||
|
|
||||||
|
The @scheme[input-prefix] specifies bytes that effectively precede
|
||||||
|
@scheme[input] for the purposes of @litchar{^} and other look-behind
|
||||||
|
matching. For example, a @scheme[#""] prefix means that @litchar{^}
|
||||||
|
matches at the beginning of the stream, while a @scheme[#"\n"]
|
||||||
|
@scheme[input-prefix] means that a start-of-line @litchar{^} can match
|
||||||
|
the beginning of the input, while a start-of-file @litchar{^} cannot.
|
||||||
|
|
||||||
If the match fails, @scheme[#f] is returned. If the match succeeds, a
|
If the match fails, @scheme[#f] is returned. If the match succeeds, a
|
||||||
list containing strings or byte string, and possibly @scheme[#f], is
|
list containing strings or byte string, and possibly @scheme[#f], is
|
||||||
|
@ -302,7 +322,8 @@ bytes. To avoid such interleaving, use @scheme[regexp-match-peek]
|
||||||
@defproc[(regexp-match* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f])
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(string? input))
|
||||||
(listof string?)
|
(listof string?)
|
||||||
|
@ -315,12 +336,14 @@ results for parenthesized sub-patterns in @scheme[pattern] are not
|
||||||
returned.)
|
returned.)
|
||||||
|
|
||||||
The @scheme[pattern] is used in order to find matches, where each
|
The @scheme[pattern] is used in order to find matches, where each
|
||||||
match attempt starts at the end of the last match. Empty matches are
|
match attempt starts at the end of the last match, and @litchar{$} is
|
||||||
handled like any matches, returning a zero-length string or byte
|
allowed to match the beginning of the input (if @scheme[input-prefix]
|
||||||
sequence (they are more useful in the complementing
|
is @scheme[#""]) only for the first match. Empty matches are handled
|
||||||
@scheme[regexp-split] function). However, the @scheme[pattern] is
|
like other matches, returning a zero-length string or byte sequence
|
||||||
restricted from matching an empty string at the beginning (or right
|
(they are more useful in the complementing @scheme[regexp-split]
|
||||||
after a previous match) or at the end.
|
function), but @scheme[pattern] is restricted from matching an empty
|
||||||
|
string at the beginning (or right after a previous match) or at the
|
||||||
|
end.
|
||||||
|
|
||||||
If @scheme[input] contains no matches (in the range @scheme[start-pos]
|
If @scheme[input] contains no matches (in the range @scheme[start-pos]
|
||||||
to @scheme[end-pos]), @scheme[null] is returned. Otherwise, each item
|
to @scheme[end-pos]), @scheme[null] is returned. Otherwise, each item
|
||||||
|
@ -339,7 +362,8 @@ port).
|
||||||
[input input-port?]
|
[input input-port?]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f])
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(string? input))
|
||||||
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
||||||
|
@ -360,7 +384,8 @@ fails.}
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f])
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
||||||
exact-nonnegative-integer?)
|
exact-nonnegative-integer?)
|
||||||
(listof (or/c (cons/c exact-nonnegative-integer?
|
(listof (or/c (cons/c exact-nonnegative-integer?
|
||||||
|
@ -388,11 +413,11 @@ positions indicate the number of bytes that were read, including
|
||||||
(regexp-match-positions #rx"(-[0-9]*)+" "a-12--345b")
|
(regexp-match-positions #rx"(-[0-9]*)+" "a-12--345b")
|
||||||
]}
|
]}
|
||||||
|
|
||||||
|
|
||||||
@defproc[(regexp-match-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f])
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(listof (cons/c exact-nonnegative-integer?
|
(listof (cons/c exact-nonnegative-integer?
|
||||||
exact-nonnegative-integer?))]{
|
exact-nonnegative-integer?))]{
|
||||||
|
|
||||||
|
@ -408,7 +433,8 @@ like @scheme[regexp-match*].
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[output-port (or/c output-port? #f) #f])
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
boolean?]{
|
boolean?]{
|
||||||
|
|
||||||
Like @scheme[regexp-match], but returns merely @scheme[#t] when the
|
Like @scheme[regexp-match], but returns merely @scheme[#t] when the
|
||||||
|
@ -437,19 +463,20 @@ entire content of @scheme[input] matches @scheme[pattern].
|
||||||
[input input-port?]
|
[input input-port?]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[progress (or/c evt #f) #f])
|
[progress (or/c evt #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(or/c (cons/c bytes? (listof (or/c bytes? #f)))
|
(or/c (cons/c bytes? (listof (or/c bytes? #f)))
|
||||||
#f)]{
|
#f)]{
|
||||||
|
|
||||||
Like @scheme[regexp-match] on input ports, but only peeks bytes from
|
Like @scheme[regexp-match] on input ports, but only peeks bytes from
|
||||||
@scheme[input-port] instead of reading them. Furthermore, instead of
|
@scheme[input] instead of reading them. Furthermore, instead of
|
||||||
an output port, the last optional argument is a progress event for
|
an output port, the last optional argument is a progress event for
|
||||||
@scheme[input-port] (see @scheme[port-progress-evt]). If @scheme[progress]
|
@scheme[input] (see @scheme[port-progress-evt]). If @scheme[progress]
|
||||||
becomes ready, then the match stops peeking from @scheme[input-port]
|
becomes ready, then the match stops peeking from @scheme[input]
|
||||||
and returns @scheme[#f]. The @scheme[progress] argument can be
|
and returns @scheme[#f]. The @scheme[progress] argument can be
|
||||||
@scheme[#f], in which case the peek may continue with inconsistent
|
@scheme[#f], in which case the peek may continue with inconsistent
|
||||||
information if another process meanwhile reads from
|
information if another process meanwhile reads from
|
||||||
@scheme[input-port].
|
@scheme[input].
|
||||||
|
|
||||||
@examples[
|
@examples[
|
||||||
(define p (open-input-string "a abcd"))
|
(define p (open-input-string "a abcd"))
|
||||||
|
@ -466,7 +493,8 @@ information if another process meanwhile reads from
|
||||||
[input input-port?]
|
[input input-port?]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[progress (or/c evt #f) #f])
|
[progress (or/c evt #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
||||||
exact-nonnegative-integer?)
|
exact-nonnegative-integer?)
|
||||||
(listof (or/c (cons/c exact-nonnegative-integer?
|
(listof (or/c (cons/c exact-nonnegative-integer?
|
||||||
|
@ -475,7 +503,7 @@ information if another process meanwhile reads from
|
||||||
#f)]{
|
#f)]{
|
||||||
|
|
||||||
Like @scheme[regexp-match-positions] on input ports, but only peeks
|
Like @scheme[regexp-match-positions] on input ports, but only peeks
|
||||||
bytes from @scheme[input-port] instead of reading them, and with a
|
bytes from @scheme[input] instead of reading them, and with a
|
||||||
@scheme[progress] argument like @scheme[regexp-match-peek].}
|
@scheme[progress] argument like @scheme[regexp-match-peek].}
|
||||||
|
|
||||||
|
|
||||||
|
@ -483,12 +511,13 @@ bytes from @scheme[input-port] instead of reading them, and with a
|
||||||
[input input-port?]
|
[input input-port?]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[progress (or/c evt #f) #f])
|
[progress (or/c evt #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(or/c (cons/c bytes? (listof (or/c bytes? #f)))
|
(or/c (cons/c bytes? (listof (or/c bytes? #f)))
|
||||||
#f)]{
|
#f)]{
|
||||||
|
|
||||||
Like @scheme[regexp-match-peek], but it attempts to match only bytes
|
Like @scheme[regexp-match-peek], but it attempts to match only bytes
|
||||||
that are available from @scheme[input-port] without blocking. The
|
that are available from @scheme[input] without blocking. The
|
||||||
match fails if not-yet-available characters might be used to match
|
match fails if not-yet-available characters might be used to match
|
||||||
@scheme[pattern].}
|
@scheme[pattern].}
|
||||||
|
|
||||||
|
@ -497,7 +526,8 @@ match fails if not-yet-available characters might be used to match
|
||||||
[input input-port?]
|
[input input-port?]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
[progress (or/c evt #f) #f])
|
[progress (or/c evt #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
||||||
exact-nonnegative-integer?)
|
exact-nonnegative-integer?)
|
||||||
(listof (or/c (cons/c exact-nonnegative-integer?
|
(listof (or/c (cons/c exact-nonnegative-integer?
|
||||||
|
@ -506,7 +536,7 @@ match fails if not-yet-available characters might be used to match
|
||||||
#f)]{
|
#f)]{
|
||||||
|
|
||||||
Like @scheme[regexp-match-peek-positions], but it attempts to match
|
Like @scheme[regexp-match-peek-positions], but it attempts to match
|
||||||
only bytes that are available from @scheme[input-port] without
|
only bytes that are available from @scheme[input] without
|
||||||
blocking. The match fails if not-yet-available characters might be
|
blocking. The match fails if not-yet-available characters might be
|
||||||
used to match @scheme[pattern].}
|
used to match @scheme[pattern].}
|
||||||
|
|
||||||
|
@ -514,20 +544,91 @@ used to match @scheme[pattern].}
|
||||||
@defproc[(regexp-match-peek-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-match-peek-positions* [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input input-port?]
|
[input input-port?]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f])
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(listof (cons/c exact-nonnegative-integer?
|
(listof (cons/c exact-nonnegative-integer?
|
||||||
exact-nonnegative-integer?))]{
|
exact-nonnegative-integer?))]{
|
||||||
|
|
||||||
Like @scheme[regexp-match-peek-positions], but returns multiple matches like
|
Like @scheme[regexp-match-peek-positions], but returns multiple matches like
|
||||||
@scheme[regexp-match*].}
|
@scheme[regexp-match*].}
|
||||||
|
|
||||||
|
@defproc[(regexp-match/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
|
[input (or/c string? bytes? input-port?)]
|
||||||
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[output-port (or/c output-port? #f) #f]
|
||||||
|
[input-prefix bytes? #""]
|
||||||
|
[count nonnegative-exact-integer? 1])
|
||||||
|
(values
|
||||||
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
|
(string? input))
|
||||||
|
(or/c #f (cons/c string? (listof (or/c string? #f))))
|
||||||
|
(or/c #f (cons/c bytes? (listof (or/c bytes? #f)))))
|
||||||
|
(or/c #f bytes?))]{
|
||||||
|
|
||||||
|
Like @scheme[regexp-match], but with a second result: a byte
|
||||||
|
string of up to @scheme[count] bytes that correspond to the input
|
||||||
|
(possibly including the @scheme[input-prefix]) leading to the end of
|
||||||
|
the match; the second result is @scheme[#f] if no match is found.
|
||||||
|
|
||||||
|
The second result can be useful as an @scheme[input-prefix] for
|
||||||
|
attempting a second match on @scheme[input] starting from the end of
|
||||||
|
the first match. In that case, use @scheme[regexp-max-lookbehind]
|
||||||
|
to determine an appropriate value for @scheme[count].}
|
||||||
|
|
||||||
|
@deftogether[(
|
||||||
|
@defproc[(regexp-match-positions/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
|
[input (or/c string? bytes? input-port?)]
|
||||||
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[input-prefix bytes? #""]
|
||||||
|
[count exact-nonnegative-integer? 1])
|
||||||
|
(values (listof (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?))
|
||||||
|
(or/c #f bytes?))]
|
||||||
|
@defproc[(regexp-match-peek-positions/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
|
[input input-port?]
|
||||||
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[progress (or/c evt #f) #f]
|
||||||
|
[input-prefix bytes? #""]
|
||||||
|
[count exact-nonnegative-integer? 1])
|
||||||
|
(values
|
||||||
|
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?)
|
||||||
|
(listof (or/c (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?)
|
||||||
|
#f)))
|
||||||
|
#f)
|
||||||
|
(or/c #f bytes?))]
|
||||||
|
@defproc[(regexp-match-peek-positions-immediate/end [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
|
[input input-port?]
|
||||||
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[progress (or/c evt #f) #f]
|
||||||
|
[input-prefix bytes? #""]
|
||||||
|
[count exact-nonnegative-integer? 1])
|
||||||
|
(values
|
||||||
|
(or/c (cons/c (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?)
|
||||||
|
(listof (or/c (cons/c exact-nonnegative-integer?
|
||||||
|
exact-nonnegative-integer?)
|
||||||
|
#f)))
|
||||||
|
#f)
|
||||||
|
(or/c #f bytes?))]
|
||||||
|
)]{
|
||||||
|
|
||||||
|
Like @scheme[regexp-match-positions], etc., but with a second result
|
||||||
|
like @scheme[regexp-match/end].}
|
||||||
|
|
||||||
@;------------------------------------------------------------------------
|
@;------------------------------------------------------------------------
|
||||||
@section{Regexp Splitting}
|
@section{Regexp Splitting}
|
||||||
|
|
||||||
@defproc[(regexp-split [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
@defproc[(regexp-split [pattern (or/c string? bytes? regexp? byte-regexp?)]
|
||||||
[input (or/c string? bytes? input-port?)]
|
[input (or/c string? bytes? input-port?)]
|
||||||
[start-pos exact-nonnegative-integer? 0]
|
[start-pos exact-nonnegative-integer? 0]
|
||||||
[end-pos (or/c exact-nonnegative-integer? #f) #f])
|
[end-pos (or/c exact-nonnegative-integer? #f) #f]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(string? input))
|
||||||
(cons/c string? (listof string?))
|
(cons/c string? (listof string?))
|
||||||
|
@ -567,7 +668,8 @@ an end-of-file if @scheme[input] is an input port).
|
||||||
[input (or/c string? bytes?)]
|
[input (or/c string? bytes?)]
|
||||||
[insert (or/c string? bytes?
|
[insert (or/c string? bytes?
|
||||||
((string?) () #:rest (listof string?) . ->* . string?)
|
((string?) () #:rest (listof string?) . ->* . string?)
|
||||||
((bytes?) () #:rest (listof bytes?) . ->* . bytes?))])
|
((bytes?) () #:rest (listof bytes?) . ->* . bytes?))]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(if (and (or (string? pattern) (regexp? pattern))
|
(if (and (or (string? pattern) (regexp? pattern))
|
||||||
(string? input))
|
(string? input))
|
||||||
string?
|
string?
|
||||||
|
@ -634,7 +736,8 @@ before the @litchar{\}. For example, the Scheme constant
|
||||||
[input (or/c string? bytes?)]
|
[input (or/c string? bytes?)]
|
||||||
[insert (or/c string? bytes?
|
[insert (or/c string? bytes?
|
||||||
(string? . -> . string?)
|
(string? . -> . string?)
|
||||||
(bytes? . -> . bytes?))])
|
(bytes? . -> . bytes?))]
|
||||||
|
[input-prefix bytes? #""])
|
||||||
(or/c string? bytes?)]{
|
(or/c string? bytes?)]{
|
||||||
|
|
||||||
Like @scheme[regexp-replace], except that every instance of
|
Like @scheme[regexp-replace], except that every instance of
|
||||||
|
|
|
@ -1266,12 +1266,12 @@
|
||||||
|
|
||||||
(arity-test regexp 1 1)
|
(arity-test regexp 1 1)
|
||||||
(arity-test regexp? 1 1)
|
(arity-test regexp? 1 1)
|
||||||
(arity-test regexp-match 2 5)
|
(arity-test regexp-match 2 6)
|
||||||
(arity-test regexp-match-positions 2 5)
|
(arity-test regexp-match-positions 2 6)
|
||||||
(arity-test regexp-match-peek 2 5)
|
(arity-test regexp-match-peek 2 6)
|
||||||
(arity-test regexp-match-peek-positions 2 5)
|
(arity-test regexp-match-peek-positions 2 6)
|
||||||
(arity-test regexp-replace 3 3)
|
(arity-test regexp-replace 3 4)
|
||||||
(arity-test regexp-replace* 3 3)
|
(arity-test regexp-replace* 3 4)
|
||||||
|
|
||||||
(test #t procedure? car)
|
(test #t procedure? car)
|
||||||
(test #f procedure? 'car)
|
(test #f procedure? 'car)
|
||||||
|
|
|
@ -1435,7 +1435,7 @@
|
||||||
#"^(?:a?b?)*$"
|
#"^(?:a?b?)*$"
|
||||||
(#"(?=(a+?))(\\1ab)" #"aaab" (#"aab" #"a" #"aab"))
|
(#"(?=(a+?))(\\1ab)" #"aaab" (#"aab" #"a" #"aab"))
|
||||||
(#"(\\w+:)+" #"one:" (#"one:" #"one:"))
|
(#"(\\w+:)+" #"one:" (#"one:" #"one:"))
|
||||||
(#"$(?<=^(a))" #"a" #f)
|
(#"$(?<=^(a))" #"a" (#"" #"a"))
|
||||||
(#"(?=(a+?))(\\1ab)" #"aaab" (#"aab" #"a" #"aab"))
|
(#"(?=(a+?))(\\1ab)" #"aaab" (#"aab" #"a" #"aab"))
|
||||||
(#"^(?=(a+?))\\1ab" #"aaab" #f)
|
(#"^(?=(a+?))\\1ab" #"aaab" #f)
|
||||||
(#"^(?=(a+?))\\1ab" #"aaab" #f)
|
(#"^(?=(a+?))\\1ab" #"aaab" #f)
|
||||||
|
@ -1710,4 +1710,52 @@
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Check prefixes and suffixes that disallow matches at ^ at start or $ at end:
|
||||||
|
(let ([try
|
||||||
|
(lambda (regexp-match input output [output2 output])
|
||||||
|
(let ([try-succeed
|
||||||
|
(lambda (output i s e)
|
||||||
|
(test (output (list "a")) regexp-match #rx"^a" (i) s e #f #"")
|
||||||
|
(test (output (list "a")) regexp-match #rx"(?m:^a)" (i) s e #f #"\n")
|
||||||
|
(test (output (list "a")) regexp-match #rx"(?m:^a)" (i) s e #f #"x\n"))])
|
||||||
|
(try-succeed output (lambda () (input "a")) 0 #f)
|
||||||
|
(try-succeed output2 (lambda () (input "xay")) 1 2))
|
||||||
|
(let ([try-fail
|
||||||
|
(lambda (i s e)
|
||||||
|
(test #f regexp-match #rx"^a" (i) s e #f #"\n")
|
||||||
|
(test #f regexp-match #rx"^a" (i) s e #f #"x\n")
|
||||||
|
(let ([try-always-fail
|
||||||
|
(lambda (m)
|
||||||
|
(test #f regexp-match (m #rx"^a") (i) s e #f #"x")
|
||||||
|
(test #f regexp-match (m #rx"^a") (i) s e #f #"\nx"))])
|
||||||
|
(try-always-fail values)
|
||||||
|
(try-always-fail (lambda (rx) (regexp (format "(?m:~a)" (object-name rx)))))))])
|
||||||
|
(try-fail (lambda () (input "a")) 0 #f)
|
||||||
|
(try-fail (lambda () (input "xay")) 1 2)))])
|
||||||
|
(try regexp-match values values)
|
||||||
|
(try regexp-match? values (lambda (a) (and a #t)))
|
||||||
|
(try regexp-match string->bytes/utf-8 (lambda (l) (map string->bytes/utf-8 l)))
|
||||||
|
(try regexp-match open-input-string (lambda (l) (map string->bytes/utf-8 l)))
|
||||||
|
(try regexp-match-positions values
|
||||||
|
(lambda (s) (and s '((0 . 1))))
|
||||||
|
(lambda (s) (and s '((1 . 2))))))
|
||||||
|
|
||||||
|
;; regexp-replace* disallows start matching after the first match:
|
||||||
|
(test "baa" regexp-replace* #rx"^a" "aaa" "b")
|
||||||
|
|
||||||
|
;; regexp-replace* disallows start matching after the first match:
|
||||||
|
(test "bbb" regexp-replace* #rx"(?m:^a\n)" "a\na\na\n" "b")
|
||||||
|
(test "bba\n" regexp-replace* #rx"(?m:^a[z\n])" "a\naza\n" "b")
|
||||||
|
|
||||||
|
(test-values `(("abc") #"c")
|
||||||
|
(lambda () (regexp-match/end #rx".*" "abc")))
|
||||||
|
(test-values `(((0 . 3)) #"c")
|
||||||
|
(lambda () (regexp-match-positions/end #rx".*" "abc")))
|
||||||
|
(test-values `(((0 . 3)) #"c")
|
||||||
|
(lambda () (regexp-match-peek-positions/end #rx".*" (open-input-string "abc"))))
|
||||||
|
(test-values `(((0 . 3)) #"c")
|
||||||
|
(lambda () (regexp-match-peek-positions-immediate/end #rx".*" (open-input-string "abc"))))
|
||||||
|
|
||||||
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(report-errs)
|
(report-errs)
|
||||||
|
|
|
@ -75,6 +75,10 @@
|
||||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0)
|
(t '("a" "b" "c") eof "[abc]" "a b c" 0)
|
||||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0 #f)
|
(t '("a" "b" "c") eof "[abc]" "a b c" 0 #f)
|
||||||
(t '("a" "b" "c") eof "[abc]" "a b c" 0 5)
|
(t '("a" "b" "c") eof "[abc]" "a b c" 0 5)
|
||||||
|
(t '("a") eof "^." "a b c" 0 5 #"")
|
||||||
|
(t '() eof "^." "a b c" 0 5 #"x")
|
||||||
|
(t '("a\n" "b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
|
(t '("b\n" "c\n") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
(for-each (lambda (cvt)
|
(for-each (lambda (cvt)
|
||||||
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
|
(test '(#"\x80" #"\x80") regexp-match* (cvt #"\x80") #"a\x80z\x80q"))
|
||||||
(list values byte-regexp byte-pregexp))
|
(list values byte-regexp byte-pregexp))
|
||||||
|
@ -95,6 +99,10 @@
|
||||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0)
|
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0)
|
||||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 #f)
|
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 #f)
|
||||||
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 5)
|
(t '((0 . 1) (2 . 3) (4 . 5)) eof "[abc]" "a b c" 0 5)
|
||||||
|
(t '((0 . 1)) eof "^." "a b c" 0 5 #"")
|
||||||
|
(t '() eof "^." "a b c" 0 5 #"x")
|
||||||
|
(t '((0 . 2) (2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
|
(t '((2 . 4) (4 . 6)) eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
(for-each (lambda (cvt)
|
(for-each (lambda (cvt)
|
||||||
(test '((1 . 2) (3 . 4)) regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
|
(test '((1 . 2) (3 . 4)) regexp-match-positions* (cvt #"\x80") #"a\x80z\x80q"))
|
||||||
(list values byte-regexp byte-pregexp))
|
(list values byte-regexp byte-pregexp))
|
||||||
|
@ -115,6 +123,10 @@
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0)
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 #f)
|
||||||
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
(t '("" "1" "2" "") eof "[abc]" "a1b2c" 0 5)
|
||||||
|
(t '("" " b c") eof "^." "a b c" 0 5 #"")
|
||||||
|
(t '("a b c") eof "^." "a b c" 0 5 #"x")
|
||||||
|
(t '("" "" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
|
(t '("a\n" "" "") eof "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
(for-each (lambda (cvt)
|
(for-each (lambda (cvt)
|
||||||
(test '(#"" #"a" #"z" #"q" #"") regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
|
(test '(#"" #"a" #"z" #"q" #"") regexp-split (cvt #"\x80") #"\x80a\x80z\x80q\x80"))
|
||||||
(list values byte-regexp byte-pregexp))
|
(list values byte-regexp byte-pregexp))
|
||||||
|
@ -135,6 +147,10 @@
|
||||||
(t '((0 . 1)) "a b c" "[abc]" "a b c" 0 2)
|
(t '((0 . 1)) "a b c" "[abc]" "a b c" 0 2)
|
||||||
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 #f)
|
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 #f)
|
||||||
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 5)
|
(t '((0 . 1) (2 . 3) (4 . 5)) "a b c" "[abc]" "a b c" 0 5)
|
||||||
|
(t '((0 . 1)) "a b c" "^." "a b c" 0 5 #"")
|
||||||
|
(t '() "a b c" "^." "a b c" 0 5 #"x")
|
||||||
|
(t '((0 . 2) (2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6)
|
||||||
|
(t '((2 . 4) (4 . 6)) "a\nb\nc\n" "(?m:^.\n)" "a\nb\nc\n" 0 6 #"x")
|
||||||
;; ---------- tests with zero-length matches ----------
|
;; ---------- tests with zero-length matches ----------
|
||||||
;; Many of these tests can be repeated with Perl. To try something in Perl,
|
;; Many of these tests can be repeated with Perl. To try something in Perl,
|
||||||
;; put this code in a file:
|
;; put this code in a file:
|
||||||
|
|
|
@ -1,3 +1,7 @@
|
||||||
|
Version 4.2.5.9
|
||||||
|
regexp-match* et al. now disable ^ matching on all but the first
|
||||||
|
match
|
||||||
|
|
||||||
Version 4.2.5.3
|
Version 4.2.5.3
|
||||||
Added chaperones
|
Added chaperones
|
||||||
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -5130,6 +5130,7 @@ static int mark_regwork_MARK(void *p, struct NewGC *gc) {
|
||||||
gcMARK2(r->endp, gc);
|
gcMARK2(r->endp, gc);
|
||||||
gcMARK2(r->counters, gc);
|
gcMARK2(r->counters, gc);
|
||||||
gcMARK2(r->peekskip, gc);
|
gcMARK2(r->peekskip, gc);
|
||||||
|
gcMARK2(r->prefix, gc);
|
||||||
return
|
return
|
||||||
gcBYTES_TO_WORDS(sizeof(Regwork));
|
gcBYTES_TO_WORDS(sizeof(Regwork));
|
||||||
}
|
}
|
||||||
|
@ -5145,6 +5146,7 @@ static int mark_regwork_FIXUP(void *p, struct NewGC *gc) {
|
||||||
gcFIXUP2(r->endp, gc);
|
gcFIXUP2(r->endp, gc);
|
||||||
gcFIXUP2(r->counters, gc);
|
gcFIXUP2(r->counters, gc);
|
||||||
gcFIXUP2(r->peekskip, gc);
|
gcFIXUP2(r->peekskip, gc);
|
||||||
|
gcFIXUP2(r->prefix, gc);
|
||||||
return
|
return
|
||||||
gcBYTES_TO_WORDS(sizeof(Regwork));
|
gcBYTES_TO_WORDS(sizeof(Regwork));
|
||||||
}
|
}
|
||||||
|
|
|
@ -2101,6 +2101,7 @@ mark_regwork {
|
||||||
gcMARK2(r->endp, gc);
|
gcMARK2(r->endp, gc);
|
||||||
gcMARK2(r->counters, gc);
|
gcMARK2(r->counters, gc);
|
||||||
gcMARK2(r->peekskip, gc);
|
gcMARK2(r->peekskip, gc);
|
||||||
|
gcMARK2(r->prefix, gc);
|
||||||
size:
|
size:
|
||||||
gcBYTES_TO_WORDS(sizeof(Regwork));
|
gcBYTES_TO_WORDS(sizeof(Regwork));
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -13,7 +13,7 @@
|
||||||
|
|
||||||
#define USE_COMPILED_STARTUP 1
|
#define USE_COMPILED_STARTUP 1
|
||||||
|
|
||||||
#define EXPECTED_PRIM_COUNT 983
|
#define EXPECTED_PRIM_COUNT 988
|
||||||
#define EXPECTED_UNSAFE_COUNT 65
|
#define EXPECTED_UNSAFE_COUNT 65
|
||||||
#define EXPECTED_FLFXNUM_COUNT 53
|
#define EXPECTED_FLFXNUM_COUNT 53
|
||||||
|
|
||||||
|
|
|
@ -26,11 +26,12 @@ typedef struct regexp {
|
||||||
#endif
|
#endif
|
||||||
} regexp;
|
} regexp;
|
||||||
|
|
||||||
#define REGEXP_IS_UTF8 0x01
|
#define REGEXP_IS_UTF8 0x01
|
||||||
#define REGEXP_IS_PCRE 0x02
|
#define REGEXP_IS_PCRE 0x02
|
||||||
#define REGEXP_ANCH 0x04
|
#define REGEXP_ANCH 0x04
|
||||||
#define REGEXP_MUST_CI 0x08
|
#define REGEXP_MUST_CI 0x08
|
||||||
#define REGEXP_JIT 0x10
|
#define REGEXP_JIT 0x10
|
||||||
|
#define REGEXP_LOOKBEHIND 0x20
|
||||||
|
|
||||||
#ifdef INDIRECT_TO_PROGRAM
|
#ifdef INDIRECT_TO_PROGRAM
|
||||||
# define N_ITO_DELTA(prog, extra, re) extra
|
# define N_ITO_DELTA(prog, extra, re) extra
|
||||||
|
@ -217,14 +218,17 @@ typedef struct Regwork {
|
||||||
char *instr;
|
char *instr;
|
||||||
Scheme_Object *port;
|
Scheme_Object *port;
|
||||||
Scheme_Object *unless_evt;
|
Scheme_Object *unless_evt;
|
||||||
short nonblock, aborted;
|
char nonblock, aborted;
|
||||||
rxpos instr_size; /* For port reads */
|
rxpos instr_size; /* For port reads */
|
||||||
rxpos input_maxend; /* For port reads */
|
rxpos input_maxend; /* For port reads */
|
||||||
rxpos input, input_end, input_start; /* String-input pointer. */
|
rxpos input, input_end, input_start; /* String-input pointer. */
|
||||||
rxpos boi, bol; /* Beginning of input/line, for ^ check. */
|
rxpos input_min; /* input_start minus prefix_size */
|
||||||
|
rxpos boi; /* Beginning of input, for ^ check. */
|
||||||
rxpos *startp; /* Pointer to startp array. */
|
rxpos *startp; /* Pointer to startp array. */
|
||||||
rxpos *maybep; /* Pointer to tentative startp array. */
|
rxpos *maybep; /* Pointer to tentative startp array. */
|
||||||
rxpos *endp; /* Ditto for endp. */
|
rxpos *endp; /* Ditto for endp. */
|
||||||
int *counters; /* For {} counters */
|
int *counters; /* For {} counters */
|
||||||
Scheme_Object *peekskip;
|
Scheme_Object *peekskip;
|
||||||
|
char *prefix;
|
||||||
|
rxpos prefix_len, prefix_delta;
|
||||||
} Regwork;
|
} Regwork;
|
||||||
|
|
|
@ -13,12 +13,12 @@
|
||||||
consistently.)
|
consistently.)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#define MZSCHEME_VERSION "4.2.5.9"
|
#define MZSCHEME_VERSION "4.2.5.10"
|
||||||
|
|
||||||
#define MZSCHEME_VERSION_X 4
|
#define MZSCHEME_VERSION_X 4
|
||||||
#define MZSCHEME_VERSION_Y 2
|
#define MZSCHEME_VERSION_Y 2
|
||||||
#define MZSCHEME_VERSION_Z 5
|
#define MZSCHEME_VERSION_Z 5
|
||||||
#define MZSCHEME_VERSION_W 9
|
#define MZSCHEME_VERSION_W 10
|
||||||
|
|
||||||
#define MZSCHEME_VERSION_MAJOR ((MZSCHEME_VERSION_X * 100) + MZSCHEME_VERSION_Y)
|
#define MZSCHEME_VERSION_MAJOR ((MZSCHEME_VERSION_X * 100) + MZSCHEME_VERSION_Y)
|
||||||
#define MZSCHEME_VERSION_MINOR ((MZSCHEME_VERSION_Z * 1000) + MZSCHEME_VERSION_W)
|
#define MZSCHEME_VERSION_MINOR ((MZSCHEME_VERSION_Z * 1000) + MZSCHEME_VERSION_W)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user