cs & regexp: fix regexp-match/end
on large strings
When a string is large enough, its conversion to bytes is internally streamed, and `regexp-match/end` did not get the match-ending bytes correctly. Closes #3684
This commit is contained in:
parent
0541fe3b54
commit
b9ef307b30
|
@ -1884,4 +1884,88 @@
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; Check that `regexp-match/end` produces the right suffix
|
||||||
|
;; when a string to convert is large enough that its
|
||||||
|
;; conversion is internally streamed
|
||||||
|
(for ([N (in-list '(100 1000 10000 100000))])
|
||||||
|
(test-values (list '(#"") #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||||
|
(bytes-append (make-bytes N (char->integer #\a)) #"!b"))))
|
||||||
|
(test-values (list '(#"") #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b"))))
|
||||||
|
(test-values (list '("") #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match/end (pregexp "(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b"))))
|
||||||
|
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||||
|
(bytes-append (make-bytes N (char->integer #\a)) #"!b"))))
|
||||||
|
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b"))))
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (pregexp "(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b"))))
|
||||||
|
|
||||||
|
(test-values (list '(#"") #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||||
|
(bytes-append (make-bytes N (char->integer #\a)) #"!b")
|
||||||
|
0 #f #f #"prefix")))
|
||||||
|
(test-values (list '(#"") #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b")
|
||||||
|
0 #f #f #"prefix")))
|
||||||
|
(test-values (list '("") #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match/end (pregexp "(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b")
|
||||||
|
0 #f #f #"prefix")))
|
||||||
|
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||||
|
(bytes-append (make-bytes N (char->integer #\a)) #"!b")
|
||||||
|
0 #f #f #"prefix")))
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b")
|
||||||
|
0 #f #f #"prefix")))
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (pregexp "(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b")
|
||||||
|
0 #f #f #"prefix")))
|
||||||
|
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N)))
|
||||||
|
(bytes-append (make-bytes (sub1 N) (char->integer #\a)) #"!"))
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||||
|
(bytes-append (make-bytes N (char->integer #\a)) #"!b")
|
||||||
|
0 #f #f #"prefix" N)))
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N)))
|
||||||
|
(bytes-append (make-bytes (sub1 N) (char->integer #\a)) #"!"))
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b")
|
||||||
|
0 #f #f #"prefix" N)))
|
||||||
|
(test-values (list (list (cons (add1 N) (add1 N)))
|
||||||
|
(bytes-append (make-bytes (sub1 N) (char->integer #\a)) #"!"))
|
||||||
|
(lambda ()
|
||||||
|
(regexp-match-positions/end (pregexp "(?=b)")
|
||||||
|
(string-append (make-string N #\a) "!b")
|
||||||
|
0 #f #f #"prefix" N))))
|
||||||
|
|
||||||
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
(report-errs)
|
(report-errs)
|
||||||
|
|
|
@ -8810,8 +8810,14 @@
|
||||||
start-offset_0
|
start-offset_0
|
||||||
0)))
|
0)))
|
||||||
(let ((max-lookbehind_0
|
(let ((max-lookbehind_0
|
||||||
|
(let ((app_0
|
||||||
(rx:regexp-max-lookbehind
|
(rx:regexp-max-lookbehind
|
||||||
rx_0)))
|
rx_0)))
|
||||||
|
(max
|
||||||
|
app_0
|
||||||
|
(if end-bytes-count9_0
|
||||||
|
end-bytes-count9_0
|
||||||
|
0)))))
|
||||||
(let ((max-peek_0
|
(let ((max-peek_0
|
||||||
(if (input-port? in_0)
|
(if (input-port? in_0)
|
||||||
(if (not
|
(if (not
|
||||||
|
@ -8997,7 +9003,10 @@
|
||||||
positions_0
|
positions_0
|
||||||
end-bytes-count9_0
|
end-bytes-count9_0
|
||||||
bstr_0
|
bstr_0
|
||||||
me-pos_0)))
|
(-
|
||||||
|
me-pos_0
|
||||||
|
(lazy-bytes-discarded-count
|
||||||
|
lb-in_0)))))
|
||||||
(if (eq?
|
(if (eq?
|
||||||
tmp_0
|
tmp_0
|
||||||
'strings)
|
'strings)
|
||||||
|
@ -9032,7 +9041,9 @@
|
||||||
bytes/strings_0
|
bytes/strings_0
|
||||||
end-bytes-count9_0
|
end-bytes-count9_0
|
||||||
bstr_0
|
bstr_0
|
||||||
me-pos_0))))
|
(-
|
||||||
|
me-pos_0
|
||||||
|
delta_0)))))
|
||||||
(void))))))
|
(void))))))
|
||||||
(write/consume-skipped_0))))
|
(write/consume-skipped_0))))
|
||||||
(args
|
(args
|
||||||
|
|
|
@ -281,7 +281,8 @@
|
||||||
;; Create a lazy string from the port:
|
;; Create a lazy string from the port:
|
||||||
(define lb-in (make-lazy-bytes port-in (if peek? start-offset 0) prefix
|
(define lb-in (make-lazy-bytes port-in (if peek? start-offset 0) prefix
|
||||||
peek? immediate-only? progress-evt
|
peek? immediate-only? progress-evt
|
||||||
out (rx:regexp-max-lookbehind rx)
|
out (max (rx:regexp-max-lookbehind rx)
|
||||||
|
(or end-bytes-count 0))
|
||||||
(and (input-port? in)
|
(and (input-port? in)
|
||||||
(not (eq? 'eof end-offset))
|
(not (eq? 'eof end-offset))
|
||||||
(- end-offset start-offset))))
|
(- end-offset start-offset))))
|
||||||
|
@ -355,7 +356,7 @@
|
||||||
#:start-index (- ms-pos delta)
|
#:start-index (- ms-pos delta)
|
||||||
#:delta delta
|
#:delta delta
|
||||||
#:result-offset (+ ms-str-pos start-offset))]))
|
#:result-offset (+ ms-str-pos start-offset))]))
|
||||||
(add-end-bytes positions end-bytes-count bstr me-pos)]
|
(add-end-bytes positions end-bytes-count bstr (- me-pos (lazy-bytes-discarded-count lb-in)))]
|
||||||
[(strings)
|
[(strings)
|
||||||
;; The byte string may be shifted by discarded bytes, if not
|
;; The byte string may be shifted by discarded bytes, if not
|
||||||
;; in `peek?` mode
|
;; in `peek?` mode
|
||||||
|
@ -368,7 +369,7 @@
|
||||||
(byte-positions->bytess bstr ms-pos me-pos state #:delta delta)]
|
(byte-positions->bytess bstr ms-pos me-pos state #:delta delta)]
|
||||||
[else
|
[else
|
||||||
(byte-positions->strings bstr ms-pos me-pos state #:delta delta)]))
|
(byte-positions->strings bstr ms-pos me-pos state #:delta delta)]))
|
||||||
(add-end-bytes bytes/strings end-bytes-count bstr me-pos)])
|
(add-end-bytes bytes/strings end-bytes-count bstr (- me-pos delta))])
|
||||||
|
|
||||||
;; Now, write and consume port content:
|
;; Now, write and consume port content:
|
||||||
(write/consume-skipped))]))
|
(write/consume-skipped))]))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user