cs & regexp: fix regexp-match/end
on large strings
When a string is large enough, its conversion to bytes is internally streamed, and `regexp-match/end` did not get the match-ending bytes correctly. Closes #3684
This commit is contained in:
parent
0541fe3b54
commit
b9ef307b30
|
@ -1884,4 +1884,88 @@
|
|||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Check that `regexp-match/end` produces the right suffix
|
||||
;; when a string to convert is large enough that its
|
||||
;; conversion is internally streamed
|
||||
(for ([N (in-list '(100 1000 10000 100000))])
|
||||
(test-values (list '(#"") #"!")
|
||||
(lambda ()
|
||||
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||
(bytes-append (make-bytes N (char->integer #\a)) #"!b"))))
|
||||
(test-values (list '(#"") #"!")
|
||||
(lambda ()
|
||||
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||
(string-append (make-string N #\a) "!b"))))
|
||||
(test-values (list '("") #"!")
|
||||
(lambda ()
|
||||
(regexp-match/end (pregexp "(?=b)")
|
||||
(string-append (make-string N #\a) "!b"))))
|
||||
|
||||
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||
(bytes-append (make-bytes N (char->integer #\a)) #"!b"))))
|
||||
|
||||
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||
(string-append (make-string N #\a) "!b"))))
|
||||
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (pregexp "(?=b)")
|
||||
(string-append (make-string N #\a) "!b"))))
|
||||
|
||||
(test-values (list '(#"") #"!")
|
||||
(lambda ()
|
||||
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||
(bytes-append (make-bytes N (char->integer #\a)) #"!b")
|
||||
0 #f #f #"prefix")))
|
||||
(test-values (list '(#"") #"!")
|
||||
(lambda ()
|
||||
(regexp-match/end (byte-pregexp #"(?=b)")
|
||||
(string-append (make-string N #\a) "!b")
|
||||
0 #f #f #"prefix")))
|
||||
(test-values (list '("") #"!")
|
||||
(lambda ()
|
||||
(regexp-match/end (pregexp "(?=b)")
|
||||
(string-append (make-string N #\a) "!b")
|
||||
0 #f #f #"prefix")))
|
||||
|
||||
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||
(bytes-append (make-bytes N (char->integer #\a)) #"!b")
|
||||
0 #f #f #"prefix")))
|
||||
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||
(string-append (make-string N #\a) "!b")
|
||||
0 #f #f #"prefix")))
|
||||
(test-values (list (list (cons (add1 N) (add1 N))) #"!")
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (pregexp "(?=b)")
|
||||
(string-append (make-string N #\a) "!b")
|
||||
0 #f #f #"prefix")))
|
||||
|
||||
(test-values (list (list (cons (add1 N) (add1 N)))
|
||||
(bytes-append (make-bytes (sub1 N) (char->integer #\a)) #"!"))
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||
(bytes-append (make-bytes N (char->integer #\a)) #"!b")
|
||||
0 #f #f #"prefix" N)))
|
||||
(test-values (list (list (cons (add1 N) (add1 N)))
|
||||
(bytes-append (make-bytes (sub1 N) (char->integer #\a)) #"!"))
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (byte-pregexp #"(?=b)")
|
||||
(string-append (make-string N #\a) "!b")
|
||||
0 #f #f #"prefix" N)))
|
||||
(test-values (list (list (cons (add1 N) (add1 N)))
|
||||
(bytes-append (make-bytes (sub1 N) (char->integer #\a)) #"!"))
|
||||
(lambda ()
|
||||
(regexp-match-positions/end (pregexp "(?=b)")
|
||||
(string-append (make-string N #\a) "!b")
|
||||
0 #f #f #"prefix" N))))
|
||||
|
||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(report-errs)
|
||||
|
|
|
@ -8810,8 +8810,14 @@
|
|||
start-offset_0
|
||||
0)))
|
||||
(let ((max-lookbehind_0
|
||||
(rx:regexp-max-lookbehind
|
||||
rx_0)))
|
||||
(let ((app_0
|
||||
(rx:regexp-max-lookbehind
|
||||
rx_0)))
|
||||
(max
|
||||
app_0
|
||||
(if end-bytes-count9_0
|
||||
end-bytes-count9_0
|
||||
0)))))
|
||||
(let ((max-peek_0
|
||||
(if (input-port? in_0)
|
||||
(if (not
|
||||
|
@ -8997,7 +9003,10 @@
|
|||
positions_0
|
||||
end-bytes-count9_0
|
||||
bstr_0
|
||||
me-pos_0)))
|
||||
(-
|
||||
me-pos_0
|
||||
(lazy-bytes-discarded-count
|
||||
lb-in_0)))))
|
||||
(if (eq?
|
||||
tmp_0
|
||||
'strings)
|
||||
|
@ -9032,7 +9041,9 @@
|
|||
bytes/strings_0
|
||||
end-bytes-count9_0
|
||||
bstr_0
|
||||
me-pos_0))))
|
||||
(-
|
||||
me-pos_0
|
||||
delta_0)))))
|
||||
(void))))))
|
||||
(write/consume-skipped_0))))
|
||||
(args
|
||||
|
|
|
@ -281,7 +281,8 @@
|
|||
;; Create a lazy string from the port:
|
||||
(define lb-in (make-lazy-bytes port-in (if peek? start-offset 0) prefix
|
||||
peek? immediate-only? progress-evt
|
||||
out (rx:regexp-max-lookbehind rx)
|
||||
out (max (rx:regexp-max-lookbehind rx)
|
||||
(or end-bytes-count 0))
|
||||
(and (input-port? in)
|
||||
(not (eq? 'eof end-offset))
|
||||
(- end-offset start-offset))))
|
||||
|
@ -355,7 +356,7 @@
|
|||
#:start-index (- ms-pos delta)
|
||||
#:delta delta
|
||||
#:result-offset (+ ms-str-pos start-offset))]))
|
||||
(add-end-bytes positions end-bytes-count bstr me-pos)]
|
||||
(add-end-bytes positions end-bytes-count bstr (- me-pos (lazy-bytes-discarded-count lb-in)))]
|
||||
[(strings)
|
||||
;; The byte string may be shifted by discarded bytes, if not
|
||||
;; in `peek?` mode
|
||||
|
@ -368,7 +369,7 @@
|
|||
(byte-positions->bytess bstr ms-pos me-pos state #:delta delta)]
|
||||
[else
|
||||
(byte-positions->strings bstr ms-pos me-pos state #:delta delta)]))
|
||||
(add-end-bytes bytes/strings end-bytes-count bstr me-pos)])
|
||||
(add-end-bytes bytes/strings end-bytes-count bstr (- me-pos delta))])
|
||||
|
||||
;; Now, write and consume port content:
|
||||
(write/consume-skipped))]))
|
||||
|
|
Loading…
Reference in New Issue
Block a user