regexp: fix consumption of bytes from a port

This commit is contained in:
Matthew Flatt 2019-01-16 19:01:35 -07:00
parent b19671f0f5
commit 6c8ba483a1
3 changed files with 52 additions and 22 deletions

View File

@ -40,6 +40,17 @@
(test (rx:regexp-match rx:.n in 0 #f #f #"\n") '(#"b\n"))
(test (rx:regexp-match rx:.n in 0 #f #f #"\n") '(#"c\n")))
(let ()
(define in (open-input-bytes #" a b c "))
(define discard (open-output-bytes))
(rx:regexp-match "[abc]" in 0 3 discard #"")
(test (get-output-bytes discard) #" ")
(define discard2 (open-output-bytes))
(rx:regexp-match "[abc]" in 0 1 discard2 #"")
(test (get-output-bytes discard2) #" "))
;; ----------------------------------------
(define (check rx in N [M (max 1 (quotient N 10))])

View File

@ -23,16 +23,19 @@
out ; output hold discarded bytes; implies `(not peek?)`
max-lookbehind ; bytes before current counter to preserve, if `out`
[failed? #:mutable] ; set to #t if `progress-evt` fires or read blocks
[discarded-count #:mutable])) ; bytes discarded, if not `peek?`
[discarded-count #:mutable] ; bytes discarded, if not `peek?`
max-peek)) ; maximum number of bytes to peek or #f
(define (make-lazy-bytes in skip-amt prefix
peek? immediate-only? progress-evt
out max-lookbehind)
out max-lookbehind
max-peek)
(define len (bytes-length prefix))
(lazy-bytes prefix len in skip-amt len
peek? immediate-only? progress-evt
out max-lookbehind
#f 0))
#f 0
max-peek))
(define (lazy-bytes-before-end? s pos end)
(and (or (not (exact-integer? end))
@ -54,9 +57,9 @@
;; then flush unneeded bytes...
;; The promise is that we won't ask for bytes before
;; `pos` minus the `max-lookbehind`
(define pos (min given-pos (lazy-bytes-end s)))
(when force?
(lazy-bytes-before-end? s pos 'eof))
(lazy-bytes-before-end? s given-pos 'eof))
(define pos (min given-pos (lazy-bytes-end s)))
(when (and (lazy-bytes? s)
(not (lazy-bytes-peek? s)))
(define discarded-count (lazy-bytes-discarded-count s))
@ -80,7 +83,9 @@
[else (min amt (- prefix-len discarded-count))])
;; To amount to discard:
amt))
(bytes-copy! bstr 0 bstr amt (- (lazy-bytes-end s) discarded-count))
(define copy-end (- (lazy-bytes-end s) discarded-count))
(unless (= amt copy-end)
(bytes-copy! bstr 0 bstr amt copy-end))
(set-lazy-bytes-discarded-count! s (+ amt discarded-count)))))
;; ----------------------------------------
@ -116,9 +121,20 @@
(set-lazy-bytes-end! s (+ n len discarded-count))
#t])]
[else
;; We're going to need a bigger byte string
(define bstr2 (make-bytes (max 32 (* 2 (bytes-length bstr)))))
(bytes-copy! bstr2 0 bstr 0 len)
(set-lazy-bytes-bstr! s bstr2)
(get-more-bytes! s)])]
(define max-peek (lazy-bytes-max-peek s))
(define prefix-len (and max-peek (lazy-bytes-prefix-len s)))
(cond
[(and max-peek
(len . >= . (- (+ max-peek prefix-len) discarded-count)))
;; Not allowed to read any more
#f]
[else
;; We're going to need a bigger byte string
(define bstr2 (make-bytes (let ([sz (max 32 (* 2 (bytes-length bstr)))])
(if max-peek
(min sz (- (+ prefix-len max-peek) discarded-count))
sz))))
(bytes-copy! bstr2 0 bstr 0 len)
(set-lazy-bytes-bstr! s bstr2)
(get-more-bytes! s)])])]
[else #f]))

View File

@ -281,7 +281,10 @@
;; Create a lazy string from the port:
(define lb-in (make-lazy-bytes port-in (if peek? start-offset 0) prefix
peek? immediate-only? progress-evt
out (rx:regexp-max-lookbehind rx)))
out (rx:regexp-max-lookbehind rx)
(and (input-port? in)
(not (eq? 'eof end-offset))
(- end-offset start-offset))))
(define end-pos (if (eq? 'eof end-offset)
'eof
(+ start-pos
@ -303,17 +306,22 @@
(when (not peek?)
(cond
[ms-pos
(when (or out (input-port? in))
(when out
;; Flush bytes before match:
(lazy-bytes-advance! lb-in ms-pos #t)
(lazy-bytes-advance! lb-in ms-pos #t))
(when (input-port? in)
;; Consume bytes that correspond to match:
(copy-port-bytes port-in #f (- me-pos prefix-len)))]
[(eq? end-pos 'eof)
;; copy all remaining bytes from input to output
;; Copy all remaining bytes from input to output
(copy-port-bytes port-in out #f)]
[else
(when (or out (input-port? in))
(lazy-bytes-advance! lb-in end-pos #t))])))
(when out
;; Copy all bytes to output
(lazy-bytes-advance! lb-in end-pos #t))
(when (input-port? in)
;; Consume all bytes
(copy-port-bytes port-in #f (- end-pos start-pos)))])))
(begin0
@ -322,11 +330,6 @@
(not (lazy-bytes-failed? lb-in))
mode)
[(#f)
(when (and (not peek?)
any-bytes-left?
(input-port? in))
;; Consume non-matching bytes
(copy-port-bytes port-in out (if (eq? 'eof end-offset) #f end-offset)))
(add-end-bytes #f end-bytes-count #f #f)]
[(?) #t]
[(positions)