regexp: fix consumption of bytes from a port
This commit is contained in:
parent
b19671f0f5
commit
6c8ba483a1
|
@ -40,6 +40,17 @@
|
|||
(test (rx:regexp-match rx:.n in 0 #f #f #"\n") '(#"b\n"))
|
||||
(test (rx:regexp-match rx:.n in 0 #f #f #"\n") '(#"c\n")))
|
||||
|
||||
(let ()
|
||||
(define in (open-input-bytes #" a b c "))
|
||||
|
||||
(define discard (open-output-bytes))
|
||||
(rx:regexp-match "[abc]" in 0 3 discard #"")
|
||||
(test (get-output-bytes discard) #" ")
|
||||
|
||||
(define discard2 (open-output-bytes))
|
||||
(rx:regexp-match "[abc]" in 0 1 discard2 #"")
|
||||
(test (get-output-bytes discard2) #" "))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
||||
(define (check rx in N [M (max 1 (quotient N 10))])
|
||||
|
|
|
@ -23,16 +23,19 @@
|
|||
out ; output hold discarded bytes; implies `(not peek?)`
|
||||
max-lookbehind ; bytes before current counter to preserve, if `out`
|
||||
[failed? #:mutable] ; set to #t if `progress-evt` fires or read blocks
|
||||
[discarded-count #:mutable])) ; bytes discarded, if not `peek?`
|
||||
[discarded-count #:mutable] ; bytes discarded, if not `peek?`
|
||||
max-peek)) ; maximum number of bytes to peek or #f
|
||||
|
||||
(define (make-lazy-bytes in skip-amt prefix
|
||||
peek? immediate-only? progress-evt
|
||||
out max-lookbehind)
|
||||
out max-lookbehind
|
||||
max-peek)
|
||||
(define len (bytes-length prefix))
|
||||
(lazy-bytes prefix len in skip-amt len
|
||||
peek? immediate-only? progress-evt
|
||||
out max-lookbehind
|
||||
#f 0))
|
||||
#f 0
|
||||
max-peek))
|
||||
|
||||
(define (lazy-bytes-before-end? s pos end)
|
||||
(and (or (not (exact-integer? end))
|
||||
|
@ -54,9 +57,9 @@
|
|||
;; then flush unneeded bytes...
|
||||
;; The promise is that we won't ask for bytes before
|
||||
;; `pos` minus the `max-lookbehind`
|
||||
(define pos (min given-pos (lazy-bytes-end s)))
|
||||
(when force?
|
||||
(lazy-bytes-before-end? s pos 'eof))
|
||||
(lazy-bytes-before-end? s given-pos 'eof))
|
||||
(define pos (min given-pos (lazy-bytes-end s)))
|
||||
(when (and (lazy-bytes? s)
|
||||
(not (lazy-bytes-peek? s)))
|
||||
(define discarded-count (lazy-bytes-discarded-count s))
|
||||
|
@ -80,7 +83,9 @@
|
|||
[else (min amt (- prefix-len discarded-count))])
|
||||
;; To amount to discard:
|
||||
amt))
|
||||
(bytes-copy! bstr 0 bstr amt (- (lazy-bytes-end s) discarded-count))
|
||||
(define copy-end (- (lazy-bytes-end s) discarded-count))
|
||||
(unless (= amt copy-end)
|
||||
(bytes-copy! bstr 0 bstr amt copy-end))
|
||||
(set-lazy-bytes-discarded-count! s (+ amt discarded-count)))))
|
||||
|
||||
;; ----------------------------------------
|
||||
|
@ -116,9 +121,20 @@
|
|||
(set-lazy-bytes-end! s (+ n len discarded-count))
|
||||
#t])]
|
||||
[else
|
||||
;; We're going to need a bigger byte string
|
||||
(define bstr2 (make-bytes (max 32 (* 2 (bytes-length bstr)))))
|
||||
(bytes-copy! bstr2 0 bstr 0 len)
|
||||
(set-lazy-bytes-bstr! s bstr2)
|
||||
(get-more-bytes! s)])]
|
||||
(define max-peek (lazy-bytes-max-peek s))
|
||||
(define prefix-len (and max-peek (lazy-bytes-prefix-len s)))
|
||||
(cond
|
||||
[(and max-peek
|
||||
(len . >= . (- (+ max-peek prefix-len) discarded-count)))
|
||||
;; Not allowed to read any more
|
||||
#f]
|
||||
[else
|
||||
;; We're going to need a bigger byte string
|
||||
(define bstr2 (make-bytes (let ([sz (max 32 (* 2 (bytes-length bstr)))])
|
||||
(if max-peek
|
||||
(min sz (- (+ prefix-len max-peek) discarded-count))
|
||||
sz))))
|
||||
(bytes-copy! bstr2 0 bstr 0 len)
|
||||
(set-lazy-bytes-bstr! s bstr2)
|
||||
(get-more-bytes! s)])])]
|
||||
[else #f]))
|
||||
|
|
|
@ -281,7 +281,10 @@
|
|||
;; Create a lazy string from the port:
|
||||
(define lb-in (make-lazy-bytes port-in (if peek? start-offset 0) prefix
|
||||
peek? immediate-only? progress-evt
|
||||
out (rx:regexp-max-lookbehind rx)))
|
||||
out (rx:regexp-max-lookbehind rx)
|
||||
(and (input-port? in)
|
||||
(not (eq? 'eof end-offset))
|
||||
(- end-offset start-offset))))
|
||||
(define end-pos (if (eq? 'eof end-offset)
|
||||
'eof
|
||||
(+ start-pos
|
||||
|
@ -303,17 +306,22 @@
|
|||
(when (not peek?)
|
||||
(cond
|
||||
[ms-pos
|
||||
(when (or out (input-port? in))
|
||||
(when out
|
||||
;; Flush bytes before match:
|
||||
(lazy-bytes-advance! lb-in ms-pos #t)
|
||||
(lazy-bytes-advance! lb-in ms-pos #t))
|
||||
(when (input-port? in)
|
||||
;; Consume bytes that correspond to match:
|
||||
(copy-port-bytes port-in #f (- me-pos prefix-len)))]
|
||||
[(eq? end-pos 'eof)
|
||||
;; copy all remaining bytes from input to output
|
||||
;; Copy all remaining bytes from input to output
|
||||
(copy-port-bytes port-in out #f)]
|
||||
[else
|
||||
(when (or out (input-port? in))
|
||||
(lazy-bytes-advance! lb-in end-pos #t))])))
|
||||
(when out
|
||||
;; Copy all bytes to output
|
||||
(lazy-bytes-advance! lb-in end-pos #t))
|
||||
(when (input-port? in)
|
||||
;; Consume all bytes
|
||||
(copy-port-bytes port-in #f (- end-pos start-pos)))])))
|
||||
|
||||
(begin0
|
||||
|
||||
|
@ -322,11 +330,6 @@
|
|||
(not (lazy-bytes-failed? lb-in))
|
||||
mode)
|
||||
[(#f)
|
||||
(when (and (not peek?)
|
||||
any-bytes-left?
|
||||
(input-port? in))
|
||||
;; Consume non-matching bytes
|
||||
(copy-port-bytes port-in out (if (eq? 'eof end-offset) #f end-offset)))
|
||||
(add-end-bytes #f end-bytes-count #f #f)]
|
||||
[(?) #t]
|
||||
[(positions)
|
||||
|
|
Loading…
Reference in New Issue
Block a user