cs & regexp: fix incorrect request for extra bytes

When trying to match "\r\n" on an input port, for example, don't
insist on peeking a second byte if the first one already isn't "\r".

Closes #3132
This commit is contained in:
Matthew Flatt 2020-04-27 14:52:20 -06:00
parent 4662141ffc
commit a4b5fe5b78
3 changed files with 55 additions and 15 deletions

View File

@ -1850,6 +1850,25 @@
(or y "[y]")
(or z "[z]"))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Don't get stuck waiting for an unneeded byte
(let ()
(define-values (i o) (make-pipe))
(write-string "1\n" o)
(define rx (regexp "^(?:(.*?)(?:\r\n|\n))"))
(test '(#"1\n" #"1") regexp-match rx i))
(let ()
(define-values (i o) (make-pipe))
(write-string "abc" o)
(define rx (regexp "^(ab)*"))
(test '(#"ab" #"ab") regexp-match rx i))
(let ()
(define-values (i o) (make-pipe))
(write-string "123" o)
(define rx (pregexp "^(12)\\1|123"))
(test '(#"123" #f) regexp-match rx i))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)

View File

@ -77,6 +77,23 @@
(test (regexp-replace* "-" "zero-or-more?" "_")
"zero_or_more?")
;; Don't get stuck waiting for an unneeded byte:
(let ()
(define-values (i o) (make-pipe))
(write-string "1\n" o)
(define rx (rx:regexp "^(?:(.*?)(?:\r\n|\n))"))
(test (rx:regexp-match rx i) '(#"1\n" #"1")))
(let ()
(define-values (i o) (make-pipe))
(write-string "abc" o)
(define rx (rx:regexp "^(ab)*"))
(test (rx:regexp-match rx i) '(#"ab" #"ab")))
(let ()
(define-values (i o) (make-pipe))
(write-string "123" o)
(define rx (rx:pregexp "^(12)\\1|123"))
(test (rx:regexp-match rx i) '(#"123" #f)))
;; ----------------------------------------
(define (check rx in N [M (max 1 (quotient N 10))])

View File

@ -89,7 +89,8 @@
;; An iterator performs a single match as many times as possible, up
;; to a specified max number of times, and it returns the position
;; and the number of items; this mode is used only when each match
;; has a fixed size
;; has a fixed size, and `ls-tst` must incrementally check for available
;; lazy bytes if `size` is greater than 1.
(define-syntax-rule (define-iterate (op-matcher* arg ...)
outer-defn ...
(lambda (s pos2 start limit end state)
@ -116,7 +117,7 @@
(let loop ([pos2 pos] [n 0])
(cond
[(or (and limit ((+ pos2 size) . > . limit))
(not (lazy-bytes-before-end? s (+ pos2 (sub1 size)) limit))
(not (lazy-bytes-before-end? s pos2 limit)) ; only checks for 1 byte
(not ls-tst))
(values pos2 n size)]
[else
@ -172,11 +173,12 @@
(for/and ([c1 (in-bytes bstr 0 len)]
[c2 (in-bytes s pos (+ pos len))])
(= c1 c2)))
(and (lazy-bytes-before-end? s (sub1 (+ pos len)) limit)
(for/and ([c1 (in-bytes bstr 0 len)]
[i (in-naturals pos)])
(define c2 (lazy-bytes-ref s i))
(= c1 c2))))
(for/and ([c1 (in-bytes bstr 0 len)]
[i (in-naturals pos)])
(and (lazy-bytes-before-end? s i limit)
(let ()
(define c2 (lazy-bytes-ref s i))
(= c1 c2)))))
(+ pos len)))
(define-iterate (bytes-matcher* bstr)
@ -188,8 +190,9 @@
(= c1 c2))
#:ls-test (for/and ([c1 (in-bytes bstr 0 len)]
[i (in-naturals pos)])
(define c2 (lazy-bytes-ref s i))
(= c1 c2))))
(and (lazy-bytes-before-end? s i limit)
(let ([c2 (lazy-bytes-ref s i)])
(= c1 c2))))))
;; ----------------------------------------
;; An always-fail pattern
@ -456,12 +459,13 @@
(for/and ([c1 (in-bytes s (car p) (cdr p))]
[c2 (in-bytes s pos (+ pos len))])
(chyte=? c1 c2)))
(and (lazy-bytes-before-end? s (sub1 (+ pos len)) limit)
(for/and ([j (in-range (car p) (cdr p))]
[i (in-naturals pos)])
(define c1 (lazy-bytes-ref s j))
(define c2 (lazy-bytes-ref s i))
(chyte=? c1 c2)))))
(for/and ([j (in-range (car p) (cdr p))]
[i (in-naturals pos)])
(and (lazy-bytes-before-end? s i limit)
(let ()
(define c1 (lazy-bytes-ref s j))
(define c2 (lazy-bytes-ref s i))
(chyte=? c1 c2))))))
(and matches?
(next-m s (+ pos len) start limit end state stack))]))))