cs & regexp: fix incorrect request for extra bytes

When trying to match "\r\n" on an input port, for example, don't
insist on peeking a second byte if the first one already isn't "\r".

Closes #3132
This commit is contained in:
Matthew Flatt 2020-04-27 14:52:20 -06:00
parent 4662141ffc
commit a4b5fe5b78
3 changed files with 55 additions and 15 deletions

View File

@ -1850,6 +1850,25 @@
(or y "[y]") (or y "[y]")
(or z "[z]")))) (or z "[z]"))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Don't get stuck waiting for an unneeded byte
(let ()
(define-values (i o) (make-pipe))
(write-string "1\n" o)
(define rx (regexp "^(?:(.*?)(?:\r\n|\n))"))
(test '(#"1\n" #"1") regexp-match rx i))
(let ()
(define-values (i o) (make-pipe))
(write-string "abc" o)
(define rx (regexp "^(ab)*"))
(test '(#"ab" #"ab") regexp-match rx i))
(let ()
(define-values (i o) (make-pipe))
(write-string "123" o)
(define rx (pregexp "^(12)\\1|123"))
(test '(#"123" #f) regexp-match rx i))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs) (report-errs)

View File

@ -77,6 +77,23 @@
(test (regexp-replace* "-" "zero-or-more?" "_") (test (regexp-replace* "-" "zero-or-more?" "_")
"zero_or_more?") "zero_or_more?")
;; Don't get stuck waiting for an unneeded byte:
(let ()
(define-values (i o) (make-pipe))
(write-string "1\n" o)
(define rx (rx:regexp "^(?:(.*?)(?:\r\n|\n))"))
(test (rx:regexp-match rx i) '(#"1\n" #"1")))
(let ()
(define-values (i o) (make-pipe))
(write-string "abc" o)
(define rx (rx:regexp "^(ab)*"))
(test (rx:regexp-match rx i) '(#"ab" #"ab")))
(let ()
(define-values (i o) (make-pipe))
(write-string "123" o)
(define rx (rx:pregexp "^(12)\\1|123"))
(test (rx:regexp-match rx i) '(#"123" #f)))
;; ---------------------------------------- ;; ----------------------------------------
(define (check rx in N [M (max 1 (quotient N 10))]) (define (check rx in N [M (max 1 (quotient N 10))])

View File

@ -89,7 +89,8 @@
;; An iterator performs a single match as many times as possible, up ;; An iterator performs a single match as many times as possible, up
;; to a specified max number of times, and it returns the position ;; to a specified max number of times, and it returns the position
;; and the number of items; this mode is used only when each match ;; and the number of items; this mode is used only when each match
;; has a fixed size ;; has a fixed size, and `ls-tst` must incrementally check for available
;; lazy bytes if `size` is greater than 1.
(define-syntax-rule (define-iterate (op-matcher* arg ...) (define-syntax-rule (define-iterate (op-matcher* arg ...)
outer-defn ... outer-defn ...
(lambda (s pos2 start limit end state) (lambda (s pos2 start limit end state)
@ -116,7 +117,7 @@
(let loop ([pos2 pos] [n 0]) (let loop ([pos2 pos] [n 0])
(cond (cond
[(or (and limit ((+ pos2 size) . > . limit)) [(or (and limit ((+ pos2 size) . > . limit))
(not (lazy-bytes-before-end? s (+ pos2 (sub1 size)) limit)) (not (lazy-bytes-before-end? s pos2 limit)) ; only checks for 1 byte
(not ls-tst)) (not ls-tst))
(values pos2 n size)] (values pos2 n size)]
[else [else
@ -172,11 +173,12 @@
(for/and ([c1 (in-bytes bstr 0 len)] (for/and ([c1 (in-bytes bstr 0 len)]
[c2 (in-bytes s pos (+ pos len))]) [c2 (in-bytes s pos (+ pos len))])
(= c1 c2))) (= c1 c2)))
(and (lazy-bytes-before-end? s (sub1 (+ pos len)) limit)
(for/and ([c1 (in-bytes bstr 0 len)] (for/and ([c1 (in-bytes bstr 0 len)]
[i (in-naturals pos)]) [i (in-naturals pos)])
(and (lazy-bytes-before-end? s i limit)
(let ()
(define c2 (lazy-bytes-ref s i)) (define c2 (lazy-bytes-ref s i))
(= c1 c2)))) (= c1 c2)))))
(+ pos len))) (+ pos len)))
(define-iterate (bytes-matcher* bstr) (define-iterate (bytes-matcher* bstr)
@ -188,8 +190,9 @@
(= c1 c2)) (= c1 c2))
#:ls-test (for/and ([c1 (in-bytes bstr 0 len)] #:ls-test (for/and ([c1 (in-bytes bstr 0 len)]
[i (in-naturals pos)]) [i (in-naturals pos)])
(define c2 (lazy-bytes-ref s i)) (and (lazy-bytes-before-end? s i limit)
(= c1 c2)))) (let ([c2 (lazy-bytes-ref s i)])
(= c1 c2))))))
;; ---------------------------------------- ;; ----------------------------------------
;; An always-fail pattern ;; An always-fail pattern
@ -456,12 +459,13 @@
(for/and ([c1 (in-bytes s (car p) (cdr p))] (for/and ([c1 (in-bytes s (car p) (cdr p))]
[c2 (in-bytes s pos (+ pos len))]) [c2 (in-bytes s pos (+ pos len))])
(chyte=? c1 c2))) (chyte=? c1 c2)))
(and (lazy-bytes-before-end? s (sub1 (+ pos len)) limit)
(for/and ([j (in-range (car p) (cdr p))] (for/and ([j (in-range (car p) (cdr p))]
[i (in-naturals pos)]) [i (in-naturals pos)])
(and (lazy-bytes-before-end? s i limit)
(let ()
(define c1 (lazy-bytes-ref s j)) (define c1 (lazy-bytes-ref s j))
(define c2 (lazy-bytes-ref s i)) (define c2 (lazy-bytes-ref s i))
(chyte=? c1 c2))))) (chyte=? c1 c2))))))
(and matches? (and matches?
(next-m s (+ pos len) start limit end state stack))])))) (next-m s (+ pos len) start limit end state stack))]))))