empty matches in regexp-X* makeover, part one

svn: r9594
This commit is contained in:
Eli Barzilay 2008-05-02 17:18:52 +00:00
parent 89a12d4f56
commit 26a180f458

View File

@ -128,7 +128,7 @@
(format "ending offset index out of range [~a,~a]: " start len)
end))
(reverse
(let loop ([acc '()] [start start] [end end] [skipped? #f])
(let loop ([acc '()] [start start] [end end])
(when (and need-leftover? (positive? start) (input-port? string))
;; Skip start chars:
(let ([s (make-bytes 4096)])
@ -139,71 +139,97 @@
(unless (eof-object? m) (loop (+ n m))))))))
(if (and port-success-choose (input-port? string))
;; Input port match, get string
(let ([discarded/leftovers 0])
(let* ([discarded/leftovers 0]
[spitout (if need-leftover?
(open-output-bytes)
(make-output-port
'counter always-evt
(lambda (s start end flush? breakable?)
(let ([c (- end start)])
(set! discarded/leftovers
(+ c discarded/leftovers))
c))
void))])
(define match
(if need-leftover?
(let ([leftover-port (open-output-bytes)])
(begin0 (regexp-match
rx string
(if skipped? 1 0)
(and end (if skipped? (- end start -1) (- end start)))
leftover-port)
(set! discarded/leftovers
(if (and (regexp? rx) (string? string))
(get-output-string leftover-port)
(get-output-bytes leftover-port)))))
(regexp-match
rx string start end
(make-output-port
'counter always-evt
(lambda (s start end flush? breakable?)
(let ([c (- end start)])
(set! discarded/leftovers (+ c discarded/leftovers))
c))
void))))
(if match
(let* ([mlen (bstring-length (car match))]
[skip? (zero? mlen)])
(let* ([end (and end (- end start))]
[m (regexp-match rx string 0 end spitout)]
;; re-match if we get a zero-length match at the
;; beginning
[m (if (and m ; we have a match
;; and it's an empty one
(zero? (bstring-length (car m)))
;; and it's at the beginning
(zero? (file-position spitout))
;; and we still have stuff to match
;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
(if end
(< 0 end)
(not (eof-object? (peek-byte string)))))
(regexp-match rx string 1 end spitout)
m)])
(set! discarded/leftovers (get-output-bytes spitout))
m)
(let ([m (regexp-match rx string start end spitout)])
;; re-match if we get a zero-length match at the
;; beginning
(if (and m
(zero? discarded/leftovers)
(zero? (bstring-length (car m))))
(regexp-match rx string (add1 start) (sub1 end) spitout)
m))))
(let ([end (and end match
(- end (if need-leftover?
(+ (bstring-length discarded/leftovers)
start)
discarded/leftovers)
(bstring-length (car match))))])
(if (and match
(or (< 0 (bstring-length (car match)))
(if end
(< 0 end)
(not (eof-object? (peek-byte string))))))
(loop (cons (port-success-choose (car match)
discarded/leftovers)
acc)
(if skip? 1 0)
(and end
(- end (if need-leftover?
(+ (bstring-length discarded/leftovers)
start
(if skipped? 1 0))
discarded/leftovers)
mlen))
skip?))
(port-failure-k acc discarded/leftovers)))
0 end)
(port-failure-k acc discarded/leftovers))))
;; String/port match, get positions
(let ([match ((if peek?
regexp-match-peek-positions
regexp-match-positions)
rx string start end)]
[start (if skipped? (sub1 start) start)])
(if match
rx string start end)])
(if (not match)
(failure-k acc start end)
(let* ([mstart (caar match)]
[mend (cdar match)]
[skip? (= mstart mend)])
;; The following two pieces are similar, but not
;; simple to combine and preserve efficiency
(define (cont acc end* new-start new-end)
(if skip?
(if (and end* (new-start . >= . end*))
(if failure-k (failure-k acc end* end) acc)
(loop acc (add1 new-start) new-end #t))
(loop acc new-start new-end #f)))
(if port-success-k
(port-success-k
(lambda (acc new-start new-end)
(cont acc (or new-end end len) new-start new-end))
acc start end mstart mend)
(cont (cons (success-choose start mstart mend) acc)
(or end len) mend end)))
(failure-k acc start end)))))))]))
;; re-match if we get a zero-length match at the
;; beginning
[match (if (and (= mstart mend start)
(< start (or end len +inf.0)))
((if peek?
regexp-match-peek-positions
regexp-match-positions)
rx string (add1 start) end)
match)])
;; fail if rematch failed
(if (not match)
(failure-k acc start end)
(let ([mstart (caar match)]
[mend (cdar match)])
;; or if we have a zero-length match at the end
(if (= mstart mend (or end len +inf.0))
(failure-k acc start end)
(if port-success-k
(port-success-k
(lambda (acc new-start new-end)
(loop acc new-start new-end))
acc start end mstart mend)
(loop (cons (success-choose start mstart mend) acc)
mend end))))))))))))]))
;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f])
@ -262,7 +288,7 @@
;; port-success-choose:
(lambda (match-string leftovers) leftovers)
;; port-failure-k:
(lambda (acc leftover) (cons leftover acc))
(lambda (acc leftover) (if leftover (cons leftover acc) acc))
#t
#f))