empty matches in regexp-X* makeover, part one

svn: r9594
This commit is contained in:
Eli Barzilay 2008-05-02 17:18:52 +00:00
parent 89a12d4f56
commit 26a180f458

View File

@ -128,7 +128,7 @@
(format "ending offset index out of range [~a,~a]: " start len) (format "ending offset index out of range [~a,~a]: " start len)
end)) end))
(reverse (reverse
(let loop ([acc '()] [start start] [end end] [skipped? #f]) (let loop ([acc '()] [start start] [end end])
(when (and need-leftover? (positive? start) (input-port? string)) (when (and need-leftover? (positive? start) (input-port? string))
;; Skip start chars: ;; Skip start chars:
(let ([s (make-bytes 4096)]) (let ([s (make-bytes 4096)])
@ -139,71 +139,97 @@
(unless (eof-object? m) (loop (+ n m)))))))) (unless (eof-object? m) (loop (+ n m))))))))
(if (and port-success-choose (input-port? string)) (if (and port-success-choose (input-port? string))
;; Input port match, get string ;; Input port match, get string
(let ([discarded/leftovers 0]) (let* ([discarded/leftovers 0]
(define match [spitout (if need-leftover?
(if need-leftover? (open-output-bytes)
(let ([leftover-port (open-output-bytes)])
(begin0 (regexp-match
rx string
(if skipped? 1 0)
(and end (if skipped? (- end start -1) (- end start)))
leftover-port)
(set! discarded/leftovers
(if (and (regexp? rx) (string? string))
(get-output-string leftover-port)
(get-output-bytes leftover-port)))))
(regexp-match
rx string start end
(make-output-port (make-output-port
'counter always-evt 'counter always-evt
(lambda (s start end flush? breakable?) (lambda (s start end flush? breakable?)
(let ([c (- end start)]) (let ([c (- end start)])
(set! discarded/leftovers (+ c discarded/leftovers)) (set! discarded/leftovers
(+ c discarded/leftovers))
c)) c))
void)))) void))])
(if match (define match
(let* ([mlen (bstring-length (car match))] (if need-leftover?
[skip? (zero? mlen)]) (let* ([end (and end (- end start))]
[m (regexp-match rx string 0 end spitout)]
;; re-match if we get a zero-length match at the
;; beginning
[m (if (and m ; we have a match
;; and it's an empty one
(zero? (bstring-length (car m)))
;; and it's at the beginning
(zero? (file-position spitout))
;; and we still have stuff to match
;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
(if end
(< 0 end)
(not (eof-object? (peek-byte string)))))
(regexp-match rx string 1 end spitout)
m)])
(set! discarded/leftovers (get-output-bytes spitout))
m)
(let ([m (regexp-match rx string start end spitout)])
;; re-match if we get a zero-length match at the
;; beginning
(if (and m
(zero? discarded/leftovers)
(zero? (bstring-length (car m))))
(regexp-match rx string (add1 start) (sub1 end) spitout)
m))))
(let ([end (and end match
(- end (if need-leftover?
(+ (bstring-length discarded/leftovers)
start)
discarded/leftovers)
(bstring-length (car match))))])
(if (and match
(or (< 0 (bstring-length (car match)))
(if end
(< 0 end)
(not (eof-object? (peek-byte string))))))
(loop (cons (port-success-choose (car match) (loop (cons (port-success-choose (car match)
discarded/leftovers) discarded/leftovers)
acc) acc)
(if skip? 1 0) 0 end)
(and end (port-failure-k acc discarded/leftovers))))
(- end (if need-leftover?
(+ (bstring-length discarded/leftovers)
start
(if skipped? 1 0))
discarded/leftovers)
mlen))
skip?))
(port-failure-k acc discarded/leftovers)))
;; String/port match, get positions ;; String/port match, get positions
(let ([match ((if peek? (let ([match ((if peek?
regexp-match-peek-positions regexp-match-peek-positions
regexp-match-positions) regexp-match-positions)
rx string start end)] rx string start end)])
[start (if skipped? (sub1 start) start)]) (if (not match)
(if match (failure-k acc start end)
(let* ([mstart (caar match)] (let* ([mstart (caar match)]
[mend (cdar match)] [mend (cdar match)]
[skip? (= mstart mend)]) ;; re-match if we get a zero-length match at the
;; The following two pieces are similar, but not ;; beginning
;; simple to combine and preserve efficiency [match (if (and (= mstart mend start)
(define (cont acc end* new-start new-end) (< start (or end len +inf.0)))
(if skip? ((if peek?
(if (and end* (new-start . >= . end*)) regexp-match-peek-positions
(if failure-k (failure-k acc end* end) acc) regexp-match-positions)
(loop acc (add1 new-start) new-end #t)) rx string (add1 start) end)
(loop acc new-start new-end #f))) match)])
;; fail if rematch failed
(if (not match)
(failure-k acc start end)
(let ([mstart (caar match)]
[mend (cdar match)])
;; or if we have a zero-length match at the end
(if (= mstart mend (or end len +inf.0))
(failure-k acc start end)
(if port-success-k (if port-success-k
(port-success-k (port-success-k
(lambda (acc new-start new-end) (lambda (acc new-start new-end)
(cont acc (or new-end end len) new-start new-end)) (loop acc new-start new-end))
acc start end mstart mend) acc start end mstart mend)
(cont (cons (success-choose start mstart mend) acc) (loop (cons (success-choose start mstart mend) acc)
(or end len) mend end))) mend end))))))))))))]))
(failure-k acc start end)))))))]))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f]) (define (regexp-match-positions* pattern string [start 0] [end #f])
@ -262,7 +288,7 @@
;; port-success-choose: ;; port-success-choose:
(lambda (match-string leftovers) leftovers) (lambda (match-string leftovers) leftovers)
;; port-failure-k: ;; port-failure-k:
(lambda (acc leftover) (cons leftover acc)) (lambda (acc leftover) (if leftover (cons leftover acc) acc))
#t #t
#f)) #f))