empty matches in regexp-X* makeover, part one
svn: r9594
This commit is contained in:
parent
89a12d4f56
commit
26a180f458
|
@ -128,7 +128,7 @@
|
||||||
(format "ending offset index out of range [~a,~a]: " start len)
|
(format "ending offset index out of range [~a,~a]: " start len)
|
||||||
end))
|
end))
|
||||||
(reverse
|
(reverse
|
||||||
(let loop ([acc '()] [start start] [end end] [skipped? #f])
|
(let loop ([acc '()] [start start] [end end])
|
||||||
(when (and need-leftover? (positive? start) (input-port? string))
|
(when (and need-leftover? (positive? start) (input-port? string))
|
||||||
;; Skip start chars:
|
;; Skip start chars:
|
||||||
(let ([s (make-bytes 4096)])
|
(let ([s (make-bytes 4096)])
|
||||||
|
@ -139,71 +139,97 @@
|
||||||
(unless (eof-object? m) (loop (+ n m))))))))
|
(unless (eof-object? m) (loop (+ n m))))))))
|
||||||
|
|
||||||
(if (and port-success-choose (input-port? string))
|
(if (and port-success-choose (input-port? string))
|
||||||
|
|
||||||
;; Input port match, get string
|
;; Input port match, get string
|
||||||
(let ([discarded/leftovers 0])
|
(let* ([discarded/leftovers 0]
|
||||||
(define match
|
[spitout (if need-leftover?
|
||||||
(if need-leftover?
|
(open-output-bytes)
|
||||||
(let ([leftover-port (open-output-bytes)])
|
|
||||||
(begin0 (regexp-match
|
|
||||||
rx string
|
|
||||||
(if skipped? 1 0)
|
|
||||||
(and end (if skipped? (- end start -1) (- end start)))
|
|
||||||
leftover-port)
|
|
||||||
(set! discarded/leftovers
|
|
||||||
(if (and (regexp? rx) (string? string))
|
|
||||||
(get-output-string leftover-port)
|
|
||||||
(get-output-bytes leftover-port)))))
|
|
||||||
(regexp-match
|
|
||||||
rx string start end
|
|
||||||
(make-output-port
|
(make-output-port
|
||||||
'counter always-evt
|
'counter always-evt
|
||||||
(lambda (s start end flush? breakable?)
|
(lambda (s start end flush? breakable?)
|
||||||
(let ([c (- end start)])
|
(let ([c (- end start)])
|
||||||
(set! discarded/leftovers (+ c discarded/leftovers))
|
(set! discarded/leftovers
|
||||||
|
(+ c discarded/leftovers))
|
||||||
c))
|
c))
|
||||||
void))))
|
void))])
|
||||||
(if match
|
(define match
|
||||||
(let* ([mlen (bstring-length (car match))]
|
(if need-leftover?
|
||||||
[skip? (zero? mlen)])
|
(let* ([end (and end (- end start))]
|
||||||
|
[m (regexp-match rx string 0 end spitout)]
|
||||||
|
;; re-match if we get a zero-length match at the
|
||||||
|
;; beginning
|
||||||
|
[m (if (and m ; we have a match
|
||||||
|
;; and it's an empty one
|
||||||
|
(zero? (bstring-length (car m)))
|
||||||
|
;; and it's at the beginning
|
||||||
|
(zero? (file-position spitout))
|
||||||
|
;; and we still have stuff to match
|
||||||
|
;;!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||||
|
(if end
|
||||||
|
(< 0 end)
|
||||||
|
(not (eof-object? (peek-byte string)))))
|
||||||
|
(regexp-match rx string 1 end spitout)
|
||||||
|
m)])
|
||||||
|
(set! discarded/leftovers (get-output-bytes spitout))
|
||||||
|
m)
|
||||||
|
(let ([m (regexp-match rx string start end spitout)])
|
||||||
|
;; re-match if we get a zero-length match at the
|
||||||
|
;; beginning
|
||||||
|
(if (and m
|
||||||
|
(zero? discarded/leftovers)
|
||||||
|
(zero? (bstring-length (car m))))
|
||||||
|
(regexp-match rx string (add1 start) (sub1 end) spitout)
|
||||||
|
m))))
|
||||||
|
(let ([end (and end match
|
||||||
|
(- end (if need-leftover?
|
||||||
|
(+ (bstring-length discarded/leftovers)
|
||||||
|
start)
|
||||||
|
discarded/leftovers)
|
||||||
|
(bstring-length (car match))))])
|
||||||
|
(if (and match
|
||||||
|
(or (< 0 (bstring-length (car match)))
|
||||||
|
(if end
|
||||||
|
(< 0 end)
|
||||||
|
(not (eof-object? (peek-byte string))))))
|
||||||
(loop (cons (port-success-choose (car match)
|
(loop (cons (port-success-choose (car match)
|
||||||
discarded/leftovers)
|
discarded/leftovers)
|
||||||
acc)
|
acc)
|
||||||
(if skip? 1 0)
|
0 end)
|
||||||
(and end
|
(port-failure-k acc discarded/leftovers))))
|
||||||
(- end (if need-leftover?
|
|
||||||
(+ (bstring-length discarded/leftovers)
|
|
||||||
start
|
|
||||||
(if skipped? 1 0))
|
|
||||||
discarded/leftovers)
|
|
||||||
mlen))
|
|
||||||
skip?))
|
|
||||||
(port-failure-k acc discarded/leftovers)))
|
|
||||||
;; String/port match, get positions
|
;; String/port match, get positions
|
||||||
(let ([match ((if peek?
|
(let ([match ((if peek?
|
||||||
regexp-match-peek-positions
|
regexp-match-peek-positions
|
||||||
regexp-match-positions)
|
regexp-match-positions)
|
||||||
rx string start end)]
|
rx string start end)])
|
||||||
[start (if skipped? (sub1 start) start)])
|
(if (not match)
|
||||||
(if match
|
(failure-k acc start end)
|
||||||
(let* ([mstart (caar match)]
|
(let* ([mstart (caar match)]
|
||||||
[mend (cdar match)]
|
[mend (cdar match)]
|
||||||
[skip? (= mstart mend)])
|
;; re-match if we get a zero-length match at the
|
||||||
;; The following two pieces are similar, but not
|
;; beginning
|
||||||
;; simple to combine and preserve efficiency
|
[match (if (and (= mstart mend start)
|
||||||
(define (cont acc end* new-start new-end)
|
(< start (or end len +inf.0)))
|
||||||
(if skip?
|
((if peek?
|
||||||
(if (and end* (new-start . >= . end*))
|
regexp-match-peek-positions
|
||||||
(if failure-k (failure-k acc end* end) acc)
|
regexp-match-positions)
|
||||||
(loop acc (add1 new-start) new-end #t))
|
rx string (add1 start) end)
|
||||||
(loop acc new-start new-end #f)))
|
match)])
|
||||||
|
;; fail if rematch failed
|
||||||
|
(if (not match)
|
||||||
|
(failure-k acc start end)
|
||||||
|
(let ([mstart (caar match)]
|
||||||
|
[mend (cdar match)])
|
||||||
|
;; or if we have a zero-length match at the end
|
||||||
|
(if (= mstart mend (or end len +inf.0))
|
||||||
|
(failure-k acc start end)
|
||||||
(if port-success-k
|
(if port-success-k
|
||||||
(port-success-k
|
(port-success-k
|
||||||
(lambda (acc new-start new-end)
|
(lambda (acc new-start new-end)
|
||||||
(cont acc (or new-end end len) new-start new-end))
|
(loop acc new-start new-end))
|
||||||
acc start end mstart mend)
|
acc start end mstart mend)
|
||||||
(cont (cons (success-choose start mstart mend) acc)
|
(loop (cons (success-choose start mstart mend) acc)
|
||||||
(or end len) mend end)))
|
mend end))))))))))))]))
|
||||||
(failure-k acc start end)))))))]))
|
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
||||||
|
@ -262,7 +288,7 @@
|
||||||
;; port-success-choose:
|
;; port-success-choose:
|
||||||
(lambda (match-string leftovers) leftovers)
|
(lambda (match-string leftovers) leftovers)
|
||||||
;; port-failure-k:
|
;; port-failure-k:
|
||||||
(lambda (acc leftover) (cons leftover acc))
|
(lambda (acc leftover) (if leftover (cons leftover acc) acc))
|
||||||
#t
|
#t
|
||||||
#f))
|
#f))
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user