yet further improvements

svn: r8553
This commit is contained in:
Eli Barzilay 2008-02-06 10:14:24 +00:00
parent 15304f5870
commit ab64d26a21

View File

@ -84,144 +84,139 @@
(and p (subbytes s (- (car p) drop) (- (cdr p) drop)))) (and p (subbytes s (- (car p) drop) (- (cdr p) drop))))
(cdr m)))))))) (cdr m))))))))
;; Helper function for the regexp functions below. ;; Helper macro for the regexp functions below.
(define-syntax make-regexp-loop (define-syntax regexp-loop
(syntax-rules () (syntax-rules ()
[(make-regexp-loop [(regexp-loop name loop start end rx string
name rx string success-k port-success-k failure-k port-failure-k success-k port-success-k failure-k port-failure-k
need-leftover? peek?) need-leftover? peek?)
(let ([len (cond [(string? string) (string-length string)] (let ([len (cond [(string? string) (string-length string)]
[(bytes? string) (bytes-length string)] [(bytes? string) (bytes-length string)]
[else #f])]) [else #f])])
(if peek? (if peek?
(unless (input-port? string) (unless (input-port? string)
(raise-type-error name "input port" string)) (raise-type-error 'name "input port" string))
(unless (or len (input-port? string)) (unless (or len (input-port? string))
(raise-type-error name "string, byte string or input port" string))) (raise-type-error
(lambda (acc start end) 'name "string, byte string or input port" string)))
(unless (and (number? start) (exact? start) (integer? start) (unless (and (number? start) (exact? start) (integer? start)
(start . >= . 0)) (start . >= . 0))
(raise-type-error name "non-negative exact integer" start)) (raise-type-error 'name "non-negative exact integer" start))
(unless (or (not end) (unless (or (not end)
(and (number? end) (exact? end) (integer? end) (and (number? end) (exact? end) (integer? end)
(end . >= . 0))) (end . >= . 0)))
(raise-type-error name "non-negative exact integer or false" end)) (raise-type-error 'name "non-negative exact integer or false" end))
(unless (or (input-port? string) (and len (start . <= . len))) (unless (or (input-port? string) (and len (start . <= . len)))
(raise-mismatch-error (raise-mismatch-error
name 'name
(format "starting offset index out of range [0,~a]: " len) (format "starting offset index out of range [0,~a]: " len)
start)) start))
(unless (or (not end) (unless (or (not end)
(and (start . <= . end) (and (start . <= . end)
(or (input-port? string) (or (input-port? string)
(and len (end . <= . len))))) (and len (end . <= . len)))))
(raise-mismatch-error (raise-mismatch-error
name 'name
(format "ending offset index out of range [~a,~a]: " start len) (format "ending offset index out of range [~a,~a]: " start len)
end)) end))
(reverse
(let loop ([acc '()] [start start] [end end])
(when (and (positive? start) (input-port? string) need-leftover?)
;; Skip start chars:
(let ([s (make-bytes 4096)])
(let loop ([n 0])
(unless (= n start)
(let ([m (read-bytes-avail!
s string 0 (min (- start n) 4096))])
(unless (eof-object? m) (loop (+ n m))))))))
(when (and (positive? start) (input-port? string) need-leftover?) (if (and port-success-k (input-port? string))
;; Skip start chars: ;; Input port match, get string
(let ([s (make-bytes 4096)]) (let ([discarded 0]
(let loop ([n 0]) [leftover-port (and need-leftover? (open-output-bytes))])
(unless (= n start) (let ([match
(let ([m (read-bytes-avail! (regexp-match
s string 0 (min (- start n) 4096))]) rx string
(unless (eof-object? m) (loop (+ n m)))))))) (if need-leftover? 0 start)
(and end (if need-leftover? (- end start) end))
(if (and (input-port? string) port-success-k) (if need-leftover?
;; Input port match, get string leftover-port
(let ([discarded 0] (make-output-port
[leftover-port (and need-leftover? (open-output-bytes))]) 'counter
(let ([match always-evt
(regexp-match (lambda (s start end flush? breakable?)
rx string (let ([c (- end start)])
(if need-leftover? 0 start) (set! discarded (+ c discarded))
(and end (if need-leftover? (- end start) end)) c))
(if need-leftover? void)))]
leftover-port [leftovers
(make-output-port (and need-leftover?
'counter (if (and (regexp? rx) (string? string))
always-evt (get-output-string leftover-port)
(lambda (s start end flush? breakable?) (get-output-bytes leftover-port)))])
(let ([c (- end start)]) (if match
(set! discarded (+ c discarded)) (port-success-k
c)) acc
void)))] (car match)
[leftovers (and end (- end (if need-leftover?
(and need-leftover? (+ (bstring-length leftovers) start)
(if (and (regexp? rx) (string? string)) discarded)
(get-output-string leftover-port) (bstring-length (car match))))
(get-output-bytes leftover-port)))]) leftovers)
(if match (port-failure-k acc leftovers))))
(port-success-k ;; String/port match, get positions
acc (let ([match ((if peek?
(car match) regexp-match-peek-positions
(and end (- end (if need-leftover? regexp-match-positions)
(+ (bstring-length leftovers) start) rx string start end)])
discarded) (if match
(bstring-length (car match)))) (let ([match-start (caar match)]
leftovers) [match-end (cdar match)])
(port-failure-k acc leftovers)))) (if (= match-start match-end)
;; String/port match, get positions (error 'name
(let ([match ((if peek? "pattern matched a zero-length substring: ~e" rx)
regexp-match-peek-positions (success-k acc start end match-start match-end)))
regexp-match-positions) (failure-k acc start end)))))))]))
rx string start end)])
(if match
(let ([match-start (caar match)]
[match-end (cdar match)])
(if (= match-start match-end)
(error name
"pattern matched a zero-length substring: ~e" rx)
(success-k acc start end match-start match-end)))
(failure-k acc start end))))))]))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f]) (define (regexp-match-positions* pattern string [start 0] [end #f])
(define rx (bstring->regexp 'regexp-match-positions* pattern)) (define rx (bstring->regexp 'regexp-match-positions* pattern))
(define loop (regexp-loop regexp-match-positions* loop start end rx string
(make-regexp-loop ;; success-k:
'regexp-match-positions* rx string (lambda (acc start end match-start match-end)
;; success-k: (let ([acc (cons (cons match-start match-end) acc)])
(lambda (acc start end match-start match-end) (if (or (string? string) (bytes? string))
(let ([acc (cons (cons match-start match-end) acc)]) (loop acc match-end end)
(if (or (string? string) (bytes? string)) ;; Need to shift index of rest as reading, cannot do a
(loop acc match-end end) ;; tail call without adding another state variable to the loop:
;; Need to shift index of rest as reading, cannot do a (append (map (lambda (p)
;; tail call without adding another state variable to the loop: (cons (+ match-end (car p)) (+ match-end (cdr p))))
(append (map (lambda (p) (loop '() 0 (and end (- end match-end))))
(cons (+ match-end (car p)) (+ match-end (cdr p)))) acc))))
(loop '() 0 (and end (- end match-end)))) ;; port-success-k: use string case
acc)))) #f
;; port-success-k: use string case ;; failure-k:
#f (lambda (acc start end) acc)
;; failure-k: ;; port-fail-k: use string case
(lambda (acc start end) acc) #f
;; port-fail-k: use string case #f
#f #f))
#f
#f))
(reverse (loop '() start end)))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]) (define (regexp-match-peek-positions* pattern string [start 0] [end #f])
(define rx (bstring->regexp 'regexp-match-peek-positions* pattern)) (define rx (bstring->regexp 'regexp-match-peek-positions* pattern))
(define loop (regexp-loop regexp-match-peek-positions* loop start end rx string
(make-regexp-loop ;; success-k:
'regexp-match-peek-positions* rx string (lambda (acc start end match-start match-end)
;; success-k: (loop (cons (cons match-start match-end) acc) match-end end))
(lambda (acc start end match-start match-end) ;; port-success-k: use string case
(loop (cons (cons match-start match-end) acc) match-end end)) #f
;; port-success-k: use string case ;; failure-k:
#f (lambda (acc start end) acc)
;; failure-k: ;; port-fail-k: use string case
(lambda (acc start end) acc) #f
;; port-fail-k: use string case #f
#f #t))
#f
#t))
(reverse (loop '() start end)))
;; Splits a string into a list by removing any piece which matches ;; Splits a string into a list by removing any piece which matches
;; the pattern. ;; the pattern.
@ -231,23 +226,20 @@
(string->bytes/utf-8 string (char->integer #\?)) (string->bytes/utf-8 string (char->integer #\?))
string)) string))
(define sub (if (bytes? buf) subbytes substring)) (define sub (if (bytes? buf) subbytes substring))
(define loop (regexp-loop regexp-split loop start end rx buf
(make-regexp-loop ;; success-k:
'regexp-split rx buf (lambda (acc start end match-start match-end)
;; success-k: (loop (cons (sub buf start match-start) acc) match-end end))
(lambda (acc start end match-start match-end) ;; port-success-k:
(loop (cons (sub buf start match-start) acc) match-end end)) (lambda (acc match-string new-end leftovers)
;; port-success-k: (loop (cons leftovers acc) 0 new-end))
(lambda (acc match-string new-end leftovers) ;; failure-k:
(loop (cons leftovers acc) 0 new-end)) (lambda (acc start end)
;; failure-k: (cons (sub buf start (or end (bstring-length buf))) acc))
(lambda (acc start end) ;; port-fail-k
(cons (sub buf start (or end (bstring-length buf))) acc)) (lambda (acc leftover) (cons leftover acc))
;; port-fail-k #t
(lambda (acc leftover) (cons leftover acc)) #f))
#t
#f))
(reverse (loop '() start end)))
;; Returns all the matches for the pattern in the string. ;; Returns all the matches for the pattern in the string.
(define (regexp-match* pattern string [start 0] [end #f]) (define (regexp-match* pattern string [start 0] [end #f])
@ -256,22 +248,19 @@
(string->bytes/utf-8 string (char->integer #\?)) (string->bytes/utf-8 string (char->integer #\?))
string)) string))
(define sub (if (bytes? buf) subbytes substring)) (define sub (if (bytes? buf) subbytes substring))
(define loop (regexp-loop regexp-match* loop start end rx buf
(make-regexp-loop ;; success-k:
'regexp-match* rx buf (lambda (acc start end match-start match-end)
;; success-k: (loop (cons (sub buf match-start match-end) acc) match-end end))
(lambda (acc start end match-start match-end) ;; port-success-k:
(loop (cons (sub buf match-start match-end) acc) match-end end)) (lambda (acc match-string new-end leftovers)
;; port-success-k: (loop (cons match-string acc) 0 new-end))
(lambda (acc match-string new-end leftovers) ;; failure-k:
(loop (cons match-string acc) 0 new-end)) (lambda (acc start end) acc)
;; failure-k: ;; port-fail-k:
(lambda (acc start end) acc) (lambda (acc leftover) acc)
;; port-fail-k: #f
(lambda (acc leftover) acc) #f))
#f
#f))
(reverse (loop '() start end)))
(define (regexp-match-exact? p s) (define (regexp-match-exact? p s)
(let ([m (regexp-match-positions p s)]) (let ([m (regexp-match-positions p s)])