Major cleanup, switch regexp-fn to a macro to clarify things

svn: r8545
This commit is contained in:
Eli Barzilay 2008-02-05 22:02:34 +00:00
parent 13a3c31ad5
commit f3bbc44451

View File

@ -9,6 +9,7 @@
regexp-split regexp-split
regexp-match-exact? regexp-match-exact?
regexp-try-match) regexp-try-match)
(require (for-syntax "stxcase-scheme.ss"))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -29,8 +30,13 @@
(define (bstring-length s) (define (bstring-length s)
(if (bytes? s) (bytes-length s) (string-length s))) (if (bytes? s) (bytes-length s) (string-length s)))
(define (subbstring s st e) (define (bstring->regexp name pattern)
(if (bytes? s) (subbytes s st e) (substring s st e))) (cond [(regexp? pattern) pattern]
[(byte-regexp? pattern) pattern]
[(string? pattern) (regexp pattern)]
[(bytes? pattern) (byte-regexp pattern)]
[else (raise-type-error
name "regexp, byte regexp, string, or byte string" pattern)]))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Regexp helpers ;; Regexp helpers
@ -51,13 +57,10 @@
[else (string-append "(?i:" s ")")]))) [else (string-append "(?i:" s ")")])))
(define (regexp-replace-quote s) (define (regexp-replace-quote s)
(let ([b? (cond [(bytes? s) #t] (cond [(bytes? s) (regexp-replace* #rx#"[&\\]" s #"\\\\&")]
[(string? s) #f] [(string? s) (regexp-replace* #rx"[&\\]" s "\\\\&")]
[else (raise-type-error 'regexp-replace-quote [else (raise-type-error 'regexp-replace-quote
"string or byte string" s)])]) "string or byte string" s)]))
(if b?
(regexp-replace* #rx#"[&\\]" s #"\\\\&")
(regexp-replace* #rx"[&\\]" s "\\\\&"))))
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f]) (define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f])
(unless (input-port? input-port) (unless (input-port? input-port)
@ -82,209 +85,200 @@
(cdr m)))))))) (cdr m))))))))
;; Helper function for the regexp functions below. ;; Helper function for the regexp functions below.
(define (regexp-fn name success-k port-success-k failure-k port-failure-k (define-syntax make-regexp-loop
need-leftover? peek?) (syntax-rules ()
(lambda (pattern string start end) [(make-regexp-loop
(unless (or (string? pattern) (bytes? pattern) name rx string success-k port-success-k failure-k port-failure-k
(regexp? pattern) (byte-regexp? pattern)) need-leftover? peek?)
(raise-type-error name "regexp, byte regexp, string, or byte string" pattern)) (let ([len (cond [(string? string) (string-length string)]
(if peek? [(bytes? string) (bytes-length string)]
(unless (input-port? string) [else #f])])
(raise-type-error name "input port" string)) (if peek?
(unless (or (string? string) (unless (input-port? string)
(bytes? string) (raise-type-error name "input port" string))
(input-port? string)) (unless (or len (input-port? string))
(raise-type-error name "string, byte string or input port" string))) (raise-type-error name "string, byte string or input port" string)))
(unless (and (number? start) (exact? start) (integer? start) (start . >= . 0)) (lambda (start end)
(raise-type-error name "non-negative exact integer" start)) (unless (and (number? start) (exact? start) (integer? start)
(unless (or (not end) (start . >= . 0))
(and (number? end) (exact? end) (integer? end) (end . >= . 0))) (raise-type-error name "non-negative exact integer" start))
(raise-type-error name "non-negative exact integer or false" end)) (unless (or (not end)
(unless (or (input-port? string) (and (number? end) (exact? end) (integer? end)
(and (string? string) (end . >= . 0)))
(start . <= . (string-length string))) (raise-type-error name "non-negative exact integer or false" end))
(and (bytes? string) (unless (or (input-port? string) (and len (start . <= . len)))
(start . <= . (bytes-length string)))) (raise-mismatch-error
(raise-mismatch-error name
name (format "starting offset index out of range [0,~a]: " len)
(format "starting offset index out of range [0,~a]: " start))
(if (string? string) (unless (or (not end)
(string-length string) (and (start . <= . end)
(bytes-length string))) (or (input-port? string)
start)) (and len (end . <= . len)))))
(unless (or (not end) (raise-mismatch-error
(and (start . <= . end) name
(or (input-port? string) (format "ending offset index out of range [~a,~a]: " start len)
(and (string? string) end))
(end . <= . (string-length string)))
(and (bytes? string)
(end . <= . (bytes-length string))))))
(raise-mismatch-error
name
(format "ending offset index out of range [~a,~a]: "
end
(if (string? string)
(string-length string)
(bytes-length string)))
start))
(when (and (positive? start) (input-port? string) need-leftover?) (when (and (positive? start) (input-port? string) need-leftover?)
;; Skip start chars: ;; Skip start chars:
(let ([s (make-bytes 4096)]) (let ([s (make-bytes 4096)])
(let loop ([n 0]) (let loop ([n 0])
(unless (= n start) (unless (= n start)
(let ([m (read-bytes-avail! s string 0 (min (- start n) 4096))]) (let ([m (read-bytes-avail!
(unless (eof-object? m) s string 0 (min (- start n) 4096))])
(loop (+ n m)))))))) (unless (eof-object? m) (loop (+ n m))))))))
(let ([expr (cond [(string? pattern) (regexp pattern)] (if (and (input-port? string) port-success-k)
[(bytes? pattern) (byte-regexp pattern)] ;; Input port match, get string
[else pattern])]) (let ([discarded 0]
(if (and (input-port? string) port-success-k) [leftover-port (and need-leftover? (open-output-bytes))])
;; Input port match, get string (let ([match
(let ([discarded 0] (regexp-match
[leftover-port (and need-leftover? (open-output-bytes))]) rx string
(let ([match (if need-leftover? 0 start)
(regexp-match (and end (if need-leftover? (- end start) end))
expr string (if need-leftover?
(if need-leftover? 0 start) leftover-port
(and end (if need-leftover? (- end start) end)) (make-output-port
(if need-leftover? 'counter
leftover-port always-evt
(make-output-port 'counter (lambda (s start end flush? breakable?)
always-evt (let ([c (- end start)])
(lambda (s start end flush? breakable?) (set! discarded (+ c discarded))
(let ([c (- end start)]) c))
(set! discarded (+ c discarded)) void)))]
c)) [leftovers
void)))] (and need-leftover?
[leftovers (if (and (regexp? rx) (string? string))
(and need-leftover? (get-output-string leftover-port)
(if (and (regexp? pattern) (string? string)) (get-output-bytes leftover-port)))])
(get-output-string leftover-port) (if match
(get-output-bytes leftover-port)))]) (port-success-k
(if match (car match)
(port-success-k expr string (car match) (and end (- end (if need-leftover?
(and end (- end (+ (bstring-length leftovers) start)
(if need-leftover? discarded)
(+ (bstring-length leftovers) start) (bstring-length (car match))))
discarded) leftovers)
(bstring-length (car match)))) (port-failure-k leftovers))))
leftovers) ;; String/port match, get positions
(port-failure-k leftovers)))) (let ([match ((if peek?
;; String/port match, get positions regexp-match-peek-positions
(let ([match ((if peek? regexp-match-positions)
regexp-match-peek-positions rx string start end)])
regexp-match-positions) (if match
expr string start end)]) (let ([match-start (caar match)]
(if match [match-end (cdar match)])
(let ([match-start (caar match)] (if (= match-start match-end)
[match-end (cdar match)]) (error name
(when (= match-start match-end) "pattern matched a zero-length substring: ~e" rx)
(error name "pattern matched a zero-length substring")) (success-k start end match-start match-end)))
(success-k expr string start end match-start match-end)) (failure-k start end))))))]))
(failure-k expr string start end)))))))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define -regexp-match-positions*
(regexp-fn 'regexp-match-positions*
;; success-k:
(lambda (expr string start end match-start match-end)
(cons (cons match-start match-end)
(if (or (string? string) (bytes? string))
(regexp-match-positions* expr string match-end end)
;; Need to shift index of rest as reading:
(map (lambda (p)
(cons (+ match-end (car p))
(+ match-end (cdr p))))
(regexp-match-positions* expr string 0 (and end (- end match-end)))))))
;; port-success-k --- use string case
#f
;; fail-k:
(lambda (expr string start end) null)
;; port-fail-k --- use string case
#f
#f
#f))
(define (regexp-match-positions* pattern string [start 0] [end #f]) (define (regexp-match-positions* pattern string [start 0] [end #f])
(-regexp-match-positions* pattern string start end)) (define rx (bstring->regexp 'regexp-match-positions* pattern))
(define loop
(make-regexp-loop
'regexp-match-positions* rx string
;; success-k:
(lambda (start end match-start match-end)
(cons (cons match-start match-end)
(if (or (string? string) (bytes? string))
(loop match-end end)
;; Need to shift index of rest as reading:
(map (lambda (p)
(cons (+ match-end (car p)) (+ match-end (cdr p))))
(loop 0 (and end (- end match-end)))))))
;; port-success-k: use string case
#f
;; failure-k:
(lambda (start end) null)
;; port-fail-k: use string case
#f
#f
#f))
(loop start end))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define -regexp-match-peek-positions*
(regexp-fn 'regexp-match-peek-positions*
;; success-k:
(lambda (expr string start end match-start match-end)
(cons (cons match-start match-end)
(regexp-match-peek-positions* expr string match-end end)))
;; port-success-k --- use string case
#f
;; fail-k:
(lambda (expr string start end) null)
;; port-fail-k --- use string case
#f
#f
#t))
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]) (define (regexp-match-peek-positions* pattern string [start 0] [end #f])
(-regexp-match-peek-positions* pattern string start end)) (define rx (bstring->regexp 'regexp-match-peek-positions* pattern))
(define loop
(make-regexp-loop
'regexp-match-peek-positions* rx string
;; success-k:
(lambda (start end match-start match-end)
(cons (cons match-start match-end)
(loop match-end end)))
;; port-success-k: use string case
#f
;; failure-k:
(lambda (start end) null)
;; port-fail-k: use string case
#f
#f
#t))
(loop start end))
;; Splits a string into a list by removing any piece which matches ;; Splits a string into a list by removing any piece which matches
;; the pattern. ;; the pattern.
(define -regexp-split
(regexp-fn 'regexp-split
;; success-k
(lambda (expr string start end match-start match-end)
(let ([string (if (and (string? string)
(or (bytes? expr) (byte-regexp? expr)))
(string->bytes/utf-8 string (char->integer #\?))
string)])
(cons (subbstring string start match-start)
(regexp-split expr string match-end end))))
;; port-success-k:
(lambda (expr string match-string new-end leftovers)
(cons leftovers (regexp-split expr string 0 new-end)))
;; failure-k:
(lambda (expr string start end)
(list (subbstring string start (or end (bstring-length string)))))
;; port-fail-k
(lambda (leftover) (list leftover))
#t
#f))
(define (regexp-split pattern string [start 0] [end #f]) (define (regexp-split pattern string [start 0] [end #f])
(-regexp-split pattern string start end)) (define rx (bstring->regexp 'regexp-split pattern))
(define buf (if (and (string? string) (byte-regexp? rx))
(string->bytes/utf-8 string (char->integer #\?))
string))
(define sub (if (bytes? buf) subbytes substring))
(define loop
(make-regexp-loop
'regexp-split rx buf
;; success-k:
(lambda (start end match-start match-end)
(cons (sub buf start match-start)
(loop match-end end)))
;; port-success-k:
(lambda (match-string new-end leftovers)
(cons leftovers (loop 0 new-end)))
;; failure-k:
(lambda (start end)
(list (sub buf start (or end (bstring-length buf)))))
;; port-fail-k
(lambda (leftover) (list leftover))
#t
#f))
(loop start end))
;; Returns all the matches for the pattern in the string. ;; Returns all the matches for the pattern in the string.
(define -regexp-match*
(regexp-fn 'regexp-match*
;; success-k:
(lambda (expr string start end match-start match-end)
(let ([string (if (and (string? string)
(or (bytes? expr) (byte-regexp? expr)))
(string->bytes/utf-8 string (char->integer #\?))
string)])
(cons (subbstring string match-start match-end)
(regexp-match* expr string match-end end))))
;; port-success-k:
(lambda (expr string match-string new-end leftovers)
(cons match-string (regexp-match* expr string 0 new-end)))
;; fail-k:
(lambda (expr string start end) null)
;; port-fail-k:
(lambda (leftover) null)
#f
#f))
(define (regexp-match* pattern string [start 0] [end #f]) (define (regexp-match* pattern string [start 0] [end #f])
(-regexp-match* pattern string start end)) (define rx (bstring->regexp 'regexp-match* pattern))
(define buf (if (and (string? string) (byte-regexp? rx))
(string->bytes/utf-8 string (char->integer #\?))
string))
(define sub (if (bytes? buf) subbytes substring))
(define loop
(make-regexp-loop
'regexp-match* rx buf
;; success-k:
(lambda (start end match-start match-end)
(cons (sub buf match-start match-end)
(loop match-end end)))
;; port-success-k:
(lambda (match-string new-end leftovers)
(cons match-string (loop 0 new-end)))
;; failure-k:
(lambda (start end) null)
;; port-fail-k:
(lambda (leftover) null)
#f
#f))
(loop start end))
(define (regexp-match-exact? p s) (define (regexp-match-exact? p s)
(let ([m (regexp-match-positions p s)]) (let ([m (regexp-match-positions p s)])
(and m (and m (zero? (caar m))
(zero? (caar m)) (= (cdar m)
(if (or (byte-regexp? p) (bytes? p) (bytes? s)) (cond [(bytes? s) (bytes-length s)]
(= (cdar m) (if (bytes? s) (bytes-length s) (string-utf-8-length s))) [(or (byte-regexp? p) (bytes? p)) (string-utf-8-length s)]
(= (cdar m) (string-length s)))))) [else (string-length s)])))))
) )