Major cleanup, switch regexp-fn to a macro to clarify things
svn: r8545
This commit is contained in:
parent
13a3c31ad5
commit
f3bbc44451
|
@ -9,6 +9,7 @@
|
||||||
regexp-split
|
regexp-split
|
||||||
regexp-match-exact?
|
regexp-match-exact?
|
||||||
regexp-try-match)
|
regexp-try-match)
|
||||||
|
(require (for-syntax "stxcase-scheme.ss"))
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
@ -29,8 +30,13 @@
|
||||||
(define (bstring-length s)
|
(define (bstring-length s)
|
||||||
(if (bytes? s) (bytes-length s) (string-length s)))
|
(if (bytes? s) (bytes-length s) (string-length s)))
|
||||||
|
|
||||||
(define (subbstring s st e)
|
(define (bstring->regexp name pattern)
|
||||||
(if (bytes? s) (subbytes s st e) (substring s st e)))
|
(cond [(regexp? pattern) pattern]
|
||||||
|
[(byte-regexp? pattern) pattern]
|
||||||
|
[(string? pattern) (regexp pattern)]
|
||||||
|
[(bytes? pattern) (byte-regexp pattern)]
|
||||||
|
[else (raise-type-error
|
||||||
|
name "regexp, byte regexp, string, or byte string" pattern)]))
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Regexp helpers
|
;; Regexp helpers
|
||||||
|
@ -51,13 +57,10 @@
|
||||||
[else (string-append "(?i:" s ")")])))
|
[else (string-append "(?i:" s ")")])))
|
||||||
|
|
||||||
(define (regexp-replace-quote s)
|
(define (regexp-replace-quote s)
|
||||||
(let ([b? (cond [(bytes? s) #t]
|
(cond [(bytes? s) (regexp-replace* #rx#"[&\\]" s #"\\\\&")]
|
||||||
[(string? s) #f]
|
[(string? s) (regexp-replace* #rx"[&\\]" s "\\\\&")]
|
||||||
[else (raise-type-error 'regexp-replace-quote
|
[else (raise-type-error 'regexp-replace-quote
|
||||||
"string or byte string" s)])])
|
"string or byte string" s)]))
|
||||||
(if b?
|
|
||||||
(regexp-replace* #rx#"[&\\]" s #"\\\\&")
|
|
||||||
(regexp-replace* #rx"[&\\]" s "\\\\&"))))
|
|
||||||
|
|
||||||
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f])
|
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f])
|
||||||
(unless (input-port? input-port)
|
(unless (input-port? input-port)
|
||||||
|
@ -82,76 +85,63 @@
|
||||||
(cdr m))))))))
|
(cdr m))))))))
|
||||||
|
|
||||||
;; Helper function for the regexp functions below.
|
;; Helper function for the regexp functions below.
|
||||||
(define (regexp-fn name success-k port-success-k failure-k port-failure-k
|
(define-syntax make-regexp-loop
|
||||||
|
(syntax-rules ()
|
||||||
|
[(make-regexp-loop
|
||||||
|
name rx string success-k port-success-k failure-k port-failure-k
|
||||||
need-leftover? peek?)
|
need-leftover? peek?)
|
||||||
(lambda (pattern string start end)
|
(let ([len (cond [(string? string) (string-length string)]
|
||||||
(unless (or (string? pattern) (bytes? pattern)
|
[(bytes? string) (bytes-length string)]
|
||||||
(regexp? pattern) (byte-regexp? pattern))
|
[else #f])])
|
||||||
(raise-type-error name "regexp, byte regexp, string, or byte string" pattern))
|
|
||||||
(if peek?
|
(if peek?
|
||||||
(unless (input-port? string)
|
(unless (input-port? string)
|
||||||
(raise-type-error name "input port" string))
|
(raise-type-error name "input port" string))
|
||||||
(unless (or (string? string)
|
(unless (or len (input-port? string))
|
||||||
(bytes? string)
|
|
||||||
(input-port? string))
|
|
||||||
(raise-type-error name "string, byte string or input port" string)))
|
(raise-type-error name "string, byte string or input port" string)))
|
||||||
(unless (and (number? start) (exact? start) (integer? start) (start . >= . 0))
|
(lambda (start end)
|
||||||
|
(unless (and (number? start) (exact? start) (integer? start)
|
||||||
|
(start . >= . 0))
|
||||||
(raise-type-error name "non-negative exact integer" start))
|
(raise-type-error name "non-negative exact integer" start))
|
||||||
(unless (or (not end)
|
(unless (or (not end)
|
||||||
(and (number? end) (exact? end) (integer? end) (end . >= . 0)))
|
(and (number? end) (exact? end) (integer? end)
|
||||||
|
(end . >= . 0)))
|
||||||
(raise-type-error name "non-negative exact integer or false" end))
|
(raise-type-error name "non-negative exact integer or false" end))
|
||||||
(unless (or (input-port? string)
|
(unless (or (input-port? string) (and len (start . <= . len)))
|
||||||
(and (string? string)
|
|
||||||
(start . <= . (string-length string)))
|
|
||||||
(and (bytes? string)
|
|
||||||
(start . <= . (bytes-length string))))
|
|
||||||
(raise-mismatch-error
|
(raise-mismatch-error
|
||||||
name
|
name
|
||||||
(format "starting offset index out of range [0,~a]: "
|
(format "starting offset index out of range [0,~a]: " len)
|
||||||
(if (string? string)
|
|
||||||
(string-length string)
|
|
||||||
(bytes-length string)))
|
|
||||||
start))
|
start))
|
||||||
(unless (or (not end)
|
(unless (or (not end)
|
||||||
(and (start . <= . end)
|
(and (start . <= . end)
|
||||||
(or (input-port? string)
|
(or (input-port? string)
|
||||||
(and (string? string)
|
(and len (end . <= . len)))))
|
||||||
(end . <= . (string-length string)))
|
|
||||||
(and (bytes? string)
|
|
||||||
(end . <= . (bytes-length string))))))
|
|
||||||
(raise-mismatch-error
|
(raise-mismatch-error
|
||||||
name
|
name
|
||||||
(format "ending offset index out of range [~a,~a]: "
|
(format "ending offset index out of range [~a,~a]: " start len)
|
||||||
end
|
end))
|
||||||
(if (string? string)
|
|
||||||
(string-length string)
|
|
||||||
(bytes-length string)))
|
|
||||||
start))
|
|
||||||
|
|
||||||
(when (and (positive? start) (input-port? string) need-leftover?)
|
(when (and (positive? start) (input-port? string) need-leftover?)
|
||||||
;; Skip start chars:
|
;; Skip start chars:
|
||||||
(let ([s (make-bytes 4096)])
|
(let ([s (make-bytes 4096)])
|
||||||
(let loop ([n 0])
|
(let loop ([n 0])
|
||||||
(unless (= n start)
|
(unless (= n start)
|
||||||
(let ([m (read-bytes-avail! s string 0 (min (- start n) 4096))])
|
(let ([m (read-bytes-avail!
|
||||||
(unless (eof-object? m)
|
s string 0 (min (- start n) 4096))])
|
||||||
(loop (+ n m))))))))
|
(unless (eof-object? m) (loop (+ n m))))))))
|
||||||
|
|
||||||
(let ([expr (cond [(string? pattern) (regexp pattern)]
|
|
||||||
[(bytes? pattern) (byte-regexp pattern)]
|
|
||||||
[else pattern])])
|
|
||||||
(if (and (input-port? string) port-success-k)
|
(if (and (input-port? string) port-success-k)
|
||||||
;; Input port match, get string
|
;; Input port match, get string
|
||||||
(let ([discarded 0]
|
(let ([discarded 0]
|
||||||
[leftover-port (and need-leftover? (open-output-bytes))])
|
[leftover-port (and need-leftover? (open-output-bytes))])
|
||||||
(let ([match
|
(let ([match
|
||||||
(regexp-match
|
(regexp-match
|
||||||
expr string
|
rx string
|
||||||
(if need-leftover? 0 start)
|
(if need-leftover? 0 start)
|
||||||
(and end (if need-leftover? (- end start) end))
|
(and end (if need-leftover? (- end start) end))
|
||||||
(if need-leftover?
|
(if need-leftover?
|
||||||
leftover-port
|
leftover-port
|
||||||
(make-output-port 'counter
|
(make-output-port
|
||||||
|
'counter
|
||||||
always-evt
|
always-evt
|
||||||
(lambda (s start end flush? breakable?)
|
(lambda (s start end flush? breakable?)
|
||||||
(let ([c (- end start)])
|
(let ([c (- end start)])
|
||||||
|
@ -160,13 +150,13 @@
|
||||||
void)))]
|
void)))]
|
||||||
[leftovers
|
[leftovers
|
||||||
(and need-leftover?
|
(and need-leftover?
|
||||||
(if (and (regexp? pattern) (string? string))
|
(if (and (regexp? rx) (string? string))
|
||||||
(get-output-string leftover-port)
|
(get-output-string leftover-port)
|
||||||
(get-output-bytes leftover-port)))])
|
(get-output-bytes leftover-port)))])
|
||||||
(if match
|
(if match
|
||||||
(port-success-k expr string (car match)
|
(port-success-k
|
||||||
(and end (- end
|
(car match)
|
||||||
(if need-leftover?
|
(and end (- end (if need-leftover?
|
||||||
(+ (bstring-length leftovers) start)
|
(+ (bstring-length leftovers) start)
|
||||||
discarded)
|
discarded)
|
||||||
(bstring-length (car match))))
|
(bstring-length (car match))))
|
||||||
|
@ -176,115 +166,119 @@
|
||||||
(let ([match ((if peek?
|
(let ([match ((if peek?
|
||||||
regexp-match-peek-positions
|
regexp-match-peek-positions
|
||||||
regexp-match-positions)
|
regexp-match-positions)
|
||||||
expr string start end)])
|
rx string start end)])
|
||||||
(if match
|
(if match
|
||||||
(let ([match-start (caar match)]
|
(let ([match-start (caar match)]
|
||||||
[match-end (cdar match)])
|
[match-end (cdar match)])
|
||||||
(when (= match-start match-end)
|
(if (= match-start match-end)
|
||||||
(error name "pattern matched a zero-length substring"))
|
(error name
|
||||||
(success-k expr string start end match-start match-end))
|
"pattern matched a zero-length substring: ~e" rx)
|
||||||
(failure-k expr string start end)))))))
|
(success-k start end match-start match-end)))
|
||||||
|
(failure-k start end))))))]))
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define -regexp-match-positions*
|
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
||||||
(regexp-fn 'regexp-match-positions*
|
(define rx (bstring->regexp 'regexp-match-positions* pattern))
|
||||||
|
(define loop
|
||||||
|
(make-regexp-loop
|
||||||
|
'regexp-match-positions* rx string
|
||||||
;; success-k:
|
;; success-k:
|
||||||
(lambda (expr string start end match-start match-end)
|
(lambda (start end match-start match-end)
|
||||||
(cons (cons match-start match-end)
|
(cons (cons match-start match-end)
|
||||||
(if (or (string? string) (bytes? string))
|
(if (or (string? string) (bytes? string))
|
||||||
(regexp-match-positions* expr string match-end end)
|
(loop match-end end)
|
||||||
;; Need to shift index of rest as reading:
|
;; Need to shift index of rest as reading:
|
||||||
(map (lambda (p)
|
(map (lambda (p)
|
||||||
(cons (+ match-end (car p))
|
(cons (+ match-end (car p)) (+ match-end (cdr p))))
|
||||||
(+ match-end (cdr p))))
|
(loop 0 (and end (- end match-end)))))))
|
||||||
(regexp-match-positions* expr string 0 (and end (- end match-end)))))))
|
;; port-success-k: use string case
|
||||||
;; port-success-k --- use string case
|
|
||||||
#f
|
#f
|
||||||
;; fail-k:
|
;; failure-k:
|
||||||
(lambda (expr string start end) null)
|
(lambda (start end) null)
|
||||||
;; port-fail-k --- use string case
|
;; port-fail-k: use string case
|
||||||
#f
|
#f
|
||||||
#f
|
#f
|
||||||
#f))
|
#f))
|
||||||
|
(loop start end))
|
||||||
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
|
||||||
(-regexp-match-positions* pattern string start end))
|
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define -regexp-match-peek-positions*
|
(define (regexp-match-peek-positions* pattern string [start 0] [end #f])
|
||||||
(regexp-fn 'regexp-match-peek-positions*
|
(define rx (bstring->regexp 'regexp-match-peek-positions* pattern))
|
||||||
|
(define loop
|
||||||
|
(make-regexp-loop
|
||||||
|
'regexp-match-peek-positions* rx string
|
||||||
;; success-k:
|
;; success-k:
|
||||||
(lambda (expr string start end match-start match-end)
|
(lambda (start end match-start match-end)
|
||||||
(cons (cons match-start match-end)
|
(cons (cons match-start match-end)
|
||||||
(regexp-match-peek-positions* expr string match-end end)))
|
(loop match-end end)))
|
||||||
;; port-success-k --- use string case
|
;; port-success-k: use string case
|
||||||
#f
|
#f
|
||||||
;; fail-k:
|
;; failure-k:
|
||||||
(lambda (expr string start end) null)
|
(lambda (start end) null)
|
||||||
;; port-fail-k --- use string case
|
;; port-fail-k: use string case
|
||||||
#f
|
#f
|
||||||
#f
|
#f
|
||||||
#t))
|
#t))
|
||||||
|
(loop start end))
|
||||||
(define (regexp-match-peek-positions* pattern string [start 0] [end #f])
|
|
||||||
(-regexp-match-peek-positions* pattern string start end))
|
|
||||||
|
|
||||||
;; Splits a string into a list by removing any piece which matches
|
;; Splits a string into a list by removing any piece which matches
|
||||||
;; the pattern.
|
;; the pattern.
|
||||||
(define -regexp-split
|
(define (regexp-split pattern string [start 0] [end #f])
|
||||||
(regexp-fn 'regexp-split
|
(define rx (bstring->regexp 'regexp-split pattern))
|
||||||
;; success-k
|
(define buf (if (and (string? string) (byte-regexp? rx))
|
||||||
(lambda (expr string start end match-start match-end)
|
|
||||||
(let ([string (if (and (string? string)
|
|
||||||
(or (bytes? expr) (byte-regexp? expr)))
|
|
||||||
(string->bytes/utf-8 string (char->integer #\?))
|
(string->bytes/utf-8 string (char->integer #\?))
|
||||||
string)])
|
string))
|
||||||
(cons (subbstring string start match-start)
|
(define sub (if (bytes? buf) subbytes substring))
|
||||||
(regexp-split expr string match-end end))))
|
(define loop
|
||||||
|
(make-regexp-loop
|
||||||
|
'regexp-split rx buf
|
||||||
|
;; success-k:
|
||||||
|
(lambda (start end match-start match-end)
|
||||||
|
(cons (sub buf start match-start)
|
||||||
|
(loop match-end end)))
|
||||||
;; port-success-k:
|
;; port-success-k:
|
||||||
(lambda (expr string match-string new-end leftovers)
|
(lambda (match-string new-end leftovers)
|
||||||
(cons leftovers (regexp-split expr string 0 new-end)))
|
(cons leftovers (loop 0 new-end)))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
(lambda (expr string start end)
|
(lambda (start end)
|
||||||
(list (subbstring string start (or end (bstring-length string)))))
|
(list (sub buf start (or end (bstring-length buf)))))
|
||||||
;; port-fail-k
|
;; port-fail-k
|
||||||
(lambda (leftover) (list leftover))
|
(lambda (leftover) (list leftover))
|
||||||
#t
|
#t
|
||||||
#f))
|
#f))
|
||||||
|
(loop start end))
|
||||||
(define (regexp-split pattern string [start 0] [end #f])
|
|
||||||
(-regexp-split pattern string start end))
|
|
||||||
|
|
||||||
;; Returns all the matches for the pattern in the string.
|
;; Returns all the matches for the pattern in the string.
|
||||||
(define -regexp-match*
|
(define (regexp-match* pattern string [start 0] [end #f])
|
||||||
(regexp-fn 'regexp-match*
|
(define rx (bstring->regexp 'regexp-match* pattern))
|
||||||
;; success-k:
|
(define buf (if (and (string? string) (byte-regexp? rx))
|
||||||
(lambda (expr string start end match-start match-end)
|
|
||||||
(let ([string (if (and (string? string)
|
|
||||||
(or (bytes? expr) (byte-regexp? expr)))
|
|
||||||
(string->bytes/utf-8 string (char->integer #\?))
|
(string->bytes/utf-8 string (char->integer #\?))
|
||||||
string)])
|
string))
|
||||||
(cons (subbstring string match-start match-end)
|
(define sub (if (bytes? buf) subbytes substring))
|
||||||
(regexp-match* expr string match-end end))))
|
(define loop
|
||||||
|
(make-regexp-loop
|
||||||
|
'regexp-match* rx buf
|
||||||
|
;; success-k:
|
||||||
|
(lambda (start end match-start match-end)
|
||||||
|
(cons (sub buf match-start match-end)
|
||||||
|
(loop match-end end)))
|
||||||
;; port-success-k:
|
;; port-success-k:
|
||||||
(lambda (expr string match-string new-end leftovers)
|
(lambda (match-string new-end leftovers)
|
||||||
(cons match-string (regexp-match* expr string 0 new-end)))
|
(cons match-string (loop 0 new-end)))
|
||||||
;; fail-k:
|
;; failure-k:
|
||||||
(lambda (expr string start end) null)
|
(lambda (start end) null)
|
||||||
;; port-fail-k:
|
;; port-fail-k:
|
||||||
(lambda (leftover) null)
|
(lambda (leftover) null)
|
||||||
#f
|
#f
|
||||||
#f))
|
#f))
|
||||||
|
(loop start end))
|
||||||
(define (regexp-match* pattern string [start 0] [end #f])
|
|
||||||
(-regexp-match* pattern string start end))
|
|
||||||
|
|
||||||
(define (regexp-match-exact? p s)
|
(define (regexp-match-exact? p s)
|
||||||
(let ([m (regexp-match-positions p s)])
|
(let ([m (regexp-match-positions p s)])
|
||||||
(and m
|
(and m (zero? (caar m))
|
||||||
(zero? (caar m))
|
(= (cdar m)
|
||||||
(if (or (byte-regexp? p) (bytes? p) (bytes? s))
|
(cond [(bytes? s) (bytes-length s)]
|
||||||
(= (cdar m) (if (bytes? s) (bytes-length s) (string-utf-8-length s)))
|
[(or (byte-regexp? p) (bytes? p)) (string-utf-8-length s)]
|
||||||
(= (cdar m) (string-length s))))))
|
[else (string-length s)])))))
|
||||||
|
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user