original commit: 6d21ce768796964a5743d06e8a2686ed7f897cc1
This commit is contained in:
Matthew Flatt 2003-01-24 13:25:59 +00:00
parent 42c6cb30b1
commit 75444d4af8

View File

@ -10,6 +10,7 @@
regexp-replace-quote
regexp-match*
regexp-match-positions*
regexp-match-peek-positions*
regexp-split
regexp-match-exact?
regexp-match/fail-without-reading)
@ -127,17 +128,13 @@
(define (regexp-replace-quote s)
(unless (string? s)
(raise-type-error 'regexp-replace-quote "string" s))
(regexp-replace* "\\\\" s "\\\\\\\\"))
(regexp-replace* "&" (regexp-replace* "\\\\" s "\\\\\\\\") "\\\\&"))
(define regexp-match/fail-without-reading
(opt-lambda (pattern input-port [start-k 0] [end-k #f] [output-port #f])
(opt-lambda (pattern input-port [start-k 0] [end-k #f])
(unless (input-port? input-port)
(raise-type-error 'regexp-match/fail-without-reading "input port" input-port))
(let ([m (apply regexp-match-peek-positions
pattern input-port start-k end-k
(if output-port
(list output-port)
null))])
(let ([m (regexp-match-peek-positions pattern input-port start-k end-k)])
(and m
;; What happens if someone swipes our chars before we can get them?
(begin
@ -150,76 +147,193 @@
(cdr m)))))))))
;; Helper function for the regexp functions below.
(define (regexp-fn name success failure)
(opt-lambda (pattern string [start 0] [end (and (string? string)
(string-length string))])
(define (regexp-fn name success-k port-success-k failure-k port-failure-k
need-leftover? peek?)
(lambda (pattern string start end)
(unless (or (string? pattern) (regexp? pattern))
(raise-type-error name "regexp or string" pattern))
(unless (string? string)
(raise-type-error name "string" string))
(if peek?
(unless (input-port? string)
(raise-type-error name "input-port" string))
(unless (or (string? string) (input-port? string))
(raise-type-error name "string or input-port" string)))
(unless (and (number? start) (exact? start) (integer? start) (start . >= . 0))
(raise-type-error name "non-negative exact integer" start))
(unless (and (number? end) (exact? end) (integer? end) (end . >= . 0))
(raise-type-error name "non-negative exact integer" end))
(unless (start . <= . (string-length string))
(unless (or (not end)
(and (number? end) (exact? end) (integer? end) (end . >= . 0)))
(raise-type-error name "non-negative exact integer or false" end))
(unless (or (input-port? string)
(start . <= . (string-length string)))
(raise-mismatch-error
name
(format "starting offset index ~a out of range [0,~a] for string: "
start
(format "starting offset index out of range [0,~a]: "
(string-length string))
string))
(unless (<= start end (string-length string))
start))
(unless (or (not end)
(and (start . <= . end)
(or (input-port? string)
(end . <= . (string-length string)))))
(raise-mismatch-error
name
(format "ending offset index ~a out of range [~a,~a] for string: "
(format "ending offset index out of range [~a,~a]: "
end
start
(string-length string))
string))
start))
(let* ((expr (if (regexp? pattern)
pattern
(regexp pattern)))
(match (regexp-match-positions expr string start end)))
(if match
(let ((match-start (caar match))
(match-end (cdar match)))
(when (= match-start match-end)
(error name "pattern matched a zero-length substring"))
(success expr string start end match-start match-end))
(failure expr string start end)))))
(when (and (positive? start)
(input-port? string)
need-leftover?)
;; Skip start chars:
(let ([s (make-string 4096)])
(let loop ([n 0])
(unless (= n start)
(let ([m (read-string-avail! s string 0 (min (- start n) 4096))])
(unless (eof-object? m)
(loop (+ n m))))))))
(let ((expr (if (regexp? pattern)
pattern
(regexp pattern))))
(if (and (input-port? string)
port-success-k)
;; Input port match, get string
(let ([discarded 0]
[leftover-port (and need-leftover?
(open-output-string))])
(let ([match (regexp-match expr string
(if need-leftover? 0 start)
(and end (if need-leftover? (- end start) end))
(if need-leftover?
leftover-port
(make-custom-output-port
#f
(lambda (s start end flush?)
(let ([c (- end start)])
(set! discarded (+ c discarded))
c))
void
void)))]
[leftovers (and need-leftover?
(get-output-string leftover-port))])
(if match
(port-success-k expr string (car match)
(and end (- end
(if need-leftover?
(+ (string-length leftovers) start)
discarded)
(string-length (car match))))
leftovers)
(port-failure-k leftovers))))
;; String/port match, get positions
(let ([match ((if peek?
regexp-match-peek-positions
regexp-match-positions)
expr string start end)])
(if match
(let ((match-start (caar match))
(match-end (cdar match)))
(when (= match-start match-end)
(error name "pattern matched a zero-length substring"))
(success-k expr string start end match-start match-end))
(failure-k expr string start end)))))))
(define-syntax wrap
(syntax-rules ()
[(_ out orig)
(define out
(opt-lambda (pattern string [start 0] [end #f])
(orig pattern string start end)))]))
;; Returns all the positions at which the pattern matched.
(define regexp-match-positions*
(define -regexp-match-positions*
(regexp-fn 'regexp-match-positions*
;; success-k:
(lambda (expr string start end match-start match-end)
(cons (cons match-start match-end)
(regexp-match-positions* expr string match-end end)))
(if (string? string)
(regexp-match-positions* expr string match-end end)
;; Need to shift index of rest as reading:
(map (lambda (p)
(cons (+ match-end (car p))
(+ match-end (cdr p))))
(regexp-match-positions* expr string 0 (and end (- end match-end)))))))
;; port-success-k --- use string case
#f
;; fail-k:
(lambda (expr string start end)
null)))
null)
;; port-fail-k --- use string case
#f
#f
#f))
(wrap regexp-match-positions* -regexp-match-positions*)
;; Returns all the positions at which the pattern matched.
(define -regexp-match-peek-positions*
(regexp-fn 'regexp-match-peek-positions*
;; success-k:
(lambda (expr string start end match-start match-end)
(cons (cons match-start match-end)
(regexp-match-peek-positions* expr string match-end end)))
;; port-success-k --- use string case
#f
;; fail-k:
(lambda (expr string start end)
null)
;; port-fail-k --- use string case
#f
#f
#t))
(wrap regexp-match-peek-positions* -regexp-match-peek-positions*)
;; Splits a string into a list by removing any piece which matches
;; the pattern.
(define regexp-split
(define -regexp-split
(regexp-fn 'regexp-split
;; success-k
(lambda (expr string start end match-start match-end)
(cons
(substring string start match-start)
(regexp-split expr string match-end end)))
;; port-success-k:
(lambda (expr string match-string new-end leftovers)
(cons
leftovers
(regexp-split expr string 0 new-end)))
;; failure-k:
(lambda (expr string start end)
(list
(substring string start end)))))
(substring string start (or end (string-length string)))))
;; port-fail-k
(lambda (leftover)
(list leftover))
#t
#f))
(wrap regexp-split -regexp-split)
;; Returns all the matches for the pattern in the string.
(define regexp-match*
(define -regexp-match*
(regexp-fn 'regexp-match*
;; success-k:
(lambda (expr string start end match-start match-end)
(cons
(substring string match-start match-end)
(regexp-match* expr string match-end end)))
(lambda args
(list))))
;; port-success-k:
(lambda (expr string match-string new-end leftovers)
(cons
match-string
(regexp-match* expr string 0 new-end)))
;; fail-k:
(lambda (expr string start end)
null)
;; port-fail-k:
(lambda (leftover)
null)
#f
#f))
(wrap regexp-match* -regexp-match*)
(define regexp-match-exact?
(lambda (p s)