diff --git a/collects/mzlib/string.ss b/collects/mzlib/string.ss index 8c9378d..a83a7be 100644 --- a/collects/mzlib/string.ss +++ b/collects/mzlib/string.ss @@ -8,6 +8,9 @@ expr->string regexp-quote regexp-replace-quote + regexp-match* + regexp-match-positions* + regexp-split regexp-match-exact?) (require (lib "etc.ss")) @@ -129,6 +132,79 @@ (raise-type-error 'regexp-replace-quote "string" s)) (regexp-replace* "\\\\" s "\\\\\\\\")) + + ;; Helper function for the regexp functions below. + (define (regexp-fn name success failure) + (opt-lambda (pattern string [start 0] [end (and (string? string) + (string-length string))]) + + (unless (or (string? pattern) (regexp? pattern)) + (raise-type-error name "regexp or string" pattern)) + (unless (string? string) + (raise-type-error name "string" string)) + (unless (and (number? start) (exact? start) (integer? start) (start . >= . 0)) + (raise-type-error name "non-negative exact integer" start)) + (unless (and (number? end) (exact? end) (integer? end) (end . >= . 0)) + (raise-type-error name "non-negative exact integer" end)) + (unless (start . <= . (string-length string)) + (raise-mismatch-error + name + (format "starting offset index ~a out of range [0,~a] for string: " + start + (string-length string)) + string)) + (unless (<= start end (string-length string)) + (raise-mismatch-error + name + (format "ending offset index ~a out of range [~a,~a] for string: " + end + start + (string-length string)) + string)) + + (let* ((expr (if (regexp? pattern) + pattern + (regexp pattern))) + (match (regexp-match-positions expr string start end))) + (if match + (let ((match-start (caar match)) + (match-end (cdar match))) + (when (= match-start match-end) + (error name "pattern matched a zero-length substring")) + (success expr string start end match-start match-end)) + (failure expr string start end))))) + + ;; Returns all the positions at which the pattern matched. + (define regexp-match-positions* + (regexp-fn 'regexp-match-positions* + (lambda (expr string start end match-start match-end) + (cons (cons match-start match-end) + (regexp-match-positions* expr string match-end end))) + (lambda (expr string start end) + null))) + + ;; Splits a string into a list by removing any piece which matches + ;; the pattern. + (define regexp-split + (regexp-fn 'regexp-split + (lambda (expr string start end match-start match-end) + (cons + (substring string start match-start) + (regexp-split expr string match-end end))) + (lambda (expr string start end) + (list + (substring string start end))))) + + ;; Returns all the matches for the pattern in the string. + (define regexp-match* + (regexp-fn 'regexp-match* + (lambda (expr string start end match-start match-end) + (cons + (substring string match-start match-end) + (regexp-match* expr string match-end end))) + (lambda args + (list)))) + (define regexp-match-exact? (lambda (p s) (let ([m (regexp-match-positions p s)])