added make-regexp-tweaker (not used yet), and some code reorganization

svn: r12573
This commit is contained in:
Eli Barzilay 2008-11-23 00:17:39 +00:00
parent 62a8873198
commit ecb39eedee

View File

@ -32,21 +32,7 @@
s))))))) s)))))))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Regexp helpers ;; Regexp utilities
(define (bstring-length s)
(if (bytes? s) (bytes-length s) (string-length s)))
(define (bstring->regexp name pattern)
(cond [(regexp? pattern) pattern]
[(byte-regexp? pattern) pattern]
[(string? pattern) (regexp pattern)]
[(bytes? pattern) (byte-regexp pattern)]
[else (raise-type-error
name "regexp, byte regexp, string, or byte string" pattern)]))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Regexp helpers
(define regexp-quote-chars:s #rx"[][.*?+|(){}\\$^]") (define regexp-quote-chars:s #rx"[][.*?+|(){}\\$^]")
(define regexp-quote-chars:b #rx#"[][.*?+|(){}\\$^]") (define regexp-quote-chars:b #rx#"[][.*?+|(){}\\$^]")
@ -69,6 +55,33 @@
[else (raise-type-error 'regexp-replace-quote [else (raise-type-error 'regexp-replace-quote
"string or byte string" s)])) "string or byte string" s)]))
(define (make-regexp-tweaker tweaker)
(let ([t (make-weak-hasheq)])
(lambda (rx)
(define-syntax-rule (->str x) (if (bytes? x) (bytes->string/utf-8 x) x))
(define-syntax-rule (->bts x) (if (bytes? x) x (string->bytes/utf-8 x)))
(define-syntax-rule (tweak unwrap wrap convert)
(let ([tweaked (tweaker (unwrap rx))])
;; the tweaker is allowed to return a regexp
(if (or (regexp? tweaked) (byte-regexp? tweaked))
tweaked
(wrap (convert tweaked)))))
(define (run-tweak)
(cond [(pregexp? rx) (tweak object-name pregexp ->str)]
[(regexp? rx) (tweak object-name regexp ->str)]
[(byte-pregexp? rx) (tweak object-name byte-pregexp ->bts)]
[(byte-regexp? rx) (tweak object-name byte-regexp ->bts)]
;; allow getting a string, so if someone needs to go
;; from a string to a regexp, there's no penalty
;; because of the intermediate regexp being recreated
[(string? rx) (tweak (lambda (x) x) regexp ->str)]
[(bytes? rx) (tweak (lambda (x) x) byte-regexp ->bts)]
[else (raise-type-error 'regexp-tweaker
"regexp of any kind, string, or bytes"
rx)]))
(or (hash-ref t rx #f)
(let ([rx* (run-tweak)]) (hash-set! t rx rx*) rx*)))))
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f]) (define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f])
(unless (input-port? input-port) (unless (input-port? input-port)
(raise-type-error 'regexp-try-match (raise-type-error 'regexp-try-match
@ -91,10 +104,18 @@
(and p (subbytes s (- (car p) drop) (- (cdr p) drop)))) (and p (subbytes s (- (car p) drop) (- (cdr p) drop))))
(cdr m)))))))) (cdr m))))))))
;; Helper macro for the regexp functions below. ;; Helper macro for the regexp functions below, with some utilities.
(define-syntax regexp-loop (define (bstring-length s)
(syntax-rules () (if (bytes? s) (bytes-length s) (string-length s)))
[(regexp-loop name loop start end rx string (define (bstring->regexp name pattern)
(cond [(regexp? pattern) pattern]
[(byte-regexp? pattern) pattern]
[(string? pattern) (regexp pattern)]
[(bytes? pattern) (byte-regexp pattern)]
[else (raise-type-error
name "regexp, byte regexp, string, or byte string" pattern)]))
(define-syntax-rule (regexp-loop
name loop start end rx string
success-choose failure-k success-choose failure-k
port-success-k port-success-choose port-failure-k port-success-k port-success-choose port-failure-k
need-leftover? peek?) need-leftover? peek?)
@ -121,8 +142,7 @@
start)) start))
(unless (or (not end) (unless (or (not end)
(and (start . <= . end) (and (start . <= . end)
(or (input-port? string) (or (input-port? string) (and len (end . <= . len)))))
(and len (end . <= . len)))))
(raise-mismatch-error (raise-mismatch-error
'name 'name
(format "ending offset index out of range [~a,~a]: " start len) (format "ending offset index out of range [~a,~a]: " start len)
@ -235,12 +255,13 @@
(loop acc new-start new-end)) (loop acc new-start new-end))
acc start end mstart mend) acc start end mstart mend)
(loop (cons (success-choose start mstart mend) acc) (loop (cons (success-choose start mstart mend) acc)
mend end))))))))))))])) mend end)))))))))))))
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-positions* pattern string [start 0] [end #f]) (define (regexp-match-positions* pattern string [start 0] [end #f])
(define rx (bstring->regexp 'regexp-match-positions* pattern)) (regexp-loop
(regexp-loop regexp-match-positions* loop start end rx string regexp-match-positions* loop start end
(bstring->regexp 'regexp-match-positions* pattern) string
;; success-choose: ;; success-choose:
(lambda (start mstart mend) (cons mstart mend)) (lambda (start mstart mend) (cons mstart mend))
;; failure-k: ;; failure-k:
@ -262,8 +283,9 @@
;; Returns all the positions at which the pattern matched. ;; Returns all the positions at which the pattern matched.
(define (regexp-match-peek-positions* pattern string [start 0] [end #f]) (define (regexp-match-peek-positions* pattern string [start 0] [end #f])
(define rx (bstring->regexp 'regexp-match-peek-positions* pattern)) (regexp-loop
(regexp-loop regexp-match-peek-positions* loop start end rx string regexp-match-peek-positions* loop start end
(bstring->regexp 'regexp-match-peek-positions* pattern) string
;; success-choose: ;; success-choose:
(lambda (start mstart mend) (cons mstart mend)) (lambda (start mstart mend) (cons mstart mend))
;; failure-k: ;; failure-k: