added make-regexp-tweaker (not used yet), and some code reorganization
svn: r12573
This commit is contained in:
parent
62a8873198
commit
ecb39eedee
|
@ -32,21 +32,7 @@
|
||||||
s)))))))
|
s)))))))
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Regexp helpers
|
;; Regexp utilities
|
||||||
|
|
||||||
(define (bstring-length s)
|
|
||||||
(if (bytes? s) (bytes-length s) (string-length s)))
|
|
||||||
|
|
||||||
(define (bstring->regexp name pattern)
|
|
||||||
(cond [(regexp? pattern) pattern]
|
|
||||||
[(byte-regexp? pattern) pattern]
|
|
||||||
[(string? pattern) (regexp pattern)]
|
|
||||||
[(bytes? pattern) (byte-regexp pattern)]
|
|
||||||
[else (raise-type-error
|
|
||||||
name "regexp, byte regexp, string, or byte string" pattern)]))
|
|
||||||
|
|
||||||
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
|
||||||
;; Regexp helpers
|
|
||||||
|
|
||||||
(define regexp-quote-chars:s #rx"[][.*?+|(){}\\$^]")
|
(define regexp-quote-chars:s #rx"[][.*?+|(){}\\$^]")
|
||||||
(define regexp-quote-chars:b #rx#"[][.*?+|(){}\\$^]")
|
(define regexp-quote-chars:b #rx#"[][.*?+|(){}\\$^]")
|
||||||
|
@ -69,6 +55,33 @@
|
||||||
[else (raise-type-error 'regexp-replace-quote
|
[else (raise-type-error 'regexp-replace-quote
|
||||||
"string or byte string" s)]))
|
"string or byte string" s)]))
|
||||||
|
|
||||||
|
(define (make-regexp-tweaker tweaker)
|
||||||
|
(let ([t (make-weak-hasheq)])
|
||||||
|
(lambda (rx)
|
||||||
|
(define-syntax-rule (->str x) (if (bytes? x) (bytes->string/utf-8 x) x))
|
||||||
|
(define-syntax-rule (->bts x) (if (bytes? x) x (string->bytes/utf-8 x)))
|
||||||
|
(define-syntax-rule (tweak unwrap wrap convert)
|
||||||
|
(let ([tweaked (tweaker (unwrap rx))])
|
||||||
|
;; the tweaker is allowed to return a regexp
|
||||||
|
(if (or (regexp? tweaked) (byte-regexp? tweaked))
|
||||||
|
tweaked
|
||||||
|
(wrap (convert tweaked)))))
|
||||||
|
(define (run-tweak)
|
||||||
|
(cond [(pregexp? rx) (tweak object-name pregexp ->str)]
|
||||||
|
[(regexp? rx) (tweak object-name regexp ->str)]
|
||||||
|
[(byte-pregexp? rx) (tweak object-name byte-pregexp ->bts)]
|
||||||
|
[(byte-regexp? rx) (tweak object-name byte-regexp ->bts)]
|
||||||
|
;; allow getting a string, so if someone needs to go
|
||||||
|
;; from a string to a regexp, there's no penalty
|
||||||
|
;; because of the intermediate regexp being recreated
|
||||||
|
[(string? rx) (tweak (lambda (x) x) regexp ->str)]
|
||||||
|
[(bytes? rx) (tweak (lambda (x) x) byte-regexp ->bts)]
|
||||||
|
[else (raise-type-error 'regexp-tweaker
|
||||||
|
"regexp of any kind, string, or bytes"
|
||||||
|
rx)]))
|
||||||
|
(or (hash-ref t rx #f)
|
||||||
|
(let ([rx* (run-tweak)]) (hash-set! t rx rx*) rx*)))))
|
||||||
|
|
||||||
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f])
|
(define (regexp-try-match pattern input-port [start-k 0] [end-k #f] [out #f])
|
||||||
(unless (input-port? input-port)
|
(unless (input-port? input-port)
|
||||||
(raise-type-error 'regexp-try-match
|
(raise-type-error 'regexp-try-match
|
||||||
|
@ -91,10 +104,18 @@
|
||||||
(and p (subbytes s (- (car p) drop) (- (cdr p) drop))))
|
(and p (subbytes s (- (car p) drop) (- (cdr p) drop))))
|
||||||
(cdr m))))))))
|
(cdr m))))))))
|
||||||
|
|
||||||
;; Helper macro for the regexp functions below.
|
;; Helper macro for the regexp functions below, with some utilities.
|
||||||
(define-syntax regexp-loop
|
(define (bstring-length s)
|
||||||
(syntax-rules ()
|
(if (bytes? s) (bytes-length s) (string-length s)))
|
||||||
[(regexp-loop name loop start end rx string
|
(define (bstring->regexp name pattern)
|
||||||
|
(cond [(regexp? pattern) pattern]
|
||||||
|
[(byte-regexp? pattern) pattern]
|
||||||
|
[(string? pattern) (regexp pattern)]
|
||||||
|
[(bytes? pattern) (byte-regexp pattern)]
|
||||||
|
[else (raise-type-error
|
||||||
|
name "regexp, byte regexp, string, or byte string" pattern)]))
|
||||||
|
(define-syntax-rule (regexp-loop
|
||||||
|
name loop start end rx string
|
||||||
success-choose failure-k
|
success-choose failure-k
|
||||||
port-success-k port-success-choose port-failure-k
|
port-success-k port-success-choose port-failure-k
|
||||||
need-leftover? peek?)
|
need-leftover? peek?)
|
||||||
|
@ -121,8 +142,7 @@
|
||||||
start))
|
start))
|
||||||
(unless (or (not end)
|
(unless (or (not end)
|
||||||
(and (start . <= . end)
|
(and (start . <= . end)
|
||||||
(or (input-port? string)
|
(or (input-port? string) (and len (end . <= . len)))))
|
||||||
(and len (end . <= . len)))))
|
|
||||||
(raise-mismatch-error
|
(raise-mismatch-error
|
||||||
'name
|
'name
|
||||||
(format "ending offset index out of range [~a,~a]: " start len)
|
(format "ending offset index out of range [~a,~a]: " start len)
|
||||||
|
@ -235,12 +255,13 @@
|
||||||
(loop acc new-start new-end))
|
(loop acc new-start new-end))
|
||||||
acc start end mstart mend)
|
acc start end mstart mend)
|
||||||
(loop (cons (success-choose start mstart mend) acc)
|
(loop (cons (success-choose start mstart mend) acc)
|
||||||
mend end))))))))))))]))
|
mend end)))))))))))))
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
(define (regexp-match-positions* pattern string [start 0] [end #f])
|
||||||
(define rx (bstring->regexp 'regexp-match-positions* pattern))
|
(regexp-loop
|
||||||
(regexp-loop regexp-match-positions* loop start end rx string
|
regexp-match-positions* loop start end
|
||||||
|
(bstring->regexp 'regexp-match-positions* pattern) string
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend) (cons mstart mend))
|
(lambda (start mstart mend) (cons mstart mend))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
|
@ -262,8 +283,9 @@
|
||||||
|
|
||||||
;; Returns all the positions at which the pattern matched.
|
;; Returns all the positions at which the pattern matched.
|
||||||
(define (regexp-match-peek-positions* pattern string [start 0] [end #f])
|
(define (regexp-match-peek-positions* pattern string [start 0] [end #f])
|
||||||
(define rx (bstring->regexp 'regexp-match-peek-positions* pattern))
|
(regexp-loop
|
||||||
(regexp-loop regexp-match-peek-positions* loop start end rx string
|
regexp-match-peek-positions* loop start end
|
||||||
|
(bstring->regexp 'regexp-match-peek-positions* pattern) string
|
||||||
;; success-choose:
|
;; success-choose:
|
||||||
(lambda (start mstart mend) (cons mstart mend))
|
(lambda (start mstart mend) (cons mstart mend))
|
||||||
;; failure-k:
|
;; failure-k:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user