Use regexp-match*' instead of regexp-split'.

Also, explicitly make it throw an error when it's getting an empty glob.
It used to just return the root in that case, but it's unclear if this
would be the right thing, or maybe return everything, or nothing.
This commit is contained in:
Eli Barzilay 2010-05-16 22:59:36 -04:00
parent 400bd5eaad
commit 2ecdc0f33a
2 changed files with 45 additions and 39 deletions

View File

@ -97,6 +97,8 @@
=> same-as-last-datums => same-as-last-datums
(e/filter #"*") (e/filter #"*")
=> same-as-last-datums => same-as-last-datums
(e/filter #"")
=error> "bad glob"
(e/filter #"A2/") (e/filter #"A2/")
=> '(-/ -/A2/ -/A2/5) => '(-/ -/A2/ -/A2/5)
(e/filter #"A1/B/") (e/filter #"A1/B/")

View File

@ -109,12 +109,14 @@
(if (null? ps) glob (loop 0 ps '())))])))) (if (null? ps) glob (loop 0 ps '())))]))))
(define (glob->pred glob) (define (glob->pred glob)
(let loop ([xs (let* (;; ignore "//"s (otherwise they'd match nothing) (let loop (;; split the glob to its parts, ignoring "//"s and a prefix "/"
[glob (regexp-replace* #rx#"//+" glob #"/")] ;; (filter never uses the root path)
;; ignore "/" prefix (filter never uses the root path) [xs (let ([xs (regexp-match* #rx#"[^/]+(?:/|$)" glob)])
[glob (regexp-replace #rx#"^/" glob #"")]) ;; it's not clear what should the meaning of an empty glob
(regexp-split #rx#"(?<=/)(?=.)" glob))]) ;; be: return everything? just the root? nothing? -- throw
;; - xs is never null (`regexp-split' never returns null) ;; an error for now, maybe change it later
(if (null? xs) (error 'glob->pred "bad glob: ~e" glob)
xs))])
;; - an element without a trailing slash must be the last one ;; - an element without a trailing slash must be the last one
;; - an element with a trailing slash matches non-leaf nodes only, so need ;; - an element with a trailing slash matches non-leaf nodes only, so need
;; to test subs for `*/' and `**/' ;; to test subs for `*/' and `**/'
@ -123,39 +125,41 @@
;; predicate result will never be used and it will mess up (eg, a ;; predicate result will never be used and it will mess up (eg, a
;; predicate result for a leaf is considered true, but (not: (lambda (t) ;; predicate result for a leaf is considered true, but (not: (lambda (t)
;; #t)) is also a predicate) => use #t for `r' in this case ;; #t)) is also a predicate) => use #t for `r' in this case
(let* ([x (car xs)] (or
[x* (glob->regexp-or-literal x)] (null? xs)
[xs (cdr xs)] (let* ([x (car xs)]
[r (or (null? xs) (loop xs))]) [x* (glob->regexp-or-literal x)]
(cond [xs (cdr xs)]
[(eq? '* x*) (lambda (t) #t)] ; it's the last one [r (loop xs)])
[(eq? '*/ x*) (lambda (t) (and (tree-subs t) r))] (cond
[(eq? '** x*) (lambda (t) #t)] [(eq? '* x*) (lambda (t) #t)] ; it's the last one
[(eq? '**/ x*) (letrec ([R (or: r (lambda (t) (and (tree-subs t) R)))]) [(eq? '*/ x*) (lambda (t) (and (tree-subs t) r))]
R)] [(eq? '** x*) (lambda (t) #t)]
;; if it's the last one and it has no "/" suffix then it will match [(eq? '**/ x*)
;; only leaves => in this case, allow matches on non-leaf nodes by (letrec ([R (or: r (lambda (t) (and (tree-subs t) R)))]) R)]
;; adding the "/" (if this is not done then it's very easy to make ;; if it's the last one and it has no "/" suffix then it will match
;; mistakes) ;; only leaves => in this case, allow matches on non-leaf nodes by
[else ;; adding the "/" (if this is not done then it's very easy to make
(let ([x*/ (cond [(or (pair? xs) (regexp-match? #rx#"/$" x)) #f] ;; mistakes)
[(bytes? x*) (bytes-append x* #"/")] [else
[(byte-regexp? x*) (let ([x*/ (cond [(or (pair? xs) (regexp-match? #rx#"/$" x)) #f]
(glob->regexp-or-literal (bytes-append x #"/"))] [(bytes? x*) (bytes-append x* #"/")]
[else (error 'glob->pred "bad glob part: ~e" x)])]) [(byte-regexp? x*)
(cond (glob->regexp-or-literal (bytes-append x #"/"))]
[(bytes? x*/) [else (error 'glob->pred "bad glob part: ~e" x)])])
(lambda (t) (cond
(let ([x (if (tree-subs t) x*/ x*)]) [(bytes? x*/)
(and (equal? x (tree-name t)) r)))] (lambda (t)
[(byte-regexp? x*/) (let ([x (if (tree-subs t) x*/ x*)])
(lambda (t) (and (equal? x (tree-name t)) r)))]
(let ([x (if (tree-subs t) x*/ x*)]) [(byte-regexp? x*/)
(and (regexp-match? x (tree-name t)) r)))] (lambda (t)
[(bytes? x*) (let ([x (if (tree-subs t) x*/ x*)])
(lambda (t) (and (tree-subs t) (equal? x* (tree-name t)) r))] (and (regexp-match? x (tree-name t)) r)))]
[(byte-regexp? x*) [(bytes? x*)
(lambda (t) (and (regexp-match? x* (tree-name t)) r))]))])))) (lambda (t) (and (tree-subs t) (equal? x* (tree-name t)) r))]
[(byte-regexp? x*)
(lambda (t) (and (regexp-match? x* (tree-name t)) r))]))])))))
(define (pred/glob->pred pred/glob) (define (pred/glob->pred pred/glob)
(cond [(string? pred/glob) (glob->pred (string->bytes/utf-8 pred/glob))] (cond [(string? pred/glob) (glob->pred (string->bytes/utf-8 pred/glob))]