238 lines
11 KiB
Racket
238 lines
11 KiB
Racket
#lang scheme/base
|
|
|
|
(provide (struct-out tree) leaf? tree-foldl tree-foldr tree-for-each tree->list
|
|
and: or: not: tree-filter)
|
|
|
|
(require scheme/list)
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Type definitions
|
|
|
|
;; This is a generic tree representation, subs is a list of subtrees, or #f for
|
|
;; a leaf.
|
|
;; - `name' is a name for this tree as a byte string, with a "/" suffix for
|
|
;; non-leaf nodes (the filtering code relies on this assumption)
|
|
;; - `subs' is a list of subtrees, or #f to mark a leaf
|
|
;; - `path' is the full path for to this tree (eg, FS path or a subvesion url),
|
|
;; this code has no assumptions on what's in there
|
|
;; - `data' is a placeholder for additional data
|
|
|
|
(define-struct tree (name subs path [data #:auto #:mutable]))
|
|
(define-syntax-rule (leaf? tree) (not (tree-subs tree)))
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Tree utilities
|
|
|
|
(define (tree-foldl f init tree)
|
|
(let loop ([tree tree] [acc init])
|
|
(let ([subs (tree-subs tree)])
|
|
(if subs
|
|
(let dloop ([subs subs] [acc (f tree acc)])
|
|
(if (null? subs)
|
|
acc
|
|
(dloop (cdr subs) (loop (car subs) acc))))
|
|
(f tree acc)))))
|
|
|
|
(define (tree-foldr f init tree)
|
|
(let loop ([tree tree] [acc init])
|
|
(let ([subs (tree-subs tree)])
|
|
(f tree (if subs
|
|
(let dloop ([subs subs])
|
|
(if (null? subs)
|
|
acc
|
|
(loop (car subs) (dloop (cdr subs)))))
|
|
acc)))))
|
|
|
|
(define (tree-for-each f tree)
|
|
(let loop ([tree tree])
|
|
(f tree)
|
|
(let ([subs (tree-subs tree)])
|
|
(when subs (for-each loop subs)))))
|
|
|
|
(define (tree->list tree) (tree-foldr cons '() tree))
|
|
|
|
;; ----------------------------------------------------------------------------
|
|
;; Tree filtering
|
|
|
|
;; A tree-filtering predicate is a function that receives a tree, and returns
|
|
;; either #t/#f to include or exclude it, or it can return a function to be
|
|
;; applied on its sub-trees. This setup makes it possible to minimize the
|
|
;; filtering work that is needed (compared to the old code that would compare
|
|
;; full paths). `tree-filter' takes such a predicate and returns a tree with
|
|
;; filtered subtrees, so the smallest result is the empty root.
|
|
|
|
;; Turns a byte string with globbing into a regexp string. "*" turns to ".*",
|
|
;; "?" turns to ".", "[...]" ranges are used as is, "{...|...}" turns to
|
|
;; "(?:...|...)", backslash escapes as usual. If the glob is "*", "*/", "**",
|
|
;; or "**/", a corresponding symbol is returned; and if the glob is all
|
|
;; literal, a byte string is returned. No special treatment of "/"s, since
|
|
;; these are used against path elements. Note that this is applied on each
|
|
;; part of a glob string, so "{...|...}" should not have "/"s in them.
|
|
(define glob->regexp-or-literal
|
|
(let ([glob-item
|
|
((compose byte-regexp bytes-append)
|
|
#"(?:"
|
|
#"\\\\." ; escaped item
|
|
#"|"
|
|
#"[*?{|}]" ; wildcards and options -- the only 1-character matches
|
|
#"|\\[(?:\\^?\\]|\\^?[^]^])[^]]*\\]" ; [...] ranges
|
|
#")"
|
|
)]
|
|
[substs (map cons
|
|
(bytes->list #"*?{|}")
|
|
(regexp-split #rx" " #".* . (?: | )"))])
|
|
(define (subq bstr . xs) (regexp-quote (apply subbytes bstr xs)))
|
|
(lambda (glob)
|
|
(define (loop i ps r)
|
|
(if (null? ps)
|
|
(let ([r (apply bytes-append (reverse (cons (subq glob i) r)))])
|
|
(byte-regexp (bytes-append #"^" r #"$")))
|
|
(loop
|
|
(cdar ps) (cdr ps)
|
|
;; length=1 is only for `*' or `?'
|
|
(cons (if (= 1 (- (cdar ps) (caar ps)))
|
|
(cdr (or (assq (bytes-ref glob (caar ps)) substs)
|
|
(error "internal error")))
|
|
;; everything else passes through as is, including all
|
|
;; backslashed escapes (not always needed, but harmless)
|
|
(subbytes glob (caar ps) (cdar ps)))
|
|
;; and stuff between these things is getting quoted
|
|
(if (= i (caar ps))
|
|
r (cons (subq glob i (caar ps)) r))))))
|
|
(cond [(equal? #"*" glob) '*]
|
|
[(equal? #"*/" glob) '*/]
|
|
[(equal? #"**" glob) '**]
|
|
[(equal? #"**/" glob) '**/]
|
|
[(regexp-match #rx#"^[*]+/?$" glob)
|
|
(error 'glob->regexp-or-literal "bad glob: ~e" glob)]
|
|
[else (let ([ps (regexp-match-positions* glob-item glob)])
|
|
(if (null? ps) glob (loop 0 ps '())))]))))
|
|
|
|
(define (glob->pred glob)
|
|
(let loop ([xs (let* (;; ignore "//"s (otherwise they'd match nothing)
|
|
[glob (regexp-replace* #rx#"//+" glob #"/")]
|
|
;; ignore "/" prefix (filter never uses the root path)
|
|
[glob (regexp-replace #rx#"^/" glob #"")])
|
|
(regexp-split #rx#"(?<=/)(?=.)" glob))])
|
|
;; - xs is never null (`regexp-split' never returns null)
|
|
;; - an element without a trailing slash must be the last one
|
|
;; - an element with a trailing slash matches non-leaf nodes only, so need
|
|
;; to test subs for `*/' and `**/'
|
|
;; - things usually work out fine, but if it's the last element, then we
|
|
;; better return #t or #f rather than a continuation predicate, since a
|
|
;; predicate result will never be used and it will mess up (eg, a
|
|
;; predicate result for a leaf is considered true, but (not: (lambda (t)
|
|
;; #t)) is also a predicate) => use #t for `r' in this case
|
|
(let* ([x (car xs)]
|
|
[x* (glob->regexp-or-literal x)]
|
|
[xs (cdr xs)]
|
|
[r (or (null? xs) (loop xs))])
|
|
(cond
|
|
[(eq? '* x*) (lambda (t) #t)] ; it's the last one
|
|
[(eq? '*/ x*) (lambda (t) (and (tree-subs t) r))]
|
|
[(eq? '** x*) (lambda (t) #t)]
|
|
[(eq? '**/ x*) (letrec ([R (or: r (lambda (t) (and (tree-subs t) R)))])
|
|
R)]
|
|
;; if it's the last one and it has no "/" suffix then it will match
|
|
;; only leaves => in this case, allow matches on non-leaf nodes by
|
|
;; adding the "/" (if this is not done then it's very easy to make
|
|
;; mistakes)
|
|
[else
|
|
(let ([x*/ (cond [(or (pair? xs) (regexp-match? #rx#"/$" x)) #f]
|
|
[(bytes? x*) (bytes-append x* #"/")]
|
|
[(byte-regexp? x*)
|
|
(glob->regexp-or-literal (bytes-append x #"/"))]
|
|
[else (error 'glob->pred "bad glob part: ~e" x)])])
|
|
(cond
|
|
[(bytes? x*/)
|
|
(lambda (t)
|
|
(let ([x (if (tree-subs t) x*/ x*)])
|
|
(and (equal? x (tree-name t)) r)))]
|
|
[(byte-regexp? x*/)
|
|
(lambda (t)
|
|
(let ([x (if (tree-subs t) x*/ x*)])
|
|
(and (regexp-match? x (tree-name t)) r)))]
|
|
[(bytes? x*)
|
|
(lambda (t) (and (tree-subs t) (equal? x* (tree-name t)) r))]
|
|
[(byte-regexp? x*)
|
|
(lambda (t) (and (regexp-match? x* (tree-name t)) r))]))]))))
|
|
|
|
(define (pred/glob->pred pred/glob)
|
|
(cond [(string? pred/glob) (glob->pred (string->bytes/utf-8 pred/glob))]
|
|
[(bytes? pred/glob) (glob->pred pred/glob)]
|
|
[(procedure? pred/glob) pred/glob]
|
|
[else (error 'pred/glob->pred "bad predicate or glob: ~e" pred/glob)]))
|
|
|
|
;; Combine tree-filter predicates efficiently: stop when the result is #f or #t
|
|
;; for `and:' or `or:' resp., drop predicates that returned #t or #f for them.
|
|
(define-syntax-rule (define-combiner name: raw-name: pos neg)
|
|
(begin
|
|
(define raw-name:
|
|
(case-lambda [() (lambda (tree) pos)]
|
|
[(p1) p1]
|
|
[(p1 p2) (lambda (tree)
|
|
(let ([r1 (p1 tree)] [r2 (p2 tree)])
|
|
(cond [(eq? neg r1) neg]
|
|
[(eq? neg r2) neg]
|
|
[(eq? pos r1) r2]
|
|
[(eq? pos r2) r1]
|
|
[else (raw-name: r1 r2)])))]
|
|
[ps (lambda (tree)
|
|
(let loop ([ps ps] [rs '()])
|
|
(if (null? ps)
|
|
(apply raw-name: (reverse rs))
|
|
(let ([r ((car ps) tree)] [ps (cdr ps)])
|
|
(cond [(eq? neg r) neg]
|
|
[(eq? pos r) (loop ps rs)]
|
|
[else (loop ps (cons r rs))])))))]))
|
|
(define (name: . preds/globs)
|
|
(apply raw-name: (map pred/glob->pred preds/globs)))))
|
|
(define-combiner and: raw-and: #t #f)
|
|
(define-combiner or: raw-or: #f #t)
|
|
|
|
;; Negating predicates is a little tricky, for example (not: "*/*") would
|
|
;; filter out everything in all subtrees, and since empty non-leaf nodes are
|
|
;; usually dropped by `tree-filter', this means that the containing trees will
|
|
;; be dropped too, leaving only immediate leaves. The way to make this behave
|
|
;; more intuitively is to mark negated predicates, and when filtering with a
|
|
;; negated predicate the default is to keep empty non-leaf nodes rather than
|
|
;; drop them. (As an aside, this can also be used to make (not: (not: f))
|
|
;; return `f'.)
|
|
(define-struct negated (pred orig) #:property prop:procedure 0)
|
|
(define (raw-not: p)
|
|
(if (negated? p)
|
|
(negated-orig p)
|
|
(make-negated (lambda (tree)
|
|
(let ([r (p tree)])
|
|
(cond [(eq? #t r) #f]
|
|
[(eq? #f r) #t]
|
|
[else (raw-not: r)])))
|
|
p)))
|
|
(define (not: pred/glob)
|
|
(raw-not: (pred/glob->pred pred/glob)))
|
|
|
|
;; filter a whole tree
|
|
(define (tree-filter pred/glob tree)
|
|
(define pred (pred/glob->pred pred/glob))
|
|
(define (subs-filter pred tree)
|
|
(let* ([same? #t]
|
|
[subs (tree-subs tree)]
|
|
[new-subs (filter-map (lambda (sub)
|
|
(let ([r (loop sub pred)])
|
|
(unless (eq? r sub) (set! same? #f))
|
|
r))
|
|
subs)])
|
|
(cond [(and (null? new-subs) (not (negated? pred))) #f]
|
|
[same? tree]
|
|
[else (make-tree (tree-name tree) new-subs (tree-path tree))])))
|
|
(define (loop tree pred)
|
|
(let ([r (pred tree)])
|
|
(cond [(eq? #t r) tree]
|
|
[(eq? #f r) #f]
|
|
[(procedure? r) (and (tree-subs tree) (subs-filter r tree))]
|
|
[else (error 'tree-filter "bad result from predicate: ~e" r)])))
|
|
(if (leaf? tree)
|
|
(error 'tree-filter "expecting a non-leaf, got ~e" tree)
|
|
(or (subs-filter pred tree)
|
|
(make-tree (tree-name tree) '() (tree-path tree)))))
|