* Including full path for each node in the tree -- this means that any

subtree is complete with no need to rehack its root.  Simplifies a
  bunch of code, and makes some utilities redundant.
* Using a single struct for all nodes, with subs=#f to mark files
* Names of struct is generic, no relation to actual files and dirs
* Added more tests

svn: r17527
This commit is contained in:
Eli Barzilay 2010-01-07 04:05:21 +00:00
parent 220801c80c
commit 5fe14f70b6
3 changed files with 195 additions and 172 deletions

View File

@ -1,6 +1,6 @@
#lang scheme/base
(require setup/dirs "tree.ss")
(require "tree.ss" setup/dirs)
(define (get-plt-tree)
(when absolute-installation?
@ -8,9 +8,11 @@
(get-tree (build-path (find-collects-dir) 'up)))
#| good for benchmarking changes
(printf "getting tree\n")
(define t (get-plt-tree))
(printf "filtering\n")
(printf "getting tree ")
(define t (time (get-plt-tree)))
;;!!! (printf "adding deps ")
;;!!! (time (add-deps! t))
(printf "filtering x 1000 ")
(time
(for ([i (in-range 1000)]) ; print-tree
(tree-filter

View File

@ -46,52 +46,86 @@
(define (tree-tests)
(define a-dir (collection-path "scribble"))
(define a-list (map (lambda (p)
(let ([r (path->bytes p)])
(if (directory-exists? p) (bytes-append r #"/") r)))
(find-files void a-dir)))
(define a-list (find-files void a-dir))
(define a-tree #f)
(define (->bytes x) (string->bytes/utf-8 (format "~a" x)))
(define same-as-last-datums #f)
(define datums-result #f)
(define (->datums xs)
(set! same-as-last-datums datums-result)
(set! datums-result (map (lambda (x) (read (open-input-bytes x))) xs))
(set! datums-result
(map (lambda (x)
(read (open-input-bytes (if (path? x) (path->bytes x) x))))
xs))
datums-result)
(define (mk-tree t)
(e (let loop ([t t])
(if (pair? t)
`(make-dir ,(regexp-replace #rx#"/?$" (->bytes (car t)) #"/")
(list ,@(map loop (cdr t))))
`(make-file ,(->bytes t))))))
(test (set! a-tree (mk-tree '(- 0 (A1 1 2 3 (B 4) C) (A2 5))))
(->datums (e `(map tree-name (tree->list ,a-tree))))
=> '(-/ 0 A1/ 1 2 3 B/ 4 C A2/ 5)
(->datums (e `(tree->path-list ,a-tree)))
=> '(-/ -/0 -/A1/ -/A1/1 -/A1/2 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C
-/A2/ -/A2/5)
(->datums (e `(tree->path-list (tree-filter #"*" ,a-tree))))
=> same-as-last-datums
(->datums (e `(tree->path-list (tree-filter #"A2/" ,a-tree))))
=> '(-/ -/A2/ -/A2/5)
(->datums (e `(tree->path-list (tree-filter #"A1/B/" ,a-tree))))
=> '(-/ -/A1/ -/A1/B/ -/A1/B/4)
;; works with string patterns too
(->datums (e `(tree->path-list (tree-filter "A1/B/" ,a-tree))))
=> same-as-last-datums
;; last "/" is optional here ...
(->datums (e `(tree->path-list (tree-filter "A1/B" ,a-tree))))
=> same-as-last-datums
;; ... but in general it forces matching only directories
(->datums (e `(tree->path-list (tree-filter "A1/?/" ,a-tree))))
=> same-as-last-datums
(->datums (e `(tree->path-list (tree-filter "A1/?" ,a-tree))))
=> '(-/ -/A1/ -/A1/1 -/A1/2 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C)
(set! a-tree (e `(get-tree ,a-dir)))
(e `(tree->path-list ,a-tree))
=> a-list
(e `(tree->path-list (tree-filter #"*" ,a-tree)))
=> a-list)
(define (mk-tree t [convert values])
(e (let loop ([t t] [path #""])
(let* ([subs? (pair? t)]
[name (->bytes (if subs? (car t) t))]
[name (if subs? (regexp-replace #rx#"/?$" name #"/") name)]
[path (bytes-append path name)])
(if subs?
`(make-tree ,name
(list ,@(map (lambda (s) (loop s path)) (cdr t)))
,(convert path))
`(make-tree ,name #f ,(convert path)))))))
(define (e/filter filter)
(->datums (e `(map tree-path (tree->list (tree-filter ,filter ,a-tree))))))
(test
;; works with paths...
(set! a-tree (mk-tree '(- 0 (A1 1 2 3 (B 4) C) (A2 5)) bytes->path))
(->datums (e `(map tree-name (tree->list ,a-tree))))
=> '(-/ 0 A1/ 1 2 3 B/ 4 C A2/ 5)
;; ...as well as bytes
(set! a-tree (mk-tree '(- 0 (A1 1 2 3 (B 4) C) (A2 5))))
(->datums (e `(map tree-name (tree->list ,a-tree))))
=> same-as-last-datums
(->datums (e `(map tree-path (tree->list ,a-tree))))
=> '(-/ -/0 -/A1/ -/A1/1 -/A1/2 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C -/A2/ -/A2/5)
(->datums (e `(map tree-path (tree-foldr cons '() ,a-tree))))
=> same-as-last-datums
(->datums (e `(map tree-path (reverse (tree-foldl cons '() ,a-tree)))))
=> same-as-last-datums
(->datums (e `(let ([l '()])
(tree-for-each (lambda (t) (set! l (cons (tree-path t) l)))
,a-tree)
(reverse l))))
=> same-as-last-datums
(e/filter #"*")
=> same-as-last-datums
(e/filter #"A2/")
=> '(-/ -/A2/ -/A2/5)
(e/filter #"A1/B/")
=> '(-/ -/A1/ -/A1/B/ -/A1/B/4)
;; works with string patterns too
(e/filter "A1/B/")
=> same-as-last-datums
;; last "/" is optional here ...
(e/filter "A1/B")
=> same-as-last-datums
;; ... but in general it forces matching only directories
(e/filter "A1/?/")
=> same-as-last-datums
(e/filter "A1/?")
=> '(-/ -/A1/ -/A1/1 -/A1/2 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C)
(e/filter "*/2")
=> '(-/ -/A1/ -/A1/2)
(e/filter "*/[25]")
=> '(-/ -/A1/ -/A1/2 -/A2/ -/A2/5)
(e/filter "*/{2|5}")
=> same-as-last-datums
(e/filter '(not: "*/2"))
=> '(-/ -/0 -/A1/ -/A1/1 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C -/A2/ -/A2/5)
(e/filter '(not: "*/[25]"))
=> '(-/ -/0 -/A1/ -/A1/1 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C -/A2/)
(e/filter '(not: "*/{2|5}"))
=> '(-/ -/0 -/A1/ -/A1/1 -/A1/3 -/A1/B/ -/A1/B/4 -/A1/C -/A2/)
(set! a-tree (e `(get-tree ,a-dir)))
(e `(map tree-path (tree->list ,a-tree)))
=> a-list
(e/filter #"*")
=> (->datums a-list)
)
(set! a-tree
(mk-tree '(-
(.svn
@ -746,7 +780,7 @@
xref.ss)))
(test
;; the whole tree
(->datums (e `(tree->path-list (tree-filter "*" ,a-tree))))
(e/filter "*")
=>
'(
-/
@ -1402,7 +1436,7 @@
-/xref.ss
)
;; no immediate files
(->datums (e `(tree->path-list (tree-filter "*/*" ,a-tree))))
(e/filter "*/*")
=>
'(-/
-/.svn/
@ -2009,10 +2043,10 @@
-/tools/private/compiled/mk-drs-bitmaps_ss.zo
-/tools/private/mk-drs-bitmaps.ss
)
(->datums (e `(tree->path-list (tree-filter "*/" ,a-tree))))
(e/filter "*/")
=> same-as-last-datums
;; only 2-levels and deeper
(->datums (e `(tree->path-list (tree-filter "*/*/*" ,a-tree))))
(e/filter "*/*/*")
=>
'(-/
-/.svn/
@ -2506,7 +2540,7 @@
-/tools/private/mk-drs-bitmaps.ss
)
;; only 3-levels and deeper
(->datums (e `(tree->path-list (tree-filter "*/*/*/*" ,a-tree))))
(e/filter "*/*/*/*")
=>
'(-/
-/base/
@ -2779,7 +2813,7 @@
-/tools/private/compiled/mk-drs-bitmaps_ss.zo
)
;; only 4-levels and deeper
(->datums (e `(tree->path-list (tree-filter "*/*/*/*/*" ,a-tree))))
(e/filter "*/*/*/*/*")
=>
'(-/
-/base/
@ -2863,7 +2897,7 @@
-/tools/private/.svn/tmp/text-base/
)
;; only 4-levels and deeper of directories, including empty ones
(->datums (e `(tree->path-list (tree-filter "*/*/*/*/" ,a-tree))))
(e/filter "*/*/*/*/")
=>
'(-/
-/base/
@ -2995,13 +3029,13 @@
-/tools/private/.svn/tmp/text-base/
)
;; only 5-levels and deeper => nothing
(->datums (e `(tree->path-list (tree-filter "*/*/*/*/*/*" ,a-tree))))
(e/filter "*/*/*/*/*/*")
=> '(-/)
;; only 6-levels and deeper => nothing
(->datums (e `(tree->path-list (tree-filter "*/*/*/*/*/*/*" ,a-tree))))
(e/filter "*/*/*/*/*/*/*")
=> '(-/)
;; only immediate files
(->datums (e `(tree->path-list (tree-filter (not: "*/") ,a-tree))))
(e/filter '(not: "*/"))
=>
'(-/
-/base-render.ss
@ -3056,7 +3090,7 @@
;; dropped -- but for negated predicates the default is to keep empty
;; directories, so the result is the same as the above but also includes
;; directories
(->datums (e `(tree->path-list (tree-filter (not: "*/*") ,a-tree))))
(e/filter '(not: "*/*"))
=>
'(
-/
@ -3119,13 +3153,11 @@
-/xref.ss
)
;; (not: (not: pred)) returns `pred'
(->datums (e `(tree->path-list (tree-filter (not: (not: (not: "*/*")))
,a-tree))))
(e/filter '(not: (not: (not: "*/*"))))
=> same-as-last-datums
;; the special treatment of negated predicates makes it possible to select
;; only toplevel directories too
(->datums (e `(tree->path-list (tree-filter (and: "*/" (not: "*/*"))
,a-tree))))
(e/filter '(and: "*/" (not: "*/*")))
=>
'(
-/
@ -3141,11 +3173,10 @@
-/tools/
)
;; demorgan works with this negation
(->datums (e `(tree->path-list (tree-filter (not: (or: (not: "*/") "*/*"))
,a-tree))))
(e/filter '(not: (or: (not: "*/") "*/*")))
=> same-as-last-datums
;; only compiled directories
(->datums (e `(tree->path-list (tree-filter "**/compiled/" ,a-tree))))
(e/filter "**/compiled/")
=>
'(-/
-/base/
@ -3330,9 +3361,7 @@
-/tools/private/compiled/mk-drs-bitmaps_ss.zo
)
;; only compiled directories but not their content
(->datums (e `(tree->path-list (tree-filter (and: "**/compiled/"
(not: "**/compiled/*"))
,a-tree))))
(e/filter '(and: "**/compiled/" (not: "**/compiled/*")))
=>
'(-/
-/base/
@ -3367,7 +3396,7 @@
-/tools/private/compiled/
)
;; only .dep files in compiled directories
(->datums (e `(tree->path-list (tree-filter "**/compiled/*.dep" ,a-tree))))
(e/filter "**/compiled/*.dep")
=>
'(-/
-/base/
@ -3477,12 +3506,10 @@
-/tools/private/compiled/mk-drs-bitmaps_ss.dep
)
;; only .dep files in compiled directories, by dropping .zo files
(->datums (e `(tree->path-list (tree-filter (and: "**/compiled/"
(not: "**/*.zo"))
,a-tree))))
(e/filter '(and: "**/compiled/" (not: "**/*.zo")))
=> same-as-last-datums
;; no .svn directories
(->datums (e `(tree->path-list (tree-filter (not: "**/.svn/") ,a-tree))))
(e/filter '(not: "**/.svn/"))
=>
'(-/
-/base/
@ -3760,8 +3787,7 @@
-/xref.ss
)
;; no .svn or compiled directories using "{|}"
(->datums (e `(tree->path-list (tree-filter (not: "**/{.svn|compiled}/")
,a-tree))))
(e/filter '(not: "**/{.svn|compiled}/"))
=>
'(-/
-/base/
@ -3874,14 +3900,10 @@
-/xref.ss
)
;; no .svn or compiled directories using `or:'
(->datums (e `(tree->path-list
(tree-filter (not: (or: "**/.svn/" "**/compiled/"))
,a-tree))))
(e/filter '(not: (or: "**/.svn/" "**/compiled/")))
=> same-as-last-datums
;; no .svn or compiled directories using `and:'
(->datums (e `(tree->path-list
(tree-filter (and: (not: "**/.svn/") (not: "**/compiled/"))
,a-tree))))
(e/filter '(and: (not: "**/.svn/") (not: "**/compiled/")))
=> same-as-last-datums))
(test do (glob-tests)

View File

@ -1,72 +1,65 @@
#lang scheme/base
(provide tree-foldl tree-foldr tree-for-each print-tree
tree->list tree->path-list and: or: not: tree-filter get-tree
(struct-out tree) (struct-out file) (struct-out dir))
(provide (struct-out tree) leaf? tree-foldl tree-foldr tree-for-each tree->list
and: or: not: tree-filter get-tree)
(require scheme/list)
;; ----------------------------------------------------------------------------
;; Type definitions
(define-struct tree (name [data #:auto #:mutable]))
(define-struct (file tree) ())
(define-struct (dir tree) (subs))
;; This is a generic tree representation, subs is a list of subtrees, or #f for
;; a leaf.
;; - `name' is a name for this tree as a byte string, with a "/" suffix for
;; non-leaf nodes (the filtering code relies on this assumption)
;; - `subs' is a list of subtrees, or #f to mark a leaf
;; - `path' is the full path for to this tree (eg, FS path or a subvesion url),
;; this code has no assumptions on what's in there
;; - `data' is a placeholder for additional data
(define-struct tree (name subs path [data #:auto #:mutable]))
(define-syntax-rule (leaf? tree) (not (tree-subs tree)))
;; ----------------------------------------------------------------------------
;; Tree utilities
(define (tree-foldl f init tree)
(let loop ([tree tree] [base #""] [acc init])
(if (file? tree)
(f tree base acc)
(let ([base* (bytes-append base (tree-name tree))])
(let dloop ([trees (dir-subs tree)] [acc (f tree base acc)])
(if (null? trees)
(let loop ([tree tree] [acc init])
(let ([subs (tree-subs tree)])
(if subs
(let dloop ([subs subs] [acc (f tree acc)])
(if (null? subs)
acc
(dloop (cdr trees) (loop (car trees) base* acc))))))))
(dloop (cdr subs) (loop (car subs) acc))))
(f tree acc)))))
(define (tree-foldr f init tree)
(let loop ([tree tree] [base #""] [acc init])
(f tree base
(if (file? tree)
acc
(let ([base* (bytes-append base (tree-name tree))])
(let dloop ([trees (dir-subs tree)])
(if (null? trees)
acc
(loop (car trees) base* (dloop (cdr trees))))))))))
(let loop ([tree tree] [acc init])
(let ([subs (tree-subs tree)])
(f tree (if subs
(let dloop ([subs subs])
(if (null? subs)
acc
(loop (car subs) (dloop (cdr subs)))))
acc)))))
(define (tree-for-each f tree)
(let loop ([tree tree] [base #""])
(f tree base)
(when (dir? tree)
(let ([base* (bytes-append base (tree-name tree))])
(for ([tree (in-list (dir-subs tree))]) (loop tree base*))))))
(let loop ([tree tree])
(f tree)
(let ([subs (tree-subs tree)])
(when subs (for-each loop subs)))))
(define (print-tree tree)
(tree-for-each
(lambda (tree base)
(write-bytes base) (write-bytes (tree-name tree)) (newline))
tree))
(define (tree->list tree)
(tree-foldr (lambda (tree base acc) (cons tree acc)) '() tree))
(define (tree->path-list tree)
(tree-foldr (lambda (tree base acc)
(cons (bytes-append base (tree-name tree)) acc))
'() tree))
(define (tree->list tree) (tree-foldr cons '() tree))
;; ----------------------------------------------------------------------------
;; Tree filtering
;; A tree-filtering predicate is a function that receives a tree, and returns
;; either #t/#f to include or exclude it, or it can return a function to be
;; applied on the sub-trees of a directory. This setup makes it possible to
;; minimize the filtering work that is needed (compared to the old code that
;; would compare full paths). `tree-filter' takes such a predicate and returns
;; a tree with filtered subtrees, so the smallest result is the empty root.
;; applied on its sub-trees. This setup makes it possible to minimize the
;; filtering work that is needed (compared to the old code that would compare
;; full paths). `tree-filter' takes such a predicate and returns a tree with
;; filtered subtrees, so the smallest result is the empty root.
;; Turns a byte string with globbing into a regexp string. "*" turns to ".*",
;; "?" turns to ".", "[...]" ranges are used as is, "{...|...}" turns to
@ -123,41 +116,44 @@
(regexp-split #rx#"(?<=/)" glob))])
;; - xs is never null (`regexp-split' never returns null)
;; - an element without a trailing slash must be the last one
;; - an element with a trailing slash matches directories only, need to use
;; `dir?' for `*/' and `**/'
;; - an element with a trailing slash matches non-leaf nodes only, so need
;; to test subs for `*/' and `**/'
;; - things usually work out fine, but if it's the last element, then we
;; better return #t or #f rather a continuation predicate, since a
;; predicate result will never be used and it will mess up (eg, a
;; predicate result for a file is considered true, but (not: (lambda (t)
;; predicate result for a leaf is considered true, but (not: (lambda (t)
;; #t)) is also a predicate) => use #t for `r' in this case
(let* ([x (car xs)]
[x* (glob->regexp-or-literal x)]
[xs (cdr xs)]
[r (or (null? xs) (loop xs))])
(cond
[(eq? '* x*) (lambda (t) #t)]
[(eq? '*/ x*) (lambda (t) (and (dir? t) r))]
[(eq? '* x*) (lambda (t) #t)] ; it's the last one
[(eq? '*/ x*) (lambda (t) (and (tree-subs t) r))]
[(eq? '** x*) (lambda (t) #t)]
[(eq? '**/ x*) (letrec ([R (or: r (lambda (t) (and (dir? t) R)))]) R)]
[(eq? '**/ x*) (letrec ([R (or: r (lambda (t) (and (tree-subs t) R)))])
R)]
;; if it's the last one and it has no "/" suffix then it will match
;; only files => in this case, allow matches on directories by adding
;; the "/" (if this is not done then directories must always be
;; specified with a trailing slash, which is easy to forget)
;; only leaves => in this case, allow matches on non-leaf nodes by
;; adding the "/" (if this is not done then it's very easy to make
;; mistakes)
[else
(let ([x*/
(cond [(or (pair? xs) (regexp-match? #rx#"/$" x)) #f]
[(bytes? x*) (bytes-append x* #"/")]
[(byte-regexp? x*)
(glob->regexp-or-literal (bytes-append x #"/"))]
[else (error 'glob->pred "bad glob element: ~e" x)])])
(let ([x*/ (cond [(or (pair? xs) (regexp-match? #rx#"/$" x)) #f]
[(bytes? x*) (bytes-append x* #"/")]
[(byte-regexp? x*)
(glob->regexp-or-literal (bytes-append x #"/"))]
[else (error 'glob->pred "bad glob part: ~e" x)])])
(cond
[(bytes? x*/)
(lambda (t) (and (equal? (if (dir? t) x*/ x*) (tree-name t)) r))]
(lambda (t)
(let ([x (if (tree-subs t) x*/ x*)])
(and (equal? x (tree-name t)) r)))]
[(byte-regexp? x*/)
(lambda (t)
(and (regexp-match? (if (dir? t) x*/ x*) (tree-name t)) r))]
(let ([x (if (tree-subs t) x*/ x*)])
(and (regexp-match? x (tree-name t)) r)))]
[(bytes? x*)
(lambda (t) (and (dir? t) (equal? x* (tree-name t)) r))]
(lambda (t) (and (tree-subs t) (equal? x* (tree-name t)) r))]
[(byte-regexp? x*)
(lambda (t) (and (regexp-match? x* (tree-name t)) r))]))]))))
@ -195,12 +191,13 @@
(define-combiner or: raw-or: #f #t)
;; Negating predicates is a little tricky, for example (not: "*/*") would
;; filter out everything in all subdirectories, and since empty directories are
;; usually dropped by `tree-filter', this means that the directories will be
;; dropped too, leaving only immediate files. The way to make this behave more
;; intuitively is to mark negated predicates, and when filtering with a negated
;; predicate the default is to keep empty directories rather than drop them.
;; (As an aside, this can also be used to make (not: (not: f)) return `f'.)
;; filter out everything in all subtrees, and since empty non-leaf nodes are
;; usually dropped by `tree-filter', this means that the containing trees will
;; be dropped too, leaving only immediate leaves. The way to make this behave
;; more intuitively is to mark negated predicates, and when filtering with a
;; negated predicate the default is to keep empty non-leaf nodes rather than
;; drop them. (As an aside, this can also be used to make (not: (not: f))
;; return `f'.)
(define-struct negated (pred orig) #:property prop:procedure 0)
(define (raw-not: p)
(if (negated? p)
@ -214,48 +211,50 @@
(define (not: pred/glob)
(raw-not: (pred/glob->pred pred/glob)))
;; filter a whole tree
(define (tree-filter pred/glob tree)
(define pred (pred/glob->pred pred/glob))
(define-syntax-rule (dir-filter pred dir)
(define (subs-filter pred tree)
(let* ([same? #t]
[subs (dir-subs dir)]
[subs (tree-subs tree)]
[new-subs (filter-map (lambda (sub)
(let ([r (loop sub pred)])
(unless (eq? r sub) (set! same? #f))
r))
subs)])
(cond [(and (null? new-subs) (not (negated? pred))) #f]
[same? dir]
[else (make-dir (tree-name dir) new-subs)])))
[same? tree]
[else (make-tree (tree-name tree) new-subs (tree-path tree))])))
(define (loop tree pred)
(let ([r (pred tree)])
(cond [(eq? #t r) tree]
[(eq? #f r) #f]
[(procedure? r) (and (dir? tree) (dir-filter r tree))]
[(procedure? r) (and (tree-subs tree) (subs-filter r tree))]
[else (error 'tree-filter "bad result from predicate: ~e" r)])))
(if (file? tree)
(error 'tree-filter "expecting a `dir', got ~e" tree)
(or (dir-filter pred tree) (make-dir (tree-name tree) '()))))
(if (leaf? tree)
(error 'tree-filter "expecting a non-leaf, got ~e" tree)
(or (subs-filter pred tree)
(make-tree (tree-name tree) '() (tree-path tree)))))
;; ----------------------------------------------------------------------------
;; Reading a tree from a directory
(define (get-tree dir)
(define (subs dir)
(parameterize ([current-directory dir])
(map cdr
(sort
(for/list ([path (directory-list)])
(let ([name (path-element->bytes path)])
(cons name
(cond
[(directory-exists? path)
(make-dir (bytes-append name #"/") (subs path))]
[(file-exists? path) (make-file name)]
[else (error 'get-tree "bad path encountered: ~a/~a"
(current-directory) path)]))))
bytes<?
#:key car))))
(define root (path->bytes (simplify-path dir)))
(make-dir (if (regexp-match? #rx#"/$" root) root (bytes-append root #"/"))
(subs dir)))
(define (get-tree path)
(define path* (simplify-path path))
(let loop ([path path*]
[name (regexp-replace #rx#"/$" (path->bytes path*) #"")])
(cond [(directory-exists? path)
(make-tree
(bytes-append name #"/")
(parameterize ([current-directory path])
(let* ([subs (map (lambda (sub)
(cons (path-element->bytes sub) sub))
(directory-list))]
[subs (sort subs bytes<? #:key car)])
(map (lambda (sub)
(loop (build-path path (cdr sub)) (car sub)))
subs)))
path)]
[(file-exists? path) (make-tree name #f path)]
[else (error 'get-tree "bad path encountered: ~a/~a"
(current-directory) path)])))