From 97c65102b3ca4aaacf0674fe691e3c0783224bc0 Mon Sep 17 00:00:00 2001 From: Ben Greenman Date: Sun, 18 Sep 2016 03:43:03 -0400 Subject: [PATCH] add file/glob implements globbing for path strings - glob : globs -> listof path in-glob : globs -> sequenceof path glob-match? : globs path-string -> boolean - wildcards are: * ? [...] - braces {} get expanded to multiple globs - if pattern ends with /, only match directories - wildcards don't capture dotfiles by default (keyword arg overrides) --- pkgs/racket-doc/file/scribblings/file.scrbl | 1 + pkgs/racket-doc/file/scribblings/glob.scrbl | 125 +++ .../tests/file/glob-test-dir/.secret1.rkt | 0 .../tests/file/glob-test-dir/.secret2.rkt | 0 .../tests/file/glob-test-dir/A.txt | 0 .../tests/file/glob-test-dir/B.txt | 0 .../tests/file/glob-test-dir/C.txt | 0 .../tests/file/glob-test-dir/README.md | 4 + .../glob-test-subdir/.secret3.rkt | 0 .../glob-test-subdir/.secret4.rkt | 0 .../glob-test-dir/glob-test-subdir/A1.txt | 0 .../glob-test-dir/glob-test-subdir/A2.txt | 0 .../glob-test-dir/glob-test-subdir/A3.txt | 0 .../glob-test-sub-sub-dir/.deep.secret | 0 .../glob-test-sub-sub-dir/A4.txt | 0 .../glob-test-sub-sub-dir/deep.c | 0 pkgs/racket-test/tests/file/glob.rkt | 246 +++++ racket/collects/file/glob.rkt | 14 + racket/collects/file/private/glob.rkt | 853 ++++++++++++++++++ 19 files changed, 1243 insertions(+) create mode 100644 pkgs/racket-doc/file/scribblings/glob.scrbl create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/.secret1.rkt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/.secret2.rkt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/A.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/B.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/C.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/README.md create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/.secret3.rkt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/.secret4.rkt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A1.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A2.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A3.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/.deep.secret create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/A4.txt create mode 100644 pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/deep.c create mode 100644 pkgs/racket-test/tests/file/glob.rkt create mode 100644 racket/collects/file/glob.rkt create mode 100644 racket/collects/file/private/glob.rkt diff --git a/pkgs/racket-doc/file/scribblings/file.scrbl b/pkgs/racket-doc/file/scribblings/file.scrbl index 47df5822cc..5adc16ff59 100644 --- a/pkgs/racket-doc/file/scribblings/file.scrbl +++ b/pkgs/racket-doc/file/scribblings/file.scrbl @@ -19,6 +19,7 @@ @include-section["ico.scrbl"] @include-section["resource.scrbl"] @include-section["cache.scrbl"] +@include-section["glob.scrbl"] @(bibliography (bib-entry #:key "Gervautz1990" diff --git a/pkgs/racket-doc/file/scribblings/glob.scrbl b/pkgs/racket-doc/file/scribblings/glob.scrbl new file mode 100644 index 0000000000..6385c01444 --- /dev/null +++ b/pkgs/racket-doc/file/scribblings/glob.scrbl @@ -0,0 +1,125 @@ +#lang scribble/doc +@require["common.rkt" + scribble/examples + (for-label racket/sequence)] + +@title[#:tag "glob"]{Globbing} + +@defmodule[file/glob]{The @racketmodname[file/glob] library implements +@hyperlink["https://en.wikipedia.org/wiki/Glob_(programming)"]{globbing} +for @racket[path-string?] values. A @emph{glob} is a path string that matches +a set of path strings using the following @emph{wildcards}: +@itemlist[ +@item{ + A sextile (@tt{*}) matches any sequence of characters in a file or directory + name. +} +@item{ + Two sextiles (@tt{**}) match any sequence of characters and any number of + path separators. +} +@item{ + A question mark (@tt{?}) matches any single character in a file or directory + name. +} +@item{ + Square bracket-delimited character groups, e.g. @tt{[abc]}, match any + character within the group. The square brackets have the same meaning in globs + as in regular expressions, see + @secref["regexp-syntax" #:doc '(lib "scribblings/reference/reference.scrbl")]. +} +@item{ + If the glob ends with a path separator (@tt{/} on any @racket[(system-type)], + additionally @tt{\} on @racket['windows]) + then it only matches directories. +} +] + +By default, wildcards will not match files or directories whose name begins +with a period (aka "dotfiles"). To override, set the parameter +@racket[glob-capture-dotfiles?] to a non-@racket[#f] value or supply a similar +value using the @racket[#:capture-dotfiles?] keyword. +} + +@defthing[glob/c (or/c path-string? (sequence/c path-string?))]{ +A @tech[#:doc '(lib "scribblings/reference/reference.scrbl")]{flat contract} +that accepts a glob or a sequence of globs. +} + +All @racketmodname[file/glob] functions accept @racket[glob/c] values. +These functions also recognize braces (@tt|{{}}|) as a @emph{meta-wildcard} for +describing multiple globs. +@margin-note{Braces are interpreted @emph{before} any other wildcards.} + +@itemlist[ +@item{ +Brace-delimited, comma-separated character groups, e.g. @tt|{{foo,bar}}|, +expand to multiple globs before the @racketmodname[file/glob] module begins +matching. For example, the @racket[glob/c] value @racket|{{foo,bar}.rkt}| +has the same meaning as @racket['("foo.rkt" "bar.rkt")]. +} +] + +@defproc[(glob [pattern glob/c] [#:capture-dotfiles? capture-dotfiles? boolean? (glob-capture-dotfiles?)]) (listof path-string?)]{ + Builds a list of all paths on the current filesystem that match any glob + in @racket[pattern]. The order of paths in the result is unspecified. + + If @racket[pattern] contains the wildcard @tt{**}, then @racket[glob] + recursively searches the filesystem to find matches. + For example, the glob @racket{/**.rkt} will search the @emph{entire filesystem} + for files or directories with a @racket{.rkt} suffix (aka, Racket files). + +Examples: +@codeblock{ +> (glob "*.rkt") +;; Lists all Racket files in current directory + +> (glob "*/*.rkt") +;; Lists all Racket files in all sub-directories of the current directory. +;; (Does not search sub-sub-directories, etc.) + +> (glob (build-path (find-system-path 'home-dir) "**" "*.rkt")) +;; Recursively searches home directory for Racket files, lists all matches. + +> (glob "??.rkt") +;; Lists all Racket files in current directory with 2-character names. + +> (glob "[a-z0-9].rkt") +;; Lists all Racket files in current directory with single-character, +;; alphanumeric names. + +> (glob '("foo-bar.rkt" "foo-baz.rkt" "qux-bar.rkt" "qux-baz.rkt")) +;; Filters the list to contain only files or directories that exist. + +> (glob "{foo,qux}-{bar,baz}.rkt") +;; Same as above, returns at most 4 files. +} +} + +@defproc[(in-glob [pattern glob/c] [#:capture-dotfiles? capture-dotfiles? boolean? (glob-capture-dotfiles?)]) (sequence/c path-string?)]{ + Returns a stream of all paths matching the glob @racket[pattern], + instead of eagerly building a list. +} + +@defproc[(glob-match? [pattern glob/c] + [path path-string?] + [#:capture-dotfiles? capture-dotfiles? boolean? (glob-capture-dotfiles?)]) boolean?]{ + Analogous to @racket[regexp-match?]; returns @racket[#true] if @racket[path] + matches any glob in @racket[pattern]. + + @racket[(glob-match? pattern path)] is @emph{not} the same as: + @racketblock[ + (member path (glob pattern)) + ] + because @racket[glob] only returns files/directories that exist, whereas + @racket[glob-match?] does not check that @racket[path] exists. + + This operation accesses the filesystem. +} + +@defparam[glob-capture-dotfiles? capture-dotfiles? boolean? #:value #f]{ + Determines whether wildcards match names that begin with a @racket[#\.] + character. If @racket[#t], the wildcards will match dotfiles. If + @racket[#f], use a glob such as @racket{.*} to match dotfiles explicitly. +} + diff --git a/pkgs/racket-test/tests/file/glob-test-dir/.secret1.rkt b/pkgs/racket-test/tests/file/glob-test-dir/.secret1.rkt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/.secret2.rkt b/pkgs/racket-test/tests/file/glob-test-dir/.secret2.rkt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/A.txt b/pkgs/racket-test/tests/file/glob-test-dir/A.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/B.txt b/pkgs/racket-test/tests/file/glob-test-dir/B.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/C.txt b/pkgs/racket-test/tests/file/glob-test-dir/C.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/README.md b/pkgs/racket-test/tests/file/glob-test-dir/README.md new file mode 100644 index 0000000000..af41a0a949 --- /dev/null +++ b/pkgs/racket-test/tests/file/glob-test-dir/README.md @@ -0,0 +1,4 @@ +glob-test-dir +=== + +Sample directory to run `file/glob` unit tests on. diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/.secret3.rkt b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/.secret3.rkt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/.secret4.rkt b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/.secret4.rkt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A1.txt b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A1.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A2.txt b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A2.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A3.txt b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/A3.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/.deep.secret b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/.deep.secret new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/A4.txt b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/A4.txt new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/deep.c b/pkgs/racket-test/tests/file/glob-test-dir/glob-test-subdir/glob-test-sub-sub-dir/deep.c new file mode 100644 index 0000000000..e69de29bb2 diff --git a/pkgs/racket-test/tests/file/glob.rkt b/pkgs/racket-test/tests/file/glob.rkt new file mode 100644 index 0000000000..c46bf9c17d --- /dev/null +++ b/pkgs/racket-test/tests/file/glob.rkt @@ -0,0 +1,246 @@ +#lang racket/base + +(require file/glob + racket/runtime-path + racket/path + rackunit + (for-syntax racket/base)) + + +(define-runtime-path glob-test-dir "./glob-test-dir") +(define glob-test-subdir (build-path glob-test-dir "glob-test-subdir")) +(define glob-test-sub-sub-dir (build-path glob-test-subdir "glob-test-sub-sub-dir")) + +(define (check-test-dir) + (unless (directory-exists? glob-test-dir) + (raise-user-error 'file/glob + "could not locate directory '~a', cannot run tests" + glob-test-dir))) + +(module+ test + (check-test-dir) + (current-directory glob-test-dir) + + (define unsupplied-arg (gensym)) + (define (glob/cwd g #:capture-dotfiles? [cd? unsupplied-arg]) + (define cwd (list (current-directory))) + (define p* (if (eq? cd? unsupplied-arg) (glob g) (glob g #:capture-dotfiles? cd?))) + (for/list ([p (in-list p*)]) + (path->string (shrink-path-wrt p cwd)))) + + (define (add-cwd* p*) + (define cwd (current-directory)) + (for/list ([p (in-list p*)]) + (build-path cwd p))) + + (test-case "glob:current-dir" + + (check-equal? + (sort (glob/cwd "*") stringstring p) 0)))) + (path->string p)) + stringstring glob-test-subdir)]) + (check-equal? + (glob (format "~a/A2.txt" str)) + (list (simplify-path (build-path glob-test-subdir "A2.txt")))) + (check-equal? (glob/cwd (format "~a//A2.txt" str)) '("A2.txt")) + (check-equal? (glob/cwd (build-path str ".." "A*.txt")) '("A.txt")))) + + (test-case "glob:**" + (let ([subdir-dotfiles '(".secret3.rkt" ".secret4.rkt")] + [sub-subdir-dotfiles '(".deep.secret")] + [subdir-txtfiles '("A1.txt" "A2.txt" "A3.txt")] + [sub-subdir-txtfiles '("A4.txt")]) + (check-equal? + (glob/cwd "**/.*") + (append subdir-dotfiles sub-subdir-dotfiles)) + (check-equal? (glob/cwd "*/**/.*") sub-subdir-dotfiles) + (check-equal? + (glob/cwd (build-path "**" "*txt")) + (append subdir-txtfiles sub-subdir-txtfiles)) + (check-equal? (glob/cwd "*/**/*txt") sub-subdir-txtfiles))) + + (test-case "glob:ends-with-path-sep" + (check-equal? (glob/cwd "*/") '("glob-test-subdir")) + (check-equal? (glob/cwd "*/*/") '("glob-test-sub-sub-dir"))) + + (test-case "glob:multi" + (check-equal? + (glob/cwd '("*.txt" "*.md")) + '("A.txt" "B.txt" "C.txt" "README.md")) + (check-equal? + (glob/cwd '("*.{txt,md}")) + '("A.txt" "B.txt" "C.txt" "README.md")) + (check-equal? + (glob/cwd "*.{txt,md}") + '("A.txt" "B.txt" "C.txt" "README.md"))) + + (test-case "in-glob" + (let ([seq (in-glob "*/.*")]) + (check-false (list? seq)) + (check-equal? + (for/list ([x seq]) x) + (for/list ([name (in-list '(".secret3.rkt" ".secret4.rkt"))]) + (simplify-path (build-path glob-test-subdir name))))) + (check-equal? + (for/list ([x (in-glob (build-path glob-test-subdir "{.,?}*"))]) x) + (map (λ (p) (simplify-path (build-path glob-test-subdir p))) + '(".secret3.rkt" ".secret4.rkt" "A1.txt" "A2.txt" "A3.txt" + "glob-test-sub-sub-dir"))) + (check-equal? + (for/list ([x (in-glob (map (λ (p) (build-path glob-test-sub-sub-dir p)) + '(".*" "*.c")))]) + x) + (map (λ (p) (simplify-path (build-path glob-test-sub-sub-dir p))) + '(".deep.secret" "deep.c")))) + + (test-case "glob-match?" + (check-true (glob-match? "A*" "A.txt")) + (check-true (glob-match? "A*" (format "~a/A.txt" glob-test-dir))) + (check-true (glob-match? (build-path "A*") + (build-path glob-test-dir "A.txt"))) + + (define (check-glob-match?-theorem g p) + (if (member (path->complete-path (simplify-path p)) (glob g)) + (check-true + (glob-match? g p) + #;(format "glob '~a' should match '~a'" g p)) + (check-false + (glob-match? g p) + #;(format "glob '~a' should not match '~a'" g p)))) + + (check-glob-match?-theorem + (build-path glob-test-dir "README.md") + "README.md") + (check-glob-match?-theorem + (build-path glob-test-subdir ".secret*") + (format "~a/.secret3.rkt" glob-test-subdir)) + (check-glob-match?-theorem + (build-path glob-test-subdir ".secret*") + (format "~a/.mismatch" glob-test-subdir))) + + (test-case "glob-match?:dotfiles" + (check-true (glob-match? ".secret*" ".secret1")) + (check-false (glob-match? "*" ".secret.1")) + (check-true (glob-match? "*" ".secret.1" #:capture-dotfiles? #t)) + (parameterize ([glob-capture-dotfiles? #t]) + (check-true (glob-match? "*" ".secret.1"))) + + (check-true + (glob-match? "*/.se?ret[43]*" (build-path glob-test-subdir ".secret3"))) + (check-true + (glob-match? "*/.se?ret[43]*" (build-path glob-test-subdir ".secret4"))) + ) + + (test-case "glob-match?-pun" + ;; -- Check that `glob-match?` fails if the unambiguous parts don't match + ;; (but the ambiguous parts do) + (check-false (glob-match? (format "~a/*" glob-test-subdir) "README.md")) + (check-true (glob-match? (format "~a/*" glob-test-dir) "README.md")) + (check-false (glob-match? (format "~a/*" glob-test-subdir) "A.txt")) + (check-true + (glob-match? (build-path glob-test-subdir "*") + (format "~a/A1.txt" glob-test-subdir)))) + + (test-case "glob-match?:unamb" + (check-true (glob-match? "foo.rkt" "foo.rkt")) + (check-true (glob-match? '("foo.rkt") "foo.rkt")) + (check-true (glob-match? '("foo.rkt" "foo.rkt") "foo.rkt")) + (check-true (glob-match? "." ".")) + (check-true (glob-match? "longerfilename" "longerfilename")) + + (check-false (glob-match? "A" "x")) + (check-false (glob-match? '("1" "2" "3") "4")) + ) + + (test-case "glob-match?:recursive" + (check-true (glob-match? "**.rkt" "foo.rkt")) + (check-true (glob-match? "**" "yolo")) + (check-true (glob-match? "**" ".dotfile" #:capture-dotfiles? #t)) + (check-true (glob-match? "/**.rkt" "/a/b/c//d/./../foo.rkt")) + (check-true (glob-match? "a/b/**/c" "a/b/c/d/e/f/c")) + + (check-false (glob-match? "**.rkt" "foo")) + (check-false (glob-match? "**" "yo/.lo")) + (check-false (glob-match? "**" ".yolo")) + (check-false (glob-match? "**" ".yo/lo")) + (check-false (glob-match? "a/b/**/c" "a/b/c/d/e"))) + + (test-case "glob-match?:split" + (check-true (glob-match? "a/*.rkt" "a/foo.rkt")) + (check-true (glob-match? "????" "yolo")) + (check-true (glob-match? "*f[iou]?e" ".dotfile" #:capture-dotfiles? #t)) + (check-true (glob-match? "/*/*/*/*.rkt" "/a/b/c//d/./../foo.rkt")) + (check-true (glob-match? "a/b/*/c" "a/b/d/c")) + + (check-false (glob-match? "*.rkt" "foo")) + (check-false (glob-match? "*/*" "yo/.lo")) + (check-false (glob-match? "*" ".yolo")) + (check-false (glob-match? "*/*" ".yo/lo")) + (check-false (glob-match? "a/b/*/c" "a/b/c/d/"))) + + (test-case "glob-match?:ends-with-path-sep" + (check-true (glob-match? "*/" glob-test-subdir)) + (check-true (glob-match? "glob*/glob*/" glob-test-sub-sub-dir)) + (check-true (glob-match? "foo/" "foo/")) + (check-true (glob-match? "*/" "foo/")) + + (check-false (glob-match? "foo/" "foo")) + (check-false (glob-match? "*/" "foo"))) + +) diff --git a/racket/collects/file/glob.rkt b/racket/collects/file/glob.rkt new file mode 100644 index 0000000000..7a67fcc0ef --- /dev/null +++ b/racket/collects/file/glob.rkt @@ -0,0 +1,14 @@ +#lang racket/base + +(require + file/private/glob + racket/contract + (only-in racket/sequence sequence/c)) + +(provide + glob/c + (contract-out + [glob (->* [glob/c] [#:capture-dotfiles? boolean?] (listof path-string?))] + [in-glob (->* [glob/c] [#:capture-dotfiles? boolean?] (sequence/c path-string?))] + [glob-match? (->* [glob/c path-string?] [#:capture-dotfiles? boolean?] boolean?)] + [glob-capture-dotfiles? (parameter/c boolean?)])) diff --git a/racket/collects/file/private/glob.rkt b/racket/collects/file/private/glob.rkt new file mode 100644 index 0000000000..e7612223d9 --- /dev/null +++ b/racket/collects/file/private/glob.rkt @@ -0,0 +1,853 @@ +#lang racket/base + +;; Filepath globbing. + +;; A glob is a path-string that describes a set of path-strings via "wildcards": +;; - * matches any sequence of characters in a file or directory name +;; - ** matches any sequence of characters or directories in a path +;; - ? matches any single character +;; - [] matches any character between the brackets + +;; Terms / Conventions in this file: +;; - variable names ending with a star (*) represent lists, +;; e.g., `path*` is "a list of paths". +;; - "amb" = "ambiguous" +;; - "unamb" = "unambiguous" + +(provide + glob/c + glob + in-glob + glob-match? + glob-capture-dotfiles?) + +(require + racket/contract/base + racket/generator + (only-in racket/list + append* + splitf-at) + (only-in racket/path + shrink-path-wrt) + (only-in racket/sequence + sequence/c + sequence-append + sequence-map + sequence-filter + sequence->list) + (only-in racket/string + string-contains? + string-join) + (for-syntax racket/base)) + +(module+ test ;; faking RackUnit + + (define current-test-case (make-parameter #f)) + (define num-tests (box 0)) + + (define-syntax-rule (test-case str e* ...) + (parameterize ([current-test-case str]) + e* ...)) + + (define-syntax (check-equal? stx) + (syntax-case stx () + [(_ actual expect) + #`(let ([a actual] + [e expect]) + (set-box! num-tests (+ (unbox num-tests) 1)) + (unless (equal? a e) + (raise-user-error 'check-equal? + "~a [~a:~a:~a] actual value ~a differs from expected ~a" + (current-test-case) + #,(syntax-line stx) + #,(syntax-column stx) + #,(syntax-span stx) + a + e)))])) + + (define-syntax-rule (check-true e) + (check-equal? e #t)) + + (define-syntax-rule (check-false e) + (check-equal? e #f)) + + (define-syntax-rule (check-pred f e) + (check-equal? (and (f e) #t) #t)) + + (define-syntax-rule (check-exn pred thunk) + (check-equal? (with-handlers ([pred (λ (e) #t)]) (and (thunk) #f)) #t))) + +;; ============================================================================= + +(define glob/c + (or/c path-string? (sequence/c path-string?))) + +(define glob-capture-dotfiles? (make-parameter #f)) + +(define (glob pattern #:capture-dotfiles? [cd? (glob-capture-dotfiles?)]) + (sequence->list (in-glob pattern #:capture-dotfiles? cd?))) + +(define (in-glob pattern #:capture-dotfiles? [cd? (glob-capture-dotfiles?)]) + ;; 2016-10-01: assuming `pattern` doesn't specify many globs + (apply sequence-append + (for/list ([p (in-list (flatten-glob pattern))]) + (define r* (in-glob/single p #:capture-dotfiles? cd?)) + (if (ends-with-path-sep? p) + ;; then: filter non-directory results + (sequence-filter directory-exists? r*) + r*)))) + +(define (glob-match? pattern ps-raw #:capture-dotfiles? [cd? (glob-capture-dotfiles?)]) + (define ps (normalize-path ps-raw)) + (define swd (starts-with-dot? (path-string->string ps-raw))) + (for/or ([p (in-list (flatten-glob pattern))]) + (and (glob-match?/single p ps swd #:capture-dotfiles? cd?) + (or (not (ends-with-path-sep? p)) + ;; then: pattern must be a syntactic or real directory + (or (ends-with-path-sep? ps) + (directory-exists? ps)))))) + +;; ----------------------------------------------------------------------------- +;; -- parsing + +;; (define parsed-glob/c +;; (or/c glob-unambiguous? +;; glob-recursive? +;; glob-split?)) + +(define glob-unambiguous? + path-string?) + +(define glob-recursive? + (cons/c path-string? regexp?)) + +(define glob-split? + ;; "split" globs have 1 unambiguous element + ;; and at least 1 ambiguous elements (with wildcards) + (cons/c path-string? (cons/c path-string? (listof path-string?)))) + +;; : glob-recursive? boolean? #:capture-dotfiles? boolean? -> (path-string? -> boolean?) +(define (glob->regexp-matcher g pattern-starts-with-dot? #:capture-dotfiles? capture-dotfiles?) + (λ (ps) + (and + (if (starts-with-dot? ps) + (or capture-dotfiles? + pattern-starts-with-dot?) + #t) + (regexp-match? g (normalize-path ps))))) + +;; glob->unamb+amb* : glob-split? -> (values path? (listof path?)) +(define (glob->unamb+amb* g) + (values (car g) (cdr g))) + +;; path-string->glob : path-string? boolean? -> parsed-glob/c +;; +;; Parse a path-string into either: +;; - a complete path, if the string has no wildcards +;; - a regular expression, if the string has the ** wildcard +;; - a pair of: +;; - a wildcard-free path-string +;; - a non-empty list of path-strings, some elements will use wildcards + +(module+ test + (define HOME (find-system-path 'home-dir)) + + (test-case "path-string->glob" + ;; -- no wildcards => unambiguous + (check-pred glob-unambiguous? (path-string->glob "foo" #f)) + (check-pred glob-unambiguous? + (path-string->glob (build-path (current-directory) "bar" "baz.c") #f)) + + ;; -- anything with ** is recursive + (check-pred glob-recursive? (path-string->glob "**" #f)) + (check-pred glob-recursive? + (path-string->glob (build-path HOME "x" "**" "y") #f)) + + ;; -- everything else is a "split" glob + (check-pred glob-split? (path-string->glob "foo.rkt?" #f)) + (check-pred glob-split? (path-string->glob (build-path "." "yo" "[lo]") #f)) + (check-pred glob-split? (path-string->glob "*" #f)))) + +(define (path-string->glob pattern capture-dotfiles?) + (define path (normalize-path (path-string->path pattern))) + (define elem* (explode-path path)) + (define-values (unamb amb*) + (let loop ([unamb* '()] [elem* elem*]) + (cond + [(or (null? elem*) (has-**? (car elem*))) + (values (normalize-path (path*->path (reverse unamb*))) + (if (not (null? elem*)) '** #f))] + [(has-glob-pattern? (car elem*)) + (values (normalize-path (path*->path (reverse unamb*))) + (if (ormap has-**? (cdr elem*)) '** elem*))] + [else + (loop (cons (car elem*) unamb*) (cdr elem*))]))) + (case amb* + [(#f) + unamb] + [(**) + (cons (path->directory-path unamb) + (regexp + (format "^~a$" (glob-element->regexp path capture-dotfiles?))))] + [else + (cons (path->directory-path unamb) amb*)])) + +;; has-glob-pattern? : path-element? -> boolean? +;; #t if argument contains a globbing wildcard (* ** ? []) +(define (has-glob-pattern? p) + (define str (path-or-symbol->string p)) + (define in-brace? (box #f)) + (for/or ([c (in-string str)] + [i (in-naturals)] + #:when (not (escaped? str i))) + (when (and (not (unbox in-brace?)) (eq? #\[ c)) + (set-box! in-brace? #t)) + (or (eq? #\* c) + (eq? #\? c) + (and (unbox in-brace?) (eq? #\] c))))) + +;; ----------------------------------------------------------------------------- +;; -- matching + +;; in-glob/single : path-string #:capture-dotfiles? boolean? -> (sequence/c path-string) +(define (in-glob/single pattern #:capture-dotfiles? cd?) + (define g (path-string->glob pattern cd?)) + (cond + [(glob-unambiguous? g) + ;; -- return at most 1 path + (if (or (file-exists? g) (directory-exists? g)) + (list g) + '())] + [(glob-recursive? g) + ;; -- recursively search filesystem for all (regexp) matches + ;; (start `in-directory` with the longest possible prefix) + (define start (car g)) + (define matcher (glob->regexp-matcher (cdr g) + (starts-with-dot? pattern) + #:capture-dotfiles? cd?)) + (sequence-map normalize-path (sequence-filter matcher (in-directory start)))] + [else + ;; -- follow the glob through the filesystem, return all matches + (define-values (unamb amb*) (glob->unamb+amb* g)) + (if (directory-exists? unamb) + (in-producer (glob-generator amb* unamb #:capture-dotfiles? cd?) (void)) + '())])) + +;; glob-match?/single : path-string? path-string? boolean? #:capture-dotfiles? boolean? -> boolean? +(define (glob-match?/single pattern ps ps-starts-with-dot? #:capture-dotfiles? cd?) + ;; 2016-10-01 : need `ps-starts-with-dot?` because the `ps` here is normalized, + ;; instead of what the user submitted + (define g (path-string->glob pattern cd?)) + (cond + [(glob-unambiguous? g) + (or (equal? g ps) + (equal? (path->directory-path g) ps))] + [(glob-recursive? g) + (and (if ps-starts-with-dot? + (or cd? (starts-with-dot? (path-string->string pattern))) + #t) + (regexp-match? (cdr g) ps))] + [else + (define-values (unamb amb*) (glob->unamb+amb* g)) + ;; -- pop the `unamb` prefix from `ps`, similar to `shrink-path-wrt` + (define ps-rest + (let loop ([unamb* (explode-path unamb)] + [ps* (explode-path ps)]) + (cond + [(null? unamb*) + ps*] + [(null? ps*) + #f] + [else + (and (equal? (car unamb*) (car ps*)) + (loop (cdr unamb*) (cdr ps*)))]))) + ;; -- match the `amb*` + (and ps-rest + (let loop ([amb* amb*] + [ps* ps-rest]) + (cond + [(and (null? amb*) (null? ps*)) + #t] + [(or (null? amb*) (null? ps*)) + #f] + [else + (and (not (null? (glob-filter (car amb*) + (list (car ps*)) + #:capture-dotfiles? cd?))) + (loop (cdr amb*) (cdr ps*)))])))])) + +;; glob-generator : (listof path-element?) path? #:capture-dotfiles? boolean? -> (sequence/c path?) +;; `((glob-generator p* p #:capture-dotfiles? cd?))` filters the contents +;; of `p` to those matching the pattern `(car p*)`. In particular: +;; - `p*` is sequence of glob-patterns to match against the filesystem +;; - `p` is a file/directory to start the search from +;; Starts a recursive search for each match in the current directory. +(define (glob-generator to-explore* current-path #:capture-dotfiles? cd?) (generator () + (define in-current-path* (directory-list current-path)) + (cond + [(null? to-explore*) + ;; -- finished searching, yield all matches + (for ([f (in-list in-current-path*)]) + (yield (build-path current-path f))) + (yield (void))] + [(null? in-current-path*) + (yield (void))] + [else + (define fst (car to-explore*)) + (define rst (cdr to-explore*)) + (define nothing-left? (null? rst)) + (define e* (glob-filter fst in-current-path* #:capture-dotfiles? cd?)) + (for ([elem (in-list e*)]) + (define new-current (build-path current-path elem)) + (cond + [nothing-left? + (yield new-current)] + [(directory-exists? new-current) + (for ([r (in-producer (glob-generator rst new-current #:capture-dotfiles? cd?) (void))]) + (yield r))])) + (void)]))) + +;; glob-filter : path-element? (listof path?) #:capture-dotfiles? boolean? -> (listof path?) +;; `(glob-filter g p*) filters the list `p*`, removing elements that do not +;; match pattern `g` (interpreted as a glob). +;; Assumes the `p*` do not contain any '/' characters +(define (glob-filter pattern path* #:capture-dotfiles? capture-dotfiles?) + (define rx + (regexp + (string-append "^" (glob-element->regexp pattern capture-dotfiles?) "$"))) + (define pattern-starts-with-dot? (starts-with-dot? pattern)) + (if (not rx) + '() + (for/list ([path (in-list path*)] + #:when (let ([str (path->string path)]) + (and + ;; -- If `path` is a dotfile and * should not + ;; capture dotfiles, then ignore `path`. + (if (starts-with-dot? str) + (or capture-dotfiles? + pattern-starts-with-dot?) + #t) + ;; -- Ignore `path` if it doesn't match `pattern` + (regexp-match? rx str)))) + path))) + +;; ----------------------------------------------------------------------------- +;; -- compiling + +;; glob-element->regexp : path? boolean? -> string? +;; Convert a glob to a regular expression string +;; - interpret wildcards as regular expressions +;; - escape other regexp syntax +(define glob-element->regexp + (let ([REGEXP-CHARS '(#\. #\( #\) #\| #\+ #\$ #\^ #\[ #\] #\{ #\})]) + ;; Need to quote these characters before using string as a regexp + (λ (path capture-dotfiles?) + (define str (path->string path)) + (define prev-brace-idx ;; (U #f Index), index of most-recent '[' character + (box #f)) + (define in-** ;; if #t, prev char. was '*' and current char. is '*' too + (box #f)) + (define len (string-length str)) + (define str* + (for/list ([c (in-string str)] + [i (in-naturals)]) + (cond + [(unbox in-**) + (set-box! in-** #f) + ""] + [(escaped? str i) + (string c)] + [(unbox prev-brace-idx) + ;; inside a [...] + ;; - don't escape anything + ;; - exit at first ']' + ;; - unless we have '[]]', then exit at 2nd ']' + (if (eq? c #\]) + (if (= (unbox prev-brace-idx) (- i 1)) + "]" + (begin (set-box! prev-brace-idx #f) "]")) + (string c))] + [(eq? c #\*) + ;; check for '**' + ;; - if so, match anything even '/' + ;; - else, match anything except '/' + (if (and (< (+ i 1) len) (eq? (string-ref str (+ i 1)) #\*)) + (begin (set-box! in-** #t) + (if capture-dotfiles? ".*" "((?!/\\.).)*")) + "[^/]*")] + [(eq? c #\?) + "[^/]"] + [(and (eq? c #\[) (has-matching-bracket? str (+ i 1))) + (set-box! prev-brace-idx i) + "["] + [(memq c REGEXP-CHARS) + ;; escape characters that the regexp might interpret + (string #\\ c)] + [else + ;; keep everything else + (string c)]))) + (string-join str* "")))) + +;; flatten-glob : glob/c -> (listof path-string?) +(define (flatten-glob pattern) + (if (path-string? pattern) + (expand-braces pattern) + (append* (map expand-braces pattern)))) + +;; expand-braces : path-string boolean? -> (listof path-string?) +(define (expand-braces p [in-brace? #f]) + (define str (path-string->string p)) + (define len (string-length str)) + (define (alt**->string* alt**) + (for/list ([alt* (in-list alt**)]) + (list->string (reverse alt*)))) + ;; loop : (listof (listof char)) natural-number/c -> (listof path-string?) + ;; Walk through `str`, collect characters into a list of paths, + ;; returns 1 string for each combination of brace-separated alternatives. + (let loop ([alt** '()] + [i 0]) + (if (= i len) + (for/list ([alt* (in-list alt**)]) + (list->string (reverse alt*))) + (let ([c (string-ref str i)] + [esc (escaped? str i)]) + (cond + [(and (eq? #\{ c) + (not esc)) + ;; On {, collect all alternatives between matching } + ;; and accumulate a _new_ string for each + (define j (or (find-matching-brace str i) + (malformed-glob-error "unmatched brace at character ~a in '~a'" i str))) + (define str* (expand-braces (substring str (+ i 1) j) #t)) + (define alt**+ + (cond + [(null? alt**) + (for/list ([str (in-list str*)]) + (reverse (string->list str)))] + [(null? str*) + alt**] + [else + (append* + (for/list ([str (in-list str*)]) + (define char* (reverse (string->list str))) + (for/list ([alt* (in-list alt**)]) + (append char* alt*))))])) + (loop alt**+ (+ j 1))] + [(and (not esc) in-brace? (eq? #\, c)) + ;; -- start a new alternative + (append (alt**->string* alt**) + (loop '() (+ i 1)))] + [(and (not esc) (eq? #\} c)) + (malformed-glob-error "unmatched } in glob '~a'" str)] + [else + (loop (if (null? alt**) + (list (list c)) + (for/list ([alt* (in-list alt**)]) + (cons c alt*))) + (+ i 1))]))))) + +;; ----------------------------------------------------------------------------- +;; -- other + +(define-syntax-rule (malformed-glob-error msg arg* ...) + (raise-user-error 'glob (string-append "malformed glob: " msg) arg* ...)) + +;; has-**? : path-element? -> boolean? +;; #t if `p` contains '**' +(define (has-**? p) + (string-contains? (path-or-symbol->string p) "**")) + +;; normalize-path : path? -> path? +(define (normalize-path p) + (path->complete-path (simplify-path (expand-user-path p)))) + +;; path*->path : (listof path?) -> path? +(define (path*->path p*) + (if (null? p*) + (current-directory) + (apply build-path p*))) + +;; path-string->path : path-string? -> path? +(define (path-string->path p) + (if (path? p) p (string->path p))) + +;; `(has-matching-bracket? str i)` returns #t if `str` contains an unescaped #\] +;; character past position `i` + +(module+ test + (check-false (has-matching-bracket? "" 0)) + (check-false (has-matching-bracket? "foo" 0)) + (check-false (has-matching-bracket? "yo]lo" 2)) + (check-false (has-matching-bracket? "foo]" 5)) + (check-true (and (has-matching-bracket? "foo]" 0) #t)) + (check-equal? (has-matching-bracket? "yo]lo" 1) 2)) + +(define (has-matching-bracket? str left-idx) + (for/or ([i (in-range (+ left-idx 1) (string-length str))]) + (and (eq? #\] (string-ref str i)) + (not (escaped? str i)) + i))) + +;; find-matching-brace : string? integer? -> (or/c #f integer?) +;; Returns the index of the } that matches the one at index `i` +(define (find-matching-brace str i) + (define len (string-length str)) + (and (<= 0 i (- len 1)) + (eq? #\{ (string-ref str i)) + ;; loop : natural-number/c natural-number/c -> natural-number/c + (let loop ([brace-depth 0] ; "how many { are we currently inside" + [i (+ i 1)]) ; index into `str` + (and (not (= i len)) + (let ([c (string-ref str i)] + [esc (escaped? str i)]) + (cond + [(and (not esc) (eq? c #\{)) + ;; -- add 1 level of nesting + (loop (+ 1 brace-depth) (+ i 1))] + [(and (not esc) (eq? c #\})) + ;; -- maybe a match, maybe just lose 1 level of nesting + (if (zero? brace-depth) + i + (loop (- brace-depth 1) (+ i 1)))] + [else + (loop brace-depth (+ i 1))])))))) + +;; `(escaped? str i)` returns #t if position `i` in the string `str` +;; is escaped by a #\\ character + +(module+ test + (check-equal? (escaped? "be\\n" 0) #f) + (check-equal? (escaped? "be\\\\n" 4) #f) + (check-equal? (escaped? "be\\n" 3) #t) + (check-equal? (escaped? "\\neb" 1) #t)) + +(define (escaped? str i) + (case i + [(0) + #f] + [(1) + (eq? #\\ (string-ref str 0))] + [else + (and (eq? #\\ (string-ref str (- i 1))) + (not (eq? #\\ (string-ref str (- i 2)))))])) + +;; `(starts-with-dot? str)` returns #t if the first character in `str` is #\. + +(module+ test + (check-equal? (starts-with-dot? "foo") #f) + (check-equal? (starts-with-dot? ".") #f) + (check-equal? (starts-with-dot? "./yo") #f) + (check-equal? (starts-with-dot? "") #f) + (check-equal? (starts-with-dot? ".foo") #t) + (check-equal? (starts-with-dot? ".barry") #t)) + +(define (starts-with-dot? ps) + (define str (path-string->string ps)) + (and (< 1 (string-length str)) + (eq? #\. (string-ref str 0)) + (not (eq? #\/ (string-ref str 1))))) + +(define (path-string->string ps) + (if (string? ps) ps (path->string ps))) + +(define (path-or-symbol->string p) + (case p + [(up) ".."] + [(same) "."] + [else + (if (absolute-path? p) + (path->string p) + (path-element->string p))])) + +(define (ends-with-path-sep? ps) + (define str (path-string->string ps)) + (define len (string-length str)) + (and (< 0 len) + (case (string-ref str (- (string-length str) 1)) + [(#\/) #t] + [(#\\) (eq? 'windows (system-type 'os))] + [else #f]))) + +;; ============================================================================= + +(module+ test + + (parameterize ([current-directory (symbol->string (gensym "glob-test"))]) + (test-case "path-string->glob:unambiguous" + (define (check-path-string->glob/unamb input expect) + (define r (path-string->glob input #f)) + (check-pred glob-unambiguous? r) + (check-equal? r expect)) + + (check-path-string->glob/unamb "/a/b/c" (build-path "/" "a" "b" "c")) + (check-path-string->glob/unamb "/////a" (build-path "/" "a")) + (check-path-string->glob/unamb "~/foo.txt" (build-path HOME "foo.txt")) + (check-path-string->glob/unamb "~/foo/bar/baz.md" + (build-path HOME "foo" "bar" "baz.md")) + (check-path-string->glob/unamb "/a/b/c?/../e" + (build-path "/" "a" "b" "e"))) + + (test-case "path-string->glob:recursive" + (define (check-path-string->glob/recur input expect) + (define r (path-string->glob input #f)) + (check-pred glob-recursive? r) + (check-equal? r expect)) + + (check-path-string->glob/recur + "/**/" + (cons (string->path "/") #rx"^/((?!/\\.).)*/$")) + (check-path-string->glob/recur + "a.a/[b?]/**/c?" + (cons (path->directory-path (build-path (current-directory) "a.a")) + (regexp + (format "^~a$" + (path->string + (build-path (current-directory) "a\\.a/[b?]/((?!/\\.).)*/c[^/]"))))))) + + (test-case "path-string->glob:split" + (define (check-path-string->glob/split input expect) + (define r (path-string->glob input #f)) + (check-pred glob-split? r) + (check-equal? r expect)) + + (check-path-string->glob/split + "*" + (list (current-directory) (build-path "*"))) + (check-path-string->glob/split + "/a/b/c?" + (list (build-path "/" "a" "b/") (build-path "c?"))) + (check-path-string->glob/split + "/a/b/c?/d/e" + (cons (build-path "/" "a" "b/") (map string->path '("c?" "d" "e")))) + (check-path-string->glob/split + "~/foo/bar?/baz.md" + (cons (build-path HOME "foo/") (map string->path '("bar?" "baz.md")))) + (check-path-string->glob/split + "~/foo/*/baz/.." + (list (build-path HOME "foo/") (string->path "*"))) + (check-path-string->glob/split + "~/foo/bar*/baz/.." + (list (build-path HOME "foo/") (string->path "bar*"))) + (check-path-string->glob/split + "/a[bc]/d/e/f/../g" + (cons (find-system-path 'sys-dir) + (map string->path '("a[bc]" "d" "e" "g"))))) + + (test-case "has-glob-pattern?" + (define (has-glob-pattern?* x) + (has-glob-pattern? (string->path x))) + + (check-equal? (has-glob-pattern?* "foo") #f) + (check-equal? (has-glob-pattern?* "foo") #f) + (check-equal? (has-glob-pattern?* "]") #f) + (check-equal? (has-glob-pattern?* "[") #f) + (check-equal? (has-glob-pattern?* "lit\\*star") #f) + (check-equal? (has-glob-pattern?* "\\?\\?") #f) + (check-equal? (has-glob-pattern?* "\\[\\]x") #f) + (check-equal? (has-glob-pattern?* "][") #f) + (check-equal? (has-glob-pattern?* "*") #t) + (check-equal? (has-glob-pattern?* "?") #t) + (check-equal? (has-glob-pattern?* "*") #t) + (check-equal? (has-glob-pattern?* "***") #t) + (check-equal? (has-glob-pattern?* "[]") #t) + (check-equal? (has-glob-pattern?* "*[ab*d]*") #t) + (check-equal? (has-glob-pattern?* "[[[][") #t) + (check-equal? (has-glob-pattern?* "foo?bar*") #t) + (check-equal? (has-glob-pattern?* "???*") #t) + (check-equal? (has-glob-pattern?* "ar[gh]r*") #t) + (check-equal? (has-glob-pattern?* (string #\\ #\\ #\*)) #t)) + + (define (glob-element->regexp* s) + (glob-element->regexp (string->path s) #f)) + + (test-case "glob-element->regexp" + (check-equal? (glob-element->regexp* "foobar") "foobar") + (check-equal? (glob-element->regexp* ".") "\\.") + (check-equal? (glob-element->regexp* "*") "[^/]*") + (check-equal? (glob-element->regexp* "foo*.txt") "foo[^/]*\\.txt") + (check-equal? (glob-element->regexp* "(hello world)") "\\(hello world\\)") + (check-equal? (glob-element->regexp* "^foo|bar$") "\\^foo\\|bar\\$") + (check-equal? (glob-element->regexp* "things?") "things[^/]") + (check-equal? (glob-element->regexp* "\tescaped\\things\n?") + "\tescaped\\things\n[^/]") + (check-equal? (glob-element->regexp* "outside[in]") "outside[in]") + (check-equal? (glob-element->regexp* ".?.?.?") "\\.[^/]\\.[^/]\\.[^/]") + (check-equal? (glob-element->regexp* "[") "\\[") + (check-equal? (glob-element->regexp* "][") "\\]\\[") + (check-equal? (glob-element->regexp* "[]]") "[]]") + (check-equal? (glob-element->regexp* "[a*?]") "[a*?]") + (check-equal? (glob-element->regexp* "h[el]lo[") "h[el]lo\\[")) + + (test-case "glob-element->regexp:tree.ss" ;; from tree.ss in the PLT SVN + (check-equal? (glob-element->regexp* "glob") "glob") + (check-equal? (glob-element->regexp* "gl?ob") "gl[^/]ob") + (check-equal? (glob-element->regexp* "gl*ob") "gl[^/]*ob") + (check-equal? (glob-element->regexp* "gl*?ob") "gl[^/]*[^/]ob") + (check-equal? (glob-element->regexp* "gl?*ob") "gl[^/][^/]*ob") + (check-equal? (glob-element->regexp* "gl.ob") "gl\\.ob") + (check-equal? (glob-element->regexp* "gl?.ob") "gl[^/]\\.ob") + (check-equal? (glob-element->regexp* "gl^ob") "gl\\^ob") + (check-equal? (glob-element->regexp* "gl^?ob") "gl\\^[^/]ob") + (check-equal? (glob-element->regexp* "gl\\.ob") "gl\\.ob") + (check-equal? (glob-element->regexp* "gl\\ob") "gl\\ob") + (check-equal? (glob-element->regexp* "gl\\*ob") "gl\\*ob") + (check-equal? (glob-element->regexp* "gl\\?ob") "gl\\?ob") + (check-equal? (glob-element->regexp* "gl\\|ob") "gl\\|ob") + (check-equal? (glob-element->regexp* "gl\\{ob") "gl\\{ob") + (check-equal? (glob-element->regexp* "gl[?]ob") "gl[?]ob") + (check-equal? (glob-element->regexp* "gl[*?]ob") "gl[*?]ob") + (check-equal? (glob-element->regexp* "gl[?*]ob") "gl[?*]ob") + (check-equal? (glob-element->regexp* "gl[]*]ob") "gl[]*]ob") + (check-equal? (glob-element->regexp* "gl[^]*]ob") "gl[^][^/]*\\]ob") + (check-equal? (glob-element->regexp* "gl[^]*]*ob") "gl[^][^/]*\\][^/]*ob")) + + (test-case "expand-braces:simple" + (check-equal? (expand-braces "") '()) + (check-equal? (expand-braces "a") '("a")) + (check-equal? (expand-braces "anything\\,") '("anything\\,")) + (check-equal? (expand-braces "a,b,c") '("a,b,c")) + (check-equal? (expand-braces "{a,b,c}") '("a" "b" "c")) + (check-equal? (expand-braces "foo,bar") '("foo,bar")) + (check-equal? (expand-braces "{foo,bar}") '("foo" "bar")) + (check-equal? + (sort (expand-braces "{foo,bar}-{baz,qux}.rkt") stringregexp:invalid" + ;; these are all invalid regexps + (check-equal? (glob-element->regexp* "[]") "\\[\\]") + (check-equal? (glob-element->regexp* "[---]") "[---]")) + + (test-case "glob-filter" + (define (glob-filter* p p* #:capture-dotfiles? [cd? (glob-capture-dotfiles?)]) + (map path->string + (glob-filter (string->path p) (map string->path p*) #:capture-dotfiles? cd?))) + + (check-equal? + (glob-filter* "foo" '() #:capture-dotfiles? #f) + '()) + (check-equal? + (glob-filter* "foo" '("foo") #:capture-dotfiles? #f) + '("foo")) + (parameterize ([glob-capture-dotfiles? #f]) + (check-equal? (glob-filter* "foo" '()) '()) + (check-equal? (glob-filter* "foo" '("foo")) '("foo")) + (check-equal? (glob-filter* "*" '("foo" ".foo")) '("foo")) + (check-equal? (glob-filter* "foo" '("qux" "foo" "bar")) '("foo")) + (check-equal? + (glob-filter* "*" '("cat" "dog" "goa")) + '("cat" "dog" "goa")) + (check-equal? (glob-filter* "ca?" '("ca" "car" "cat")) '("car" "cat")) + (check-equal? + (glob-filter* "ca?at" '("ca" "car" "catat" "caat")) + '("catat")) + (check-equal? (glob-filter* ".?.?.?" '("a" "ab" "abc" "abcd")) '()) + (check-equal? (glob-filter* ".?.?" '("." ".." "..." "....")) '("....")) + (check-equal? + (glob-filter* "*.txt" '("file.txt" "sheet.txt" "work.jar" "play.tab")) + '("file.txt" "sheet.txt"))) + + (check-equal? + (glob-filter* "*" '("foo" ".foo") #:capture-dotfiles? #t) + '("foo" ".foo")) + (check-equal? + (glob-filter* ".?.?" '("." ".." "..." "....") #:capture-dotfiles? #t) + '("....")) + (parameterize ([glob-capture-dotfiles? #t]) + (check-equal? (glob-filter* "*" '("foo" ".foo")) '("foo" ".foo")) + (check-equal? (glob-filter* ".?.?" '("." ".." "..." "....")) '("....")) + (void))) + + (test-case "find-matching-brace:simple" + (check-equal? (find-matching-brace "yo{}lo" 2) 3) + (check-equal? (find-matching-brace "we{p}a" 2) 4) + (check-equal? (find-matching-brace "{abc}" 0) 4) + (check-equal? (find-matching-brace "hel{lo}world" 3) 6) + ) + + (test-case "find-matching-brace:nested" + (check-equal? (find-matching-brace "a{b{}c}d" 1) 6) + (check-equal? (find-matching-brace "a{b{}c}d" 3) 4) + (check-equal? (find-matching-brace "a{b{c{d}e}f}g" 3) 9)) + + (test-case "find-matching-brace:unmatched" + (check-equal? (find-matching-brace "{" 0) #f) + (check-equal? (find-matching-brace "abc{d" 3) #f) + (check-equal? (find-matching-brace "foo{bar{baz}" 3) #f) + (check-equal? (find-matching-brace "a{b{c{d}e" 3) #f)) + + (test-case "find-matching-brace:arg-error" + (check-equal? (find-matching-brace "{" -1) #f) + (check-equal? (find-matching-brace "{}" 2) #f) + (check-equal? (find-matching-brace "" 0) #f) + (check-equal? (find-matching-brace "asdf" 0) #f) + (check-equal? (find-matching-brace "a{b}c" 0) #f)) + (test-case "path-or-symbol->string" + (check-equal? + (path-or-symbol->string 'up) + "..") + (check-equal? + (path-or-symbol->string 'same) + ".") + (check-equal? + (path-or-symbol->string (string->path "~")) + "~") + (check-equal? + (path-or-symbol->string (string->path "hello/")) + "hello")) + + (test-case "ends-with-path-sep?" + (check-true (ends-with-path-sep? "foo/")) + (check-true (ends-with-path-sep? "foo/bar/")) + (check-true (ends-with-path-sep? "foo/bar/baz/")) + (check-true (ends-with-path-sep? "foo//")) + (check-true (ends-with-path-sep? (string->path "/"))) + (check-true (ends-with-path-sep? "*/")) + (check-true (ends-with-path-sep? "/*/*/*/")) + (check-true (ends-with-path-sep? (build-path "?" "?/"))) + + (check-false (ends-with-path-sep? "")) + (check-false (ends-with-path-sep? "a")) + (check-false (ends-with-path-sep? "/a")) + (check-false (ends-with-path-sep? (build-path "foo" "bar"))) + (check-false (ends-with-path-sep? "?/?")) + (check-false (ends-with-path-sep? "[abc/]")) + ) + + (printf "~a tests passed\n" (unbox num-tests)) +))