racket/racket/collects/file/private/glob.rkt
Ben Greenman b38ce36c92
glob: add 'glob-quote' (#2397)
add a function to escape any glob wildcards in a path or string

also add a private `glob-element->filename` function so that, e.g., the pattern
`a\*` matches the file named `a*` (previously, the match would fail and
I think it was impossible to match for only `a*`)
2018-12-04 00:21:14 -05:00

919 lines
35 KiB
Racket

#lang racket/base
;; Filepath globbing.
;; A glob is a path-string that describes a set of path-strings via "wildcards":
;; - * matches any sequence of characters in a file or directory name
;; - ** matches any sequence of characters or directories in a path
;; - ? matches any single character
;; - [] matches any character between the brackets
;; Terms / Conventions in this file:
;; - variable names ending with a star (*) represent lists,
;; e.g., `path*` is "a list of paths".
;; - "amb" = "ambiguous"
;; - "unamb" = "unambiguous"
(provide
glob/c
glob
in-glob
glob-match?
glob-quote
glob-capture-dotfiles?)
(require
racket/contract/base
racket/generator
(only-in racket/list
append*
splitf-at)
(only-in racket/path
shrink-path-wrt)
(only-in racket/sequence
sequence/c
sequence-append
sequence-map
sequence-filter
sequence->list)
(only-in racket/string
string-contains?
string-join)
(for-syntax racket/base))
(module+ test ;; faking RackUnit
(define current-test-case (make-parameter #f))
(define num-tests (box 0))
(define-syntax-rule (test-case str e* ...)
(parameterize ([current-test-case str])
e* ...))
(define-syntax (check-equal? stx)
(syntax-case stx ()
[(_ actual expect)
#`(let ([a actual]
[e expect])
(set-box! num-tests (+ (unbox num-tests) 1))
(unless (equal? a e)
(raise-user-error 'check-equal?
"~a [~a:~a:~a] actual value ~a differs from expected ~a"
(current-test-case)
#,(syntax-line stx)
#,(syntax-column stx)
#,(syntax-span stx)
a
e)))]))
(define-syntax-rule (check-true e)
(check-equal? e #t))
(define-syntax-rule (check-false e)
(check-equal? e #f))
(define-syntax-rule (check-pred f e)
(check-equal? (and (f e) #t) #t))
(define-syntax-rule (check-exn pred thunk)
(check-equal? (with-handlers ([pred (λ (e) #t)]) (and (thunk) #f)) #t)))
;; =============================================================================
(define glob/c
(or/c path-string? (sequence/c path-string?)))
(define glob-capture-dotfiles? (make-parameter #f))
(define (glob pattern #:capture-dotfiles? [cd? (glob-capture-dotfiles?)])
(sequence->list (in-glob pattern #:capture-dotfiles? cd?)))
(define (in-glob pattern #:capture-dotfiles? [cd? (glob-capture-dotfiles?)])
;; 2016-10-01: assuming `pattern` doesn't specify many globs
(apply sequence-append
(for/list ([p (in-list (flatten-glob pattern))])
(define r* (in-glob/single p #:capture-dotfiles? cd?))
(if (ends-with-path-sep? p)
;; then: filter non-directory results
(sequence-filter directory-exists? r*)
r*))))
(define (glob-match? pattern ps-raw #:capture-dotfiles? [cd? (glob-capture-dotfiles?)])
(define ps (normalize-path ps-raw))
(define swd (starts-with-dot? (path-string->string ps-raw)))
(for/or ([p (in-list (flatten-glob pattern))])
(and (glob-match?/single p ps swd #:capture-dotfiles? cd?)
(or (not (ends-with-path-sep? p))
;; then: pattern must be a syntactic or real directory
(or (ends-with-path-sep? ps)
(directory-exists? ps))))))
(define (glob-quote ps)
(if (path? ps)
(string->path (glob-quote/string (path->string ps)))
(glob-quote/string ps)))
;; -----------------------------------------------------------------------------
;; -- parsing
;; (define parsed-glob/c
;; (or/c glob-unambiguous?
;; glob-recursive?
;; glob-split?))
(define glob-unambiguous?
path-string?)
(define glob-recursive?
(cons/c path-string? regexp?))
(define glob-split?
;; "split" globs have 1 unambiguous element
;; and at least 1 ambiguous elements (with wildcards)
(cons/c path-string? (cons/c path-string? (listof path-string?))))
;; : glob-recursive? boolean? #:capture-dotfiles? boolean? -> (path-string? -> boolean?)
(define (glob->regexp-matcher g pattern-starts-with-dot? #:capture-dotfiles? capture-dotfiles?)
(λ (ps)
(and
(if (starts-with-dot? ps)
(or capture-dotfiles?
pattern-starts-with-dot?)
#t)
(regexp-match? g (normalize-path ps)))))
;; glob->unamb+amb* : glob-split? -> (values path? (listof path?))
(define (glob->unamb+amb* g)
(values (car g) (cdr g)))
;; path-string->glob : path-string? boolean? -> parsed-glob/c
;;
;; Parse a path-string into either:
;; - a complete path, if the string has no wildcards
;; - a regular expression, if the string has the ** wildcard
;; - a pair of:
;; - a wildcard-free path-string
;; - a non-empty list of path-strings, some elements will use wildcards
(module+ test
(define HOME (find-system-path 'home-dir))
(test-case "path-string->glob"
;; -- no wildcards => unambiguous
(check-pred glob-unambiguous? (path-string->glob "foo" #f))
(check-pred glob-unambiguous?
(path-string->glob (build-path (current-directory) "bar" "baz.c") #f))
;; -- anything with ** is recursive
(check-pred glob-recursive? (path-string->glob "**" #f))
(check-pred glob-recursive?
(path-string->glob (build-path HOME "x" "**" "y") #f))
;; -- everything else is a "split" glob
(check-pred glob-split? (path-string->glob "foo.rkt?" #f))
(check-pred glob-split? (path-string->glob (build-path "." "yo" "[lo]") #f))
(check-pred glob-split? (path-string->glob "*" #f))))
(define (path-string->glob pattern capture-dotfiles?)
(define path (normalize-path (path-string->path pattern)))
(define elem* (explode-path path))
(define-values (unamb amb*)
(let loop ([unamb* '()] [elem* elem*])
(cond
[(or (null? elem*) (has-**? (car elem*)))
(values (normalize-path (path*->path (map glob-element->filename (reverse unamb*))))
(if (not (null? elem*)) '** #f))]
[(has-glob-pattern? (car elem*))
(values (normalize-path (path*->path (map glob-element->filename (reverse unamb*))))
(if (ormap has-**? (cdr elem*)) '** elem*))]
[else
(loop (cons (car elem*) unamb*) (cdr elem*))])))
(case amb*
[(#f)
unamb]
[(**)
(cons (path->directory-path unamb)
(regexp
(format "^~a$" (glob-element->regexp path capture-dotfiles?))))]
[else
(cons (path->directory-path unamb) amb*)]))
;; has-glob-pattern? : path-element? -> boolean?
;; #t if argument contains a globbing wildcard (* ** ? [])
(define (has-glob-pattern? p)
(define str (path-or-symbol->string p))
(define in-brace? (box #f))
(for/or ([c (in-string str)]
[i (in-naturals)]
#:when (not (escaped? str i)))
(when (and (not (unbox in-brace?)) (eq? #\[ c))
(set-box! in-brace? #t))
(or (eq? #\* c)
(eq? #\? c)
(and (unbox in-brace?) (eq? #\] c)))))
;; -----------------------------------------------------------------------------
;; -- matching
;; in-glob/single : path-string #:capture-dotfiles? boolean? -> (sequence/c path-string)
(define (in-glob/single pattern #:capture-dotfiles? cd?)
(define g (path-string->glob pattern cd?))
(cond
[(glob-unambiguous? g)
;; -- return at most 1 path
(if (or (file-exists? g) (directory-exists? g))
(list g)
'())]
[(glob-recursive? g)
;; -- recursively search filesystem for all (regexp) matches
;; (start `in-directory` with the longest possible prefix)
(define start (car g))
(define matcher (glob->regexp-matcher (cdr g)
(starts-with-dot? pattern)
#:capture-dotfiles? cd?))
(sequence-map normalize-path (sequence-filter matcher (in-directory start)))]
[else
;; -- follow the glob through the filesystem, return all matches
(define-values (unamb amb*) (glob->unamb+amb* g))
(if (directory-exists? unamb)
(in-producer (glob-generator amb* unamb #:capture-dotfiles? cd?) (void))
'())]))
;; glob-match?/single : path-string? path-string? boolean? #:capture-dotfiles? boolean? -> boolean?
(define (glob-match?/single pattern ps ps-starts-with-dot? #:capture-dotfiles? cd?)
;; 2016-10-01 : need `ps-starts-with-dot?` because the `ps` here is normalized,
;; instead of what the user submitted
(define g (path-string->glob pattern cd?))
(cond
[(glob-unambiguous? g)
(or (equal? g ps)
(equal? (path->directory-path g) ps))]
[(glob-recursive? g)
(and (if ps-starts-with-dot?
(or cd? (starts-with-dot? (path-string->string pattern)))
#t)
(regexp-match? (cdr g) ps))]
[else
(define-values (unamb amb*) (glob->unamb+amb* g))
;; -- pop the `unamb` prefix from `ps`, similar to `shrink-path-wrt`
(define ps-rest
(let loop ([unamb* (explode-path unamb)]
[ps* (explode-path ps)])
(cond
[(null? unamb*)
ps*]
[(null? ps*)
#f]
[else
(and (equal? (car unamb*) (car ps*))
(loop (cdr unamb*) (cdr ps*)))])))
;; -- match the `amb*`
(and ps-rest
(let loop ([amb* amb*]
[ps* ps-rest])
(cond
[(and (null? amb*) (null? ps*))
#t]
[(or (null? amb*) (null? ps*))
#f]
[else
(and (not (null? (glob-filter (car amb*)
(list (car ps*))
#:capture-dotfiles? cd?)))
(loop (cdr amb*) (cdr ps*)))])))]))
;; glob-generator : (listof path-element?) path? #:capture-dotfiles? boolean? -> (sequence/c path?)
;; `((glob-generator p* p #:capture-dotfiles? cd?))` filters the contents
;; of `p` to those matching the pattern `(car p*)`. In particular:
;; - `p*` is sequence of glob-patterns to match against the filesystem
;; - `p` is a file/directory to start the search from
;; Starts a recursive search for each match in the current directory.
(define (glob-generator to-explore* current-path #:capture-dotfiles? cd?) (generator ()
(define in-current-path* (directory-list current-path))
(cond
[(null? to-explore*)
;; -- finished searching, yield all matches
(for ([f (in-list in-current-path*)])
(yield (build-path current-path f)))
(yield (void))]
[(null? in-current-path*)
(yield (void))]
[else
(define fst (car to-explore*))
(define rst (cdr to-explore*))
(define nothing-left? (null? rst))
(define e* (glob-filter fst in-current-path* #:capture-dotfiles? cd?))
(for ([elem (in-list e*)])
(define new-current (build-path current-path elem))
(cond
[nothing-left?
(yield new-current)]
[(directory-exists? new-current)
(for ([r (in-producer (glob-generator rst new-current #:capture-dotfiles? cd?) (void))])
(yield r))]))
(void)])))
;; glob-filter : path-element? (listof path?) #:capture-dotfiles? boolean? -> (listof path?)
;; `(glob-filter g p*) filters the list `p*`, removing elements that do not
;; match pattern `g` (interpreted as a glob).
;; Assumes the `p*` do not contain any '/' characters
(define (glob-filter pattern path* #:capture-dotfiles? capture-dotfiles?)
(define rx
(regexp
(string-append "^" (glob-element->regexp pattern capture-dotfiles?) "$")))
(define pattern-starts-with-dot? (starts-with-dot? pattern))
(if (not rx)
'()
(for/list ([path (in-list path*)]
#:when (let ([str (path->string path)])
(and
;; -- If `path` is a dotfile and * should not
;; capture dotfiles, then ignore `path`.
(if (starts-with-dot? str)
(or capture-dotfiles?
pattern-starts-with-dot?)
#t)
;; -- Ignore `path` if it doesn't match `pattern`
(regexp-match? rx str))))
path)))
;; -----------------------------------------------------------------------------
;; -- compiling
;; glob-element->regexp : path? boolean? -> string?
;; Convert a glob to a regular expression string
;; - interpret wildcards as regular expressions
;; - escape other regexp syntax
(define glob-element->regexp
(let ([REGEXP-CHARS '(#\. #\( #\) #\| #\+ #\$ #\^ #\[ #\] #\{ #\})])
;; Need to quote these characters before using string as a regexp
(λ (path capture-dotfiles?)
(define str (path->string path))
(define prev-brace-idx ;; (U #f Index), index of most-recent '[' character
(box #f))
(define in-** ;; if #t, prev char. was '*' and current char. is '*' too
(box #f))
(define len (string-length str))
(define str*
(for/list ([c (in-string str)]
[i (in-naturals)])
(cond
[(unbox in-**)
(set-box! in-** #f)
""]
[(escaped? str i)
(string c)]
[(unbox prev-brace-idx)
;; inside a [...]
;; - don't escape anything
;; - exit at first ']'
;; - unless we have '[]]', then exit at 2nd ']'
(if (eq? c #\])
(if (= (unbox prev-brace-idx) (- i 1))
"]"
(begin (set-box! prev-brace-idx #f) "]"))
(string c))]
[(eq? c #\*)
;; check for '**'
;; - if so, match anything even '/'
;; - else, match anything except '/'
(if (and (< (+ i 1) len) (eq? (string-ref str (+ i 1)) #\*))
(begin (set-box! in-** #t)
(if capture-dotfiles? ".*" "((?!/\\.).)*"))
"[^/]*")]
[(eq? c #\?)
"[^/]"]
[(and (eq? c #\[) (has-matching-bracket? str (+ i 1)))
(set-box! prev-brace-idx i)
"["]
[(memq c REGEXP-CHARS)
;; escape characters that the regexp might interpret
(string #\\ c)]
[else
;; keep everything else
(string c)])))
(string-join str* ""))))
;; glob-element->filename : (and/c path-string?
;; (not/c has-**?)
;; (not/c has-glob-pattern?)) -> path-string?
;; Convert a pattern with no (unescaped) glob wildcards into a value suitable
;; for use in `file-exists?` queries --- by removing the escape characters
;; from the wildcards.
;;
;; (If a pattern has wildcards, then it is converted to a regexp and compared
;; to filenames via `directory-list` and `regexp-match?`.
;; If not, the pattern goes through this conversion function and is compared
;; to filenames using `file-exists?` and `directory-exists?`.)
(define (glob-element->filename ps)
(if (path? ps)
(string->path (glob-element->filename/string (path->string ps)))
(glob-element->filename/string ps)))
(define GLOB-WILDCARD-CHAR* '(#\* #\? #\[ #\] #\{ #\} #\,))
(define (glob-element->filename/string str)
(define str*
;; examine `str` in reverse, remove #\\ from escaped wildcards
(let loop ([c* (reverse (string->list str))]
[i (- (string-length str) 1)])
(cond
[(null? c*)
'()]
[(and (memq (car c*) GLOB-WILDCARD-CHAR*)
(escaped? str i))
(cons (string (car c*)) (loop (cddr c*) (- i 2)))]
[else
(cons (string (car c*)) (loop (cdr c*) (- i 1)))])))
(apply string-append (reverse str*)))
(module+ test
(test-case "glob-element->filename/string"
(check-equal? (glob-element->filename/string "a") "a")
(check-equal? (glob-element->filename/string "foo\\*rkt") "foo*rkt")
(check-equal? (glob-element->filename/string "?\\?\\]\\[\\*") "??][*")
(check-equal? (glob-element->filename/string "\\}a\\,") "}a,")
(check-equal? (glob-element->filename/string "\\normal") "\\normal")))
(define (glob-quote/string str)
(define str*
;; add #\\ before all wildcards
(for/list ([c (in-string str)])
(if (memq c GLOB-WILDCARD-CHAR*)
(string #\\ c)
(string c))))
(apply string-append str*))
(module+ test
(test-case "glob-quote/string"
(check-equal? (glob-quote/string "a") "a")
(check-equal? (glob-quote/string "a*") "a\\*")
(check-equal? (glob-quote/string "*][?") "\\*\\]\\[\\?")
(check-equal? (glob-quote/string "racket/**/base") "racket/\\*\\*/base")
(check-equal? (glob-quote/string "},{foo,bar}") "\\}\\,\\{foo\\,bar\\}")
(check-equal? (glob-quote/string "\\") "\\")))
;; flatten-glob : glob/c -> (listof path-string?)
(define (flatten-glob pattern)
(if (path-string? pattern)
(expand-braces pattern)
(append* (map expand-braces pattern))))
;; expand-braces : path-string boolean? -> (listof path-string?)
(define (expand-braces p [in-brace? #f])
(define str (path-string->string p))
(define len (string-length str))
(define (alt**->string* alt**)
(for/list ([alt* (in-list alt**)])
(list->string (reverse alt*))))
;; loop : (listof (listof char)) natural-number/c -> (listof path-string?)
;; Walk through `str`, collect characters into a list of paths,
;; returns 1 string for each combination of brace-separated alternatives.
(let loop ([alt** '()]
[i 0])
(if (= i len)
(for/list ([alt* (in-list alt**)])
(list->string (reverse alt*)))
(let ([c (string-ref str i)]
[esc (escaped? str i)])
(cond
[(and (eq? #\{ c)
(not esc))
;; On {, collect all alternatives between matching }
;; and accumulate a _new_ string for each
(define j (or (find-matching-brace str i)
(malformed-glob-error "unmatched brace at character ~a in '~a'" i str)))
(define str* (expand-braces (substring str (+ i 1) j) #t))
(define alt**+
(cond
[(null? alt**)
(for/list ([str (in-list str*)])
(reverse (string->list str)))]
[(null? str*)
alt**]
[else
(append*
(for/list ([str (in-list str*)])
(define char* (reverse (string->list str)))
(for/list ([alt* (in-list alt**)])
(append char* alt*))))]))
(loop alt**+ (+ j 1))]
[(and (not esc) in-brace? (eq? #\, c))
;; -- start a new alternative
(append (alt**->string* alt**)
(loop '() (+ i 1)))]
[(and (not esc) (eq? #\} c))
(malformed-glob-error "unmatched } in glob '~a'" str)]
[else
(loop (if (null? alt**)
(list (list c))
(for/list ([alt* (in-list alt**)])
(cons c alt*)))
(+ i 1))])))))
;; -----------------------------------------------------------------------------
;; -- other
(define-syntax-rule (malformed-glob-error msg arg* ...)
(raise-user-error 'glob (string-append "malformed glob: " msg) arg* ...))
;; has-**? : path-element? -> boolean?
;; #t if `p` contains '**'
(define (has-**? p)
(string-contains? (path-or-symbol->string p) "**"))
;; normalize-path : path? -> path?
(define (normalize-path p)
(path->complete-path (simplify-path (expand-user-path p))))
;; path*->path : (listof path?) -> path?
(define (path*->path p*)
(if (null? p*)
(current-directory)
(apply build-path p*)))
;; path-string->path : path-string? -> path?
(define (path-string->path p)
(if (path? p) p (string->path p)))
;; `(has-matching-bracket? str i)` returns #t if `str` contains an unescaped #\]
;; character past position `i`
(module+ test
(check-false (has-matching-bracket? "" 0))
(check-false (has-matching-bracket? "foo" 0))
(check-false (has-matching-bracket? "yo]lo" 2))
(check-false (has-matching-bracket? "foo]" 5))
(check-true (and (has-matching-bracket? "foo]" 0) #t))
(check-equal? (has-matching-bracket? "yo]lo" 1) 2))
(define (has-matching-bracket? str left-idx)
(for/or ([i (in-range (+ left-idx 1) (string-length str))])
(and (eq? #\] (string-ref str i))
(not (escaped? str i))
i)))
;; find-matching-brace : string? integer? -> (or/c #f integer?)
;; Returns the index of the } that matches the one at index `i`
(define (find-matching-brace str i)
(define len (string-length str))
(and (<= 0 i (- len 1))
(eq? #\{ (string-ref str i))
;; loop : natural-number/c natural-number/c -> natural-number/c
(let loop ([brace-depth 0] ; "how many { are we currently inside"
[i (+ i 1)]) ; index into `str`
(and (not (= i len))
(let ([c (string-ref str i)]
[esc (escaped? str i)])
(cond
[(and (not esc) (eq? c #\{))
;; -- add 1 level of nesting
(loop (+ 1 brace-depth) (+ i 1))]
[(and (not esc) (eq? c #\}))
;; -- maybe a match, maybe just lose 1 level of nesting
(if (zero? brace-depth)
i
(loop (- brace-depth 1) (+ i 1)))]
[else
(loop brace-depth (+ i 1))]))))))
;; `(escaped? str i)` returns #t if position `i` in the string `str`
;; is escaped by a #\\ character
(module+ test
(check-equal? (escaped? "be\\n" 0) #f)
(check-equal? (escaped? "be\\\\n" 4) #f)
(check-equal? (escaped? "be\\n" 3) #t)
(check-equal? (escaped? "\\neb" 1) #t))
(define (escaped? str i)
(case i
[(0)
#f]
[(1)
(eq? #\\ (string-ref str 0))]
[else
(and (eq? #\\ (string-ref str (- i 1)))
(not (eq? #\\ (string-ref str (- i 2)))))]))
;; `(starts-with-dot? str)` returns #t if the first character in `str` is #\.
(module+ test
(check-equal? (starts-with-dot? "foo") #f)
(check-equal? (starts-with-dot? ".") #f)
(check-equal? (starts-with-dot? "./yo") #f)
(check-equal? (starts-with-dot? "") #f)
(check-equal? (starts-with-dot? ".foo") #t)
(check-equal? (starts-with-dot? ".barry") #t))
(define (starts-with-dot? ps)
(define str (path-string->string ps))
(and (< 1 (string-length str))
(eq? #\. (string-ref str 0))
(not (eq? #\/ (string-ref str 1)))))
(define (path-string->string ps)
(if (string? ps) ps (path->string ps)))
(define (path-or-symbol->string p)
(case p
[(up) ".."]
[(same) "."]
[else
(if (absolute-path? p)
(path->string p)
(path-element->string p))]))
(define (ends-with-path-sep? ps)
(define str (path-string->string ps))
(define len (string-length str))
(and (< 0 len)
(case (string-ref str (- (string-length str) 1))
[(#\/) #t]
[(#\\) (eq? 'windows (system-type 'os))]
[else #f])))
;; =============================================================================
(module+ test
(parameterize ([current-directory (symbol->string (gensym "glob-test"))])
(test-case "path-string->glob:unambiguous"
(define (check-path-string->glob/unamb input expect)
(define r (path-string->glob input #f))
(check-pred glob-unambiguous? r)
(check-equal? r expect))
(check-path-string->glob/unamb "/a/b/c" (build-path "/" "a" "b" "c"))
(check-path-string->glob/unamb "/////a" (build-path "/" "a"))
(check-path-string->glob/unamb "~/foo.txt" (build-path HOME "foo.txt"))
(check-path-string->glob/unamb "~/foo/bar/baz.md"
(build-path HOME "foo" "bar" "baz.md"))
(check-path-string->glob/unamb "/a/b/c?/../e"
(build-path "/" "a" "b" "e")))
(test-case "path-string->glob:recursive"
(define (check-path-string->glob/recur input expect)
(define r (path-string->glob input #f))
(check-pred glob-recursive? r)
(check-equal? r expect))
(check-path-string->glob/recur
"/**/"
(cons (string->path "/") #rx"^/((?!/\\.).)*/$"))
(check-path-string->glob/recur
"a.a/[b?]/**/c?"
(cons (path->directory-path (build-path (current-directory) "a.a"))
(regexp
(format "^~a$"
(path->string
(build-path (current-directory) "a\\.a/[b?]/((?!/\\.).)*/c[^/]")))))))
(test-case "path-string->glob:split"
(define (check-path-string->glob/split input expect)
(define r (path-string->glob input #f))
(check-pred glob-split? r)
(check-equal? r expect))
(check-path-string->glob/split
"*"
(list (current-directory) (build-path "*")))
(check-path-string->glob/split
"/a/b/c?"
(list (build-path "/" "a" "b/") (build-path "c?")))
(check-path-string->glob/split
"/a/b/c?/d/e"
(cons (build-path "/" "a" "b/") (map string->path '("c?" "d" "e"))))
(check-path-string->glob/split
"~/foo/bar?/baz.md"
(cons (build-path HOME "foo/") (map string->path '("bar?" "baz.md"))))
(check-path-string->glob/split
"~/foo/*/baz/.."
(list (build-path HOME "foo/") (string->path "*")))
(check-path-string->glob/split
"~/foo/bar*/baz/.."
(list (build-path HOME "foo/") (string->path "bar*")))
(check-path-string->glob/split
"/a[bc]/d/e/f/../g"
(cons (find-system-path 'sys-dir)
(map string->path '("a[bc]" "d" "e" "g")))))
(test-case "has-glob-pattern?"
(define (has-glob-pattern?* x)
(has-glob-pattern? (string->path x)))
(check-equal? (has-glob-pattern?* "foo") #f)
(check-equal? (has-glob-pattern?* "foo") #f)
(check-equal? (has-glob-pattern?* "]") #f)
(check-equal? (has-glob-pattern?* "[") #f)
(check-equal? (has-glob-pattern?* "lit\\*star") #f)
(check-equal? (has-glob-pattern?* "\\?\\?") #f)
(check-equal? (has-glob-pattern?* "\\[\\]x") #f)
(check-equal? (has-glob-pattern?* "][") #f)
(check-equal? (has-glob-pattern?* "*") #t)
(check-equal? (has-glob-pattern?* "?") #t)
(check-equal? (has-glob-pattern?* "*") #t)
(check-equal? (has-glob-pattern?* "***") #t)
(check-equal? (has-glob-pattern?* "[]") #t)
(check-equal? (has-glob-pattern?* "*[ab*d]*") #t)
(check-equal? (has-glob-pattern?* "[[[][") #t)
(check-equal? (has-glob-pattern?* "foo?bar*") #t)
(check-equal? (has-glob-pattern?* "???*") #t)
(check-equal? (has-glob-pattern?* "ar[gh]r*") #t)
(check-equal? (has-glob-pattern?* (string #\\ #\\ #\*)) #t))
(define (glob-element->regexp* s)
(glob-element->regexp (string->path s) #f))
(test-case "glob-element->regexp"
(check-equal? (glob-element->regexp* "foobar") "foobar")
(check-equal? (glob-element->regexp* ".") "\\.")
(check-equal? (glob-element->regexp* "*") "[^/]*")
(check-equal? (glob-element->regexp* "foo*.txt") "foo[^/]*\\.txt")
(check-equal? (glob-element->regexp* "(hello world)") "\\(hello world\\)")
(check-equal? (glob-element->regexp* "^foo|bar$") "\\^foo\\|bar\\$")
(check-equal? (glob-element->regexp* "things?") "things[^/]")
(check-equal? (glob-element->regexp* "\tescaped\\things\n?")
"\tescaped\\things\n[^/]")
(check-equal? (glob-element->regexp* "outside[in]") "outside[in]")
(check-equal? (glob-element->regexp* ".?.?.?") "\\.[^/]\\.[^/]\\.[^/]")
(check-equal? (glob-element->regexp* "[") "\\[")
(check-equal? (glob-element->regexp* "][") "\\]\\[")
(check-equal? (glob-element->regexp* "[]]") "[]]")
(check-equal? (glob-element->regexp* "[a*?]") "[a*?]")
(check-equal? (glob-element->regexp* "h[el]lo[") "h[el]lo\\["))
(test-case "glob-element->regexp:tree.ss" ;; from tree.ss in the PLT SVN
(check-equal? (glob-element->regexp* "glob") "glob")
(check-equal? (glob-element->regexp* "gl?ob") "gl[^/]ob")
(check-equal? (glob-element->regexp* "gl*ob") "gl[^/]*ob")
(check-equal? (glob-element->regexp* "gl*?ob") "gl[^/]*[^/]ob")
(check-equal? (glob-element->regexp* "gl?*ob") "gl[^/][^/]*ob")
(check-equal? (glob-element->regexp* "gl.ob") "gl\\.ob")
(check-equal? (glob-element->regexp* "gl?.ob") "gl[^/]\\.ob")
(check-equal? (glob-element->regexp* "gl^ob") "gl\\^ob")
(check-equal? (glob-element->regexp* "gl^?ob") "gl\\^[^/]ob")
(check-equal? (glob-element->regexp* "gl\\.ob") "gl\\.ob")
(check-equal? (glob-element->regexp* "gl\\ob") "gl\\ob")
(check-equal? (glob-element->regexp* "gl\\*ob") "gl\\*ob")
(check-equal? (glob-element->regexp* "gl\\?ob") "gl\\?ob")
(check-equal? (glob-element->regexp* "gl\\|ob") "gl\\|ob")
(check-equal? (glob-element->regexp* "gl\\{ob") "gl\\{ob")
(check-equal? (glob-element->regexp* "gl[?]ob") "gl[?]ob")
(check-equal? (glob-element->regexp* "gl[*?]ob") "gl[*?]ob")
(check-equal? (glob-element->regexp* "gl[?*]ob") "gl[?*]ob")
(check-equal? (glob-element->regexp* "gl[]*]ob") "gl[]*]ob")
(check-equal? (glob-element->regexp* "gl[^]*]ob") "gl[^][^/]*\\]ob")
(check-equal? (glob-element->regexp* "gl[^]*]*ob") "gl[^][^/]*\\][^/]*ob"))
(test-case "expand-braces:simple"
(check-equal? (expand-braces "") '())
(check-equal? (expand-braces "a") '("a"))
(check-equal? (expand-braces "anything\\,") '("anything\\,"))
(check-equal? (expand-braces "a,b,c") '("a,b,c"))
(check-equal? (expand-braces "{a,b,c}") '("a" "b" "c"))
(check-equal? (expand-braces "foo,bar") '("foo,bar"))
(check-equal? (expand-braces "{foo,bar}") '("foo" "bar"))
(check-equal?
(sort (expand-braces "{foo,bar}-{baz,qux}.rkt") string<?)
'("bar-baz.rkt" "bar-qux.rkt" "foo-baz.rkt" "foo-qux.rkt"))
(check-equal? (expand-braces "{a,,b}") '("a" "b")))
(test-case "expand-braces:nested"
(check-equal? (expand-braces "foo{bar},baz")
'("foobar,baz"))
(check-equal? (expand-braces "{foo{bar},baz}")
'("foobar" "baz"))
(check-equal? (expand-braces "{{{}{}}}")
'())
(check-equal? (expand-braces "yo{{}}")
'("yo"))
(check-equal? (expand-braces "foo{bar,baz},qux")
'("foobar,qux" "foobaz,qux"))
(check-equal? (expand-braces "f{oo{bar,baz},qux}")
'("foobar" "foobaz" "fqux"))
(check-equal?
(sort (expand-braces "a{b,{c,d}},e,f{g,h},{i,j,k}l") string<?)
(sort '("ab,e,fg,il" "ab,e,fg,jl" "ab,e,fg,kl"
"ab,e,fh,il" "ab,e,fh,jl" "ab,e,fh,kl"
"ac,e,fg,il" "ac,e,fg,jl" "ac,e,fg,kl"
"ac,e,fh,il" "ac,e,fh,jl" "ac,e,fh,kl"
"ad,e,fg,il" "ad,e,fg,jl" "ad,e,fg,kl"
"ad,e,fh,il" "ad,e,fh,jl" "ad,e,fh,kl") string<?))
(check-equal? (expand-braces "{a{b,{c,d}},e,f{g,h},{i,j,k}l}")
'("ab" "ac" "ad" "e" "fg" "fh" "il" "jl" "kl")))
(test-case "expand-braces:malformed"
(check-exn exn:fail:user?
(lambda () (expand-braces "}")))
(check-exn exn:fail:user?
(lambda () (expand-braces "{")))
(check-exn exn:fail:user?
(lambda () (expand-braces "abc{d")))
(check-exn exn:fail:user?
(lambda () (expand-braces "a{b}c}"))))
(test-case "glob-element->regexp:invalid"
;; these are all invalid regexps
(check-equal? (glob-element->regexp* "[]") "\\[\\]")
(check-equal? (glob-element->regexp* "[---]") "[---]"))
(test-case "glob-filter"
(define (glob-filter* p p* #:capture-dotfiles? [cd? (glob-capture-dotfiles?)])
(map path->string
(glob-filter (string->path p) (map string->path p*) #:capture-dotfiles? cd?)))
(check-equal?
(glob-filter* "foo" '() #:capture-dotfiles? #f)
'())
(check-equal?
(glob-filter* "foo" '("foo") #:capture-dotfiles? #f)
'("foo"))
(parameterize ([glob-capture-dotfiles? #f])
(check-equal? (glob-filter* "foo" '()) '())
(check-equal? (glob-filter* "foo" '("foo")) '("foo"))
(check-equal? (glob-filter* "*" '("foo" ".foo")) '("foo"))
(check-equal? (glob-filter* "foo" '("qux" "foo" "bar")) '("foo"))
(check-equal?
(glob-filter* "*" '("cat" "dog" "goa"))
'("cat" "dog" "goa"))
(check-equal? (glob-filter* "ca?" '("ca" "car" "cat")) '("car" "cat"))
(check-equal?
(glob-filter* "ca?at" '("ca" "car" "catat" "caat"))
'("catat"))
(check-equal? (glob-filter* ".?.?.?" '("a" "ab" "abc" "abcd")) '())
(check-equal? (glob-filter* ".?.?" '("." ".." "..." "....")) '("...."))
(check-equal?
(glob-filter* "*.txt" '("file.txt" "sheet.txt" "work.jar" "play.tab"))
'("file.txt" "sheet.txt")))
(check-equal?
(glob-filter* "*" '("foo" ".foo") #:capture-dotfiles? #t)
'("foo" ".foo"))
(check-equal?
(glob-filter* ".?.?" '("." ".." "..." "....") #:capture-dotfiles? #t)
'("...."))
(parameterize ([glob-capture-dotfiles? #t])
(check-equal? (glob-filter* "*" '("foo" ".foo")) '("foo" ".foo"))
(check-equal? (glob-filter* ".?.?" '("." ".." "..." "....")) '("...."))
(void)))
(test-case "find-matching-brace:simple"
(check-equal? (find-matching-brace "yo{}lo" 2) 3)
(check-equal? (find-matching-brace "we{p}a" 2) 4)
(check-equal? (find-matching-brace "{abc}" 0) 4)
(check-equal? (find-matching-brace "hel{lo}world" 3) 6)
)
(test-case "find-matching-brace:nested"
(check-equal? (find-matching-brace "a{b{}c}d" 1) 6)
(check-equal? (find-matching-brace "a{b{}c}d" 3) 4)
(check-equal? (find-matching-brace "a{b{c{d}e}f}g" 3) 9))
(test-case "find-matching-brace:unmatched"
(check-equal? (find-matching-brace "{" 0) #f)
(check-equal? (find-matching-brace "abc{d" 3) #f)
(check-equal? (find-matching-brace "foo{bar{baz}" 3) #f)
(check-equal? (find-matching-brace "a{b{c{d}e" 3) #f))
(test-case "find-matching-brace:arg-error"
(check-equal? (find-matching-brace "{" -1) #f)
(check-equal? (find-matching-brace "{}" 2) #f)
(check-equal? (find-matching-brace "" 0) #f)
(check-equal? (find-matching-brace "asdf" 0) #f)
(check-equal? (find-matching-brace "a{b}c" 0) #f))
(test-case "path-or-symbol->string"
(check-equal?
(path-or-symbol->string 'up)
"..")
(check-equal?
(path-or-symbol->string 'same)
".")
(check-equal?
(path-or-symbol->string (string->path "~"))
"~")
(check-equal?
(path-or-symbol->string (string->path "hello/"))
"hello"))
(test-case "ends-with-path-sep?"
(check-true (ends-with-path-sep? "foo/"))
(check-true (ends-with-path-sep? "foo/bar/"))
(check-true (ends-with-path-sep? "foo/bar/baz/"))
(check-true (ends-with-path-sep? "foo//"))
(check-true (ends-with-path-sep? (string->path "/")))
(check-true (ends-with-path-sep? "*/"))
(check-true (ends-with-path-sep? "/*/*/*/"))
(check-true (ends-with-path-sep? (build-path "?" "?/")))
(check-false (ends-with-path-sep? ""))
(check-false (ends-with-path-sep? "a"))
(check-false (ends-with-path-sep? "/a"))
(check-false (ends-with-path-sep? (build-path "foo" "bar")))
(check-false (ends-with-path-sep? "?/?"))
(check-false (ends-with-path-sep? "[abc/]"))
)
(printf "~a tests passed\n" (unbox num-tests))
))