glob: add 'glob-quote' (#2397)

add a function to escape any glob wildcards in a path or string

also add a private `glob-element->filename` function so that, e.g., the pattern
`a\*` matches the file named `a*` (previously, the match would fail and
I think it was impossible to match for only `a*`)
This commit is contained in:
Ben Greenman 2018-12-04 00:21:14 -05:00 committed by GitHub
parent e729d35915
commit b38ce36c92
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 85 additions and 4 deletions

View File

@ -3,12 +3,14 @@
scribble/examples
(for-label file/glob racket/sequence)]
@(define glob-eval (make-base-eval '(require file/glob)))
@title[#:tag "glob"]{Globbing}
@defmodule[file/glob]{The @racketmodname[file/glob] library implements
@hyperlink["https://en.wikipedia.org/wiki/Glob_(programming)"]{globbing}
for @racket[path-string?] values. A @emph{glob} is a path string that matches
a set of path strings using the following @emph{wildcards}:
a set of path strings using the following @deftech[#:key "glob-wildcard"]{wildcards}:
@itemlist[
@item{
A sextile (@tt{*}) matches any sequence of characters in a file or directory
@ -47,7 +49,7 @@ that accepts a glob or a sequence of globs.
}
All @racketmodname[file/glob] functions accept @racket[glob/c] values.
These functions also recognize braces (@tt|{{}}|) as a @emph{meta-wildcard} for
These functions also recognize braces (@tt|{{}}|) as a @deftech[#:key "glob-meta-wildcard"]{meta-wildcard} for
describing multiple globs.
@margin-note{Braces are interpreted @emph{before} any other wildcards.}
@ -117,6 +119,19 @@ Examples:
This operation accesses the filesystem.
}
@defproc*[([(glob-quote [str string?]) string?]
[(glob-quote [path path?]) path?]
)]{
Escapes all @tech{glob wildcards} and @tech{glob meta-wildcards} in the given
string or path string.
@examples[#:eval glob-eval
(glob-quote "*.rkt")
(glob-quote "[Ff]ile?{zip,tar.gz}")
(glob-quote "]")
]
}
@defparam[glob-capture-dotfiles? capture-dotfiles? boolean? #:value #f]{
Determines whether wildcards match names that begin with a @racket[#\.]
character. If @racket[#t], the wildcards will match dotfiles. If

View File

@ -11,4 +11,5 @@
[glob (->* [glob/c] [#:capture-dotfiles? boolean?] (listof path-string?))]
[in-glob (->* [glob/c] [#:capture-dotfiles? boolean?] (sequence/c path-string?))]
[glob-match? (->* [glob/c path-string?] [#:capture-dotfiles? boolean?] boolean?)]
[glob-quote (->i ([ps path-string?]) [r (ps) (if (path? ps) path? string?)])]
[glob-capture-dotfiles? (parameter/c boolean?)]))

View File

@ -19,6 +19,7 @@
glob
in-glob
glob-match?
glob-quote
glob-capture-dotfiles?)
(require
@ -107,6 +108,11 @@
(or (ends-with-path-sep? ps)
(directory-exists? ps))))))
(define (glob-quote ps)
(if (path? ps)
(string->path (glob-quote/string (path->string ps)))
(glob-quote/string ps)))
;; -----------------------------------------------------------------------------
;; -- parsing
@ -175,10 +181,10 @@
(let loop ([unamb* '()] [elem* elem*])
(cond
[(or (null? elem*) (has-**? (car elem*)))
(values (normalize-path (path*->path (reverse unamb*)))
(values (normalize-path (path*->path (map glob-element->filename (reverse unamb*))))
(if (not (null? elem*)) '** #f))]
[(has-glob-pattern? (car elem*))
(values (normalize-path (path*->path (reverse unamb*)))
(values (normalize-path (path*->path (map glob-element->filename (reverse unamb*))))
(if (ormap has-**? (cdr elem*)) '** elem*))]
[else
(loop (cons (car elem*) unamb*) (cdr elem*))])))
@ -388,6 +394,65 @@
(string c)])))
(string-join str* ""))))
;; glob-element->filename : (and/c path-string?
;; (not/c has-**?)
;; (not/c has-glob-pattern?)) -> path-string?
;; Convert a pattern with no (unescaped) glob wildcards into a value suitable
;; for use in `file-exists?` queries --- by removing the escape characters
;; from the wildcards.
;;
;; (If a pattern has wildcards, then it is converted to a regexp and compared
;; to filenames via `directory-list` and `regexp-match?`.
;; If not, the pattern goes through this conversion function and is compared
;; to filenames using `file-exists?` and `directory-exists?`.)
(define (glob-element->filename ps)
(if (path? ps)
(string->path (glob-element->filename/string (path->string ps)))
(glob-element->filename/string ps)))
(define GLOB-WILDCARD-CHAR* '(#\* #\? #\[ #\] #\{ #\} #\,))
(define (glob-element->filename/string str)
(define str*
;; examine `str` in reverse, remove #\\ from escaped wildcards
(let loop ([c* (reverse (string->list str))]
[i (- (string-length str) 1)])
(cond
[(null? c*)
'()]
[(and (memq (car c*) GLOB-WILDCARD-CHAR*)
(escaped? str i))
(cons (string (car c*)) (loop (cddr c*) (- i 2)))]
[else
(cons (string (car c*)) (loop (cdr c*) (- i 1)))])))
(apply string-append (reverse str*)))
(module+ test
(test-case "glob-element->filename/string"
(check-equal? (glob-element->filename/string "a") "a")
(check-equal? (glob-element->filename/string "foo\\*rkt") "foo*rkt")
(check-equal? (glob-element->filename/string "?\\?\\]\\[\\*") "??][*")
(check-equal? (glob-element->filename/string "\\}a\\,") "}a,")
(check-equal? (glob-element->filename/string "\\normal") "\\normal")))
(define (glob-quote/string str)
(define str*
;; add #\\ before all wildcards
(for/list ([c (in-string str)])
(if (memq c GLOB-WILDCARD-CHAR*)
(string #\\ c)
(string c))))
(apply string-append str*))
(module+ test
(test-case "glob-quote/string"
(check-equal? (glob-quote/string "a") "a")
(check-equal? (glob-quote/string "a*") "a\\*")
(check-equal? (glob-quote/string "*][?") "\\*\\]\\[\\?")
(check-equal? (glob-quote/string "racket/**/base") "racket/\\*\\*/base")
(check-equal? (glob-quote/string "},{foo,bar}") "\\}\\,\\{foo\\,bar\\}")
(check-equal? (glob-quote/string "\\") "\\")))
;; flatten-glob : glob/c -> (listof path-string?)
(define (flatten-glob pattern)
(if (path-string? pattern)