From b38ce36c92e6cfc41f743f640fb1d3cefcfd3395 Mon Sep 17 00:00:00 2001 From: Ben Greenman Date: Tue, 4 Dec 2018 00:21:14 -0500 Subject: [PATCH] glob: add 'glob-quote' (#2397) add a function to escape any glob wildcards in a path or string also add a private `glob-element->filename` function so that, e.g., the pattern `a\*` matches the file named `a*` (previously, the match would fail and I think it was impossible to match for only `a*`) --- pkgs/racket-doc/file/scribblings/glob.scrbl | 19 +++++- racket/collects/file/glob.rkt | 1 + racket/collects/file/private/glob.rkt | 69 ++++++++++++++++++++- 3 files changed, 85 insertions(+), 4 deletions(-) diff --git a/pkgs/racket-doc/file/scribblings/glob.scrbl b/pkgs/racket-doc/file/scribblings/glob.scrbl index c694389225..97ae464ef9 100644 --- a/pkgs/racket-doc/file/scribblings/glob.scrbl +++ b/pkgs/racket-doc/file/scribblings/glob.scrbl @@ -3,12 +3,14 @@ scribble/examples (for-label file/glob racket/sequence)] +@(define glob-eval (make-base-eval '(require file/glob))) + @title[#:tag "glob"]{Globbing} @defmodule[file/glob]{The @racketmodname[file/glob] library implements @hyperlink["https://en.wikipedia.org/wiki/Glob_(programming)"]{globbing} for @racket[path-string?] values. A @emph{glob} is a path string that matches -a set of path strings using the following @emph{wildcards}: +a set of path strings using the following @deftech[#:key "glob-wildcard"]{wildcards}: @itemlist[ @item{ A sextile (@tt{*}) matches any sequence of characters in a file or directory @@ -47,7 +49,7 @@ that accepts a glob or a sequence of globs. } All @racketmodname[file/glob] functions accept @racket[glob/c] values. -These functions also recognize braces (@tt|{{}}|) as a @emph{meta-wildcard} for +These functions also recognize braces (@tt|{{}}|) as a @deftech[#:key "glob-meta-wildcard"]{meta-wildcard} for describing multiple globs. @margin-note{Braces are interpreted @emph{before} any other wildcards.} @@ -117,6 +119,19 @@ Examples: This operation accesses the filesystem. } +@defproc*[([(glob-quote [str string?]) string?] + [(glob-quote [path path?]) path?] +)]{ + Escapes all @tech{glob wildcards} and @tech{glob meta-wildcards} in the given + string or path string. + + @examples[#:eval glob-eval + (glob-quote "*.rkt") + (glob-quote "[Ff]ile?{zip,tar.gz}") + (glob-quote "]") + ] +} + @defparam[glob-capture-dotfiles? capture-dotfiles? boolean? #:value #f]{ Determines whether wildcards match names that begin with a @racket[#\.] character. If @racket[#t], the wildcards will match dotfiles. If diff --git a/racket/collects/file/glob.rkt b/racket/collects/file/glob.rkt index 7a67fcc0ef..60e126fbcb 100644 --- a/racket/collects/file/glob.rkt +++ b/racket/collects/file/glob.rkt @@ -11,4 +11,5 @@ [glob (->* [glob/c] [#:capture-dotfiles? boolean?] (listof path-string?))] [in-glob (->* [glob/c] [#:capture-dotfiles? boolean?] (sequence/c path-string?))] [glob-match? (->* [glob/c path-string?] [#:capture-dotfiles? boolean?] boolean?)] + [glob-quote (->i ([ps path-string?]) [r (ps) (if (path? ps) path? string?)])] [glob-capture-dotfiles? (parameter/c boolean?)])) diff --git a/racket/collects/file/private/glob.rkt b/racket/collects/file/private/glob.rkt index e7612223d9..6da9e3ee84 100644 --- a/racket/collects/file/private/glob.rkt +++ b/racket/collects/file/private/glob.rkt @@ -19,6 +19,7 @@ glob in-glob glob-match? + glob-quote glob-capture-dotfiles?) (require @@ -107,6 +108,11 @@ (or (ends-with-path-sep? ps) (directory-exists? ps)))))) +(define (glob-quote ps) + (if (path? ps) + (string->path (glob-quote/string (path->string ps))) + (glob-quote/string ps))) + ;; ----------------------------------------------------------------------------- ;; -- parsing @@ -175,10 +181,10 @@ (let loop ([unamb* '()] [elem* elem*]) (cond [(or (null? elem*) (has-**? (car elem*))) - (values (normalize-path (path*->path (reverse unamb*))) + (values (normalize-path (path*->path (map glob-element->filename (reverse unamb*)))) (if (not (null? elem*)) '** #f))] [(has-glob-pattern? (car elem*)) - (values (normalize-path (path*->path (reverse unamb*))) + (values (normalize-path (path*->path (map glob-element->filename (reverse unamb*)))) (if (ormap has-**? (cdr elem*)) '** elem*))] [else (loop (cons (car elem*) unamb*) (cdr elem*))]))) @@ -388,6 +394,65 @@ (string c)]))) (string-join str* "")))) +;; glob-element->filename : (and/c path-string? +;; (not/c has-**?) +;; (not/c has-glob-pattern?)) -> path-string? +;; Convert a pattern with no (unescaped) glob wildcards into a value suitable +;; for use in `file-exists?` queries --- by removing the escape characters +;; from the wildcards. +;; +;; (If a pattern has wildcards, then it is converted to a regexp and compared +;; to filenames via `directory-list` and `regexp-match?`. +;; If not, the pattern goes through this conversion function and is compared +;; to filenames using `file-exists?` and `directory-exists?`.) +(define (glob-element->filename ps) + (if (path? ps) + (string->path (glob-element->filename/string (path->string ps))) + (glob-element->filename/string ps))) + +(define GLOB-WILDCARD-CHAR* '(#\* #\? #\[ #\] #\{ #\} #\,)) + +(define (glob-element->filename/string str) + (define str* + ;; examine `str` in reverse, remove #\\ from escaped wildcards + (let loop ([c* (reverse (string->list str))] + [i (- (string-length str) 1)]) + (cond + [(null? c*) + '()] + [(and (memq (car c*) GLOB-WILDCARD-CHAR*) + (escaped? str i)) + (cons (string (car c*)) (loop (cddr c*) (- i 2)))] + [else + (cons (string (car c*)) (loop (cdr c*) (- i 1)))]))) + (apply string-append (reverse str*))) + +(module+ test + (test-case "glob-element->filename/string" + (check-equal? (glob-element->filename/string "a") "a") + (check-equal? (glob-element->filename/string "foo\\*rkt") "foo*rkt") + (check-equal? (glob-element->filename/string "?\\?\\]\\[\\*") "??][*") + (check-equal? (glob-element->filename/string "\\}a\\,") "}a,") + (check-equal? (glob-element->filename/string "\\normal") "\\normal"))) + +(define (glob-quote/string str) + (define str* + ;; add #\\ before all wildcards + (for/list ([c (in-string str)]) + (if (memq c GLOB-WILDCARD-CHAR*) + (string #\\ c) + (string c)))) + (apply string-append str*)) + +(module+ test + (test-case "glob-quote/string" + (check-equal? (glob-quote/string "a") "a") + (check-equal? (glob-quote/string "a*") "a\\*") + (check-equal? (glob-quote/string "*][?") "\\*\\]\\[\\?") + (check-equal? (glob-quote/string "racket/**/base") "racket/\\*\\*/base") + (check-equal? (glob-quote/string "},{foo,bar}") "\\}\\,\\{foo\\,bar\\}") + (check-equal? (glob-quote/string "\\") "\\"))) + ;; flatten-glob : glob/c -> (listof path-string?) (define (flatten-glob pattern) (if (path-string? pattern)