From e7264d2d98155dc8a1576ba962eee60dbe9c4e68 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 4 Dec 2014 07:56:11 -0700 Subject: [PATCH] package manager: adjust parsing of file:// URLs For some reason, I previously made the package manager parse file URLs by stripping away "file://" and treating the rest directly as a path. Maybe it was to support relative paths, or maybe it was to implicitly disallow query and fragment parts of the URL, but it seems like a terrible idea; I've switched to `string->url` plus `url->path`. As a result, parsing now implicitly allows and ignores query and fragment parts of a "file://" URL. That's intended as a feature, allowing extra information to be attached to a path in, for example, a catalog. --- .../racket-doc/pkg/scribblings/pkg.scrbl | 13 +++++++++++-- .../racket-test/tests/pkg/tests-name.rkt | 6 ++++-- racket/collects/pkg/name.rkt | 6 ++++-- racket/collects/pkg/private/stage.rkt | 4 ++-- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/pkgs/racket-pkgs/racket-doc/pkg/scribblings/pkg.scrbl b/pkgs/racket-pkgs/racket-doc/pkg/scribblings/pkg.scrbl index d4a9ba5a42..a1145fcee5 100644 --- a/pkgs/racket-pkgs/racket-doc/pkg/scribblings/pkg.scrbl +++ b/pkgs/racket-pkgs/racket-doc/pkg/scribblings/pkg.scrbl @@ -127,6 +127,7 @@ The valid archive formats are (currently) @filepath{.zip}, @filepath{.tar}, @filepath{.tgz}, @filepath{.tar.gz}, and @filepath{.plt}. +Any query or fragments parts of a @litchar{file://} URL are ignored. For example, @filepath{~/tic-tac-toe.zip} is an archive package source, and its @tech{checksum} would be inside @@ -147,12 +148,16 @@ package name is the filename without its suffix. @history[#:changed "6.0.1.12" @elem{Changed treatment of an archive that contains all - content within a top-level directory.}]} + content within a top-level directory.} + #:changed "6.1.1.5" + @elem{Changed @litchar{file://} parsing to accept a general + URL and ignore any query or fragment.}]} @; ---------------------------------------- @item{a local directory (as a plain path or @litchar{file://} URL) --- The name of the package is the name of the directory. The @tech{checksum} is not present. +Any query or fragments parts of a @litchar{file://} URL are ignored. For example, @filepath{~/tic-tac-toe/} is a directory package source. @@ -162,7 +167,11 @@ to a directory only when it does not have a file-archive suffix, does not match the grammar of a package name, and either starts with starts with @litchar{file://} or does not start with alphabetic characters followed by @litchar{://}. The inferred -package name is the directory name.} +package name is the directory name. + +@history[#:changed "6.1.1.5" + @elem{Changed @litchar{file://} parsing to accept a general + URL and ignore any query or fragment.}]} @item{a remote URL naming an archive --- This type follows the same rules as a local file path, but the archive and @tech{checksum} files are diff --git a/pkgs/racket-pkgs/racket-test/tests/pkg/tests-name.rkt b/pkgs/racket-pkgs/racket-test/tests/pkg/tests-name.rkt index c82de07ec4..36f46e5c44 100644 --- a/pkgs/racket-pkgs/racket-test/tests/pkg/tests-name.rkt +++ b/pkgs/racket-pkgs/racket-test/tests/pkg/tests-name.rkt @@ -147,10 +147,12 @@ (check-equal-values? (parse "racket/fish/x" 'github #rx"two") (values #f 'github #f)) (check-equal-values? (parse "fish" 'github #rx"two") (values #f 'github #f)) - (check-equal-values? (parse "file://fish.plt" #f) (values "fish" 'file #t)) + (check-equal-values? (parse "file://fish.plt" #f) (values #f 'dir #t)) ; missing root means an empty path (check-equal-values? (parse "file:///root/fish.plt" #f) (values "fish" 'file #t)) - (check-equal-values? (parse "file://fish" #f) (values "fish" 'dir #t)) + (check-equal-values? (parse "file://fish" #f) (values #f 'dir #t)) ; missing root means an empty path (check-equal-values? (parse "file:///root/fish" #f) (values "fish" 'dir #t)) + (check-equal-values? (parse "file:///root/fish.zip?ignored=yes#alsoIgnored" #f) (values "fish" 'file #t)) + (check-equal-values? (parse "file:///root/fish?ignored=yes#alsoIgnored" #f) (values "fish" 'dir #t)) (check-equal-values? (parse "random://racket-lang.org/fish.plt" #f #rx"scheme") (values #f #f #f)) diff --git a/racket/collects/pkg/name.rkt b/racket/collects/pkg/name.rkt index f0ab9a427f..e4b5389a67 100644 --- a/racket/collects/pkg/name.rkt +++ b/racket/collects/pkg/name.rkt @@ -256,8 +256,10 @@ (values (validate-name name complain-name #f) (or type (and name-type)))] [(and (not type) - (regexp-match #rx"^file://(.*)$" s)) - => (lambda (m) (parse-path (cadr m)))] + (regexp-match #rx"^file://" s)) + => (lambda (m) + ;; Note that we're ignoring a query & fragment, if any: + (parse-path (url->path (string->url s))))] [(and (not type) (regexp-match? #rx"^[a-zA-Z]*://" s)) (complain "unrecognized URL scheme") diff --git a/racket/collects/pkg/private/stage.rkt b/racket/collects/pkg/private/stage.rkt index cefe21a26f..a5aa474fbf 100644 --- a/racket/collects/pkg/private/stage.rkt +++ b/racket/collects/pkg/private/stage.rkt @@ -535,8 +535,8 @@ #f ; no git-dir #f ; no clean? given-checksum ; if a checksum is provided, just use it - (directory->module-paths pkg pkg-name metadata-ns) - (directory->additional-installs pkg pkg-name metadata-ns))] + (directory->module-paths pkg-path pkg-name metadata-ns) + (directory->additional-installs pkg-path pkg-name metadata-ns))] [else (define pkg-dir (if in-place?