%-encoding extension in module paths; new r6rs library name encoding

svn: r10697
This commit is contained in:
Matthew Flatt 2008-07-09 15:41:38 +00:00
parent 2f22ed7c41
commit a4cc0f7890
7 changed files with 172 additions and 36 deletions

View File

@ -0,0 +1,24 @@
#lang scheme/base
(provide encode-name)
(define (encode-name s)
(let ([s (if (symbol? s)
(symbol->string s)
s)])
(cond
[(regexp-match #rx"(.*?)([^a-zA-Z0-9_+-]+)(.*)" s)
=> (lambda (m)
(string-append
(cadr m)
(apply
string-append
(map (lambda (c)
(let ([s (format "0~x" c)])
(string-append
"%"
(substring s (- (string-length s) 2)))))
(bytes->list (string->bytes/utf-8 (caddr m)))))
(encode-name (cadddr m))))]
[else s])))

View File

@ -1,6 +1,7 @@
#lang scheme/base #lang scheme/base
(require "find-version.ss" (require "find-version.ss"
"encode-name.ss"
(for-template scheme/base)) (for-template scheme/base))
(provide parse-import) (provide parse-import)
@ -37,10 +38,19 @@
(andmap identifier? (syntax->list #'(id2 ...))) (andmap identifier? (syntax->list #'(id2 ...)))
(is-version-reference? #'(vers ...))) (is-version-reference? #'(vers ...)))
(let-values ([(coll file) (let-values ([(coll file)
(let ([strs (map (lambda (id) (let* ([strs (map (lambda (id)
(symbol->string (syntax-e id))) (symbol->string (syntax-e id)))
(syntax->list #'(id1 id2 ...)))]) (syntax->list #'(id1 id2 ...)))]
(if (= 1 (length strs)) [len (length strs)]
[strs (map
encode-name
(if (and (= 2 len) (regexp-match? #rx"^main_*$" (cadr strs)))
;; rename (X main_*) => (X main__*)
(list (car strs)
(string-append (cadr strs) "_"))
;; no rename
strs))])
(if (= 1 len)
(values (list (car strs)) "main") (values (list (car strs)) "main")
(values (reverse (cdr (reverse strs))) (values (reverse (cdr (reverse strs)))
(car (reverse strs)))))]) (car (reverse strs)))))])

View File

@ -6,7 +6,8 @@
setup/dirs setup/dirs
scheme/port scheme/port
scheme/file scheme/file
"private/readtable.ss") "private/readtable.ss"
"private/encode-name.ss")
(define install-mode (make-parameter #f)) (define install-mode (make-parameter #f))
(define compile-mode (make-parameter #f)) (define compile-mode (make-parameter #f))
@ -123,11 +124,23 @@
(loop (cdr name)))])))) (loop (cdr name)))]))))
(define (name->path name) (define (name->path name)
(let* ([name (if (or (= (length name) 1) (let* ([name (let ([len (length name)])
(and (= (length name) 2) (cond
[(or (= len 1)
(and (= len 2)
(not (symbol? (cadr name))))) (not (symbol? (cadr name)))))
(list* (car name) 'main (cdr name)) ;; Add implicit "main":
name)]) (list* (car name) 'main (cdr name))]
[(and (or (= len 2)
(and (= len 3)
(not (symbol? (caddr name)))))
(regexp-match #rx"^main_*$" (symbol->string (cadr name))))
;; Rename (X main_*) => (X main__*)
(list* (car name)
(string->symbol
(string-append (symbol->string (cadr name)) "_"))
(cddr name))]
[else name]))])
(apply build-path (apply build-path
(if (install-all-users) (if (install-all-users)
(find-collects-dir) (find-collects-dir)
@ -140,7 +153,7 @@
;; versioned: ;; versioned:
(list (list
(format "~a~a.ss" (format "~a~a.ss"
(car name) (encode-name (car name))
(apply (apply
string-append string-append
(map (lambda (v) (map (lambda (v)
@ -148,9 +161,9 @@
(cadr name)))))] (cadr name)))))]
[(null? (cdr name)) [(null? (cdr name))
;; unversioned: ;; unversioned:
(list (format "~a.ss" (car name)))] (list (format "~a.ss" (encode-name (car name))))]
[else [else
(cons (symbol->string (car name)) (cons (encode-name (car name))
(loop (cdr name)))]))))) (loop (cdr name)))])))))
;; ---------------------------------------- ;; ----------------------------------------

View File

@ -175,8 +175,9 @@ a name is converted to a PLT Scheme module pathname (see @secref[#:doc
guide-src "module-paths"]) by concatenating the symbols with a guide-src "module-paths"]) by concatenating the symbols with a
@litchar{/} separator, and then appending the version integers each @litchar{/} separator, and then appending the version integers each
with a preceeding @litchar{-}. As a special case, when an @|r6rs| path with a preceeding @litchar{-}. As a special case, when an @|r6rs| path
contains a single symbol followed by a version, a @schemeidfont{main} contains a single symbol (optionally followed by a version), a
symbol is effectively inserted after the initial symbol. @schemeidfont{main} symbol is effectively inserted after the initial
symbol. See below for further encoding considerations.
When an @|r6rs| library or top-level program refers to another When an @|r6rs| library or top-level program refers to another
library, it can supply version constraints rather than naming a library, it can supply version constraints rather than naming a
@ -190,13 +191,41 @@ search order for file extensions is @filepath{.mzscheme.ss},
resolving version constraints, these extensions are all tried when resolving version constraints, these extensions are all tried when
looking for matches. looking for matches.
To ensure that all @|r6rs| library names can be converted to a unique
and distinct library module path, the following conversions are
applied to each symbol before concatenating them:
@itemize{
@item{The symbol is encoded using UTF-8, and the resulting bytes are
treated as Latin-1 encoded characters. ASCII letters, digits,
@litchar{+}, @litchar{-}, and @litchar{_} are left as-is; other
characters are replaced by @litchar{%} followed by two lowercase
hexadecimal digits. Note that UTF-8 encodes ASCII letters, digits,
@|etc| as themselves, so typical library names correspond to readable
module paths.}
@item{If the @|r6rs| library reference has two symbol elements and
the second one is @schemeidfont{main} followed by any number of
underscores, then an extra underscore is added to that symbol. This
conversion avoids a collision between an explicit @schemeidfont{main}
and the implicit @schemeidfont{main} when a library path has a single
symbol element.}
}
Examples (assuming a typical PLT Scheme installation): Examples (assuming a typical PLT Scheme installation):
@schemeblock[ @schemeblock[
(rnrs io simple (6)) #, @elem{means} (lib "rnrs/io/simple-6.ss") (rnrs io simple (6)) #, @elem{means} (lib "rnrs/io/simple-6.ss")
(rnrs) #, @elem{means} (lib "rnrs/main-6.ss") (rnrs) #, @elem{means} (lib "rnrs/main-6.ss")
(rnrs main) #, @elem{means} (lib "rnrs/main_.ss")
(rnrs (6)) #, @elem{means} (lib "rnrs/main-6.ss") (rnrs (6)) #, @elem{means} (lib "rnrs/main-6.ss")
(scheme base) #, @elem{means} (lib "scheme/base.ss") (scheme base) #, @elem{means} (lib "scheme/base.ss")
(achtung!) #, @elem{means} (lib "achtung%21/main.ss")
(funco new-λ) #, @elem{means} (lib "funco/new-%ce%bb.ss")
] ]

View File

@ -325,10 +325,20 @@ corresponds to the default @tech{module name resolver}.
@litchar{/} is the path delimiter (multiple adjacent @litchar{/}s are @litchar{/} is the path delimiter (multiple adjacent @litchar{/}s are
treated as a single delimiter), @litchar{..} accesses the parent treated as a single delimiter), @litchar{..} accesses the parent
directory, and @litchar{.} accesses the current directory. The path directory, and @litchar{.} accesses the current directory. The path
cannot be empty or contain a leading or trailing slash, path cannot be empty or contain a leading or trailing slash, path elements
elements before than the last one cannot include a file suffix, before than the last one cannot include a file suffix (i.e., a
@litchar{.} in an element other than @litchar{.} or @litchar{..}),
and the only allowed characters are ASCII letters, ASCII digits, and the only allowed characters are ASCII letters, ASCII digits,
@litchar{-}, @litchar{+}, @litchar{_}, @litchar{.}, and @litchar{/}.} @litchar{-}, @litchar{+}, @litchar{_}, @litchar{.}, @litchar{/}, and
@litchar{%}. Furthermore, a @litchar{%} is allowed only when followed
by two lowercase hexadecimal digits, and the digits must form a
number that is not the ASCII value of a letter, digit, @litchar{-},
@litchar{+}, or @litchar{_}.
@margin-note{The @litchar{%} provision is intended to support a
one-to-one encoding of arbitrary strings as path elements (after
UTF-8 encoding). Such encodings are not decoded to arrive at a
filename, but instead preserved in the file access.}}
@defsubform[(lib rel-string ...+)]{A path to a module installed into @defsubform[(lib rel-string ...+)]{A path to a module installed into
a @tech{collection} (see @secref["collects"]). The @scheme[rel-string]s in a @tech{collection} (see @secref["collects"]). The @scheme[rel-string]s in
@ -406,17 +416,19 @@ corresponds to the default @tech{module name resolver}.
] ]
and where an @nonterm{elem} is a non-empty sequence of characters and where an @nonterm{elem} is a non-empty sequence of characters
that are ASCII letters, ASCII digits, @litchar{-}, @litchar{+}, or that are ASCII letters, ASCII digits, @litchar{-}, @litchar{+},
@litchar{_}, and an @nonterm{int} is a non-empty sequence of ASCII @litchar{_}, or @litchar{%} followed by lowercase hexadecimal digits
digits. As this shorthand is expended, a @filepath{.plt} extension is (that do not encode one of the other allowed characters), and an
added to @nonterm{pkg}, and a @filepath{.ss} extension is added to @nonterm{int} is a non-empty sequence of ASCII digits. As this
shorthand is expended, a @filepath{.plt} extension is added to
@nonterm{pkg}, and a @filepath{.ss} extension is added to
@scheme{path}; if no @nonterm{path} is included, @filepath{main.ss} @scheme{path}; if no @nonterm{path} is included, @filepath{main.ss}
is used in the expansion. is used in the expansion.
A @scheme[(planet string)] form is like a @scheme[(planet id)] form A @scheme[(planet string)] form is like a @scheme[(planet id)] form
with the identifier converted to a string, except that the with the identifier converted to a string, except that the
@scheme[string] can optionally end with a file extension for a @scheme[string] can optionally end with a file extension (i.e., a
@nonterm{path}. @litchar{.}) for a @nonterm{path}.
In the more general last form of a @scheme[planet] module path, the In the more general last form of a @scheme[planet] module path, the
@scheme[rel-string]s are similar to the @scheme[lib] form, except @scheme[rel-string]s are similar to the @scheme[lib] form, except

View File

@ -307,14 +307,24 @@
(test #t module-path? "hello") (test #t module-path? "hello")
(test #t module-path? "hello.ss") (test #t module-path? "hello.ss")
(test #f module-path? "hello*ss") (test #f module-path? "hello*ss")
(test #t module-path? "hello%2ess")
(test #t module-path? "hello%00ss")
(test #f module-path? "hello%2Ess")
(test #f module-path? "hello%41ss")
(test #f module-path? "hello%4")
(test #f module-path? "hello%")
(test #f module-path? "hello%q0")
(test #f module-path? "hello%0q")
(test #f module-path? "foo.ss/hello") (test #f module-path? "foo.ss/hello")
(test #f module-path? "foo/") (test #f module-path? "foo/")
(test #f module-path? "a/foo/") (test #f module-path? "a/foo/")
(test #f module-path? "/foo.ss") (test #f module-path? "/foo.ss")
(test #f module-path? "/a/foo.ss") (test #f module-path? "/a/foo.ss")
(test #f module-path? "a/foo.ss/b") (test #f module-path? "a/foo.ss/b")
(test #t module-path? "a/foo%2ess/b")
(test #t module-path? "a/_/b") (test #t module-path? "a/_/b")
(test #t module-path? "a/0123456789+-_/b.---") (test #t module-path? "a/0123456789+-_/b.---")
(test #t module-path? "a/0123456789+-_/b.-%2e")
(test #t module-path? "../foo.ss") (test #t module-path? "../foo.ss")
(test #t module-path? "x/../foo.ss") (test #t module-path? "x/../foo.ss")
(test #t module-path? "x/./foo.ss") (test #t module-path? "x/./foo.ss")
@ -324,6 +334,8 @@
(test #t module-path? 'hello) (test #t module-path? 'hello)
(test #f module-path? 'hello/) (test #f module-path? 'hello/)
(test #f module-path? 'hello.ss) (test #f module-path? 'hello.ss)
(test #t module-path? 'hello%2ess)
(test #f module-path? 'hello%2Ess)
(test #f module-path? 'hello/a.ss) (test #f module-path? 'hello/a.ss)
(test #f module-path? '/hello/a.ss) (test #f module-path? '/hello/a.ss)
(test #f module-path? '/hello) (test #f module-path? '/hello)
@ -356,6 +368,9 @@
(test #f module-path? '(planet)) (test #f module-path? '(planet))
(test #f module-path? '(planet robby)) (test #f module-path? '(planet robby))
(test #t module-path? '(planet robby/redex)) (test #t module-path? '(planet robby/redex))
(test #t module-path? '(planet robby%2e/%2eredex))
(test #f module-path? '(planet robby%2/redex))
(test #f module-path? '(planet robby/redex%2))
(test #f module-path? '(planet robby/redex/)) (test #f module-path? '(planet robby/redex/))
(test #f module-path? '(planet robby/redex/foo/)) (test #f module-path? '(planet robby/redex/foo/))
(test #f module-path? '(planet /robby/redex/foo)) (test #f module-path? '(planet /robby/redex/foo))
@ -373,6 +388,7 @@
(test #t module-path? '(planet robby/redex:7:8-9/foo)) (test #t module-path? '(planet robby/redex:7:8-9/foo))
(test #t module-path? '(planet robby/redex:7:8-9)) (test #t module-path? '(planet robby/redex:7:8-9))
(test #t module-path? '(planet robby/redex:700:800-00900/foo)) (test #t module-path? '(planet robby/redex:700:800-00900/foo))
(test #t module-path? '(planet robby/redex:700:800-00900/foo%2e))
(test #f module-path? '(planet robby/redex:=7/foo)) (test #f module-path? '(planet robby/redex:=7/foo))
(test #f module-path? '(planet robby/redex::8/foo)) (test #f module-path? '(planet robby/redex::8/foo))
(test #f module-path? '(planet robby/redex:7:/foo)) (test #f module-path? '(planet robby/redex:7:/foo))
@ -389,6 +405,7 @@
(test #t module-path? '(planet "foo.ss" ("robby" "redex.plt" 7 8))) (test #t module-path? '(planet "foo.ss" ("robby" "redex.plt" 7 8)))
(test #t module-path? '(planet "foo.ss" ("robby" "redex.plt" 7 (= 8)))) (test #t module-path? '(planet "foo.ss" ("robby" "redex.plt" 7 (= 8))))
(test #t module-path? '(planet "foo.ss" ("robby" "redex.plt") "sub" "deeper")) (test #t module-path? '(planet "foo.ss" ("robby" "redex.plt") "sub" "deeper"))
(test #t module-path? '(planet "foo%2e.ss" ("robby%2e" "redex%2e.plt") "sub%2e" "%2edeeper"))
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -1818,6 +1818,39 @@ static Scheme_Object *namespace_unprotect_module(int argc, Scheme_Object *argv[]
return scheme_void; return scheme_void;
} }
static int plain_char(int c)
{
return (((c >= 'a') && (c <= 'z'))
|| ((c >= 'A') && (c <= 'Z'))
|| ((c >= '0') && (c <= '9'))
|| (c == '-')
|| (c == '_')
|| (c == '+'));
}
static int ok_hex(int c)
{
return (((c >= 'a') && (c <= 'f'))
|| ((c >= '0') && (c <= '9')));
}
static int ok_escape(int c1, int c2)
{
c1 = (((c1 >= 'a') && (c1 <= 'f'))
? (c1 - 'a' + 10)
: (c1 - '0'));
c2 = (((c2 >= 'a') && (c2 <= 'f'))
? (c2 - 'a' + 10)
: (c2 - '0'));
c1 = (c1 << 4) + c2;
if (plain_char(c1))
return 0;
else
return 1;
}
static int ok_path_string(Scheme_Object *obj, int dir_ok, int just_file_ok, int file_end_ok, int for_planet) static int ok_path_string(Scheme_Object *obj, int dir_ok, int just_file_ok, int file_end_ok, int for_planet)
{ {
mzchar *s = SCHEME_CHAR_STR_VAL(obj); mzchar *s = SCHEME_CHAR_STR_VAL(obj);
@ -1929,12 +1962,11 @@ static int ok_path_string(Scheme_Object *obj, int dir_ok, int just_file_ok, int
} }
prev_was_slash = 0; prev_was_slash = 0;
} else { } else {
if (((c >= 'a') && (c <= 'z')) if (plain_char(c)
|| ((c >= 'A') && (c <= 'Z')) || ((c == '%')
|| ((c >= '0') && (c <= '9')) && ok_hex(s[i+1])
|| (c == '-') && ok_hex(s[i+2])
|| (c == '_') && ok_escape(s[i+1], s[i+2]))) {
|| (c == '+')) {
prev_was_slash = 0; prev_was_slash = 0;
} else if ((i < start_package_pos) || (i >= end_package_pos)) } else if ((i < start_package_pos) || (i >= end_package_pos))
return 0; return 0;
@ -1998,13 +2030,12 @@ static int ok_planet_string(Scheme_Object *obj)
while (i--) { while (i--) {
c = s[i]; c = s[i];
if (((c >= 'a') && (c <= 'z')) if ((c == '%')
|| ((c >= 'A') && (c <= 'Z')) && ok_hex(s[i+1])
|| ((c >= '0') && (c <= '9')) && ok_hex(s[i+2])
|| (c == '.') && ok_escape(s[i+1], s[i+2])) {
|| (c == '-') /* ok */
|| (c == '_') } else if (plain_char(c) || (c == '.')) {
|| (c == '+')) {
/* ok */ /* ok */
} else } else
return 0; return 0;