diff --git a/collects/r6rs/private/encode-name.ss b/collects/r6rs/private/encode-name.ss new file mode 100644 index 0000000000..71f36d210e --- /dev/null +++ b/collects/r6rs/private/encode-name.ss @@ -0,0 +1,24 @@ +#lang scheme/base + +(provide encode-name) + +(define (encode-name s) + (let ([s (if (symbol? s) + (symbol->string s) + s)]) + (cond + [(regexp-match #rx"(.*?)([^a-zA-Z0-9_+-]+)(.*)" s) + => (lambda (m) + (string-append + (cadr m) + (apply + string-append + (map (lambda (c) + (let ([s (format "0~x" c)]) + (string-append + "%" + (substring s (- (string-length s) 2))))) + (bytes->list (string->bytes/utf-8 (caddr m))))) + (encode-name (cadddr m))))] + [else s]))) + diff --git a/collects/r6rs/private/parse-ref.ss b/collects/r6rs/private/parse-ref.ss index 271881e244..47841d4a86 100644 --- a/collects/r6rs/private/parse-ref.ss +++ b/collects/r6rs/private/parse-ref.ss @@ -1,6 +1,7 @@ #lang scheme/base (require "find-version.ss" + "encode-name.ss" (for-template scheme/base)) (provide parse-import) @@ -37,10 +38,19 @@ (andmap identifier? (syntax->list #'(id2 ...))) (is-version-reference? #'(vers ...))) (let-values ([(coll file) - (let ([strs (map (lambda (id) - (symbol->string (syntax-e id))) - (syntax->list #'(id1 id2 ...)))]) - (if (= 1 (length strs)) + (let* ([strs (map (lambda (id) + (symbol->string (syntax-e id))) + (syntax->list #'(id1 id2 ...)))] + [len (length strs)] + [strs (map + encode-name + (if (and (= 2 len) (regexp-match? #rx"^main_*$" (cadr strs))) + ;; rename (X main_*) => (X main__*) + (list (car strs) + (string-append (cadr strs) "_")) + ;; no rename + strs))]) + (if (= 1 len) (values (list (car strs)) "main") (values (reverse (cdr (reverse strs))) (car (reverse strs)))))]) diff --git a/collects/r6rs/run.ss b/collects/r6rs/run.ss index 2c912ad9fa..6db517a710 100644 --- a/collects/r6rs/run.ss +++ b/collects/r6rs/run.ss @@ -6,7 +6,8 @@ setup/dirs scheme/port scheme/file - "private/readtable.ss") + "private/readtable.ss" + "private/encode-name.ss") (define install-mode (make-parameter #f)) (define compile-mode (make-parameter #f)) @@ -123,11 +124,23 @@ (loop (cdr name)))])))) (define (name->path name) - (let* ([name (if (or (= (length name) 1) - (and (= (length name) 2) + (let* ([name (let ([len (length name)]) + (cond + [(or (= len 1) + (and (= len 2) (not (symbol? (cadr name))))) - (list* (car name) 'main (cdr name)) - name)]) + ;; Add implicit "main": + (list* (car name) 'main (cdr name))] + [(and (or (= len 2) + (and (= len 3) + (not (symbol? (caddr name))))) + (regexp-match #rx"^main_*$" (symbol->string (cadr name)))) + ;; Rename (X main_*) => (X main__*) + (list* (car name) + (string->symbol + (string-append (symbol->string (cadr name)) "_")) + (cddr name))] + [else name]))]) (apply build-path (if (install-all-users) (find-collects-dir) @@ -140,7 +153,7 @@ ;; versioned: (list (format "~a~a.ss" - (car name) + (encode-name (car name)) (apply string-append (map (lambda (v) @@ -148,9 +161,9 @@ (cadr name)))))] [(null? (cdr name)) ;; unversioned: - (list (format "~a.ss" (car name)))] + (list (format "~a.ss" (encode-name (car name))))] [else - (cons (symbol->string (car name)) + (cons (encode-name (car name)) (loop (cdr name)))]))))) ;; ---------------------------------------- diff --git a/collects/r6rs/scribblings/r6rs.scrbl b/collects/r6rs/scribblings/r6rs.scrbl index 695103be31..0ee8b4e2af 100644 --- a/collects/r6rs/scribblings/r6rs.scrbl +++ b/collects/r6rs/scribblings/r6rs.scrbl @@ -175,8 +175,9 @@ a name is converted to a PLT Scheme module pathname (see @secref[#:doc guide-src "module-paths"]) by concatenating the symbols with a @litchar{/} separator, and then appending the version integers each with a preceeding @litchar{-}. As a special case, when an @|r6rs| path -contains a single symbol followed by a version, a @schemeidfont{main} -symbol is effectively inserted after the initial symbol. +contains a single symbol (optionally followed by a version), a +@schemeidfont{main} symbol is effectively inserted after the initial +symbol. See below for further encoding considerations. When an @|r6rs| library or top-level program refers to another library, it can supply version constraints rather than naming a @@ -190,13 +191,41 @@ search order for file extensions is @filepath{.mzscheme.ss}, resolving version constraints, these extensions are all tried when looking for matches. + + +To ensure that all @|r6rs| library names can be converted to a unique +and distinct library module path, the following conversions are +applied to each symbol before concatenating them: + +@itemize{ + + @item{The symbol is encoded using UTF-8, and the resulting bytes are + treated as Latin-1 encoded characters. ASCII letters, digits, + @litchar{+}, @litchar{-}, and @litchar{_} are left as-is; other + characters are replaced by @litchar{%} followed by two lowercase + hexadecimal digits. Note that UTF-8 encodes ASCII letters, digits, + @|etc| as themselves, so typical library names correspond to readable + module paths.} + + @item{If the @|r6rs| library reference has two symbol elements and + the second one is @schemeidfont{main} followed by any number of + underscores, then an extra underscore is added to that symbol. This + conversion avoids a collision between an explicit @schemeidfont{main} + and the implicit @schemeidfont{main} when a library path has a single + symbol element.} + +} + Examples (assuming a typical PLT Scheme installation): @schemeblock[ (rnrs io simple (6)) #, @elem{means} (lib "rnrs/io/simple-6.ss") (rnrs) #, @elem{means} (lib "rnrs/main-6.ss") +(rnrs main) #, @elem{means} (lib "rnrs/main_.ss") (rnrs (6)) #, @elem{means} (lib "rnrs/main-6.ss") (scheme base) #, @elem{means} (lib "scheme/base.ss") +(achtung!) #, @elem{means} (lib "achtung%21/main.ss") +(funco new-λ) #, @elem{means} (lib "funco/new-%ce%bb.ss") ] diff --git a/collects/scribblings/reference/syntax.scrbl b/collects/scribblings/reference/syntax.scrbl index d4de9a45eb..f5849efa9e 100644 --- a/collects/scribblings/reference/syntax.scrbl +++ b/collects/scribblings/reference/syntax.scrbl @@ -325,10 +325,20 @@ corresponds to the default @tech{module name resolver}. @litchar{/} is the path delimiter (multiple adjacent @litchar{/}s are treated as a single delimiter), @litchar{..} accesses the parent directory, and @litchar{.} accesses the current directory. The path - cannot be empty or contain a leading or trailing slash, path - elements before than the last one cannot include a file suffix, + cannot be empty or contain a leading or trailing slash, path elements + before than the last one cannot include a file suffix (i.e., a + @litchar{.} in an element other than @litchar{.} or @litchar{..}), and the only allowed characters are ASCII letters, ASCII digits, - @litchar{-}, @litchar{+}, @litchar{_}, @litchar{.}, and @litchar{/}.} + @litchar{-}, @litchar{+}, @litchar{_}, @litchar{.}, @litchar{/}, and + @litchar{%}. Furthermore, a @litchar{%} is allowed only when followed + by two lowercase hexadecimal digits, and the digits must form a + number that is not the ASCII value of a letter, digit, @litchar{-}, + @litchar{+}, or @litchar{_}. + + @margin-note{The @litchar{%} provision is intended to support a + one-to-one encoding of arbitrary strings as path elements (after + UTF-8 encoding). Such encodings are not decoded to arrive at a + filename, but instead preserved in the file access.}} @defsubform[(lib rel-string ...+)]{A path to a module installed into a @tech{collection} (see @secref["collects"]). The @scheme[rel-string]s in @@ -406,17 +416,19 @@ corresponds to the default @tech{module name resolver}. ] and where an @nonterm{elem} is a non-empty sequence of characters - that are ASCII letters, ASCII digits, @litchar{-}, @litchar{+}, or - @litchar{_}, and an @nonterm{int} is a non-empty sequence of ASCII - digits. As this shorthand is expended, a @filepath{.plt} extension is - added to @nonterm{pkg}, and a @filepath{.ss} extension is added to + that are ASCII letters, ASCII digits, @litchar{-}, @litchar{+}, + @litchar{_}, or @litchar{%} followed by lowercase hexadecimal digits + (that do not encode one of the other allowed characters), and an + @nonterm{int} is a non-empty sequence of ASCII digits. As this + shorthand is expended, a @filepath{.plt} extension is added to + @nonterm{pkg}, and a @filepath{.ss} extension is added to @scheme{path}; if no @nonterm{path} is included, @filepath{main.ss} is used in the expansion. A @scheme[(planet string)] form is like a @scheme[(planet id)] form with the identifier converted to a string, except that the - @scheme[string] can optionally end with a file extension for a - @nonterm{path}. + @scheme[string] can optionally end with a file extension (i.e., a + @litchar{.}) for a @nonterm{path}. In the more general last form of a @scheme[planet] module path, the @scheme[rel-string]s are similar to the @scheme[lib] form, except diff --git a/collects/tests/mzscheme/module.ss b/collects/tests/mzscheme/module.ss index 968061ad56..5223abf88d 100644 --- a/collects/tests/mzscheme/module.ss +++ b/collects/tests/mzscheme/module.ss @@ -307,14 +307,24 @@ (test #t module-path? "hello") (test #t module-path? "hello.ss") (test #f module-path? "hello*ss") +(test #t module-path? "hello%2ess") +(test #t module-path? "hello%00ss") +(test #f module-path? "hello%2Ess") +(test #f module-path? "hello%41ss") +(test #f module-path? "hello%4") +(test #f module-path? "hello%") +(test #f module-path? "hello%q0") +(test #f module-path? "hello%0q") (test #f module-path? "foo.ss/hello") (test #f module-path? "foo/") (test #f module-path? "a/foo/") (test #f module-path? "/foo.ss") (test #f module-path? "/a/foo.ss") (test #f module-path? "a/foo.ss/b") +(test #t module-path? "a/foo%2ess/b") (test #t module-path? "a/_/b") (test #t module-path? "a/0123456789+-_/b.---") +(test #t module-path? "a/0123456789+-_/b.-%2e") (test #t module-path? "../foo.ss") (test #t module-path? "x/../foo.ss") (test #t module-path? "x/./foo.ss") @@ -324,6 +334,8 @@ (test #t module-path? 'hello) (test #f module-path? 'hello/) (test #f module-path? 'hello.ss) +(test #t module-path? 'hello%2ess) +(test #f module-path? 'hello%2Ess) (test #f module-path? 'hello/a.ss) (test #f module-path? '/hello/a.ss) (test #f module-path? '/hello) @@ -356,6 +368,9 @@ (test #f module-path? '(planet)) (test #f module-path? '(planet robby)) (test #t module-path? '(planet robby/redex)) +(test #t module-path? '(planet robby%2e/%2eredex)) +(test #f module-path? '(planet robby%2/redex)) +(test #f module-path? '(planet robby/redex%2)) (test #f module-path? '(planet robby/redex/)) (test #f module-path? '(planet robby/redex/foo/)) (test #f module-path? '(planet /robby/redex/foo)) @@ -373,6 +388,7 @@ (test #t module-path? '(planet robby/redex:7:8-9/foo)) (test #t module-path? '(planet robby/redex:7:8-9)) (test #t module-path? '(planet robby/redex:700:800-00900/foo)) +(test #t module-path? '(planet robby/redex:700:800-00900/foo%2e)) (test #f module-path? '(planet robby/redex:=7/foo)) (test #f module-path? '(planet robby/redex::8/foo)) (test #f module-path? '(planet robby/redex:7:/foo)) @@ -389,6 +405,7 @@ (test #t module-path? '(planet "foo.ss" ("robby" "redex.plt" 7 8))) (test #t module-path? '(planet "foo.ss" ("robby" "redex.plt" 7 (= 8)))) (test #t module-path? '(planet "foo.ss" ("robby" "redex.plt") "sub" "deeper")) +(test #t module-path? '(planet "foo%2e.ss" ("robby%2e" "redex%2e.plt") "sub%2e" "%2edeeper")) ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; diff --git a/src/mzscheme/src/module.c b/src/mzscheme/src/module.c index fda8a8f2e2..ac04da65b9 100644 --- a/src/mzscheme/src/module.c +++ b/src/mzscheme/src/module.c @@ -1818,6 +1818,39 @@ static Scheme_Object *namespace_unprotect_module(int argc, Scheme_Object *argv[] return scheme_void; } +static int plain_char(int c) +{ + return (((c >= 'a') && (c <= 'z')) + || ((c >= 'A') && (c <= 'Z')) + || ((c >= '0') && (c <= '9')) + || (c == '-') + || (c == '_') + || (c == '+')); +} + +static int ok_hex(int c) +{ + return (((c >= 'a') && (c <= 'f')) + || ((c >= '0') && (c <= '9'))); +} + +static int ok_escape(int c1, int c2) +{ + c1 = (((c1 >= 'a') && (c1 <= 'f')) + ? (c1 - 'a' + 10) + : (c1 - '0')); + c2 = (((c2 >= 'a') && (c2 <= 'f')) + ? (c2 - 'a' + 10) + : (c2 - '0')); + + c1 = (c1 << 4) + c2; + + if (plain_char(c1)) + return 0; + else + return 1; +} + static int ok_path_string(Scheme_Object *obj, int dir_ok, int just_file_ok, int file_end_ok, int for_planet) { mzchar *s = SCHEME_CHAR_STR_VAL(obj); @@ -1929,12 +1962,11 @@ static int ok_path_string(Scheme_Object *obj, int dir_ok, int just_file_ok, int } prev_was_slash = 0; } else { - if (((c >= 'a') && (c <= 'z')) - || ((c >= 'A') && (c <= 'Z')) - || ((c >= '0') && (c <= '9')) - || (c == '-') - || (c == '_') - || (c == '+')) { + if (plain_char(c) + || ((c == '%') + && ok_hex(s[i+1]) + && ok_hex(s[i+2]) + && ok_escape(s[i+1], s[i+2]))) { prev_was_slash = 0; } else if ((i < start_package_pos) || (i >= end_package_pos)) return 0; @@ -1998,13 +2030,12 @@ static int ok_planet_string(Scheme_Object *obj) while (i--) { c = s[i]; - if (((c >= 'a') && (c <= 'z')) - || ((c >= 'A') && (c <= 'Z')) - || ((c >= '0') && (c <= '9')) - || (c == '.') - || (c == '-') - || (c == '_') - || (c == '+')) { + if ((c == '%') + && ok_hex(s[i+1]) + && ok_hex(s[i+2]) + && ok_escape(s[i+1], s[i+2])) { + /* ok */ + } else if (plain_char(c) || (c == '.')) { /* ok */ } else return 0;