adjust path encoding in bytecode and syntax-object sources

When a path is made relative for marshaling to bytecode, record
a list of byte strings in stead of a platform-specific relative
path.

For syntax-object source locations, convert any non-relative path to a
string that shows just the last couple of path elements preceded by
".../". This conversion avoids embedding absolute paths in bytecode,
but at the cost of some information. A more complete and consistent
solution would invove using a module-path index instead of a path, but
that would be a big change at several layers.
This commit is contained in:
Matthew Flatt 2015-09-02 09:13:43 -06:00
parent a934bdf444
commit fb8e08a2ac
13 changed files with 220 additions and 50 deletions

View File

@ -885,7 +885,6 @@
[t2 (get-output-bytes s2)])
(or (bytes=? t1 t2)
(begin
#;
(printf "~s\n~s\n"
(zo-parse (open-input-bytes t1))
(zo-parse (open-input-bytes t2)))
@ -896,7 +895,11 @@
(case-lambda
[(expr1 expr2) (test-comp expr1 expr2 #t)]
[(expr1 expr2 same?)
(test same? `(compile ,same? ,expr2) (comp=? (compile expr1) (compile expr2)))]))
(define (->stx s)
;; Give `s` a minimal location, so that other macro locations
;; don't bleed through:
(datum->syntax #f s (vector 'here #f #f #f #f)))
(test same? `(compile ,same? ,expr2) (comp=? (compile (->stx expr1)) (compile (->stx expr2))))]))
(let ([x (compile '(lambda (x) x))])
(test #t 'fixpt (eq? x (compile x))))
@ -1669,7 +1672,9 @@
'(letrec ([x (cons 1 1)][y x]) (cons x x)))
(test-comp '(let ([f (lambda (x) x)]) f)
(syntax-property (datum->syntax #f '(lambda (x) x)) 'inferred-name 'f))
(syntax-property (datum->syntax #f '(lambda (x) x) (vector 'here #f #f #f #f))
'inferred-name
'f))
(test-comp '(letrec ([f (lambda (x) x)])
(f 10)

View File

@ -19,7 +19,10 @@
(let ([s (let ([s (format
"~a"
(cond
[(path? s) (path->string s)]
[(path? s)
;; Make the result consistent across platforms by
;; converting backslashes to forward slashes:
(regexp-replace* #rx"\\\\" (path->string s) "/")]
[else s]))])
(if ((string-length s) . > . 20)
(string-append "..." (substring s (- (string-length s) 20)))

View File

@ -457,22 +457,43 @@ static void lambda_check_args(Scheme_Object *args, Scheme_Object *form, Scheme_C
}
Scheme_Object *scheme_source_to_name(Scheme_Object *code)
/* Makes up a procedure name when there's not a good one in the source: */
/* Makes up a procedure name when there's not a good one in the source */
{
Scheme_Stx *cstx = (Scheme_Stx *)code;
if ((cstx->srcloc->col >= 0) || (cstx->srcloc->pos >= 0)) {
char buf[50], src[20];
Scheme_Object *name;
Scheme_Object *name, *bstr;
int convert_backslash = 0;
if (cstx->srcloc->src && SCHEME_PATHP(cstx->srcloc->src)) {
if (SCHEME_BYTE_STRLEN_VAL(cstx->srcloc->src) < 20)
memcpy(src, SCHEME_BYTE_STR_VAL(cstx->srcloc->src), SCHEME_BYTE_STRLEN_VAL(cstx->srcloc->src) + 1);
if (cstx->srcloc->src) {
if (SCHEME_PATHP(cstx->srcloc->src)) {
bstr = cstx->srcloc->src;
/* for generating consistent names on machines with different platform
conventions, convert "\" to "/" */
convert_backslash = 1;
} else if (SCHEME_CHAR_STRINGP(cstx->srcloc->src))
bstr = scheme_char_string_to_byte_string(cstx->srcloc->src);
else
bstr = NULL;
} else
bstr = NULL;
if (bstr) {
if (SCHEME_BYTE_STRLEN_VAL(bstr) < 20)
memcpy(src, SCHEME_BYTE_STR_VAL(bstr), SCHEME_BYTE_STRLEN_VAL(bstr) + 1);
else {
memcpy(src, SCHEME_BYTE_STR_VAL(cstx->srcloc->src) + SCHEME_BYTE_STRLEN_VAL(cstx->srcloc->src) - 19, 20);
memcpy(src, SCHEME_BYTE_STR_VAL(bstr) + SCHEME_BYTE_STRLEN_VAL(bstr) - 19, 20);
src[0] = '.';
src[1] = '.';
src[2] = '.';
}
if (convert_backslash) {
int i;
for (i = 0; src[i]; i++) {
if (src[i] == '\\')
src[i] = '/';
}
}
} else {
return NULL;
}

View File

@ -5612,9 +5612,26 @@ static Scheme_Object *do_explode_path(Scheme_Object *p)
return explode_path(1, &p);
}
Scheme_Object *scheme_extract_relative_to(Scheme_Object *obj, Scheme_Object *dir)
static Scheme_Object *to_bytes(Scheme_Object *p)
{
Scheme_Object *de, *be, *oe;
if (SCHEME_PATHP(p))
return scheme_make_sized_byte_string(SCHEME_PATH_VAL(p),
SCHEME_PATH_LEN(p),
1);
else
return p;
}
Scheme_Object *scheme_extract_relative_to(Scheme_Object *obj, Scheme_Object *dir, Scheme_Hash_Table *cache)
/* When cache is non-NULL, generate a path or a list of byte strings (suitable for bytecode) */
{
Scheme_Object *de, *be, *oe, *orig_obj = obj;
if (cache) {
de = scheme_hash_get(cache, obj);
if (de)
return de;
}
if (SCHEME_PAIRP(dir)) {
be = do_explode_path(SCHEME_CAR(dir));
@ -5644,32 +5661,78 @@ Scheme_Object *scheme_extract_relative_to(Scheme_Object *obj, Scheme_Object *dir
be = SCHEME_CDR(be);
}
/* If cache is non-NULL, generate a list of byte strings */
if (SCHEME_NULLP(oe)) {
a[0] = same_symbol;
obj = scheme_build_path(1, a);
if (cache) {
obj = scheme_null;
} else {
a[0] = same_symbol;
obj = scheme_build_path(1, a);
}
} else {
obj = SCHEME_CAR(oe);
if (cache)
obj = scheme_make_pair(to_bytes(obj), scheme_null);
oe = SCHEME_CDR(oe);
}
while (SCHEME_PAIRP(oe)) {
a[0] = obj;
a[1] = SCHEME_CAR(oe);
obj = scheme_build_path(2, a);
if (cache) {
obj = scheme_make_pair(to_bytes(SCHEME_CAR(oe)), scheme_null);
} else {
a[0] = obj;
a[1] = SCHEME_CAR(oe);
obj = scheme_build_path(2, a);
}
oe = SCHEME_CDR(oe);
}
if (cache)
obj = scheme_reverse(obj);
while (!SCHEME_NULLP(be)) {
a[0] = up_symbol;
a[1] = obj;
obj = scheme_build_path(2, a);
if (cache) {
obj = scheme_make_pair(up_symbol, scheme_null);
} else {
a[0] = up_symbol;
a[1] = obj;
obj = scheme_build_path(2, a);
}
be = SCHEME_CDR(be);
}
}
if (cache)
scheme_hash_set(cache, orig_obj, obj);
return obj;
}
Scheme_Object *scheme_maybe_build_path(Scheme_Object *base, Scheme_Object *elem)
{
Scheme_Object *a[2];
if (!base)
base = scheme_get_param(scheme_current_config(), MZCONFIG_CURRENT_DIRECTORY);
if (SAME_OBJ(elem, same_symbol)
|| SAME_OBJ(elem, up_symbol)) {
/* ok */
} else if (SCHEME_BYTE_STRINGP(elem)) {
a[0] = elem;
elem = bytes_to_path_element(1, a);
} else
elem = NULL;
if (elem) {
a[0] = base;
a[1] = elem;
return scheme_build_path(2, a);
} else
return base;
}
static Scheme_Object *filesystem_root_list(int argc, Scheme_Object *argv[])
{
Scheme_Object *first = scheme_null;

View File

@ -717,15 +717,15 @@ static Scheme_Object *read_quote_syntax(Scheme_Object *obj)
#define BOOL(x) (x ? scheme_true : scheme_false)
static int not_relative_path(Scheme_Object *p)
static int not_relative_path(Scheme_Object *p, Scheme_Hash_Table *cache)
{
Scheme_Object *dir, *rel_p;
dir = scheme_get_param(scheme_current_config(),
MZCONFIG_WRITE_DIRECTORY);
if (SCHEME_TRUEP(dir)) {
rel_p = scheme_extract_relative_to(p, dir);
if (SAME_OBJ(rel_p, p))
rel_p = scheme_extract_relative_to(p, dir, cache);
if (SCHEME_PATHP(rel_p))
return 1;
}
@ -752,7 +752,7 @@ static Scheme_Object *write_compiled_closure(Scheme_Object *obj)
/* If MZCONFIG_WRITE_DIRECTORY, drop any non-relative path
(which might happen due to function inlining, for example)
to avoid embedding absolute paths in bytecode files: */
|| not_relative_path(src))
|| not_relative_path(src, scheme_current_thread->current_mt->path_cache))
&& !SCHEME_CHAR_STRINGP(src)
&& !SCHEME_SYMBOLP(src)) {
/* Just keep the name */

View File

@ -53,6 +53,7 @@ static int mark_marshal_tables_MARK(void *p, struct NewGC *gc) {
gcMARK2(mt->cdata_map, gc);
gcMARK2(mt->rn_saved, gc);
gcMARK2(mt->shared_offsets, gc);
gcMARK2(mt->path_cache, gc);
gcMARK2(mt->sorted_keys, gc);
return
gcBYTES_TO_WORDS(sizeof(Scheme_Marshal_Tables));
@ -75,6 +76,7 @@ static int mark_marshal_tables_FIXUP(void *p, struct NewGC *gc) {
gcFIXUP2(mt->cdata_map, gc);
gcFIXUP2(mt->rn_saved, gc);
gcFIXUP2(mt->shared_offsets, gc);
gcFIXUP2(mt->path_cache, gc);
gcFIXUP2(mt->sorted_keys, gc);
return
gcBYTES_TO_WORDS(sizeof(Scheme_Marshal_Tables));

View File

@ -1830,6 +1830,7 @@ mark_marshal_tables {
gcMARK2(mt->cdata_map, gc);
gcMARK2(mt->rn_saved, gc);
gcMARK2(mt->shared_offsets, gc);
gcMARK2(mt->path_cache, gc);
gcMARK2(mt->sorted_keys, gc);
size:
gcBYTES_TO_WORDS(sizeof(Scheme_Marshal_Tables));

View File

@ -4991,7 +4991,7 @@ static Scheme_Object *wr_abs_directory_p(int argc, Scheme_Object **argv)
Scheme_Object *a, *d, *r;
a = abs_directory_p("current-write-relative-directory", SCHEME_CAR(argv[0]));
d = abs_directory_p("current-write-relative-directory", SCHEME_CDR(argv[0]));
r = scheme_extract_relative_to(a, d);
r = scheme_extract_relative_to(a, d, NULL);
if (SAME_OBJ(a, r)) {
scheme_contract_error("current-write-relative-directory",
"first path does not extend second path",

View File

@ -2619,13 +2619,17 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
dir = scheme_get_param(scheme_current_config(),
MZCONFIG_WRITE_DIRECTORY);
if (SCHEME_TRUEP(dir))
obj = scheme_extract_relative_to(obj, dir);
obj = scheme_extract_relative_to(obj, dir, mt->path_cache);
print_compact(pp, CPT_PATH);
l = SCHEME_PATH_LEN(obj);
print_compact_number(pp, l);
print_this_string(pp, SCHEME_PATH_VAL(obj), 0, l);
if (SCHEME_PATHP(obj)) {
l = SCHEME_PATH_LEN(obj);
print_compact_number(pp, l);
print_this_string(pp, SCHEME_PATH_VAL(obj), 0, l);
} else {
print_compact_number(pp, 0);
print(obj, notdisplay, compact, ht, mt, pp);
}
symtab_set(pp, mt, orig_obj);
}
@ -2638,7 +2642,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
dir = scheme_get_param(scheme_current_config(),
MZCONFIG_WRITE_DIRECTORY);
if (SCHEME_TRUEP(dir))
obj = scheme_extract_relative_to(obj, dir);
obj = scheme_extract_relative_to(obj, dir, mt->path_cache);
print_utf8_string(pp, "#^", 0, 2);
obj = scheme_make_sized_byte_string(SCHEME_PATH_VAL(obj),
@ -3467,7 +3471,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
if (compact)
closed = print(v, notdisplay, 1, NULL, mt, pp);
else {
Scheme_Hash_Table *st_refs, *symtab, *reachable_scopes, *intern_map;
Scheme_Hash_Table *st_refs, *symtab, *reachable_scopes, *intern_map, *path_cache;
intptr_t *shared_offsets;
intptr_t st_len, j, shared_offset, start_offset;
@ -3490,6 +3494,8 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
mt->reachable_scope_stack = scheme_null;
symtab = make_hash_table_symtab();
mt->symtab = symtab;
path_cache = scheme_make_hash_table_equal();
mt->path_cache = path_cache;
print_substring(v, notdisplay, 1, NULL, mt, pp, NULL, &slen, 0, NULL);
scheme_iterate_reachable_scopes(mt);
@ -3500,6 +3506,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
scheme_current_thread->current_mt = mt;
mt->reachable_scopes = reachable_scopes;
mt->intern_map = intern_map;
mt->path_cache = path_cache;
/* Track which shared values are referenced: */
st_refs = make_hash_table_symtab();

View File

@ -5025,18 +5025,22 @@ static Scheme_Object *read_compact(CPort *port, int use_stack)
{
l = read_compact_number(port);
RANGE_CHECK_GETS(l);
s = read_compact_chars(port, buffer, BLK_BUF_SIZE, l);
v = scheme_make_sized_path(s, l, l < BLK_BUF_SIZE);
if (scheme_is_relative_path(SCHEME_PATH_VAL(v), SCHEME_PATH_LEN(v), SCHEME_PLATFORM_PATH_KIND)) {
/* Resolve relative path using the current load-relative directory: */
if (SCHEME_PATHP(port->relto)) {
Scheme_Object *a[2];
a[0] = port->relto;
a[1] = v;
v = scheme_build_path(2, a);
}
}
if (l) {
s = read_compact_chars(port, buffer, BLK_BUF_SIZE, l);
v = scheme_make_sized_path(s, l, l < BLK_BUF_SIZE);
} else {
Scheme_Object *elems;
elems = read_compact(port, 0);
if (SCHEME_PATHP(port->relto)) {
/* Resolve relative path using the current load-relative directory: */
v = port->relto;
} else
v = scheme_maybe_build_path(NULL, scheme_false);
while (SCHEME_PAIRP(elems)) {
v = scheme_maybe_build_path(v, SCHEME_CAR(elems));
elems = SCHEME_CDR(elems);
}
}
}
break;
case CPT_CLOSURE:

View File

@ -3348,6 +3348,7 @@ typedef struct Scheme_Marshal_Tables {
Scheme_Object **cdata_map; /* for delay-load wrappers */
int cdata_counter; /* used with cdata_map */
intptr_t *shared_offsets; /* set in second pass */
Scheme_Hash_Table *path_cache; /* cache for path-to-relative resolution */
intptr_t sorted_keys_count;
intptr_t inspector_counter; /* for deterministic symbol allocation */
Scheme_Object **sorted_keys;
@ -3920,7 +3921,7 @@ Scheme_Object *scheme_get_run_cmd(void);
Scheme_Object *scheme_get_fd_identity(Scheme_Object *port, intptr_t fd, char *path, int noerr);
Scheme_Object *scheme_extract_relative_to(Scheme_Object *obj, Scheme_Object *dir);
Scheme_Object *scheme_extract_relative_to(Scheme_Object *obj, Scheme_Object *dir, Scheme_Hash_Table *cache);
Scheme_Object *scheme_find_links_path(int argc, Scheme_Object *argv[]);
@ -4200,6 +4201,8 @@ Scheme_Object *scheme_intern_exact_parallel_symbol(const char *name, uintptr_t l
Scheme_Object *scheme_symbol_append(Scheme_Object *s1, Scheme_Object *s2);
Scheme_Object *scheme_copy_list(Scheme_Object *l);
Scheme_Object *scheme_append_strings(Scheme_Object *s1, Scheme_Object *s2);
void scheme_reset_hash_table(Scheme_Hash_Table *ht, int *history);
XFORM_NONGCING Scheme_Object *scheme_regexp_source(Scheme_Object *re);
@ -4214,6 +4217,9 @@ int scheme_regexp_match_p(Scheme_Object *regexp, Scheme_Object *target);
Scheme_Object *scheme_gensym(Scheme_Object *base);
Scheme_Object *scheme_symbol_to_string(Scheme_Object *sym);
Scheme_Object *scheme_maybe_build_path(Scheme_Object *base, Scheme_Object *elem);
#ifdef SCHEME_BIG_ENDIAN
# define MZ_UCS4_NAME "UCS-4BE"
#else

View File

@ -1044,6 +1044,14 @@ scheme_make_locale_string(const char *chars)
return scheme_byte_string_to_char_string_locale(scheme_make_byte_string((char *)chars));
}
Scheme_Object *scheme_append_strings(Scheme_Object *s1, Scheme_Object *s2)
{
Scheme_Object *a[2];
a[0] = s1;
a[1] = s2;
return string_append(2, a);
}
/**********************************************************************/
/* index helpers */
/**********************************************************************/

View File

@ -6167,12 +6167,30 @@ static void lift_common_wraps(Scheme_Object *l, int cnt, int tail)
}
}
static Scheme_Object *srcloc_path_to_string(Scheme_Object *p)
{
Scheme_Object *base, *name, *dir_name;
int isdir;
name = scheme_split_path(SCHEME_PATH_VAL(p), SCHEME_PATH_LEN(p), &base, &isdir, SCHEME_PLATFORM_PATH_KIND);
if (SCHEME_PATHP(name) && SCHEME_PATHP(base)) {
dir_name = scheme_split_path(SCHEME_PATH_VAL(base), SCHEME_PATH_LEN(base), &base, &isdir, SCHEME_PLATFORM_PATH_KIND);
if (SCHEME_PATHP(dir_name))
name = scheme_append_strings(scheme_path_to_char_string(dir_name),
scheme_append_strings(scheme_make_utf8_string("/"),
scheme_path_to_char_string(name)));
else
name = scheme_path_to_char_string(name);
return scheme_append_strings(scheme_make_utf8_string(".../"), name);
} else if (SCHEME_PATHP(name))
return scheme_path_to_char_string(name);
else
return scheme_false;
}
static Scheme_Object *convert_srcloc(Scheme_Stx_Srcloc *srcloc, Scheme_Hash_Tree *props, Scheme_Marshal_Tables *mt)
{
Scheme_Object *vec, *paren;
if (!srcloc)
return scheme_false;
Scheme_Object *vec, *paren, *src, *dir;
if (props) {
paren = scheme_hash_tree_get(props, scheme_paren_shape_symbol);
@ -6181,8 +6199,40 @@ static Scheme_Object *convert_srcloc(Scheme_Stx_Srcloc *srcloc, Scheme_Hash_Tree
} else
paren = NULL;
if ((!srcloc || (SCHEME_FALSEP(srcloc->src)
&& (srcloc->line < 0)
&& (srcloc->col < 0)
&& (srcloc->pos < 0)))
&& !paren)
return scheme_false;
if (!srcloc)
srcloc = empty_srcloc;
src = srcloc->src;
if (SCHEME_PATHP(src)) {
/* To make paths portable and to avoid full paths, check whether the
path can be made relative (in which case it is turned into a list
of byte strings). If not, convert to a string using only the
last couple of path elements. */
dir = scheme_get_param(scheme_current_config(),
MZCONFIG_WRITE_DIRECTORY);
if (SCHEME_TRUEP(dir))
src = scheme_extract_relative_to(src, dir, mt->path_cache);
if (SCHEME_PATHP(src)) {
src = scheme_hash_get(mt->path_cache, scheme_box(srcloc->src));
if (!src) {
src = srcloc_path_to_string(srcloc->src);
scheme_hash_set(mt->path_cache, scheme_box(srcloc->src), src);
}
} else {
/* use the path directly and let the printer make it relative */
src = srcloc->src;
}
}
vec = scheme_make_vector((paren ? 6 : 5), NULL);
SCHEME_VEC_ELS(vec)[0] = srcloc->src;
SCHEME_VEC_ELS(vec)[0] = src;
SCHEME_VEC_ELS(vec)[1] = scheme_make_integer(srcloc->line);
SCHEME_VEC_ELS(vec)[2] = scheme_make_integer(srcloc->col);
SCHEME_VEC_ELS(vec)[3] = scheme_make_integer(srcloc->pos);