fix bytecode-writing inconsistencies related to syntax objects and paths

and improve organization of the docs
This commit is contained in:
Matthew Flatt 2010-08-17 17:18:24 -06:00
parent db43d25559
commit 0d9f5016ba
14 changed files with 233 additions and 58 deletions

View File

@ -8,7 +8,9 @@
racket/local
racket/list
racket/dict
racket/function)
racket/function
racket/pretty
racket/path)
(provide/contract
[zo-marshal (compilation-top? . -> . bytes?)]
@ -901,6 +903,7 @@
CPT_BYTE_STRING
#f
out)]
#;
[(path? expr)
(out-as-bytes expr
path->bytes
@ -1024,7 +1027,20 @@
(if (quoted? expr)
(out-data (quoted-v expr) out)
(let ([s (open-output-bytes)])
(write expr s)
;; print `expr' to a string, but print paths
;; in a special way
(parameterize ([pretty-print-size-hook
(lambda (v mode port)
(and (path? v)
(let ([v (make-relative v)])
(+ 2 (let ([p (open-output-bytes)])
(write (path->bytes v) p)
(bytes-length (get-output-bytes p)))))))]
[pretty-print-print-hook
(lambda (v mode port)
(display "#^" port)
(write (path->bytes (make-relative v)) port))])
(pretty-write expr s))
(out-byte CPT_ESCAPE out)
(let ([bstr (get-output-bytes s)])
(out-number (bytes-length bstr) out)
@ -1041,5 +1057,11 @@
(define-struct svector (vec))
(define (make-relative v)
(let ([r (current-write-relative-directory)])
(if r
(find-relative-path r v)
v)))
;; ----------------------------------------

View File

@ -732,7 +732,23 @@
[read-decimal-as-inexact #t]
[read-accept-dot #t]
[read-accept-infix-dot #t]
[read-accept-quasiquote #t])
[read-accept-quasiquote #t]
;; Use a readtable for special path support in escaped:
[current-readtable
(make-readtable
#f
#\^
'dispatch-macro
(lambda (char port src line col pos)
(let ([b (read port)])
(unless (bytes? b)
(error 'read-escaped-path
"expected a byte string after #^"))
(let ([p (bytes->path b)])
(if (and (relative-path? p)
(current-load-relative-directory))
(build-path (current-load-relative-directory) p)
p)))))])
(read/recursive (open-input-bytes s))))]
[(reference)
(make-primval (read-compact-number cp))]

View File

@ -3,4 +3,14 @@
@title[#:tag "debugging"]{Debugging}
Racket's built-in debugging support is limited to context (i.e.,
``stack trace'') information that is printed with an exception. In
some cases, disabling the JIT compiler can affect context
information. The @racketmodname[errortrace] library supports more
consistent (independent of the JIT compiler) and precise context
information. The @racketmodname[racket/trace] library provides simple
tracing support. Finally, the @seclink[#:doc '(lib
"scribblings/drracket/drracket.scrbl") "top"]{DrRacket} programming environment
provides much more debugging support.
@include-section["trace.scrbl"]

View File

@ -369,41 +369,8 @@ form may be saved for later use; the default compilation handler is
optimized for the special case of immediate evaluation.
When a compiled form is written to an output port, the written form
starts with @litchar{#~}. These forms are essentially assembly code
for Racket, and reading such an form produces a compiled form (as
long as the @racket[read-accept-compiled] parameter is set to
@racket[#t]).
When a compiled form contains syntax object constants, the
@litchar{#~}-marshaled form drops source-location information and
properties (@secref["stxprops"]) for the @tech{syntax objects}.
Compiled code parsed from @litchar{#~} may contain references to
unexported or protected bindings from a module. At read time, such
references are associated with the current code inspector (see
@racket[current-code-inspector]), and the code will only execute if
that inspector controls the relevant module invocation (see
@secref["modprotect"]).
A compiled-form object may contain @tech{uninterned} symbols (see
@secref["symbols"]) that were created by @racket[gensym] or
@racket[string->uninterned-symbol]. When the compiled object is read
via @litchar{#~}, each uninterned symbol in the original form is
mapped to a new uninterned symbol, where multiple instances of a
single symbol are consistently mapped to the same new symbol. The
original and new symbols have the same printed
representation. @tech{Unreadable symbols}, which are typically
generated indirectly during expansion and compilation, are saved and
restored consistently through @litchar{#~}.
Due to the restrictions on @tech{uninterned} symbols in @litchar{#~},
do not use @racket[gensym] or @racket[string->uninterned-symbol] to
construct an identifier for a top-level or module binding. Instead,
generate distinct identifiers either with
@racket[generate-temporaries] or by applying the result of
@racket[make-syntax-introducer] to an existing identifier; those
functions will lead to top-level and module bindings with
@tech{unreadable symbol}ic names.}
starts with @litchar{#~}. See @secref["print-compiled"] for more
information.}
@defproc[(compile [top-level-form any/c]) compiled-expression?]{

View File

@ -453,16 +453,34 @@ For the purposes of printing enclosing datatypes, a keyword is
@section{Printing Regular Expressions}
Regexp values in all modes (@scheme[write], @scheme[display], and
@scheme[print]) starting with @litchar{#px} (for
@scheme[pregexp]-based regexps) or @litchar{#rx} (for
@scheme[regexp]-based regexps) followed by the @scheme[write] form of
the regexp's source string or byte string.
Regexp values @scheme[write], @scheme[display], and @scheme[print]
starting with @litchar{#px} (for @scheme[pregexp]-based regexps) or
@litchar{#rx} (for @scheme[regexp]-based regexps) followed by the
@scheme[write] form of the regexp's source string or byte string.
For the purposes of printing enclosing datatypes, a regexp value is
@tech{quotable}.
@section[#:tag "print-path"]{Printing Paths}
Paths @scheme[write] and @scheme[print] as @litchar{#<path:....>}. A
path @racket[display]s the same as the string produced by
@racket[path->string]. For the purposes of printing enclosing
datatypes, a path counts as @tech{quotable}.
Although a path can be converted to a string with
@racket[path->string] or to a byte string with @racket[path->bytes],
neither is clearly the right choice for printing a path and reading it
back. If the path value is meant to be moved among platforms, then a
string is probably the right choice, despite the potential for losing
information when converting a path to a string. For a path that is
intended to be re-read on the same platform, a byte string is probably
the right choice, since it preserves information in an unportable
way. Paths do not print in a readable way so that programmers are not
mislead into thinking that either choice is always appropriate.
@section[#:tag "print-unreadable"]{Printing Unreadable Values}
For any value with no other printing specification, assuming that the
@ -475,3 +493,52 @@ to the value itself. If @racket[print-unreadable] is set to
For the purposes of printing enclosing datatypes, a value that prints
unreadably nevertheless counts as @tech{quotable}.
@section[#:tag "print-compiled"]{Printing Compiled Code}
Compiled code as produced by @racket[compile] prints using
@litchar{#~}. Compiled code printed with @litchar{#~} is essentially
assembly code for Racket, and reading such an form produces a compiled
form when the @racket[read-accept-compiled] parameter is set to
@racket[#t].
When a compiled form contains syntax object constants, the
@litchar{#~}-marshaled form drops source-location information and
properties (@secref["stxprops"]) for the @tech{syntax objects}.
Compiled code parsed from @litchar{#~} may contain references to
unexported or protected bindings from a module. At read time, such
references are associated with the current code inspector (see
@racket[current-code-inspector]), and the code will only execute if
that inspector controls the relevant module invocation (see
@secref["modprotect"]).
A compiled-form object may contain @tech{uninterned} symbols (see
@secref["symbols"]) that were created by @racket[gensym] or
@racket[string->uninterned-symbol]. When the compiled object is read
via @litchar{#~}, each uninterned symbol in the original form is
mapped to a new uninterned symbol, where multiple instances of a
single symbol are consistently mapped to the same new symbol. The
original and new symbols have the same printed
representation. @tech{Unreadable symbols}, which are typically
generated indirectly during expansion and compilation, are saved and
restored consistently through @litchar{#~}.
Due to the restrictions on @tech{uninterned} symbols in @litchar{#~},
do not use @racket[gensym] or @racket[string->uninterned-symbol] to
construct an identifier for a top-level or module binding. Instead,
generate distinct identifiers either with
@racket[generate-temporaries] or by applying the result of
@racket[make-syntax-introducer] to an existing identifier; those
functions will lead to top-level and module bindings with
@tech{unreadable symbol}ic names.
Finally, a compiled form may contain path literals. Although paths are
not normally printed in a way that can be read back in, path literals
can be written and read as part of compiled code. The
@racket[current-write-relative-directory] parameter is used to convert
the path to a relative path as is it written, and then
@racket[current-load-relative-directory] parameter is used to convert
any relative path back as it is read. The relative-path conversion
applies on reading whether the path was originally relative or not.

View File

@ -122,7 +122,7 @@ on the next character or characters in the input stream as follows:
@dispatch[@litchar{#!}]{may start a reader extension; see @secref["parse-reader"]}
@dispatch[@litchar{#`}]{starts a syntax quasiquote; see @secref["parse-quote"]}
@dispatch[@litchar{#,}]{starts an syntax [splicing] unquote; see @secref["parse-quote"]}
@dispatch[@litchar{#~}]{starts compiled code; see @racket[current-compile]}
@dispatch[@litchar{#~}]{starts compiled code; see @secref["print-compiled"]}
@dispatch[@cilitchar{#i}]{starts a number; see @secref["parse-number"]}
@dispatch[@cilitchar{#e}]{starts a number; see @secref["parse-number"]}

View File

@ -75,7 +75,6 @@ The @racketmodname[racket] library combines
@include-section["os.scrbl"]
@include-section["memory.scrbl"]
@include-section["unsafe.scrbl"]
@include-section["debugging.scrbl"]
@include-section["running.scrbl"]
@;------------------------------------------------------------------------

View File

@ -9,3 +9,4 @@
@include-section["collects.scrbl"]
@include-section["help.scrbl"]
@include-section["enter.scrbl"]
@include-section["debugging.scrbl"]

View File

@ -251,7 +251,7 @@ object within @litchar{#<syntax}...@litchar{>} (after the
(or/c (and/c path-string? complete-path?) #f)
(or/c (and/c path? complete-path?) #f)]{
A parameter that is used when writing compiled code that contains
A parameter that is used when writing compiled code (see @secref["print-compiled"]) that contains
pathname literals, including source-location pathnames for procedure
names. When not @racket[#f], paths that syntactically extend the
parameter's value are converted to relative paths; when the resulting

View File

@ -1435,6 +1435,27 @@
(test #t list? @simp@tst)
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Check marshaling of compiled code to disallow
;; unreadable values in a hash-table literal
;; cyclic hash table as a bad "constant":
(err/rt-test (let ([s (open-output-bytes)])
(write (compile `(quote ,(let ([ht (make-hasheq)])
(hash-set! ht #'bad ht)
ht)))
s)
(get-output-bytes s))
exn:fail?)
;; non-cyclic variant:
(err/rt-test (let ([s (open-output-bytes)])
(write (compile `(quote ,(let ([ht (make-hasheq)])
(hash-set! ht #'bad 10)
ht)))
s)
(get-output-bytes s))
exn:fail?)
;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(report-errs)

View File

@ -5078,6 +5078,7 @@ static int mark_read_params_MARK(void *p, struct NewGC *gc) {
gcMARK2(rp->magic_sym, gc);
gcMARK2(rp->magic_val, gc);
gcMARK2(rp->delay_load_info, gc);
gcMARK2(rp->read_relative_path, gc);
return
gcBYTES_TO_WORDS(sizeof(ReadParams));
}
@ -5088,6 +5089,7 @@ static int mark_read_params_FIXUP(void *p, struct NewGC *gc) {
gcFIXUP2(rp->magic_sym, gc);
gcFIXUP2(rp->magic_val, gc);
gcFIXUP2(rp->delay_load_info, gc);
gcFIXUP2(rp->read_relative_path, gc);
return
gcBYTES_TO_WORDS(sizeof(ReadParams));
}

View File

@ -2076,6 +2076,7 @@ mark_read_params {
gcMARK2(rp->magic_sym, gc);
gcMARK2(rp->magic_val, gc);
gcMARK2(rp->delay_load_info, gc);
gcMARK2(rp->read_relative_path, gc);
size:
gcBYTES_TO_WORDS(sizeof(ReadParams));
}

View File

@ -85,6 +85,8 @@ typedef struct Scheme_Print_Params {
char honu_mode;
Scheme_Object *inspector;
char printing_quoted;
/* Used during `display' and `write': */
char *print_buffer;
long print_position;
@ -971,6 +973,7 @@ print_to_string(Scheme_Object *obj,
params.print_port = port;
params.print_syntax = 0;
params.depth_delta = NULL;
params.printing_quoted = 0;
/* Getting print params can take a while, and they're irrelevant
for simple things like displaying numbers. So try a shortcut: */
@ -1680,8 +1683,8 @@ static void cannot_print(PrintParams *pp, int notdisplay,
int compact)
{
scheme_raise_exn(MZEXN_FAIL,
(compact
? "%s: cannot marshal constant that is embedded in compiled code: %V"
((compact || pp->printing_quoted)
? "%s: cannot marshal value that is embedded in compiled code: %V"
: "%s: printing disabled for unreadable value: %V"),
notdisplay ? "write" : "display",
obj);
@ -2343,7 +2346,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
Scheme_Object *idx;
int l;
idx = get_symtab_idx(mt, obj);
idx = get_symtab_idx(mt, obj);
if (idx) {
print_symtab_ref(pp, idx);
} else {
@ -2351,9 +2354,8 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
dir = scheme_get_param(scheme_current_config(),
MZCONFIG_WRITE_DIRECTORY);
if (SCHEME_PATHP(dir)) {
if (SCHEME_PATHP(dir))
obj = scheme_extract_relative_to(obj, dir);
}
print_compact(pp, CPT_PATH);
@ -2363,6 +2365,24 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
symtab_set(pp, mt, orig_obj);
}
} else if (!compact && pp->printing_quoted) {
/* An unlikely case: we're in escaped mode for printing a constant;
use a special escape, which is recognized only when reading
an escaped S-expression, to write a path: */
Scheme_Object *dir;
dir = scheme_get_param(scheme_current_config(),
MZCONFIG_WRITE_DIRECTORY);
if (SCHEME_PATHP(dir))
obj = scheme_extract_relative_to(obj, dir);
print_utf8_string(pp, "#^", 0, 2);
obj = scheme_make_sized_byte_string(SCHEME_PATH_VAL(obj),
SCHEME_PATH_LEN(obj),
1);
print(obj, notdisplay, compact, ht, mt, pp);
closed = 1;
} else if (!pp->print_unreadable) {
cannot_print(pp, notdisplay, obj, ht, compact);
} else {
@ -2653,7 +2673,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
}
else if (SCHEME_STXP(obj))
{
if (compact) {
if (compact && !pp->printing_quoted) {
print_compact(pp, CPT_STX);
/* "2" in scheme_syntax_to_datum() call preserves wraps. */
@ -2838,7 +2858,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
{
Scheme_Hash_Table *q_ht;
Scheme_Object *v;
int counter = 1, qpht, qpb;
int counter = 1, qpht, qpb, qpu;
v = SCHEME_PTR_VAL(obj);
@ -2865,8 +2885,16 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht,
#endif
}
/* Avoid all unprintable values, whether or not we stay in compact mode. */
qpu = pp->print_unreadable;
pp->print_unreadable = 0;
pp->printing_quoted = 1;
compact = print(v, notdisplay, 1, q_ht, mt, pp);
pp->printing_quoted = 0;
pp->print_unreadable = qpu;
pp->print_hash_table = qpht;
pp->print_box = qpb;
}

View File

@ -181,6 +181,7 @@ typedef struct ReadParams {
Readtable *table;
Scheme_Object *magic_sym, *magic_val;
Scheme_Object *delay_load_info;
Scheme_Object *read_relative_path;
} ReadParams;
#define THREAD_FOR_LOCALS scheme_current_thread
@ -1392,6 +1393,43 @@ read_inner_inner(Scheme_Object *port, Scheme_Object *stxsrc, Scheme_Hash_Table *
}
}
break;
case '^':
if (params->read_relative_path) {
ch = scheme_getc_special_ok(port);
if (ch == '#') {
ch = scheme_getc_special_ok(port);
if (ch == '"') {
Scheme_Object *str;
long sline = 0, scol = 0, spos = 0;
scheme_tell_all(port, &sline, &scol, &spos);
str = read_string(1, 0, port, stxsrc, sline, scol, spos, ht, indentation, params, 1);
str->type = SCHEME_PLATFORM_PATH_KIND;
if (scheme_is_relative_path(SCHEME_PATH_VAL(str), SCHEME_PATH_LEN(str), SCHEME_PLATFORM_PATH_KIND)) {
if (SCHEME_PATHP(params->read_relative_path)) {
Scheme_Object *a[2];
a[0] = params->read_relative_path;
a[1] = str;
str = scheme_build_path(2, a);
}
}
return str;
}
} else {
scheme_read_err(port, stxsrc, line, col, pos, 2, ch, indentation,
"read: bad syntax `#^#%c'",
ch);
}
} else {
scheme_read_err(port, stxsrc, line, col, pos, 2, ch, indentation,
"read: bad syntax `#^%c'",
ch);
}
break;
case '|':
if (!params->honu_mode) {
/* FIXME: integer overflow possible */
@ -2336,6 +2374,7 @@ _internal_read(Scheme_Object *port, Scheme_Object *stxsrc, int crc, int cant_fai
params.can_read_dot = SCHEME_TRUEP(v);
v = scheme_get_param(config, MZCONFIG_CAN_READ_INFIX_DOT);
params.can_read_infix_dot = SCHEME_TRUEP(v);
params.read_relative_path = NULL;
if (!delay_load_info)
delay_load_info = scheme_get_param(config, MZCONFIG_DELAY_LOAD_INFO);
if (SCHEME_TRUEP(delay_load_info))
@ -4536,10 +4575,11 @@ static Scheme_Object *read_compact_escape(CPort *port)
params.skip_zo_vers_check = 0;
params.table = NULL;
params.read_relative_path = port->relto;
return read_inner(ep, NULL, port->ht, scheme_null, &params, 0);
}
static Scheme_Object *read_compact(CPort *port, int use_stack);
static Scheme_Object *read_compact_k(void)
@ -4595,8 +4635,7 @@ static Scheme_Object *read_compact(CPort *port, int use_stack)
v = port->symtab[l];
if (!v) {
long save_pos = port->pos;
/* avoid cycles if marshaled form is broken: */
port->symtab[l] = scheme_false;
port->symtab[l] = scheme_false; /* avoid cycles if marshaled form is broken: */
port->pos = port->shared_offsets[l - 1];
v = read_compact(port, 0);
port->pos = save_pos;
@ -4958,6 +4997,7 @@ static Scheme_Object *read_compact(CPort *port, int use_stack)
(Scheme_Object *)port->delay_info);
} else {
long save_pos = port->pos;
port->symtab[l] = scheme_false; /* avoid cycles if marshaled form is broken: */
port->pos = port->shared_offsets[l - 1];
v = read_compact(port, 0);
port->pos = save_pos;
@ -5413,6 +5453,7 @@ static Scheme_Object *read_compiled(Scheme_Object *port,
return result;
}
THREAD_LOCAL_DECL(static Scheme_Load_Delay *clear_bytes_chain);
void scheme_clear_delayed_load_cache()