diff --git a/collects/compiler/zo-marshal.rkt b/collects/compiler/zo-marshal.rkt index f10a095f3b..da13079be1 100644 --- a/collects/compiler/zo-marshal.rkt +++ b/collects/compiler/zo-marshal.rkt @@ -8,7 +8,9 @@ racket/local racket/list racket/dict - racket/function) + racket/function + racket/pretty + racket/path) (provide/contract [zo-marshal (compilation-top? . -> . bytes?)] @@ -901,6 +903,7 @@ CPT_BYTE_STRING #f out)] + #; [(path? expr) (out-as-bytes expr path->bytes @@ -1024,7 +1027,20 @@ (if (quoted? expr) (out-data (quoted-v expr) out) (let ([s (open-output-bytes)]) - (write expr s) + ;; print `expr' to a string, but print paths + ;; in a special way + (parameterize ([pretty-print-size-hook + (lambda (v mode port) + (and (path? v) + (let ([v (make-relative v)]) + (+ 2 (let ([p (open-output-bytes)]) + (write (path->bytes v) p) + (bytes-length (get-output-bytes p)))))))] + [pretty-print-print-hook + (lambda (v mode port) + (display "#^" port) + (write (path->bytes (make-relative v)) port))]) + (pretty-write expr s)) (out-byte CPT_ESCAPE out) (let ([bstr (get-output-bytes s)]) (out-number (bytes-length bstr) out) @@ -1041,5 +1057,11 @@ (define-struct svector (vec)) +(define (make-relative v) + (let ([r (current-write-relative-directory)]) + (if r + (find-relative-path r v) + v))) + ;; ---------------------------------------- diff --git a/collects/compiler/zo-parse.rkt b/collects/compiler/zo-parse.rkt index 3afd74e4d3..ed2541fdaf 100644 --- a/collects/compiler/zo-parse.rkt +++ b/collects/compiler/zo-parse.rkt @@ -732,7 +732,23 @@ [read-decimal-as-inexact #t] [read-accept-dot #t] [read-accept-infix-dot #t] - [read-accept-quasiquote #t]) + [read-accept-quasiquote #t] + ;; Use a readtable for special path support in escaped: + [current-readtable + (make-readtable + #f + #\^ + 'dispatch-macro + (lambda (char port src line col pos) + (let ([b (read port)]) + (unless (bytes? b) + (error 'read-escaped-path + "expected a byte string after #^")) + (let ([p (bytes->path b)]) + (if (and (relative-path? p) + (current-load-relative-directory)) + (build-path (current-load-relative-directory) p) + p)))))]) (read/recursive (open-input-bytes s))))] [(reference) (make-primval (read-compact-number cp))] diff --git a/collects/scribblings/reference/debugging.scrbl b/collects/scribblings/reference/debugging.scrbl index 4b49841681..da17e1defb 100644 --- a/collects/scribblings/reference/debugging.scrbl +++ b/collects/scribblings/reference/debugging.scrbl @@ -3,4 +3,14 @@ @title[#:tag "debugging"]{Debugging} -@include-section["trace.scrbl"] \ No newline at end of file +Racket's built-in debugging support is limited to context (i.e., +``stack trace'') information that is printed with an exception. In +some cases, disabling the JIT compiler can affect context +information. The @racketmodname[errortrace] library supports more +consistent (independent of the JIT compiler) and precise context +information. The @racketmodname[racket/trace] library provides simple +tracing support. Finally, the @seclink[#:doc '(lib +"scribblings/drracket/drracket.scrbl") "top"]{DrRacket} programming environment +provides much more debugging support. + +@include-section["trace.scrbl"] diff --git a/collects/scribblings/reference/eval.scrbl b/collects/scribblings/reference/eval.scrbl index 3178d46f5a..d46c023b57 100644 --- a/collects/scribblings/reference/eval.scrbl +++ b/collects/scribblings/reference/eval.scrbl @@ -369,41 +369,8 @@ form may be saved for later use; the default compilation handler is optimized for the special case of immediate evaluation. When a compiled form is written to an output port, the written form -starts with @litchar{#~}. These forms are essentially assembly code -for Racket, and reading such an form produces a compiled form (as -long as the @racket[read-accept-compiled] parameter is set to -@racket[#t]). - -When a compiled form contains syntax object constants, the -@litchar{#~}-marshaled form drops source-location information and -properties (@secref["stxprops"]) for the @tech{syntax objects}. - -Compiled code parsed from @litchar{#~} may contain references to -unexported or protected bindings from a module. At read time, such -references are associated with the current code inspector (see -@racket[current-code-inspector]), and the code will only execute if -that inspector controls the relevant module invocation (see -@secref["modprotect"]). - -A compiled-form object may contain @tech{uninterned} symbols (see -@secref["symbols"]) that were created by @racket[gensym] or -@racket[string->uninterned-symbol]. When the compiled object is read -via @litchar{#~}, each uninterned symbol in the original form is -mapped to a new uninterned symbol, where multiple instances of a -single symbol are consistently mapped to the same new symbol. The -original and new symbols have the same printed -representation. @tech{Unreadable symbols}, which are typically -generated indirectly during expansion and compilation, are saved and -restored consistently through @litchar{#~}. - -Due to the restrictions on @tech{uninterned} symbols in @litchar{#~}, -do not use @racket[gensym] or @racket[string->uninterned-symbol] to -construct an identifier for a top-level or module binding. Instead, -generate distinct identifiers either with -@racket[generate-temporaries] or by applying the result of -@racket[make-syntax-introducer] to an existing identifier; those -functions will lead to top-level and module bindings with -@tech{unreadable symbol}ic names.} +starts with @litchar{#~}. See @secref["print-compiled"] for more +information.} @defproc[(compile [top-level-form any/c]) compiled-expression?]{ diff --git a/collects/scribblings/reference/printer.scrbl b/collects/scribblings/reference/printer.scrbl index a51ccbf30a..f588ec6e1d 100644 --- a/collects/scribblings/reference/printer.scrbl +++ b/collects/scribblings/reference/printer.scrbl @@ -453,16 +453,34 @@ For the purposes of printing enclosing datatypes, a keyword is @section{Printing Regular Expressions} -Regexp values in all modes (@scheme[write], @scheme[display], and -@scheme[print]) starting with @litchar{#px} (for -@scheme[pregexp]-based regexps) or @litchar{#rx} (for -@scheme[regexp]-based regexps) followed by the @scheme[write] form of -the regexp's source string or byte string. +Regexp values @scheme[write], @scheme[display], and @scheme[print] +starting with @litchar{#px} (for @scheme[pregexp]-based regexps) or +@litchar{#rx} (for @scheme[regexp]-based regexps) followed by the +@scheme[write] form of the regexp's source string or byte string. For the purposes of printing enclosing datatypes, a regexp value is @tech{quotable}. +@section[#:tag "print-path"]{Printing Paths} + +Paths @scheme[write] and @scheme[print] as @litchar{#}. A +path @racket[display]s the same as the string produced by +@racket[path->string]. For the purposes of printing enclosing +datatypes, a path counts as @tech{quotable}. + +Although a path can be converted to a string with +@racket[path->string] or to a byte string with @racket[path->bytes], +neither is clearly the right choice for printing a path and reading it +back. If the path value is meant to be moved among platforms, then a +string is probably the right choice, despite the potential for losing +information when converting a path to a string. For a path that is +intended to be re-read on the same platform, a byte string is probably +the right choice, since it preserves information in an unportable +way. Paths do not print in a readable way so that programmers are not +mislead into thinking that either choice is always appropriate. + + @section[#:tag "print-unreadable"]{Printing Unreadable Values} For any value with no other printing specification, assuming that the @@ -475,3 +493,52 @@ to the value itself. If @racket[print-unreadable] is set to For the purposes of printing enclosing datatypes, a value that prints unreadably nevertheless counts as @tech{quotable}. + + +@section[#:tag "print-compiled"]{Printing Compiled Code} + +Compiled code as produced by @racket[compile] prints using +@litchar{#~}. Compiled code printed with @litchar{#~} is essentially +assembly code for Racket, and reading such an form produces a compiled +form when the @racket[read-accept-compiled] parameter is set to +@racket[#t]. + +When a compiled form contains syntax object constants, the +@litchar{#~}-marshaled form drops source-location information and +properties (@secref["stxprops"]) for the @tech{syntax objects}. + +Compiled code parsed from @litchar{#~} may contain references to +unexported or protected bindings from a module. At read time, such +references are associated with the current code inspector (see +@racket[current-code-inspector]), and the code will only execute if +that inspector controls the relevant module invocation (see +@secref["modprotect"]). + +A compiled-form object may contain @tech{uninterned} symbols (see +@secref["symbols"]) that were created by @racket[gensym] or +@racket[string->uninterned-symbol]. When the compiled object is read +via @litchar{#~}, each uninterned symbol in the original form is +mapped to a new uninterned symbol, where multiple instances of a +single symbol are consistently mapped to the same new symbol. The +original and new symbols have the same printed +representation. @tech{Unreadable symbols}, which are typically +generated indirectly during expansion and compilation, are saved and +restored consistently through @litchar{#~}. + +Due to the restrictions on @tech{uninterned} symbols in @litchar{#~}, +do not use @racket[gensym] or @racket[string->uninterned-symbol] to +construct an identifier for a top-level or module binding. Instead, +generate distinct identifiers either with +@racket[generate-temporaries] or by applying the result of +@racket[make-syntax-introducer] to an existing identifier; those +functions will lead to top-level and module bindings with +@tech{unreadable symbol}ic names. + +Finally, a compiled form may contain path literals. Although paths are +not normally printed in a way that can be read back in, path literals +can be written and read as part of compiled code. The +@racket[current-write-relative-directory] parameter is used to convert +the path to a relative path as is it written, and then +@racket[current-load-relative-directory] parameter is used to convert +any relative path back as it is read. The relative-path conversion +applies on reading whether the path was originally relative or not. diff --git a/collects/scribblings/reference/reader.scrbl b/collects/scribblings/reference/reader.scrbl index 3b5f8f79a2..0b59d6223b 100644 --- a/collects/scribblings/reference/reader.scrbl +++ b/collects/scribblings/reference/reader.scrbl @@ -122,7 +122,7 @@ on the next character or characters in the input stream as follows: @dispatch[@litchar{#!}]{may start a reader extension; see @secref["parse-reader"]} @dispatch[@litchar{#`}]{starts a syntax quasiquote; see @secref["parse-quote"]} @dispatch[@litchar{#,}]{starts an syntax [splicing] unquote; see @secref["parse-quote"]} - @dispatch[@litchar{#~}]{starts compiled code; see @racket[current-compile]} + @dispatch[@litchar{#~}]{starts compiled code; see @secref["print-compiled"]} @dispatch[@cilitchar{#i}]{starts a number; see @secref["parse-number"]} @dispatch[@cilitchar{#e}]{starts a number; see @secref["parse-number"]} diff --git a/collects/scribblings/reference/reference.scrbl b/collects/scribblings/reference/reference.scrbl index acbac13397..cd4310e9f7 100644 --- a/collects/scribblings/reference/reference.scrbl +++ b/collects/scribblings/reference/reference.scrbl @@ -75,7 +75,6 @@ The @racketmodname[racket] library combines @include-section["os.scrbl"] @include-section["memory.scrbl"] @include-section["unsafe.scrbl"] -@include-section["debugging.scrbl"] @include-section["running.scrbl"] @;------------------------------------------------------------------------ diff --git a/collects/scribblings/reference/running.scrbl b/collects/scribblings/reference/running.scrbl index b799c913f0..7ec84aee67 100644 --- a/collects/scribblings/reference/running.scrbl +++ b/collects/scribblings/reference/running.scrbl @@ -9,3 +9,4 @@ @include-section["collects.scrbl"] @include-section["help.scrbl"] @include-section["enter.scrbl"] +@include-section["debugging.scrbl"] diff --git a/collects/scribblings/reference/write.scrbl b/collects/scribblings/reference/write.scrbl index 6d0d1c0944..b381b07316 100644 --- a/collects/scribblings/reference/write.scrbl +++ b/collects/scribblings/reference/write.scrbl @@ -251,7 +251,7 @@ object within @litchar{#} (after the (or/c (and/c path-string? complete-path?) #f) (or/c (and/c path? complete-path?) #f)]{ -A parameter that is used when writing compiled code that contains +A parameter that is used when writing compiled code (see @secref["print-compiled"]) that contains pathname literals, including source-location pathnames for procedure names. When not @racket[#f], paths that syntactically extend the parameter's value are converted to relative paths; when the resulting diff --git a/collects/tests/racket/stx.rktl b/collects/tests/racket/stx.rktl index 9ba9b94001..0690025e88 100644 --- a/collects/tests/racket/stx.rktl +++ b/collects/tests/racket/stx.rktl @@ -1435,6 +1435,27 @@ (test #t list? @simp@tst) +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Check marshaling of compiled code to disallow +;; unreadable values in a hash-table literal + +;; cyclic hash table as a bad "constant": +(err/rt-test (let ([s (open-output-bytes)]) + (write (compile `(quote ,(let ([ht (make-hasheq)]) + (hash-set! ht #'bad ht) + ht))) + s) + (get-output-bytes s)) + exn:fail?) +;; non-cyclic variant: +(err/rt-test (let ([s (open-output-bytes)]) + (write (compile `(quote ,(let ([ht (make-hasheq)]) + (hash-set! ht #'bad 10) + ht))) + s) + (get-output-bytes s)) + exn:fail?) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/src/racket/src/mzmark.c b/src/racket/src/mzmark.c index ac768c7a90..5d3ee50e06 100644 --- a/src/racket/src/mzmark.c +++ b/src/racket/src/mzmark.c @@ -5078,6 +5078,7 @@ static int mark_read_params_MARK(void *p, struct NewGC *gc) { gcMARK2(rp->magic_sym, gc); gcMARK2(rp->magic_val, gc); gcMARK2(rp->delay_load_info, gc); + gcMARK2(rp->read_relative_path, gc); return gcBYTES_TO_WORDS(sizeof(ReadParams)); } @@ -5088,6 +5089,7 @@ static int mark_read_params_FIXUP(void *p, struct NewGC *gc) { gcFIXUP2(rp->magic_sym, gc); gcFIXUP2(rp->magic_val, gc); gcFIXUP2(rp->delay_load_info, gc); + gcFIXUP2(rp->read_relative_path, gc); return gcBYTES_TO_WORDS(sizeof(ReadParams)); } diff --git a/src/racket/src/mzmarksrc.c b/src/racket/src/mzmarksrc.c index 404e7dbab5..9193429171 100644 --- a/src/racket/src/mzmarksrc.c +++ b/src/racket/src/mzmarksrc.c @@ -2076,6 +2076,7 @@ mark_read_params { gcMARK2(rp->magic_sym, gc); gcMARK2(rp->magic_val, gc); gcMARK2(rp->delay_load_info, gc); + gcMARK2(rp->read_relative_path, gc); size: gcBYTES_TO_WORDS(sizeof(ReadParams)); } diff --git a/src/racket/src/print.c b/src/racket/src/print.c index ab327bf727..e0847cb972 100644 --- a/src/racket/src/print.c +++ b/src/racket/src/print.c @@ -85,6 +85,8 @@ typedef struct Scheme_Print_Params { char honu_mode; Scheme_Object *inspector; + char printing_quoted; + /* Used during `display' and `write': */ char *print_buffer; long print_position; @@ -971,6 +973,7 @@ print_to_string(Scheme_Object *obj, params.print_port = port; params.print_syntax = 0; params.depth_delta = NULL; + params.printing_quoted = 0; /* Getting print params can take a while, and they're irrelevant for simple things like displaying numbers. So try a shortcut: */ @@ -1680,8 +1683,8 @@ static void cannot_print(PrintParams *pp, int notdisplay, int compact) { scheme_raise_exn(MZEXN_FAIL, - (compact - ? "%s: cannot marshal constant that is embedded in compiled code: %V" + ((compact || pp->printing_quoted) + ? "%s: cannot marshal value that is embedded in compiled code: %V" : "%s: printing disabled for unreadable value: %V"), notdisplay ? "write" : "display", obj); @@ -2343,7 +2346,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht, Scheme_Object *idx; int l; - idx = get_symtab_idx(mt, obj); + idx = get_symtab_idx(mt, obj); if (idx) { print_symtab_ref(pp, idx); } else { @@ -2351,10 +2354,9 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht, dir = scheme_get_param(scheme_current_config(), MZCONFIG_WRITE_DIRECTORY); - if (SCHEME_PATHP(dir)) { + if (SCHEME_PATHP(dir)) obj = scheme_extract_relative_to(obj, dir); - } - + print_compact(pp, CPT_PATH); l = SCHEME_PATH_LEN(obj); @@ -2363,6 +2365,24 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht, symtab_set(pp, mt, orig_obj); } + } else if (!compact && pp->printing_quoted) { + /* An unlikely case: we're in escaped mode for printing a constant; + use a special escape, which is recognized only when reading + an escaped S-expression, to write a path: */ + Scheme_Object *dir; + + dir = scheme_get_param(scheme_current_config(), + MZCONFIG_WRITE_DIRECTORY); + if (SCHEME_PATHP(dir)) + obj = scheme_extract_relative_to(obj, dir); + + print_utf8_string(pp, "#^", 0, 2); + obj = scheme_make_sized_byte_string(SCHEME_PATH_VAL(obj), + SCHEME_PATH_LEN(obj), + 1); + print(obj, notdisplay, compact, ht, mt, pp); + + closed = 1; } else if (!pp->print_unreadable) { cannot_print(pp, notdisplay, obj, ht, compact); } else { @@ -2653,7 +2673,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht, } else if (SCHEME_STXP(obj)) { - if (compact) { + if (compact && !pp->printing_quoted) { print_compact(pp, CPT_STX); /* "2" in scheme_syntax_to_datum() call preserves wraps. */ @@ -2838,7 +2858,7 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht, { Scheme_Hash_Table *q_ht; Scheme_Object *v; - int counter = 1, qpht, qpb; + int counter = 1, qpht, qpb, qpu; v = SCHEME_PTR_VAL(obj); @@ -2865,8 +2885,16 @@ print(Scheme_Object *obj, int notdisplay, int compact, Scheme_Hash_Table *ht, #endif } + /* Avoid all unprintable values, whether or not we stay in compact mode. */ + qpu = pp->print_unreadable; + pp->print_unreadable = 0; + pp->printing_quoted = 1; + compact = print(v, notdisplay, 1, q_ht, mt, pp); + pp->printing_quoted = 0; + pp->print_unreadable = qpu; + pp->print_hash_table = qpht; pp->print_box = qpb; } diff --git a/src/racket/src/read.c b/src/racket/src/read.c index 9610c23e37..f29fba5267 100644 --- a/src/racket/src/read.c +++ b/src/racket/src/read.c @@ -181,6 +181,7 @@ typedef struct ReadParams { Readtable *table; Scheme_Object *magic_sym, *magic_val; Scheme_Object *delay_load_info; + Scheme_Object *read_relative_path; } ReadParams; #define THREAD_FOR_LOCALS scheme_current_thread @@ -1392,6 +1393,43 @@ read_inner_inner(Scheme_Object *port, Scheme_Object *stxsrc, Scheme_Hash_Table * } } break; + case '^': + if (params->read_relative_path) { + ch = scheme_getc_special_ok(port); + if (ch == '#') { + ch = scheme_getc_special_ok(port); + if (ch == '"') { + Scheme_Object *str; + long sline = 0, scol = 0, spos = 0; + + scheme_tell_all(port, &sline, &scol, &spos); + + str = read_string(1, 0, port, stxsrc, sline, scol, spos, ht, indentation, params, 1); + + str->type = SCHEME_PLATFORM_PATH_KIND; + + if (scheme_is_relative_path(SCHEME_PATH_VAL(str), SCHEME_PATH_LEN(str), SCHEME_PLATFORM_PATH_KIND)) { + if (SCHEME_PATHP(params->read_relative_path)) { + Scheme_Object *a[2]; + a[0] = params->read_relative_path; + a[1] = str; + str = scheme_build_path(2, a); + } + } + + return str; + } + } else { + scheme_read_err(port, stxsrc, line, col, pos, 2, ch, indentation, + "read: bad syntax `#^#%c'", + ch); + } + } else { + scheme_read_err(port, stxsrc, line, col, pos, 2, ch, indentation, + "read: bad syntax `#^%c'", + ch); + } + break; case '|': if (!params->honu_mode) { /* FIXME: integer overflow possible */ @@ -2336,6 +2374,7 @@ _internal_read(Scheme_Object *port, Scheme_Object *stxsrc, int crc, int cant_fai params.can_read_dot = SCHEME_TRUEP(v); v = scheme_get_param(config, MZCONFIG_CAN_READ_INFIX_DOT); params.can_read_infix_dot = SCHEME_TRUEP(v); + params.read_relative_path = NULL; if (!delay_load_info) delay_load_info = scheme_get_param(config, MZCONFIG_DELAY_LOAD_INFO); if (SCHEME_TRUEP(delay_load_info)) @@ -4536,10 +4575,11 @@ static Scheme_Object *read_compact_escape(CPort *port) params.skip_zo_vers_check = 0; params.table = NULL; + params.read_relative_path = port->relto; + return read_inner(ep, NULL, port->ht, scheme_null, ¶ms, 0); } - static Scheme_Object *read_compact(CPort *port, int use_stack); static Scheme_Object *read_compact_k(void) @@ -4595,8 +4635,7 @@ static Scheme_Object *read_compact(CPort *port, int use_stack) v = port->symtab[l]; if (!v) { long save_pos = port->pos; - /* avoid cycles if marshaled form is broken: */ - port->symtab[l] = scheme_false; + port->symtab[l] = scheme_false; /* avoid cycles if marshaled form is broken: */ port->pos = port->shared_offsets[l - 1]; v = read_compact(port, 0); port->pos = save_pos; @@ -4958,6 +4997,7 @@ static Scheme_Object *read_compact(CPort *port, int use_stack) (Scheme_Object *)port->delay_info); } else { long save_pos = port->pos; + port->symtab[l] = scheme_false; /* avoid cycles if marshaled form is broken: */ port->pos = port->shared_offsets[l - 1]; v = read_compact(port, 0); port->pos = save_pos; @@ -5413,6 +5453,7 @@ static Scheme_Object *read_compiled(Scheme_Object *port, return result; } + THREAD_LOCAL_DECL(static Scheme_Load_Delay *clear_bytes_chain); void scheme_clear_delayed_load_cache()