From 1f92e985904054923cd27b109f008dd7f9ab2d1a Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Wed, 4 Dec 2019 20:45:37 -0700 Subject: [PATCH] cs: fixes for uninterned symbols in compiled code Use newly added support for uninterned symbols (as opposed to gensyms) in the Chez Scheme layer. Using uninterned symbols reduces non-determinsitsm in the build. The change to use Chez Scheme uninterned symbols exposed problems with the way that Racket-level uninterned symbols (formerly implemented with gensym) are handled in ".zo" files. The problem is that some uninterned symbols are marshaled with `racket/fasl`, which is not consostent with those that are marshaled by Chez Scheme's `fasl`. This patch fixes those problems by ensuring that uninterned symbols are always lifted to the level of a Chez Scheme `fasl` for a complete linklet bundle. --- pkgs/base/info.rkt | 2 +- .../scribblings/reference/fasl.scrbl | 18 ++++++-- pkgs/racket-test-core/tests/racket/fasl.rktl | 12 +++++ .../racket-test-core/tests/racket/module.rktl | 24 ++++++++++ .../tests/racket/deterministic-zo.rkt | 2 +- racket/collects/racket/fasl.rkt | 27 +++++++++-- racket/src/cs/bootstrap/gensym.rkt | 9 +++- racket/src/cs/bootstrap/scheme-lang.rkt | 1 + racket/src/cs/compile-file.ss | 2 +- racket/src/cs/rumble/control.ss | 2 +- racket/src/cs/rumble/struct.ss | 2 +- racket/src/cs/rumble/symbol.ss | 37 +++++++++------ racket/src/racket/src/schvers.h | 2 +- racket/src/schemify/path.rkt | 46 ++++++++++++++++--- racket/src/schemify/quoted.rkt | 7 ++- racket/src/schemify/serialize.rkt | 13 ++++-- racket/src/schemify/to-fasl.rkt | 5 +- 17 files changed, 171 insertions(+), 40 deletions(-) diff --git a/pkgs/base/info.rkt b/pkgs/base/info.rkt index 4fcd293bac..49bbda135a 100644 --- a/pkgs/base/info.rkt +++ b/pkgs/base/info.rkt @@ -12,7 +12,7 @@ (define collection 'multi) -(define version "7.5.0.8") +(define version "7.5.0.9") (define deps `("racket-lib" ["racket" #:version ,version])) diff --git a/pkgs/racket-doc/scribblings/reference/fasl.scrbl b/pkgs/racket-doc/scribblings/reference/fasl.scrbl index a27d09f078..ac8cde2134 100644 --- a/pkgs/racket-doc/scribblings/reference/fasl.scrbl +++ b/pkgs/racket-doc/scribblings/reference/fasl.scrbl @@ -12,10 +12,12 @@ @defproc[(s-exp->fasl [v any/c] [out (or/c output-port? #f) #f] [#:keep-mutable? keep-mutable? any/c #f] - [#:handle-fail handle-fail (or/c #f (any/c . -> . any/c)) #f]) + [#:handle-fail handle-fail (or/c #f (any/c . -> . any/c)) #f] + [#:external-lift? external-lift? (or/c #f (any/c . -> . any/c)) #f] (or/c (void) bytes?)] @defproc[(fasl->s-exp [in (or/c input-port? bytes?)] - [#:datum-intern? datum-intern? any/c #t]) + [#:datum-intern? datum-intern? any/c #t] + [#:external-lifts external-lifts vector? '#()]) any/c] )]{ @@ -43,6 +45,15 @@ returning a replacement value. If @racket[handle-fail] is @racket[#f], then the @exnraise[exn:fail:contract] when an invalid value is encountered. +If @racket[external-lift?] is not @racket[#f], then it receives each +value @racket[_v-sub] encountered in @racket[v] by +@racket[s-exp->fasl]. If the result of @racket[external-lift?] on +@racket[_v-sub] is true, then @racket[_v-sub] is not encoded in the +result, and it instead treated as @deftech{externally lifted}. A +deserializing @racket[fasl->s-exp] receives a @racket[external-lifts] +vector that has one value for each externally lifted value, in the +same order as passed to @racket[external-lift?] on serialization. + Like @racket[(compile `(quote ,v))], @racket[s-exp->fasl] does not preserve graph structure, support cycles, or handle non-@tech{prefab} structures. Compose @racket[s-exp->fasl] with @racket[serialize] to @@ -80,7 +91,8 @@ fasl and added the @racket[#:keep-mutable?] and @racket[#:datum-intern?] arguments.} #:changed "7.3.0.7" @elem{Added support for @tech{correlated objects}.} - #:changed "7.5.0.3" @elem{Added the @racket[#:handle-fail] argument.}]} + #:changed "7.5.0.3" @elem{Added the @racket[#:handle-fail] argument.} + #:changed "7.5.0.9" @elem{Added the @racket[#:external-lift?] and @racket[#:external-lifts] arguments.}]} @; ---------------------------------------------------------------------- diff --git a/pkgs/racket-test-core/tests/racket/fasl.rktl b/pkgs/racket-test-core/tests/racket/fasl.rktl index 03c0454a2e..32ac9ef14c 100644 --- a/pkgs/racket-test-core/tests/racket/fasl.rktl +++ b/pkgs/racket-test-core/tests/racket/fasl.rktl @@ -212,4 +212,16 @@ (test (list (dynamic-require 'racket/unsafe/undefined 'unsafe-undefined)) fasl->s-exp (s-exp->fasl (list (dynamic-require 'racket/unsafe/undefined 'unsafe-undefined)))) +;; Check external-lift support: +(let ([lifts '()] + [data '(a #(b) #s(posn c d a a) #&c)]) + (let ([bstr (s-exp->fasl data + #:external-lift? (lambda (v) + (cond + [(memq v '(a b c d)) + (set! lifts (cons v lifts)) + #t] + [else #f])))]) + (test data fasl->s-exp bstr #:external-lifts (list->vector (reverse lifts))))) + (report-errs) diff --git a/pkgs/racket-test-core/tests/racket/module.rktl b/pkgs/racket-test-core/tests/racket/module.rktl index e1ce790faf..eb3753eb35 100644 --- a/pkgs/racket-test-core/tests/racket/module.rktl +++ b/pkgs/racket-test-core/tests/racket/module.rktl @@ -3196,6 +3196,30 @@ case of module-leve bindings; it doesn't cover local bindings. exn:fail:contract:variable? #rx"^f:") +;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Check on gensyms that span phases in marshaled code + +(let ([e '(module has-a-gensym-that-spans-phases racket/base + (require (for-syntax racket/base)) + + (define-syntax (def stx) + (syntax-case stx () + [(_ id s-id) + (let ([sym (gensym)]) + #`(begin + (provide id s-id) + (define id '#,sym) + (define-syntax (s-id stx) + #'(quote #,sym))))])) + + (def the-gensym expand-to-the-gensym))]) + (define o (open-output-bytes)) + (write (compile e) o) + (eval (parameterize ([read-accept-compiled #t]) + (read-syntax 'expr (open-input-bytes (get-output-bytes o)))))) +(require 'has-a-gensym-that-spans-phases) +(test #t eq? the-gensym (expand-to-the-gensym)) + ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (report-errs) diff --git a/pkgs/racket-test/tests/racket/deterministic-zo.rkt b/pkgs/racket-test/tests/racket/deterministic-zo.rkt index b0544100ad..29e7e8e413 100644 --- a/pkgs/racket-test/tests/racket/deterministic-zo.rkt +++ b/pkgs/racket-test/tests/racket/deterministic-zo.rkt @@ -35,7 +35,7 @@ (call-with-output-file* "/tmp/c1" #:exists 'truncate (lambda (o) (write-bytes c1 o))) (call-with-output-file* "/tmp/c2" #:exists 'truncate (lambda (o) (write-bytes c2 o))) (error "failed")) - (define zo (get-compilation-bytecode-file (build-path dir f) #:modes '("compiled"))) + (define zo (get-compilation-bytecode-file (build-path dir f) #:modes (use-compiled-file-paths))) (when (file-exists? zo) (define c3 (file->bytes zo)) (unless (equal? c3 c1) diff --git a/racket/collects/racket/fasl.rkt b/racket/collects/racket/fasl.rkt index 037d4aff7c..faa387a61b 100644 --- a/racket/collects/racket/fasl.rkt +++ b/racket/collects/racket/fasl.rkt @@ -120,20 +120,33 @@ (define (s-exp->fasl v [orig-o #f] #:keep-mutable? [keep-mutable? #f] - #:handle-fail [handle-fail #f]) + #:handle-fail [handle-fail #f] + #:external-lift? [external-lift? #f]) (when orig-o (unless (output-port? orig-o) (raise-argument-error 'fasl->s-exp "(or/c output-port? #f)" orig-o))) (when handle-fail (unless (and (procedure? handle-fail) (procedure-arity-includes? handle-fail 1)) (raise-argument-error 'fasl->s-exp "(or/c (procedure-arity-includes/c 1) #f)" handle-fail))) + (when external-lift? + (unless (and (procedure? external-lift?) (procedure-arity-includes? external-lift? 1)) + (raise-argument-error 'fasl->s-exp "(or/c (procedure-arity-includes/c 1) #f)" external-lift?))) (define o (or orig-o (open-output-bytes))) (define shared (make-hasheq)) + (define external-lift (and external-lift? (make-hasheq))) (define shared-counter 0) ;; Find shared symbols and similar for compactness. We don't try to ;; save general graph structure, leaving that to `serialize`. (let loop ([v v]) (cond + [(and external-lift + (hash-ref external-lift v #f)) + (void)] + [(and external-lift? + (external-lift? v)) + (hash-set! external-lift v #t) + (set! shared-counter (add1 shared-counter)) + (hash-set! shared v (- shared-counter))] [(or (symbol? v) (keyword? v) (string? v) @@ -380,7 +393,8 @@ ;; mutable pair containing a byte string and position (define (fasl->s-exp orig-i - #:datum-intern? [intern? #t]) + #:datum-intern? [intern? #t] + #:external-lifts [external-lifts '#()]) (define init-i (cond [(bytes? orig-i) (mcons orig-i 0)] [(input-port? orig-i) orig-i] @@ -389,8 +403,15 @@ (read-error "unrecognized prefix")) (define shared-count (read-fasl-integer init-i)) (define shared (make-vector shared-count)) - (define len (read-fasl-integer init-i)) + (unless (and (vector? external-lifts) + ((vector-length external-lifts) . <= . shared-count)) + (error 'fasl->s-exp "external-lift vector does not match expected size")) + (for ([v (in-vector external-lifts)] + [pos (in-naturals)]) + (vector-set! shared pos (vector-ref external-lifts pos))) + + (define len (read-fasl-integer init-i)) (define i (if (mpair? init-i) init-i ;; Faster to work with a byte string: diff --git a/racket/src/cs/bootstrap/gensym.rkt b/racket/src/cs/bootstrap/gensym.rkt index 39274538ad..e7310ff20d 100644 --- a/racket/src/cs/bootstrap/gensym.rkt +++ b/racket/src/cs/bootstrap/gensym.rkt @@ -11,7 +11,8 @@ gensym? gensym->unique-string gensym->pretty-string - hash-curly) + hash-curly + uninterned-symbol?) (define print-gensym (make-parameter #t)) @@ -55,3 +56,9 @@ (when (regexp-match? #rx"[|]" (symbol->string sym)) (error "here")) sym) + +(define (uninterned-symbol? v) + (and (symbol? v) + (not (or (symbol-interned? v) + (symbol-unreadable? v))))) + diff --git a/racket/src/cs/bootstrap/scheme-lang.rkt b/racket/src/cs/bootstrap/scheme-lang.rkt index eb155291a1..eb2b22b791 100644 --- a/racket/src/cs/bootstrap/scheme-lang.rkt +++ b/racket/src/cs/bootstrap/scheme-lang.rkt @@ -99,6 +99,7 @@ generate-interrupt-trap $track-dynamic-closure-counts $suppress-primitive-inlining + uninterned-symbol? string->uninterned-symbol debug-level scheme-version-number scheme-fork-version-number diff --git a/racket/src/cs/compile-file.ss b/racket/src/cs/compile-file.ss index 0aee64e7ba..53781d6784 100644 --- a/racket/src/cs/compile-file.ss +++ b/racket/src/cs/compile-file.ss @@ -2,7 +2,7 @@ ;; Check to make we're using a build of Chez Scheme ;; that has all the features we need. (define-values (need-maj need-min need-sub need-dev) - (values 9 5 3 5)) + (values 9 5 3 6)) (unless (guard (x [else #f]) (eval 'scheme-fork-version-number)) (error 'compile-file diff --git a/racket/src/cs/rumble/control.ss b/racket/src/cs/rumble/control.ss index 652c79ef4e..1880a9b41b 100644 --- a/racket/src/cs/rumble/control.ss +++ b/racket/src/cs/rumble/control.ss @@ -1551,7 +1551,7 @@ (define make-continuation-mark-key (case-lambda - [() (make-continuation-mark-key (gensym))] + [() (make-continuation-mark-key (#%gensym))] [(name) (create-continuation-mark-key name)])) (define (continuation-mark-key? v) diff --git a/racket/src/cs/rumble/struct.ss b/racket/src/cs/rumble/struct.ss index 7e928ac625..3eee810a87 100644 --- a/racket/src/cs/rumble/struct.ss +++ b/racket/src/cs/rumble/struct.ss @@ -1031,7 +1031,7 @@ " (procedure-arity-includes/c 2)\n" " (procedure-arity-includes/c 2))") val) - (cons (gensym) val)))) + (cons (#%gensym) val)))) (define-values (prop:authentic authentic? authentic-ref) (make-struct-type-property 'authentic (lambda (val info) #t))) diff --git a/racket/src/cs/rumble/symbol.ss b/racket/src/cs/rumble/symbol.ss index 21c3e43578..009cd9cba0 100644 --- a/racket/src/cs/rumble/symbol.ss +++ b/racket/src/cs/rumble/symbol.ss @@ -1,10 +1,10 @@ (define gensym (case-lambda - [() (#%gensym)] + [() (gensym "g")] [(s) (cond - [(string? s) (#%gensym (append-gensym-counter s))] - [(symbol? s) (#%gensym (append-gensym-counter (chez:symbol->string s)))] + [(string? s) (string->uninterned-symbol (append-gensym-counter s))] + [(symbol? s) (string->uninterned-symbol (append-gensym-counter (#%symbol->string s)))] [else (raise-argument-error 'gensym "(or/c symbol? string?)" @@ -21,28 +21,39 @@ (define/who (symbol-interned? s) (check who symbol? s) - (not (gensym? s))) + (not (or (gensym? s) + (uninterned-symbol? s)))) (define unreadable-unique-name "gr8mwsuasnvzbl9jjo6e9b-") +(define unreadable-unique-name-length (string-length unreadable-unique-name)) (define/who (symbol-unreadable? s) (check who symbol? s) (and (gensym? s) - (equal? (gensym->unique-string s) - (string-append unreadable-unique-name (symbol->string s))))) + (let ([u (gensym->unique-string s)] + [str (symbol->string s)]) + (let ([len (string-length str)]) + (and (fx= (string-length u) + (fx+ unreadable-unique-name-length len)) + (let loop ([i 0]) + (or (fx= i unreadable-unique-name-length) + (and (char=? (string-ref unreadable-unique-name i) + (string-ref u i)) + (loop (fx+ i 1))))) + (let loop ([i 0]) + (or (fx= i len) + (and (char=? (string-ref str i) + (string-ref u (fx+ unreadable-unique-name-length i))) + (loop (fx+ i 1)))))))))) (define/who (symbol->string s) (check who symbol? s) - (string-copy (chez:symbol->string s))) - -(define/who (string->uninterned-symbol str) - (check who string? str) - (chez:gensym (string->immutable-string str))) + (string-copy (#%symbol->string s))) (define/who (string->unreadable-symbol str) (check who string? str) - (chez:gensym (string->immutable-string str) - (string-append unreadable-unique-name str))) + (#%gensym (string->immutable-string str) + (string-append unreadable-unique-name str))) (define/who symbolfasl (force-unfasl orig-p) #:handle-fail cannot-fasl))] + (define v (force-unfasl orig-p)) + (cond + [(symbol? v) + ;; Shortcut for just an uninterned symbol: + (box v)] + [else + (define lifts '()) + (define bstr (s-exp->fasl v + #:handle-fail cannot-fasl + ;; We have to keep uninterned symbols exposed, so they're + ;; fasled with the encloding linklet directory + #:external-lift? (lambda (v) + (and (symbol? v) + (not (symbol-interned? v)) + (not (symbol-unreadable? v)) + (begin + (set! lifts (cons v lifts)) + #t))))) + (if (null? lifts) + (box bstr) + (box (cons bstr (list->vector (reverse lifts)))))])] + [(symbol? orig-p) + ;; Must be an uninterned symbol: + orig-p] [else (define p (if (path-for-srcloc? orig-p) (path-for-srcloc-path orig-p) @@ -84,9 +109,16 @@ (define (compiled-path->path e) (cond - [(box? e) (to-fasl (box (unbox e)) - (or (current-load-relative-directory) - (current-directory)))] + [(box? e) + (define c (unbox e)) + (to-fasl (box (if (pair? c) (car c) c)) + (if (pair? c) (cdr c) '#()) + (and (not (symbol? c)) + (or (current-load-relative-directory) + (current-directory))))] + [(symbol? e) + ;; Must be an uninterned symbol: + e] [(bytes? e) (bytes->path e)] [(string? e) e] ; was `path-for-srcloc` on write [else (relative-path-elements->path e)])) @@ -97,7 +129,9 @@ (cond [(bytes? v) (define v2 (parameterize ([current-load-relative-directory (to-fasl-wrt tf)]) - (fasl->s-exp v #:datum-intern? #t))) + (fasl->s-exp v + #:datum-intern? #t + #:external-lifts (to-fasl-lifts tf)))) (box-cas! vb v v2) (set-to-fasl-wrt! tf #f) (unbox vb)] diff --git a/racket/src/schemify/quoted.rkt b/racket/src/schemify/quoted.rkt index fa3e1c4398..335891ae70 100644 --- a/racket/src/schemify/quoted.rkt +++ b/racket/src/schemify/quoted.rkt @@ -13,7 +13,7 @@ [for-cify? (not (or (and (exact-integer? q) ;; always a fixnum: - (<= (- (expt 2 29)) q (expt 2 29))) + (<= (- (expt 2 29)) q (sub1 (expt 2 29)))) (boolean? q) (null? q) (void? q)))] @@ -37,7 +37,10 @@ (number? q) (char? q) (boolean? q) - (symbol? q) + (and (symbol? q) + ;; lift out gensym for sharing across phases + (or (symbol-interned? q) + (symbol-unreadable? q))) (eof-object? q) (void? q) (eq? q unsafe-undefined)) diff --git a/racket/src/schemify/serialize.rkt b/racket/src/schemify/serialize.rkt index b819fca8e2..84dea41e69 100644 --- a/racket/src/schemify/serialize.rkt +++ b/racket/src/schemify/serialize.rkt @@ -161,7 +161,7 @@ (large-quoted? q)) ;; a `to-fasl` struct is recognized like ;; paths and `path-for-srcloc`: - (define id (add-lifted (to-fasl (box q) #f))) + (define id (add-lifted (to-fasl (box q) '#() #f))) `(force-unfasl ,id)] [else (let make-construct ([q q]) @@ -189,7 +189,7 @@ ,(let ([src (srcloc-source q)]) (if (and (not for-cify?) ;; Need to handle paths, need to reject (later) anything other - ;; than a few type slike strings and byte strings + ;; than a few types like strings and byte strings (not (or (string? src) (bytes? src) (symbol? src) (not src)))) ;; Like paths, `path-for-srcloc` must be recognized later (make-construct (path-for-srcloc src)) @@ -255,16 +255,21 @@ (number? q) (char? q) (boolean? q) - (symbol? q) + (and (symbol? q) + (or (symbol-interned? q) + (symbol-unreadable? q))) (eof-object? q) (void? q) (eq? q unsafe-undefined)) ;; Serializable in-place: `(quote ,q)] + [(symbol? q) + ;; Must be an uninterned symbol + `(force-unfasl ,(add-lifted (to-fasl (box q) '#() #f)))] [else ;; Lift out anything non-serializable, so we can deal with those ;; values like we deal with paths: - (define id (add-lifted (to-fasl (box q) #f))) + (define id (add-lifted (to-fasl (box q) '#() #f))) `(force-unfasl ,id)])) (cond [(and (quote? rhs) diff --git a/racket/src/schemify/to-fasl.rkt b/racket/src/schemify/to-fasl.rkt index 254d390355..ace05398f9 100644 --- a/racket/src/schemify/to-fasl.rkt +++ b/racket/src/schemify/to-fasl.rkt @@ -2,6 +2,7 @@ (provide (struct-out to-fasl)) -(struct to-fasl (vb ; box containing byte string as marshaled or other as unmarshaled - wrt) ; directory for unmarshaling +(struct to-fasl (vb ; box containing byte string as marshaled or other as unmarshaled + lifts ; vector of external lifts + wrt) ; directory for unmarshaling #:mutable)