expander extract: add --local-rename to minimize startup diffs

The expander's output normally uses a distinct symbolic name for every
distinct binding within a linklet. That property is useful for
consumers like schemify, but it's counterproductive for minimizing the
diff in changes to "startup.inc", since the traditional Racket
compiler doesn't need that guarantee. Use `--local-rename` to generate
"startup.inc", which should make future diffs smaller and more
composable after changes to the expander.
This commit is contained in:
Matthew Flatt 2018-07-27 11:58:12 -06:00
parent f4f22404b3
commit 031564b28c
7 changed files with 45855 additions and 45277 deletions

View File

@ -31,7 +31,7 @@ DIRECT = ++direct linklet ++direct kernel
expander: expander:
$(RACO) make bootstrap-run.rkt $(RACO) make bootstrap-run.rkt
$(RACKET) bootstrap-run.rkt -c compiled/cache-src $(KNOT) $(DIRECT) -O $(TREE) $(RACKET) bootstrap-run.rkt -c compiled/cache-src $(KNOT) $(DIRECT) --local-rename -O $(TREE)
expander-src: expander-src:
$(RACO) make bootstrap-run.rkt $(RACO) make bootstrap-run.rkt

View File

@ -75,6 +75,13 @@ Running:
Expands and extracts <file-path> as a single linklet to Expands and extracts <file-path> as a single linklet to
<outfile-path>. <outfile-path>.
Unless the `--local-rename` flag is also provided to
"bootstrap-run.rkt", an extracted linklet preserves a naming
property of the expander's compilation to linklets, which is that
it uses a distinct symbol for every binding. The symbol--binding
correspondence is useful for some further compiler passes, but
`--local-rename` is useful to minimize syntactic diffs.
% racket bootstrap-run.rkt -c <dir> -sx -D -t <file-path> -o <outfile-path> % racket bootstrap-run.rkt -c <dir> -sx -D -t <file-path> -o <outfile-path>
Expands and extracts <file-path> as a single linklet, compiles and Expands and extracts <file-path> as a single linklet, compiles and

View File

@ -8,9 +8,9 @@
;; Compilation of expanded code produces an S-expression (but enriched ;; Compilation of expanded code produces an S-expression (but enriched
;; with source locations and properties) where run-time primitive are ;; with source locations and properties) where run-time primitive are
;; accessed directly, and all linklet imports and local variables are ;; accessed directly, and all linklet imports and local variables are
;; renamed to avoid collisions with the primitive names and to avoid ;; renamed to avoid collisions with the primitive names and to make
;; all shadowing (but the same variable might be used in ;; every binding have a unique symbolic name (within the linklet) that
;; non-overlapping local contexts). A `compile-linklet` function ;; does not shadow a primitive. A `compile-linklet` function
;; (currently provided by the runtime system) then compiles the ;; (currently provided by the runtime system) then compiles the
;; enriched S-expression to bytecode. ;; enriched S-expression to bytecode.

View File

@ -29,6 +29,7 @@
#:as-c? as-c? #:as-c? as-c?
#:as-decompiled? as-decompiled? #:as-decompiled? as-decompiled?
#:as-bytecode? as-bytecode? #:as-bytecode? as-bytecode?
#:local-rename? local-rename?
;; Table of symbol -> (listof knot-spec), ;; Table of symbol -> (listof knot-spec),
;; to redirect a remaining import back to ;; to redirect a remaining import back to
;; an implementation that is defined in the ;; an implementation that is defined in the
@ -148,9 +149,14 @@
(exit 1)) (exit 1))
;; Avoid gratuitous differences due to names generated during ;; Avoid gratuitous differences due to names generated during
;; expansion ;; expansion...
(define re-renamed-linklet-expr (define re-renamed-linklet-expr
(simplify-underscore-numbers gced-linklet-expr)) (if local-rename?
;; ... and allow the same name to be used in different non-shadowing
;; local contextx
(collapse-underscore-numbers gced-linklet-expr)
;; ... but use a distinct symbol for every binder's name
(simplify-underscore-numbers gced-linklet-expr)))
;; Prune any explicit function names (using a `quote` pattern in ;; Prune any explicit function names (using a `quote` pattern in
;; the body) when they still match a name that would be inferred ;; the body) when they still match a name that would be inferred

View File

@ -1,18 +1,38 @@
#lang racket/base #lang racket/base
(require racket/match)
(provide simplify-underscore-numbers) (provide simplify-underscore-numbers
collapse-underscore-numbers)
;; Small changes to the input code can trigger lots of renumberings ;; Small changes to the input code can trigger lots of renumberings
;; for local variables, where the expander adds "_<num>" suffixes to ;; for local variables, where the expander adds "_<num>" suffixes to
;; generate local-variable names, and the "<num>"s count up across all ;; generate local-variable names, and the "<num>"s count up across all
;; symbols. Renumber with symbol-specific counting to reduce ;; symbols. Renumber with symbol-specific counting to reduce
;; unneccessary changes to generated code. A simple strategy works ;; unneccessary changes to generated code. A relatively simple
;; because no primitive or exported name has a "_<num>" suffix. ;; strategy works because no primitive or exported name has a "_<num>"
;; suffix.
(define (simplify-underscore-numbers s) (define (select-new-name s base-counts)
(define str (symbol->string s))
(define m (regexp-match-positions #rx"_[0-9]+$" str))
(cond
[(not m) s]
[else
(define base (substring str 0 (caar m)))
(define base-s (string->symbol base))
(define n (base-counts base-s))
(base-counts base-s (add1 n))
(string->symbol (format "~a_~a" base n))]))
;; ----------------------------------------
;; First variant: preserve the property that every binder is
;; repersented by a unique symbol.
(define (simplify-underscore-numbers linklet-expr)
(define replacements (make-hasheq)) (define replacements (make-hasheq))
(define base-counts (make-hasheq)) (define base-counts (make-hasheq))
(let loop ([s s]) (let loop ([s linklet-expr])
(cond (cond
[(symbol? s) [(symbol? s)
(cond (cond
@ -26,11 +46,9 @@
(hash-set! replacements s s) (hash-set! replacements s s)
s] s]
[else [else
(define base (substring str 0 (caar m))) (define r (select-new-name s (case-lambda
(define base-s (string->symbol base)) [(base-s) (hash-ref base-counts base-s 0)]
(define n (hash-ref base-counts base-s 0)) [(base-s n) (hash-set! base-counts base-s n)])))
(hash-set! base-counts base-s (add1 n))
(define r (string->symbol (format "~a_~a" base n)))
(hash-set! replacements s r) (hash-set! replacements s r)
r])])] r])])]
[(pair? s) [(pair? s)
@ -38,3 +56,71 @@
s s
(cons (loop (car s)) (loop (cdr s))))] (cons (loop (car s)) (loop (cdr s))))]
[else s]))) [else s])))
;; ----------------------------------------
;; Second variant: preserve the property that every binder is
;; represented by a unique symbol. A relatively simple strategy works
;; because no primitive or exported name has a "_<num>" suffix.
(define (collapse-underscore-numbers linklet-expr)
(define (collapse e replacements base-counts)
(match e
[`(define-values ,ids ,rhs)
`(define-values ,ids ,(collapse rhs replacements base-counts))]
[`(lambda ,formals ,body ...)
(define-values (new-formals new-replacements new-base-counts)
(rename-vars formals replacements base-counts))
`(lambda ,new-formals . ,(collapse-in-body body new-replacements new-base-counts))]
[`(case-lambda [,formalss ,bodys ...] ...)
`(case-lambda
. ,(for/list ([formals (in-list formalss)] [body (in-list bodys)])
(define-values (new-formals new-replacements new-base-counts)
(rename-vars formals replacements base-counts))
`[,new-formals . ,(collapse-in-body body new-replacements new-base-counts)]))]
[`(quote . ,_) e]
[`(let-values . _) (collapse-in-let e #f replacements base-counts)]
[`(letrec-values . _) (collapse-in-let e #t replacements base-counts)]
[`(,pseudo-es ...) ; catch-all for remaining syntactic forms
(collapse-in-body pseudo-es replacements base-counts)]
[_ (if (symbol? e)
(hash-ref replacements e e)
e)]))
(define (collapse-in-body es replacements base-counts)
(for/list ([e (in-list es)])
(collapse e replacements base-counts)))
(define (collapse-in-let e rec? replacements base-counts)
(match e
[`(,let-form ([,idss ,rhss] ...) ,body ...)
(define-values (new-idss body-replacements body-base-counts)
(rename-vars idss replacements body-base-counts))
(define-values (rhs-replacements rhs-base-counts)
(if rec?
(values body-replacements body-base-counts)
(values replacements base-counts)))
`(,let-form ,(for/list ([ids (in-list new-idss)]
[rhs (in-list rhss)])
`[,ids ,(collapse rhs rhs-replacements rhs-base-counts)])
. ,(collapse-in-body body body-replacements body-base-counts))]))
(define (rename-vars p replacements base-counts)
(define new-p
(let loop ([p p])
(cond
[(null? p) null]
[(symbol? p)
(define r (select-new-name p (case-lambda
[(s) (hash-ref base-counts s 0)]
[(s n) (set! base-counts (hash-set base-counts s n))])))
(set! replacements (hash-set replacements p r))
r]
[(pair? p) (cons (loop (car p)) (loop (cdr p)))])))
(values new-p replacements base-counts))
(match linklet-expr
[`(linklet ,imports ,exports ,forms ...)
`(linklet ,imports ,exports ,@(for/list ([form (in-list forms)])
(collapse form #hasheq() #hasheq())))]))

View File

@ -46,6 +46,7 @@
(define extract-to-c? #f) (define extract-to-c? #f)
(define extract-to-decompiled? #f) (define extract-to-decompiled? #f)
(define extract-to-bytecode? #f) (define extract-to-bytecode? #f)
(define local-rename? #f)
(define instance-knot-ties (make-hasheq)) (define instance-knot-ties (make-hasheq))
(define primitive-table-directs (make-hasheq)) (define primitive-table-directs (make-hasheq))
(define side-effect-free-modules (make-hash)) (define side-effect-free-modules (make-hash))
@ -100,6 +101,9 @@
(hash-set! dependencies (simplify-path (path->complete-path file)) #t)] (hash-set! dependencies (simplify-path (path->complete-path file)) #t)]
[("++depend-module") mod-file "Add <mod-file> and transitive as dependencies" [("++depend-module") mod-file "Add <mod-file> and transitive as dependencies"
(set! extra-module-dependencies (cons mod-file extra-module-dependencies))] (set! extra-module-dependencies (cons mod-file extra-module-dependencies))]
#:once-each
[("--local-rename") "Use simpler names in extracted, instead of a unique name for each binding"
(set! local-rename? #t)]
#:once-any #:once-any
[("-C") "Print extracted bootstrap as a C encoding" [("-C") "Print extracted bootstrap as a C encoding"
(set! extract-to-c? #t)] (set! extract-to-c? #t)]
@ -313,6 +317,7 @@
#:as-c? extract-to-c? #:as-c? extract-to-c?
#:as-decompiled? extract-to-decompiled? #:as-decompiled? extract-to-decompiled?
#:as-bytecode? extract-to-bytecode? #:as-bytecode? extract-to-bytecode?
#:local-rename? local-rename?
#:instance-knot-ties instance-knot-ties #:instance-knot-ties instance-knot-ties
#:primitive-table-directs primitive-table-directs #:primitive-table-directs primitive-table-directs
#:side-effect-free-modules side-effect-free-modules)) #:side-effect-free-modules side-effect-free-modules))

File diff suppressed because it is too large Load Diff