diff --git a/collects/compiler/demodularizer/alpha.rkt b/collects/compiler/demodularizer/alpha.rkt new file mode 100644 index 0000000000..7ca1b83e1a --- /dev/null +++ b/collects/compiler/demodularizer/alpha.rkt @@ -0,0 +1,19 @@ +#lang racket +(require compiler/zo-parse) + +(define (alpha-vary-ctop top) + (match top + [(struct compilation-top (max-let-depth prefix form)) + (make-compilation-top max-let-depth (alpha-vary-prefix prefix) form)])) +(define (alpha-vary-prefix p) + (struct-copy prefix p + [toplevels + (map (match-lambda + [(and sym (? symbol?)) + (gensym sym)] + [other + other]) + (prefix-toplevels p))])) + +(provide/contract + [alpha-vary-ctop (compilation-top? . -> . compilation-top?)]) \ No newline at end of file diff --git a/collects/compiler/demodularizer/batch.rkt b/collects/compiler/demodularizer/batch.rkt new file mode 100644 index 0000000000..b8e70bb143 --- /dev/null +++ b/collects/compiler/demodularizer/batch.rkt @@ -0,0 +1,127 @@ +#lang racket +#| +Here's the idea: + +- Take a module's bytecode +- Recursively get all the bytecode for modules that the target requires +- After reading it, prune everything that isn't at phase 0 (the runtime phase) + +- Now that we have all the modules, the next step is to merge them into a single + module +-- Although actually we collapse them into the top-level, not a module +- To do that, we iterate through all the modules doing two things as we go: +-- Incrementing all the global variable references by all the references in all + the modules +--- So if A has 5, then B's start at index 5 and so on +-- Replacing module variable references with the actual global variables + corresponding to those variables +--- So if A's variable 'x' is in global slot 4, then if B refers to it, it + directly uses slot 4, rather than a module-variable slot + +- At that point we have all the module code in a single top-level, but many + toplevels won't be used because a library function isn't really used +- So, we do a "garbage collection" on elements of the prefix +- First, we create a dependency graph of all toplevels and the initial scope +- Then, we do a DFS on the initial scope and keep all those toplevels, throwing + away the construction of everything else + [XXX: This may be broken because of side-effects.] + +- Now we have a small amount code, but because we want to go back to source, + we need to fix it up a bit; because different modules may've used the same + names +- So, we do alpha-renaming, but it's easy because names are only used in the + compilation-top prefix structure + +[TODO] + +- Next, we decompile +- Then, it will pay to do dead code elimination and inlining, etc. +|# + +(require racket/pretty + racket/system + "util.rkt" + "nodep.rkt" + "merge.rkt" + "gc-toplevels.rkt" + "alpha.rkt" + "module.rkt" + compiler/decompile + compiler/zo-marshal + racket/set) + +(define excluded-modules (make-parameter (set))) +(define file-to-batch + (command-line #:program "batch" + #:multi + [("-e" "--exclude-modules") mod + "Exclude a module from being batched" + (excluded-modules (set-add (excluded-modules) mod))] + #:args (filename) filename)) + +(define-values (base name dir?) (split-path file-to-batch)) +(when (or (eq? base #f) dir?) + (error 'batch "Cannot run on directory")) + + +;; Compile +#;(eprintf "Removing existing zo file~n") +#;(define compiled-zo-path (build-compiled-path base (path-add-suffix name #".zo"))) + +#;(when (file-exists? compiled-zo-path) + (delete-file compiled-zo-path)) + +(eprintf "Compiling module~n") +(void (system* (find-executable-path "raco") "make" file-to-batch)) + + +(define merged-source-path (path-add-suffix file-to-batch #".merged.rkt")) +(define-values (merged-source-base merged-source-name _1) (split-path merged-source-path)) +(define merged-zo-path (build-compiled-path merged-source-base (path-add-suffix merged-source-name #".zo"))) + +;; Transformations +(eprintf "Removing dependencies~n") +(define-values (batch-nodep top-lang-info top-self-modidx) + (nodep-file file-to-batch (excluded-modules))) + +(eprintf "Merging modules~n") +(define batch-merge + (merge-compilation-top batch-nodep)) + +(eprintf "GC-ing top-levels~n") +(define batch-gcd + (gc-toplevels batch-merge)) + +(eprintf "Alpha-varying top-levels~n") +(define batch-alpha + (alpha-vary-ctop batch-gcd)) + +(define batch-modname + (string->symbol (regexp-replace #rx"\\.rkt$" (path->string merged-source-name) ""))) +(eprintf "Modularizing into ~a~n" batch-modname) +(define batch-mod + (wrap-in-kernel-module batch-modname batch-modname top-lang-info top-self-modidx batch-alpha)) + +;; Output +(define batch-final batch-mod) + +(eprintf "Writing merged source~n") +(with-output-to-file + merged-source-path + (lambda () + (pretty-print (decompile batch-final))) + #:exists 'replace) + +(eprintf "Writing merged zo~n") +(void + (with-output-to-file + merged-zo-path + (lambda () + (write-bytes (zo-marshal batch-final))) + #:exists 'replace)) + +(eprintf "Running merged source~n") +(void (system* (find-executable-path "racket") (path->string merged-source-path))) + + + diff --git a/collects/compiler/demodularizer/gc-toplevels.rkt b/collects/compiler/demodularizer/gc-toplevels.rkt new file mode 100644 index 0000000000..df1d027969 --- /dev/null +++ b/collects/compiler/demodularizer/gc-toplevels.rkt @@ -0,0 +1,275 @@ +#lang racket +(require compiler/zo-parse + "util.rkt") + +; XXX Use efficient set structure +(define (gc-toplevels top) + (match top + [(struct compilation-top (max-let-depth top-prefix form)) + (define lift-start + (prefix-lift-start top-prefix)) + (define max-depgraph-index + (+ (prefix-num-lifts top-prefix) + lift-start)) + (define top-node max-depgraph-index) + (define DEP-GRAPH (make-vector (add1 top-node) (make-refs empty empty))) + (define build-graph! (make-build-graph! DEP-GRAPH)) + (define _void (build-graph! (list top-node) form)) + (define-values (used-tls stxs) (graph-dfs DEP-GRAPH top-node)) + (define ordered-used-tls (sort (rest used-tls) <=)) ; This rest drops off the top-node + (define ordered-stxs (sort stxs <=)) + (define (lift? i) (lift-start . <= . i)) + (define-values (lifts normal-tls) (partition lift? ordered-used-tls)) + (define new-prefix + (make-prefix + (length lifts) + (for/list ([i normal-tls]) + (list-ref (prefix-toplevels top-prefix) i)) + (for/list ([i ordered-stxs]) + (list-ref (prefix-stxs top-prefix) i)))) + (define new-lift-start + (prefix-lift-start new-prefix)) + ; XXX This probably breaks max-let-depth + (define new-form + ((gc-toplevels-form + (lambda (pos) (index<=? pos ordered-used-tls)) + (lambda (pos) + (if (lift? pos) + (+ new-lift-start (index<=? pos lifts)) + (index<=? pos normal-tls))) + (lambda (stx-pos) + (index<=? stx-pos ordered-stxs)) + (prefix-syntax-start new-prefix)) + form)) + (eprintf "Total TLS: ~S~n" (length normal-tls)) + (eprintf "Used TLS: ~S~n" normal-tls) + (eprintf "Total lifts: ~S~n" (length lifts)) + (eprintf "Used lifts: ~S~n" lifts) + (eprintf "Total stxs: ~S~n" (length stxs)) + (eprintf "Used stxs: ~S~n" ordered-stxs) + (make-compilation-top + max-let-depth + new-prefix + new-form)])) + +(define-struct refs (tl stx) #:transparent) + +(define (make-build-graph! DEP-GRAPH) + (define (build-graph!* form lhs) + (match form + [(struct def-values (ids rhs)) + (define new-lhs (map toplevel-pos ids)) + ; If we require one, we should require all, so make them reference each other + (for-each (lambda (tl) (build-graph! new-lhs tl)) ids) + (build-graph! new-lhs rhs)] + [(? def-syntaxes?) + (error 'build-graph "Doesn't handle syntax")] + [(? def-for-syntax?) + (error 'build-graph "Doesn't handle syntax")] + [(struct req (reqs dummy)) + (build-graph! lhs dummy)] + [(? mod?) + (error 'build-graph "Doesn't handle modules")] + [(struct seq (forms)) + (for-each (lambda (f) (build-graph! lhs f)) forms)] + [(struct splice (forms)) + (for-each (lambda (f) (build-graph! lhs f)) forms)] + [(and l (struct lam (name flags num-params param-types rest? closure-map closure-types max-let-depth body))) + (build-graph! lhs body)] + [(and c (struct closure (code gen-id))) + (build-graph! lhs code)] + [(and cl (struct case-lam (name clauses))) + (for-each (lambda (l) (build-graph! lhs l)) + clauses)] + [(struct let-one (rhs body flonum? unused?)) + (build-graph! lhs rhs) + (build-graph! lhs body)] + [(and f (struct let-void (count boxes? body))) + (build-graph! lhs body)] + [(and f (struct install-value (_ _ _ rhs body))) + (build-graph! lhs rhs) + (build-graph! lhs body)] + [(struct let-rec (procs body)) + (for-each (lambda (l) (build-graph! lhs l)) procs) + (build-graph! lhs body)] + [(and f (struct boxenv (_ body))) + (build-graph! lhs body)] + [(and f (struct toplevel (_ pos _ _))) + (for-each (lambda (lhs) + (dict-update! DEP-GRAPH lhs + (match-lambda + [(struct refs (tls stxs)) + (make-refs (list* pos tls) stxs)]))) + lhs)] + [(and f (struct topsyntax (_ pos _))) + (for-each (lambda (lhs) + (dict-update! DEP-GRAPH lhs + (match-lambda + [(struct refs (tls stxs)) + (make-refs tls (list* pos stxs))]))) + lhs)] + [(struct application (rator rands)) + (for-each (lambda (f) (build-graph! lhs f)) + (list* rator rands))] + [(struct branch (test then else)) + (for-each (lambda (f) (build-graph! lhs f)) + (list test then else))] + [(struct with-cont-mark (key val body)) + (for-each (lambda (f) (build-graph! lhs f)) + (list key val body))] + [(struct beg0 (seq)) + (for-each (lambda (f) (build-graph! lhs f)) + seq)] + [(struct varref (tl)) + (build-graph! lhs tl)] + [(and f (struct assign (id rhs undef-ok?))) + (build-graph! lhs id) + (build-graph! lhs rhs)] + [(struct apply-values (proc args-expr)) + (build-graph! lhs proc) + (build-graph! lhs args-expr)] + [(and f (struct primval (id))) + (void)] + [(and f (struct localref (unbox? pos clear? other-clears? flonum?))) + (void)] + [(and v (not (? form?))) + (void)])) + (define build-graph!** (build-form-memo build-graph!* #:void? #t)) + (define (build-graph! lhs form) (build-graph!** form lhs)) + build-graph!) + +(define (graph-dfs g start-node) + (define visited? (make-hasheq)) + (define (visit-tl n tls stxs) + (if (hash-has-key? visited? n) + (values tls stxs) + (match (dict-ref g n) + [(struct refs (n-tls n-stxs)) + (hash-set! visited? n #t) + (local + [(define-values (new-tls1 new-stxs1) + (for/fold ([new-tls tls] + [new-stxs stxs]) + ([tl (in-list n-tls)]) + (visit-tl tl new-tls new-stxs))) + (define new-stxs2 + (for/fold ([new-stxs new-stxs1]) + ([stx (in-list n-stxs)]) + (define this-stx (visit-stx stx)) + (if this-stx + (list* this-stx new-stxs) + new-stxs)))] + (values (list* n new-tls1) + new-stxs2))]))) + (define stx-visited? (make-hasheq)) + (define (visit-stx n) + (if (hash-has-key? stx-visited? n) + #f + (begin (hash-set! stx-visited? n #t) + n))) + (visit-tl start-node empty empty)) + +; index<=? : number? (listof number?) -> (or/c number? false/c) +; returns the index of n in l and assumes that l is sorted by <= +(define (index<=? n l) + (match l + [(list) #f] + [(list-rest f l) + (cond + [(= n f) + 0] + [(< n f) + #f] + [else + (let ([rec (index<=? n l)]) + (if rec (add1 rec) rec))])])) + +(define (identity x) x) +(define (gc-toplevels-form keep? update-tl update-ts new-ts-midpt) + (define (inner-update form) + (match form + [(struct def-values (ids rhs)) + (if (ormap (compose keep? toplevel-pos) ids) + (make-def-values (map update ids) + (update rhs)) + #f)] + [(? def-syntaxes?) + (error 'gc-tls "Doesn't handle syntax")] + [(? def-for-syntax?) + (error 'gc-tls "Doesn't handle syntax")] + [(struct req (reqs dummy)) + (make-req reqs (update dummy))] + [(? mod?) + (error 'gc-tls "Doesn't handle modules")] + [(struct seq (forms)) + (make-seq (filter identity (map update forms)))] + [(struct splice (forms)) + (make-splice (filter identity (map update forms)))] + [(and l (struct lam (name flags num-params param-types rest? closure-map closure-types max-let-depth body))) + (struct-copy lam l + [body (update body)])] + [(and c (struct closure (code gen-id))) + (struct-copy closure c + [code (update code)])] + [(and cl (struct case-lam (name clauses))) + (struct-copy case-lam cl + [clauses (map update clauses)])] + [(struct let-one (rhs body flonum? unused?)) + (make-let-one (update rhs) (update body) flonum? unused?)] ; Q: is flonum? okay here? + [(and f (struct let-void (count boxes? body))) + (struct-copy let-void f + [body (update body)])] + [(and f (struct install-value (_ _ _ rhs body))) + (struct-copy install-value f + [rhs (update rhs)] + [body (update body)])] + [(struct let-rec (procs body)) + (make-let-rec (map update procs) (update body))] + [(and f (struct boxenv (_ body))) + (struct-copy boxenv f [body (update body)])] + [(and f (struct toplevel (_ pos _ _))) + (struct-copy toplevel f + [pos (update-tl pos)])] + [(and f (struct topsyntax (_ pos _))) + (struct-copy topsyntax f + [pos (update-ts pos)] + [midpt new-ts-midpt])] + [(struct application (rator rands)) + (make-application + (update rator) + (map update rands))] + [(struct branch (test then else)) + (make-branch + (update test) + (update then) + (update else))] + [(struct with-cont-mark (key val body)) + (make-with-cont-mark + (update key) + (update val) + (update body))] + [(struct beg0 (seq)) + (make-beg0 (map update seq))] + [(struct varref (tl)) + (make-varref (update tl))] + [(and f (struct assign (id rhs undef-ok?))) + (struct-copy assign f + [id (update id)] + [rhs (update rhs)])] + [(struct apply-values (proc args-expr)) + (make-apply-values + (update proc) + (update args-expr))] + [(and f (struct primval (id))) + f] + [(and f (struct localref (unbox? pos clear? other-clears? flonum?))) + f] + [(and v (not (? form?))) + v] + )) + (define update + (build-form-memo inner-update)) + update) + +(provide/contract + [gc-toplevels (compilation-top? . -> . compilation-top?)]) \ No newline at end of file diff --git a/collects/compiler/demodularizer/merge.rkt b/collects/compiler/demodularizer/merge.rkt new file mode 100644 index 0000000000..33187add17 --- /dev/null +++ b/collects/compiler/demodularizer/merge.rkt @@ -0,0 +1,165 @@ +#lang racket +(require compiler/zo-parse + "util.rkt" + "mpi.rkt" + "nodep.rkt" + "update-toplevels.rkt") + +(define MODULE-TOPLEVEL-OFFSETS (make-hash)) + +(define (merge-compilation-top top) + (match top + [(struct compilation-top (max-let-depth prefix form)) + (define-values (new-max-let-depth new-prefix gen-new-forms) + (merge-form max-let-depth prefix form)) + (define total-tls (length (prefix-toplevels new-prefix))) + (define total-stxs (length (prefix-stxs new-prefix))) + (define total-lifts (prefix-num-lifts new-prefix)) + (eprintf "max-let-depth ~S to ~S~n" max-let-depth new-max-let-depth) + (eprintf "total toplevels ~S~n" total-tls) + (eprintf "total stxs ~S~n" total-stxs) + (eprintf "num-lifts ~S~n" total-lifts) + (make-compilation-top + new-max-let-depth new-prefix + (make-splice (gen-new-forms new-prefix)))] + [else (error 'merge "unrecognized: ~e" top)])) + +(define (merge-forms max-let-depth prefix forms) + (if (empty? forms) + (values max-let-depth prefix (lambda _ empty)) + (let*-values ([(fmax-let-depth fprefix gen-fform) (merge-form max-let-depth prefix (first forms))] + [(rmax-let-depth rprefix gen-rforms) (merge-forms fmax-let-depth fprefix (rest forms))]) + (values rmax-let-depth + rprefix + (lambda args + (append (apply gen-fform args) + (apply gen-rforms args))))))) + +(define (merge-form max-let-depth prefix form) + (match form + [(? mod?) + (merge-module max-let-depth prefix form)] + [(struct seq (forms)) + (merge-forms max-let-depth prefix forms)] + [(struct splice (forms)) + (merge-forms max-let-depth prefix forms)] + [else + (values max-let-depth prefix (lambda _ (list form)))])) + +(define (merge-prefix root-prefix mod-prefix) + (match root-prefix + [(struct prefix (root-num-lifts root-toplevels root-stxs)) + (match mod-prefix + [(struct prefix (mod-num-lifts mod-toplevels mod-stxs)) + (make-prefix (+ root-num-lifts mod-num-lifts) + (append root-toplevels mod-toplevels) + (append root-stxs mod-stxs))])])) + +(define (compute-new-modvar mv rw) + (match mv + [(struct module-variable (modidx sym pos phase)) + (match rw + [(struct modvar-rewrite (self-modidx provide->toplevel)) + (eprintf "Rewriting ~a of ~S~n" pos (mpi->path* modidx)) + (+ (hash-ref MODULE-TOPLEVEL-OFFSETS self-modidx + (lambda () + (error 'compute-new-modvar "toplevel offset not yet computed: ~S" self-modidx))) + (provide->toplevel sym pos))])])) + +(define (filter-rewritable-module-variable? toplevel-offset mod-toplevels) + (define-values + (i new-toplevels remap) + (for/fold ([i 0] + [new-toplevels empty] + [remap empty]) + ([tl (in-list mod-toplevels)]) + (match tl + [(and mv (struct module-variable (modidx sym pos phase))) + (define rw (get-modvar-rewrite modidx)) + (unless (or (not phase) (zero? phase)) + (error 'eliminate-module-variables "Non-zero phases not supported: ~S" mv)) + (cond + ; Primitive module like #%paramz + [(symbol? rw) + (eprintf "~S from ~S~n" sym rw) + (values (add1 i) + (list* tl new-toplevels) + (list* (+ i toplevel-offset) remap))] + [(module-path-index? rw) + (values (add1 i) + (list* tl new-toplevels) + (list* (+ i toplevel-offset) remap))] + [(modvar-rewrite? rw) + (values i + new-toplevels + (list* (compute-new-modvar mv rw) remap))] + [else + (error 'filter-rewritable-module-variable? "Unsupported module-rewrite: ~S" rw)])] + [tl + (values (add1 i) + (list* tl new-toplevels) + (list* (+ i toplevel-offset) remap))]))) + (values (reverse new-toplevels) + (reverse remap))) + +(define (merge-module max-let-depth top-prefix mod-form) + (match mod-form + [(struct mod (name srcname self-modidx mod-prefix provides requires body syntax-body unexported mod-max-let-depth dummy lang-info internal-context)) + (define toplevel-offset (length (prefix-toplevels top-prefix))) + (define topsyntax-offset (length (prefix-stxs top-prefix))) + (define lift-offset (prefix-num-lifts top-prefix)) + (define mod-toplevels (prefix-toplevels mod-prefix)) + (define-values (new-mod-toplevels toplevel-remap) (filter-rewritable-module-variable? toplevel-offset mod-toplevels)) + (define num-mod-toplevels + (length toplevel-remap)) + (define mod-stxs + (length (prefix-stxs mod-prefix))) + (define mod-num-lifts + (prefix-num-lifts mod-prefix)) + (define new-mod-prefix + (struct-copy prefix mod-prefix + [toplevels new-mod-toplevels])) + (hash-set! MODULE-TOPLEVEL-OFFSETS self-modidx toplevel-offset) + (unless (= (length toplevel-remap) + (length mod-toplevels)) + (error 'merge-module "Not remapping everything: ~S ~S~n" + mod-toplevels toplevel-remap)) + (eprintf "[~S] Incrementing toplevels by ~a~n" + name + toplevel-offset) + (eprintf "[~S] Incrementing lifts by ~a~n" + name + lift-offset) + (eprintf "[~S] Filtered mod-vars from ~a to ~a~n" + name + (length mod-toplevels) + (length new-mod-toplevels)) + (values (max max-let-depth mod-max-let-depth) + (merge-prefix top-prefix new-mod-prefix) + (lambda (top-prefix) + (eprintf "[~S] Updating top-levels\n" name) + (define top-lift-start (prefix-lift-start top-prefix)) + (define mod-lift-start (prefix-lift-start mod-prefix)) + (define total-lifts (prefix-num-lifts top-prefix)) + (define max-toplevel (+ top-lift-start total-lifts)) + (define update + (update-toplevels + (lambda (n) + (cond + [(mod-lift-start . <= . n) + ; This is a lift + (local [(define which-lift (- n mod-lift-start)) + (define lift-tl (+ top-lift-start lift-offset which-lift))] + (when (lift-tl . >= . max-toplevel) + (error 'merge-module "[~S] lift error: orig(~a) which(~a) max(~a) lifts(~a) now(~a)" + name n which-lift num-mod-toplevels mod-num-lifts lift-tl)) + lift-tl)] + [else + (list-ref toplevel-remap n)])) + (lambda (n) + (+ n topsyntax-offset)) + (prefix-syntax-start top-prefix))) + (map update body)))])) + +(provide/contract + [merge-compilation-top (compilation-top? . -> . compilation-top?)]) \ No newline at end of file diff --git a/collects/compiler/demodularizer/module.rkt b/collects/compiler/demodularizer/module.rkt new file mode 100644 index 0000000000..74d7ccd77b --- /dev/null +++ b/collects/compiler/demodularizer/module.rkt @@ -0,0 +1,35 @@ +#lang racket +(require compiler/zo-parse + "util.rkt") + +(define (->module-path-index s) + (if (module-path-index? s) + s + (module-path-index-join `(quote ,s) #f))) + + +(define (wrap-in-kernel-module name srcname lang-info self-modidx top) + (match top + [(struct compilation-top (max-let-depth prefix form)) + (define-values (reqs new-forms) + (partition req? (splice-forms form))) + (define requires + (map (compose ->module-path-index wrapped-datum stx-encoded req-reqs) reqs)) + (make-compilation-top + 0 + (make-prefix 0 (list #f) empty) + (make-mod name srcname + self-modidx + prefix + empty ; provides + (list (cons 0 requires)) + new-forms + empty ; syntax-body + (list empty empty empty) ; unexported + max-let-depth + (make-toplevel 0 0 #f #f) ; dummy + lang-info + #t))])) + +(provide/contract + [wrap-in-kernel-module (symbol? symbol? lang-info/c module-path-index? compilation-top? . -> . compilation-top?)]) \ No newline at end of file diff --git a/collects/compiler/demodularizer/mpi.rkt b/collects/compiler/demodularizer/mpi.rkt new file mode 100644 index 0000000000..ae86a43832 --- /dev/null +++ b/collects/compiler/demodularizer/mpi.rkt @@ -0,0 +1,30 @@ +#lang scheme +(require syntax/modresolve) + +(define current-module-path (make-parameter #f)) + +(define (mpi->string modidx) + (cond + [(symbol? modidx) modidx] + [else + (mpi->path! modidx)])) + +(define MODULE-PATHS (make-hash)) +(define (mpi->path! mpi) + (hash-ref! + MODULE-PATHS mpi + (lambda () + (define _pth + (resolve-module-path-index mpi (current-module-path))) + (if (path? _pth) + (simplify-path _pth #t) + _pth)))) +(define (mpi->path* mpi) + (hash-ref MODULE-PATHS mpi + (lambda () + (error 'mpi->path* "Cannot locate cache of path for ~S~n" mpi)))) + +(provide/contract + [current-module-path (parameter/c path-string?)] + [mpi->path! (module-path-index? . -> . (or/c symbol? path?))] + [mpi->path* (module-path-index? . -> . (or/c symbol? path?))]) \ No newline at end of file diff --git a/collects/compiler/demodularizer/nodep.rkt b/collects/compiler/demodularizer/nodep.rkt new file mode 100644 index 0000000000..f6878c2c0d --- /dev/null +++ b/collects/compiler/demodularizer/nodep.rkt @@ -0,0 +1,178 @@ +#lang racket +(require compiler/zo-parse + "util.rkt" + "mpi.rkt" + racket/set) + +(define excluded-modules (make-parameter null)) + +(define (nodep-file file-to-batch excluded) + (excluded-modules excluded) + (match (get-nodep-module-code/path file-to-batch 0) + [(struct @phase (_ (struct module-code (modvar-rewrite lang-info ctop)))) + (values ctop lang-info (modvar-rewrite-modidx modvar-rewrite))])) + +(define (path->comp-top pth) + (call-with-input-file pth zo-parse)) + +(define (excluded? pth) + (set-member? (excluded-modules) (path->string pth))) + +(define MODULE-IDX-MAP (make-hash)) +(define (get-nodep-module-code/index mpi phase) + (define pth (mpi->path! mpi)) + (cond + [(symbol? pth) + (hash-set! MODULE-IDX-MAP pth pth) + pth] + [(excluded? pth) + (hash-set! MODULE-IDX-MAP pth mpi) + mpi] + [else + (get-nodep-module-code/path pth phase)])) +(define (get-modvar-rewrite modidx) + (define pth (mpi->path* modidx)) + (hash-ref MODULE-IDX-MAP pth + (lambda () + (error 'get-modvar-rewrite "Cannot locate modvar rewrite for ~S" pth)))) + +(define-struct @phase (phase code)) +(define-struct modvar-rewrite (modidx provide->toplevel)) +(define-struct module-code (modvar-rewrite lang-info ctop)) +(define @phase-ctop (compose module-code-ctop @phase-code)) + +(define PHASE*MODULE-CACHE (make-hash)) +(define (get-nodep-module-code/path pth phase) + (define MODULE-CACHE + (hash-ref! PHASE*MODULE-CACHE phase make-hash)) + (if (hash-ref MODULE-CACHE pth #f) + #f + (hash-ref! + MODULE-CACHE pth + (lambda () + (define-values (base file dir?) (split-path pth)) + (define base-directory + (if (path? base) + (path->complete-path base (current-directory)) + (current-directory))) + (define-values (modvar-rewrite lang-info ctop) + (begin + (fprintf (current-error-port) "Load ~S @ ~S~n" pth phase) + (nodep/dir + (parameterize ([current-load-relative-directory base-directory]) + (path->comp-top + (build-compiled-path + base + (path-add-suffix file #".zo")))) + pth + phase))) + (when (and phase (zero? phase)) + (hash-set! MODULE-IDX-MAP pth modvar-rewrite)) + (make-@phase + phase + (make-module-code modvar-rewrite lang-info ctop)))))) + +(define (nodep/dir top pth phase) + (parameterize ([current-module-path pth]) + (nodep top phase))) + +(define (nodep top phase) + (match top + [(struct compilation-top (max-let-depth prefix form)) + (define-values (modvar-rewrite lang-info new-form) (nodep-form form phase)) + (values modvar-rewrite lang-info (make-compilation-top max-let-depth prefix new-form))] + [else (error 'nodep "unrecognized: ~e" top)])) + +(define (nodep-form form phase) + (if (mod? form) + (local [(define-values (modvar-rewrite lang-info mods) (nodep-module form phase))] + (values modvar-rewrite lang-info (make-splice mods))) + (error 'nodep-form "Doesn't support non mod forms"))) + +; XXX interning is hack to fix test/add04.ss and provide/contract renaming +(define (intern s) (string->symbol (symbol->string s))) +(define (construct-provide->toplevel prefix provides) + (define provide-ht (make-hasheq)) + (for ([tl (prefix-toplevels prefix)] + [i (in-naturals)]) + (when (symbol? tl) + (hash-set! provide-ht (intern tl) i))) + (lambda (sym pos) + (eprintf "Looking up ~S@~a~n" sym pos) + (hash-ref provide-ht (intern sym) + (lambda () + (error 'provide->toplevel "Cannot find ~S in ~S" sym prefix))))) + +(define (nodep-module mod-form phase) + (match mod-form + [(struct mod (name srcname self-modidx prefix provides requires body syntax-body unexported max-let-depth dummy lang-info internal-context)) + (define new-prefix prefix) + ; Cache all the mpi paths + (for-each (match-lambda + [(and mv (struct module-variable (modidx sym pos phase))) + (mpi->path! modidx)] + [tl + (void)]) + (prefix-toplevels new-prefix)) + (eprintf "[~S] module-variables: ~S~n" name (length (filter module-variable? (prefix-toplevels new-prefix)))) + (values (make-modvar-rewrite self-modidx (construct-provide->toplevel new-prefix provides)) + lang-info + (append (requires->modlist requires phase) + (if (and phase (zero? phase)) + (begin (eprintf "[~S] lang-info : ~S~n" name lang-info) ; XXX Seems to always be #f now + (list (make-mod name srcname self-modidx new-prefix provides requires body empty + unexported max-let-depth dummy lang-info internal-context))) + (begin (eprintf "[~S] Dropping module @ ~S~n" name phase) + empty))))] + [else (error 'nodep-module "huh?: ~e" mod-form)])) + +(define (+* l r) + (if (and l r) (+ l r) #f)) + +(define (requires->modlist requires current-phase) + (apply append + (map + (match-lambda + [(list-rest req-phase mpis) + (define phase (+* current-phase req-phase)) + (apply append + (map (compose extract-modules (lambda (mpi) (get-nodep-module-code/index mpi phase))) mpis))]) + requires))) + +(define (all-but-last l) + (reverse (rest (reverse l)))) + +(define REQUIRED (make-hasheq)) +(define (extract-modules ct) + (cond + [(compilation-top? ct) + (match (compilation-top-code ct) + [(and m (? mod?)) + (list m)] + [(struct splice (mods)) + mods])] + [(symbol? ct) + (if (hash-has-key? REQUIRED ct) + empty + (begin + (hash-set! REQUIRED ct #t) + (list (make-req (make-stx (make-wrapped ct empty #f)) (make-toplevel 0 0 #f #f)))))] + [(module-path-index? ct) + (if (hash-has-key? REQUIRED ct) + empty + (begin + (hash-set! REQUIRED ct #t) + (list (make-req (make-stx (make-wrapped ct empty #f)) (make-toplevel 0 0 #f #f)))))] + [(not ct) + empty] + [(@phase? ct) + (extract-modules (@phase-ctop ct))] + [else + (error 'extract-modules "Unknown extraction: ~S~n" ct)])) + +(provide/contract + [struct modvar-rewrite + ([modidx module-path-index?] + [provide->toplevel (symbol? exact-nonnegative-integer? . -> . exact-nonnegative-integer?)])] + [get-modvar-rewrite (module-path-index? . -> . (or/c symbol? modvar-rewrite? module-path-index?))] + [nodep-file (path-string? set? . -> . (values compilation-top? lang-info/c module-path-index?))]) \ No newline at end of file diff --git a/collects/compiler/demodularizer/update-toplevels.rkt b/collects/compiler/demodularizer/update-toplevels.rkt new file mode 100644 index 0000000000..701b4475d8 --- /dev/null +++ b/collects/compiler/demodularizer/update-toplevels.rkt @@ -0,0 +1,97 @@ +#lang racket +(require compiler/zo-parse + "util.rkt") + +(define (update-toplevels toplevel-updater topsyntax-updater topsyntax-new-midpt) + (define (inner-update form) + (match form + [(struct def-values (ids rhs)) + (make-def-values (map update ids) + (update rhs))] + [(? def-syntaxes?) + (error 'increment "Doesn't handle syntax")] + [(? def-for-syntax?) + (error 'increment "Doesn't handle syntax")] + [(struct req (reqs dummy)) + (make-req reqs (update dummy))] + [(? mod?) + (error 'increment "Doesn't handle modules")] + [(struct seq (forms)) + (make-seq (map update forms))] + [(struct splice (forms)) + (make-splice (map update forms))] + [(and l (struct lam (name flags num-params param-types rest? closure-map closure-types max-let-depth body))) + (struct-copy lam l + [body (update body)])] + [(and c (struct closure (code gen-id))) + (struct-copy closure c + [code (update code)])] + [(and cl (struct case-lam (name clauses))) + (define new-clauses + (map update clauses)) + (struct-copy case-lam cl + [clauses new-clauses])] + [(struct let-one (rhs body flonum? unused?)) + (make-let-one (update rhs) (update body) flonum? unused?)] ; Q: is it okay to just pass in the old value for flonum? + [(and f (struct let-void (count boxes? body))) + (struct-copy let-void f + [body (update body)])] + [(and f (struct install-value (_ _ _ rhs body))) + (struct-copy install-value f + [rhs (update rhs)] + [body (update body)])] + [(struct let-rec (procs body)) + (make-let-rec (map update procs) (update body))] + [(and f (struct boxenv (_ body))) + (struct-copy boxenv f [body (update body)])] + [(and f (struct toplevel (_ pos _ _))) + (struct-copy toplevel f + [pos (toplevel-updater pos)])] + [(and f (struct topsyntax (_ pos _))) + (struct-copy topsyntax f + [pos (topsyntax-updater pos)] + [midpt topsyntax-new-midpt])] + [(struct application (rator rands)) + (make-application + (update rator) + (map update rands))] + [(struct branch (test then else)) + (make-branch + (update test) + (update then) + (update else))] + [(struct with-cont-mark (key val body)) + (make-with-cont-mark + (update key) + (update val) + (update body))] + [(struct beg0 (seq)) + (make-beg0 (map update seq))] + [(struct varref (tl)) + (make-varref (update tl))] + [(and f (struct assign (id rhs undef-ok?))) + (struct-copy assign f + [id (update id)] + [rhs (update rhs)])] + [(struct apply-values (proc args-expr)) + (make-apply-values + (update proc) + (update args-expr))] + [(and f (struct primval (id))) + f] + [(and f (struct localref (unbox? pos clear? other-clears? flonum?))) + f] + [(and f (not (? form?))) + f] + )) + (define update + (build-form-memo inner-update)) + update) + +(provide/contract + [update-toplevels + ((exact-nonnegative-integer? . -> . exact-nonnegative-integer?) + (exact-nonnegative-integer? . -> . exact-nonnegative-integer?) + exact-nonnegative-integer? + . -> . + (form? . -> . form?))]) diff --git a/collects/compiler/demodularizer/util.rkt b/collects/compiler/demodularizer/util.rkt new file mode 100644 index 0000000000..7f8c653049 --- /dev/null +++ b/collects/compiler/demodularizer/util.rkt @@ -0,0 +1,56 @@ +#lang racket +(require compiler/zo-parse) + +(define (prefix-syntax-start pre) + (length (prefix-toplevels pre))) + +(define (prefix-lift-start pre) + (define syntax-start (prefix-syntax-start pre)) + (define total-stxs (length (prefix-stxs pre))) + (+ syntax-start total-stxs (if (zero? total-stxs) 0 1))) + +(define (eprintf . args) + (apply fprintf (current-error-port) args)) + +(define (build-form-memo inner-update #:void? [void? #f]) + (define memo (make-hasheq)) + (define (update form . args) + (cond + [(hash-ref memo form #f) + => (λ (x) x)] + [else + (let () + (define ph (make-placeholder #f)) + (hash-set! memo form ph) + (define nv (apply inner-update form args)) + (placeholder-set! ph nv) + nv)])) + (define (first-update form . args) + (define final (apply update form args)) + (make-reader-graph final)) + first-update) + +(define lang-info/c + (or/c #f (vector/c module-path? symbol? any/c))) + + +(define (build-compiled-path base name) + (build-path + (cond [(path? base) base] + [(eq? base 'relative) 'same] + [(eq? base #f) (error 'batch "Impossible")]) + "compiled" + name)) + + +(provide/contract + [prefix-syntax-start (prefix? . -> . exact-nonnegative-integer?)] + [prefix-lift-start (prefix? . -> . exact-nonnegative-integer?)] + [eprintf ((string?) () #:rest (listof any/c) . ->* . void)] + [build-form-memo + (((unconstrained-domain-> any/c)) + (#:void? boolean?) + . ->* . + (unconstrained-domain-> any/c))] + [lang-info/c contract?] + [build-compiled-path ((or/c path-string? (symbols 'relative) false/c) path-string? . -> . (or/c path-string? (symbols 'same 'up)))]) \ No newline at end of file