better traversal for more sharing in zo-marshal and some refactoring.

original commit: f4abd35f5c
This commit is contained in:
Blake Johnson 2010-08-04 16:24:47 -06:00 committed by Jay McCarthy
commit f907cbf361

View File

@ -1,24 +1,19 @@
#lang scheme/base #lang racket/base
(require compiler/zo-structs (require compiler/zo-structs
scheme/port unstable/struct
racket/port
racket/vector racket/vector
scheme/match racket/match
scheme/contract racket/contract
scheme/local racket/local
scheme/list racket/list
scheme/dict) racket/dict
racket/function)
(provide/contract (provide/contract
[zo-marshal (compilation-top? . -> . bytes?)] [zo-marshal (compilation-top? . -> . bytes?)]
[zo-marshal-to (compilation-top? output-port? . -> . void?)]) [zo-marshal-to (compilation-top? output-port? . -> . void?)])
#| Unresolved Issues
Less sharing occurs than in the C implementation, creating much larger files
|#
(define current-wrapped-ht (make-parameter #f))
(define (zo-marshal top) (define (zo-marshal top)
(define bs (open-output-bytes)) (define bs (open-output-bytes))
(zo-marshal-to top bs) (zo-marshal-to top bs)
@ -27,232 +22,131 @@
(define (zo-marshal-to top outp) (define (zo-marshal-to top outp)
(match top (match top
[(struct compilation-top (max-let-depth prefix form)) [(struct compilation-top (max-let-depth prefix form))
(define encountered (make-hasheq))
(define shared (make-hasheq)) (define shared (make-hasheq))
(define wrapped (make-hasheq)) (define wrapped (make-hasheq))
(define (visit v) (define (shared-obj-pos v)
(if (hash-ref shared v #f) (hash-ref shared v #f))
#f (define (share! v)
(if (hash-ref encountered v #f) (hash-set! shared v (add1 (hash-count shared))))
(define ct
(list* max-let-depth prefix (protect-quote form)))
; Compute what objects are in ct multiple times (by equal?)
(local [(define encountered (make-hasheq))
(define (encountered? v)
(hash-ref encountered v #f))
(define (encounter! v)
(hash-set! encountered v #t))
(define (visit! v)
(cond
[(shared-obj-pos v)
#f]
[(encountered? v)
(share! v)
#f]
[else
(encounter! v)
; All closures MUST be in the symbol table
(when (closure? v)
(share! v))
#t]))]
(traverse wrapped visit! ct))
; Hash tables aren't sorted, so we need to order them
(define in-order-shareds
(sort (hash-map shared (lambda (k v) (cons v k)))
<
#:key car))
(define (write-all outp)
; As we are writing the symbol table entry for v,
; the writing code will attempt to see if v is shared and
; insert a symtable reference, which would be wrong.
; So, the first time it is encountered while writing,
; we should pretend it ISN'T shared, so it is actually written.
; However, subsequent times (or for other shared values)
; we defer to the normal 'shared-obj-pos'
(define (shared-obj-pos/modulo-v v)
(define skip? #t)
(lambda (v2)
(if (and skip? (eq? v v2))
(begin (begin
(hash-set! shared v (add1 (hash-count shared))) (set! skip? #f)
#f) #f)
(begin (shared-obj-pos v2))))
(hash-set! encountered v #t) ; Write the symbol table, computing offsets as we go
(when (closure? v) (define offsets
(hash-set! shared v (add1 (hash-count shared)))) (for/list ([k*v (in-list in-order-shareds)])
#t)))) (define v (cdr k*v))
(define (v-skipping v) (begin0
(define skip? #t) (file-position outp)
(lambda (v2) (out-anything v (make-out outp (shared-obj-pos/modulo-v v) wrapped)))))
(if (and skip? (eq? v v2)) ; Compute where we ended
(begin (define post-shared (file-position outp))
(set! skip? #f) ; Write the entire ctop
#f) (out-data ct
(hash-ref shared v2 #f)))) (make-out outp shared-obj-pos wrapped))
(parameterize ([current-wrapped-ht wrapped]) (values offsets post-shared (file-position outp)))
(traverse-prefix prefix visit)
(traverse-form form visit))
(local [(define in-order-shareds
(sort (hash-map shared (lambda (k v) (cons v k)))
<
#:key car))
(define (write-all outp)
(define offsets
(for/list ([k*v (in-list in-order-shareds)])
(define v (cdr k*v))
(begin0
(file-position outp)
(out-anything v (make-out outp (v-skipping v) wrapped)))))
(define post-shared (file-position outp))
(out-data (list* max-let-depth prefix (protect-quote form))
(make-out outp (lambda (v) (hash-ref shared v #f)) wrapped))
(values offsets post-shared (file-position outp)))
(define counting-p (open-output-nowhere))
(define-values (offsets post-shared all-forms-length)
(write-all counting-p))
(define all-short? (post-shared . < . #xFFFF))
(define version-bs (string->bytes/latin-1 (version)))]
(write-bytes #"#~" outp)
(write-bytes (bytes (bytes-length version-bs)) outp)
(write-bytes version-bs outp)
(define symtabsize (add1 (hash-count shared)))
(write-bytes (int->bytes symtabsize) outp)
(write-bytes (bytes (if all-short? 1 0)) outp)
(for ([o (in-list offsets)])
(write-bytes (integer->integer-bytes o (if all-short? 2 4) #f #f) outp))
(write-bytes (int->bytes post-shared) outp)
(write-bytes (int->bytes all-forms-length) outp)
(write-all outp)
(void))]))
;; ---------------------------------------- ; Compute where the symbol table ends
(define counting-p (open-output-nowhere))
(define-values (offsets post-shared all-forms-length)
(write-all counting-p))
(define (traverse-prefix a-prefix visit) ; Write the compiled form header
(match a-prefix (write-bytes #"#~" outp)
[(struct prefix (num-lifts toplevels stxs))
(for-each (lambda (stx) (traverse-toplevel stx visit)) toplevels)
(for-each (lambda (stx) (traverse-stx stx visit)) stxs)]))
(define (traverse-module mod-form visit) ; Write the version (notice that it isn't the same as out-string)
(match mod-form (define version-bs (string->bytes/latin-1 (version)))
[(struct mod (name srcname self-modidx prefix provides requires body syntax-body unexported (write-bytes (bytes (bytes-length version-bs)) outp)
max-let-depth dummy lang-info internal-context)) (write-bytes version-bs outp)
(traverse-data name visit)
(traverse-data srcname visit)
(traverse-data self-modidx visit)
(traverse-prefix prefix visit)
(for-each (lambda (f) (map (lambda (v) (traverse-data v visit)) (cdr f))) requires)
(for-each (lambda (f) (traverse-form f visit)) body)
(for-each (lambda (f) (traverse-form f visit)) syntax-body)
(traverse-data lang-info visit)
(traverse-data internal-context visit)]))
(define (traverse-toplevel tl visit) ; Write the symbol table information (size, offsets)
(match tl (define symtabsize (add1 (hash-count shared)))
[#f (void)] (write-bytes (int->bytes symtabsize) outp)
[(? symbol?) (traverse-data tl visit)] (define all-short? (post-shared . < . #xFFFF))
[(struct global-bucket (name)) (write-bytes (bytes (if all-short? 1 0)) outp)
(void)] (for ([o (in-list offsets)])
[(struct module-variable (modidx sym pos phase)) (write-bytes (integer->integer-bytes o (if all-short? 2 4) #f #f) outp))
(visit tl)
(let-values ([(p b) (module-path-index-split modidx)])
(if (symbol? p)
(traverse-data p visit)
(traverse-data modidx visit)))
(traverse-data sym visit)]))
(define (traverse-wrapped w visit) ; Post-shared is where the ctop actually starts
(define ew (hash-ref! (current-wrapped-ht) w (lambda () (encode-wrapped w)))) (write-bytes (int->bytes post-shared) outp)
(traverse-data ew visit)) ; This is where the file should end
(write-bytes (int->bytes all-forms-length) outp)
(define (traverse-stx s visit) ; Write the symbol table then the ctop
(when s (write-all outp)
(traverse-wrapped (stx-encoded s) visit)))
(define (traverse-form form visit)
(match form
[(? mod?)
(traverse-module form visit)]
[(struct def-values (ids rhs))
(traverse-expr rhs visit)]
[(struct def-syntaxes (ids rhs prefix max-let-depth))
(traverse-prefix prefix visit)
(traverse-expr rhs visit)]
[(struct def-for-syntax (ids rhs prefix max-let-depth))
(traverse-prefix prefix visit)
(traverse-expr rhs visit)]
[(struct seq (forms))
(for-each (lambda (f) (traverse-form f visit)) forms)]
[(struct splice (forms))
(for-each (lambda (f) (traverse-form f visit)) forms)]
[else
(traverse-expr form visit)]))
(define (traverse-expr expr visit)
(match expr
[(struct toplevel (depth pos const? ready?))
(void)]
[(struct topsyntax (depth pos midpt))
(void)]
[(struct primval (id))
(void)]
[(struct assign (id rhs undef-ok?))
(traverse-expr rhs visit)]
[(struct localref (unbox? offset clear? other-clears? flonum?))
(void)]
[(? lam?)
(traverse-lam expr visit)]
[(struct case-lam (name lams))
(traverse-data name visit)
(for-each (lambda (lam) (traverse-lam lam visit)) lams)]
[(struct let-one (rhs body flonum? unused?))
(traverse-expr rhs visit)
(traverse-expr body visit)]
[(struct let-void (count boxes? body))
(traverse-expr body visit)]
[(struct let-rec (procs body))
(for-each (lambda (lam) (traverse-lam lam visit)) procs)
(traverse-expr body visit)]
[(struct install-value (count pos boxes? rhs body))
(traverse-expr rhs visit)
(traverse-expr body visit)]
[(struct boxenv (pos body))
(traverse-expr body visit)]
[(struct branch (test then else))
(traverse-expr test visit)
(traverse-expr then visit)
(traverse-expr else visit)]
[(struct application (rator rands))
(traverse-expr rator visit)
(for-each (lambda (rand) (traverse-expr rand visit)) rands)]
[(struct apply-values (proc args-expr))
(traverse-expr proc visit)
(traverse-expr args-expr visit)]
[(struct seq (exprs))
(for-each (lambda (expr) (traverse-form expr visit)) exprs)]
[(struct beg0 (exprs))
(for-each (lambda (expr) (traverse-expr expr visit)) exprs)]
[(struct with-cont-mark (key val body))
(traverse-expr key visit)
(traverse-expr val visit)
(traverse-expr body visit)]
[(struct closure (lam gen-id))
(traverse-lam expr visit)]
[(struct indirect (val))
(traverse-expr val visit)]
[else (traverse-data expr visit)]))
(define (traverse-data expr visit)
(cond
[(or (symbol? expr)
(keyword? expr)
(string? expr)
(bytes? expr)
(path? expr))
(visit expr)]
[(module-path-index? expr)
(visit expr)
(let-values ([(name base) (module-path-index-split expr)])
(traverse-data name visit)
(traverse-data base visit))]
[(pair? expr)
(traverse-data (car expr) visit)
(traverse-data (cdr expr) visit)]
[(vector? expr)
(for ([e (in-vector expr)])
(traverse-data e visit))]
[(box? expr)
(traverse-data (unbox expr) visit)]
[(stx? expr)
(traverse-stx expr visit)]
[(wrapped? expr)
(traverse-wrapped expr visit)]
[(hash? expr)
(when (visit expr)
(for ([(k v) (in-hash expr)])
(traverse-data k visit)
(traverse-data v visit)))]
[(prefab-struct-key expr)
(when (visit expr)
(let ([v (struct->vector expr)])
(for ([i (in-range 1 (vector-length v))])
(traverse-data (vector-ref v i) visit))))]
[(protected-symref? expr)
(visit (protected-symref-val expr))]
[else
(void)])) (void)]))
(define (traverse-lam expr visit) (define (traverse wrapped-ht visit! expr)
(match expr (when (visit! expr)
[(struct indirect (val)) (traverse-lam val visit)] (match expr
[(struct closure (lam gen-id)) [(? wrapped? w)
(when (visit expr) (define encoded-w
(traverse-lam lam visit))] (hash-ref! wrapped-ht w (lambda () (encode-wrapped w))))
[(struct lam (name flags num-params param-types rest? closure-map closure-types max-let-depth body)) (traverse wrapped-ht visit! encoded-w)]
(traverse-data name visit) [(? prefab-struct-key)
(traverse-expr body visit)])) (map (curry traverse wrapped-ht visit!) (struct->list expr))]
[(cons l r)
(traverse wrapped-ht visit! l)
(traverse wrapped-ht visit! r)]
[(? vector?)
(for ([v (in-vector expr)])
(traverse wrapped-ht visit! v))]
[(? hash?)
(for ([(k v) (in-hash expr)])
(traverse wrapped-ht visit! k)
(traverse wrapped-ht visit! v))]
[(? module-path-index?)
(define-values (name base) (module-path-index-split expr))
(traverse wrapped-ht visit! name)
(traverse wrapped-ht visit! base)]
[(box v)
(traverse wrapped-ht visit! v)]
[(protected-symref v)
(traverse wrapped-ht visit! v)]
[(quoted v)
(traverse wrapped-ht visit! v)]
[else (void)])))
;; ---------------------------------------- ;; ----------------------------------------
@ -1139,9 +1033,7 @@
(define-struct quoted (v)) (define-struct quoted (v))
; protect-quote caused some things to be sent to write. But there are some things (like paths) that can be read and passed to protect-quote that cannot be 'read' in after 'write', so we turned it off
(define (protect-quote v) (define (protect-quote v)
#;v
(if (or (pair? v) (vector? v) (prefab-struct-key v) (box? v) (hash? v) (svector? v)) (if (or (pair? v) (vector? v) (prefab-struct-key v) (box? v) (hash? v) (svector? v))
(make-quoted v) (make-quoted v)
v)) v))