diff --git a/collects/compiler/zo-marshal.ss b/collects/compiler/zo-marshal.ss new file mode 100644 index 0000000000..8091e93e1b --- /dev/null +++ b/collects/compiler/zo-marshal.ss @@ -0,0 +1,683 @@ +#lang scheme/base +(require compiler/zo-parse + scheme/match) + +(provide zo-marshal) + +;; Doesn't write as compactly as MzScheme, since list and pair sequences +;; are not compated, and symbols are not written in short form + +(define (zo-marshal top) + (match top + [(struct compilation-top (max-let-depth prefix form)) + (let ([encountered (make-hasheq)] + [shared (make-hasheq)]) + (let ([visit (lambda (v) + (if (hash-ref shared v #f) + #f + (if (hash-ref encountered v #f) + (begin + (hash-set! shared v (add1 (hash-count shared))) + #f) + (begin + (hash-set! encountered v #t) + #t))))]) + (traverse-prefix prefix visit) + (traverse-form form visit)) + (let* ([s (open-output-bytes)] + [out (make-out s (lambda (v) (hash-ref shared v #f)))] + [offsets + (map (lambda (v) + (let ([v (cdr v)]) + (begin0 + (file-position s) + (out-anything v (make-out + s + (let ([skip? #t]) + (lambda (v2) + (if (and skip? (eq? v v2)) + (begin + (set! skip? #f) + #f) + (hash-ref shared v2 #f))))))))) + (sort (hash-map shared (lambda (k v) (cons v k))) + < + #:key car))] + [post-shared (file-position s)] + [all-short? (post-shared . < . #xFFFF)]) + (out-data (list* max-let-depth prefix (protect-quote form)) out) + (let ([res (get-output-bytes s)]) + (bytes-append #"#~" + (bytes (string-length (version))) + (string->bytes/latin-1 (version)) + (int->bytes (add1 (hash-count shared))) + (bytes (if all-short? + 1 + 0)) + (apply + bytes-append + (map (lambda (o) + (integer->integer-bytes o + (if all-short? 2 4) + #f + #f)) + offsets)) + (int->bytes post-shared) + (int->bytes (bytes-length res)) + res))))])) + +;; ---------------------------------------- + +(define (traverse-prefix a-prefix visit) + (match a-prefix + [(struct prefix (num-lifts toplevels stxs)) + (for-each (lambda (stx) (traverse-toplevel stx visit)) stxs) + (for-each (lambda (stx) (traverse-stx stx visit)) stxs)])) + +(define (traverse-module mod-form visit) + (match mod-form + [(struct mod (name self-modidx prefix provides requires body syntax-body max-let-depth)) + (error "cannot handle modules, yet") + (traverse-data name visit) + (traverse-data self-modidx visit) + (traverse-prefix prefix visit) + (for-each (lambda (f) (traverse-form f prefix)) body) + (for-each (lambda (f) (traverse-form f prefix)) syntax-body)])) + +(define (traverse-toplevel tl visit) + (match tl + [#f (void)] + [(? symbol?) (visit tl)] + [(struct global-bucket (name)) + (void)] + [(struct module-variable (modidx sym pos phase)) + (visit tl) + (let-values ([(p b) (module-path-index-split modidx)]) + (if (symbol? p) + (traverse-data p visit) + (traverse-data modidx visit))) + (traverse-data sym visit)])) + +(define (traverse-stx tl visit) + (error "cannot handle syntax objects, yet")) + +(define (traverse-form form visit) + (match form + [(? mod?) + (traverse-module form visit)] + [(struct def-values (ids rhs)) + (traverse-expr rhs visit)] + [(struct def-syntaxes (ids rhs prefix max-let-depth)) + (traverse-prefix prefix visit) + (traverse-expr rhs visit)] + [(struct def-for-syntax (ids rhs prefix max-let-depth)) + (traverse-prefix prefix visit) + (traverse-expr rhs visit)] + [(struct seq (forms)) + (for-each (lambda (f) (traverse-form f visit)) forms)] + [(struct splice (forms)) + (for-each (lambda (f) (traverse-form f visit)) forms)] + [else + (traverse-expr form visit)])) + +(define (traverse-expr expr visit) + (match expr + [(struct toplevel (depth pos const? ready?)) + (void)] + [(struct topsyntax (depth pos midpt)) + (void)] + [(struct primval (id)) + (void)] + [(struct assign (id rhs undef-ok?)) + (traverse-expr rhs)] + [(struct localref (unbox? offset clear? other-clears?)) + (void)] + [(? lam?) + (traverse-lam expr visit)] + [(struct case-lam (name lams)) + (traverse-data name visit) + (for-each (lambda (lam) (traverse-lam expr visit)) lams)] + [(struct let-one (rhs body)) + (traverse-expr rhs visit) + (traverse-expr body visit)] + [(struct let-void (count boxes? body)) + (traverse-expr body visit)] + [(struct let-rec (procs body)) + (for-each (lambda (lam) (traverse-lam lam visit)) procs) + (traverse-expr body visit)] + [(struct install-value (count pos boxes? rhs body)) + (traverse-expr rhs visit) + (traverse-expr body visit)] + [(struct boxenv (pos body)) + (traverse-expr body visit)] + [(struct branch (test then else)) + (traverse-expr test visit) + (traverse-expr then visit) + (traverse-expr else visit)] + [(struct application (rator rands)) + (traverse-expr rator visit) + (for-each (lambda (rand) (traverse-expr rand visit)) rands)] + [(struct apply-values (proc args-expr)) + (traverse-expr proc visit) + (traverse-expr args-expr visit)] + [(struct seq (exprs)) + (for-each (lambda (expr) (traverse-expr expr visit)) exprs)] + [(struct beg0 (exprs)) + (for-each (lambda (expr) (traverse-expr expr visit)) exprs)] + [(struct with-cont-mark (key val body)) + (traverse-expr key visit) + (traverse-expr val visit) + (traverse-expr body visit)] + [(struct closure (lam gen-id)) + (traverse-lam expr visit)] + [(struct indirect (val)) + (traverse-expr val visit)] + [else (traverse-data expr visit)])) + +(define (traverse-data expr visit) + (cond + [(or (symbol? expr) + (keyword? expr) + (string? expr) + (bytes? expr) + (path? expr) + (module-path-index? expr)) + (visit expr)] + [(pair? expr) + (traverse-data (car expr) visit) + (traverse-data (cdr expr) visit)] + [else (void)])) + +(define (traverse-lam expr visit) + (match expr + [(struct indirect (val)) (traverse-lam expr visit)] + [(struct closure (lam gen-id)) + (when (visit expr) + (traverse-lam expr visit))] + [(struct lam (name flags num-params param-types rest? closure-map max-let-depth body)) + (traverse-data name visit) + (traverse-expr body visit)])) + +;; ---------------------------------------- + +(define toplevel-type-num 0) +(define syntax-type-num 3) +(define sequence-type-num 7) +(define unclosed-procedure-type-num 9) +(define let-value-type-num 10) +(define let-void-type-num 11) +(define letrec-type-num 12) +(define wcm-type-num 14) +(define quote-syntax-type-num 15) +(define variable-type-num 24) +(define top-type-num 87) +(define case-lambda-sequence-type-num 96) +(define prefix-type-num 103) + +(define-syntax define-enum + (syntax-rules () + [(_ n) (begin)] + [(_ n id . rest) + (begin + (define id n) + (define-enum (add1 n) . rest))])) + +(define-enum + 0 + CPT_ESCAPE + CPT_SYMBOL + CPT_SYMREF + CPT_WEIRD_SYMBOL + CPT_KEYWORD + CPT_BYTE_STRING + CPT_CHAR_STRING + CPT_CHAR + CPT_INT + CPT_NULL + CPT_TRUE + CPT_FALSE + CPT_VOID + CPT_BOX + CPT_PAIR + CPT_LIST + CPT_VECTOR + CPT_HASH_TABLE + CPT_STX + CPT_GSTX + CPT_MARSHALLED + CPT_QUOTE + CPT_REFERENCE + CPT_LOCAL + CPT_LOCAL_UNBOX + CPT_SVECTOR + CPT_APPLICATION + CPT_LET_ONE + CPT_BRANCH + CPT_MODULE_INDEX + CPT_MODULE_VAR + CPT_PATH + CPT_CLOSURE + CPT_DELAY_REF + CPT_PREFAB) + +(define-enum + 0 + DEFINE_VALUES_EXPD + DEFINE_SYNTAX_EXPD + SET_EXPD + CASE_LAMBDA_EXPD + BEGIN0_EXPD + BOXENV_EXPD + MODULE_EXPD + REQUIRE_EXPD + DEFINE_FOR_SYNTAX_EXPD + REF_EXPD + APPVALS_EXPD + SPLICE_EXPD) + +(define CPT_SMALL_LOCAL_START 192) +(define CPT_SMALL_LOCAL_END 207) +(define CPT_SMALL_LOCAL_UNBOX_START 207) +(define CPT_SMALL_LOCAL_UNBOX_END 222) + +(define CPT_SMALL_APPLICATION_START 247) +(define CPT_SMALL_APPLICATION_END 255) + +(define CLOS_HAS_REST 1) +(define CLOS_HAS_REF_ARGS 2) +(define CLOS_PRESERVES_MARKS 4) +(define CLOS_IS_METHOD 16) +(define CLOS_SINGLE_RESULT 32) + +(define BITS_PER_MZSHORT 32) + +(define *dummy* #f) + +(define (int->bytes x) + (integer->integer-bytes x + 4 + #f + #f)) + +(define-struct case-seq (name lams)) + +(define-struct out (s shared-index)) + +(define (out-shared v out k) + (let ([v ((out-shared-index out) v)]) + (if v + (begin + (out-byte CPT_SYMREF out) + (out-number v out)) + (k)))) + +(define (out-byte v out) + (write-byte v (out-s out))) + +(define (out-bytes b out) + (write-bytes b (out-s out))) + +(define (out-number n out) + (cond + [(n . < . 0) + (if (n . > . -32) + (out-byte (bitwise-ior #xC0 (- n)) out) + (begin + (out-byte #xE0 out) + (out-bytes (int->bytes (- n)) out)))] + [(n . < . 128) + (out-byte n out)] + [(n . < . #x4000) + (out-byte (bitwise-ior #x80 (bitwise-and n #x3F)) out) + (out-byte (bitwise-and #xFF (arithmetic-shift n -6)) out)] + [else + (out-bytes #xF0 out) + (out-bytes (int->bytes n) out)])) + +(define (out-syntax key val out) + (out-marshaled syntax-type-num (list* key val) out)) + +(define (out-marshaled type-num val out) + (out-byte CPT_MARSHALLED out) + (out-number type-num out) + (out-data val out)) + +(define (out-anything v out) + (cond + [(module-variable? v) + (out-toplevel v out)] + [(closure? v) + (out-expr v out)] + [else + (out-data v out)])) + +(define (out-prefix a-prefix out) + (match a-prefix + [(struct prefix (num-lifts toplevels stxs)) + (out-marshaled + prefix-type-num + (cons num-lifts + (cons (list->vector toplevels) + (list->vector stxs))) + out)])) + +(define (out-module mod-form out) + (match mod-form + [(struct mod (name self-modidx prefix provides requires body syntax-body max-let-depth)) + (error "cannot write modules, yet")])) + +(define (out-toplevel tl out) + (match tl + [#f (out-data tl out)] + [(? symbol?) (out-data tl out)] + [(struct global-bucket (name)) + (out-marshaled variable-type-num name out)] + [(struct module-variable (modidx sym pos phase)) + (out-shared + tl + out + (lambda () + (out-byte CPT_MODULE_VAR out) + (let-values ([(p b) (module-path-index-split modidx)]) + (if (symbol? p) + (out-data p out) + (out-data modidx out))) + (out-data sym out) + (unless (zero? phase) + (out-number -2 out)) + (out-number pos out)))])) + +(define (out-stx tl out) + (error "cannot handle syntax objects, yet")) + +(define (out-form form out) + (match form + [(? mod?) + (out-module form out)] + [(struct def-values (ids rhs)) + (out-syntax DEFINE_VALUES_EXPD + (list->vector (cons rhs ids)) + out)] + [(struct def-syntaxes (ids rhs prefix max-let-depth)) + (out-syntax DEFINE_SYNTAX_EXPD + (list->vector (list* rhs + prefix + max-let-depth + *dummy* + ids)) + out)] + [(struct def-for-syntax (ids rhs prefix max-let-depth)) + (out-syntax DEFINE_FOR_SYNTAX_EXPD + (list->vector (list* rhs + prefix + max-let-depth + *dummy* + ids)) + out)] + [(struct seq (forms)) + (out-marshaled sequence-type-num (map protect-quote forms) out)] + [(struct splice (forms)) + (out-syntax SPLICE_EXPD (make-seq forms) out)] + [else + (out-expr form out)])) + +(define (out-expr expr out) + (match expr + [(struct toplevel (depth pos const? ready?)) + (out-marshaled toplevel-type-num + (if (or const? ready?) + (cons pos + (bitwise-ior + (if const? #x1 0) + (if ready? #x2 0))) + pos) + out)] + [(struct topsyntax (depth pos midpt)) + (out-marshaled quote-syntax-type-num + (cons depth + (cons pos midpt)) + out)] + [(struct primval (id)) + (out-byte CPT_REFERENCE out) + (out-number id out)] + [(struct assign (id rhs undef-ok?)) + (out-syntax SET_EXPD + (cons undef-ok? (cons id rhs)) + out)] + [(struct localref (unbox? offset clear? other-clears?)) + (if (and (not clear?) (not other-clears?) + (offset . < . (- CPT_SMALL_LOCAL_END CPT_SMALL_LOCAL_START))) + (out-byte (+ (if unbox? + CPT_SMALL_LOCAL_UNBOX_START + CPT_SMALL_LOCAL_START) + offset) + out) + (begin + (out-byte (if unbox? CPT_LOCAL_UNBOX CPT_LOCAL) out) + (if (not (or clear? other-clears?)) + (out-number offset out) + (begin + (out-number (- (add1 offset)) out) + (out-number (+ (if clear? #x1 0) + (if other-clears? #x2 0)) + out)))))] + [(? lam?) + (out-lam expr out)] + [(struct case-lam (name lams)) + (out-syntax CASE_LAMBDA_EXPD + (make-case-seq name lams))] + [(struct case-seq (name lams)) + (out-marshaled case-lambda-sequence-type-num + (cons (or name null) + lams) + out)] + [(struct let-one (rhs body)) + (out-byte CPT_LET_ONE out) + (out-expr (protect-quote rhs) out) + (out-expr (protect-quote body) out)] + [(struct let-void (count boxes? body)) + (out-marshaled let-void-type-num + (list* + count + boxes? + (protect-quote body)) + out)] + [(struct let-rec (procs body)) + (out-marshaled letrec-type-num + (list* + (length procs) + (protect-quote body) + procs) + out)] + [(struct install-value (count pos boxes? rhs body)) + (out-marshaled let-value-type-num + (list* + count + pos + boxes? + (protect-quote rhs) + (protect-quote body)) + out)] + [(struct boxenv (pos body)) + (out-syntax BOXENV_EXPD + (cons + pos + (protect-quote body)) + out)] + [(struct branch (test then else)) + (out-byte CPT_BRANCH out) + (out-expr (protect-quote test) out) + (out-expr (protect-quote then) out) + (out-expr (protect-quote else) out)] + [(struct application (rator rands)) + (if ((length rands) . < . (- CPT_SMALL_APPLICATION_END CPT_SMALL_APPLICATION_START)) + (out-byte (+ CPT_SMALL_APPLICATION_START (length rands)) out) + (begin + (out-byte CPT_APPLICATION out) + (out-number (length rands) out))) + (for-each (lambda (e) (out-expr (protect-quote e) out)) + (cons rator rands))] + [(struct apply-values (proc args-expr)) + (out-syntax APPVALS_EXPD + (cons (protect-quote proc) + (protect-quote args-expr)) + out)] + [(struct seq (exprs)) + (out-form expr out)] + [(struct beg0 (exprs)) + (out-syntax BEGIN0_EXPD + (make-seq exprs) + out)] + [(struct with-cont-mark (key val body)) + (out-marshaled wcm-type-num + (list* + (protect-quote key) + (protect-quote val) + (protect-quote body)) + out)] + [(struct closure (lam gen-id)) + (out-lam expr out)] + [(struct indirect (val)) + (out-expr val out)] + [else (out-value expr out)])) + +(define (out-lam expr out) + (match expr + [(struct indirect (val)) (out-lam expr out)] + [(struct closure (lam gen-id)) + (out-shared + expr + out + (lambda () + (out-lam expr out)))] + [(struct lam (name flags num-params param-types rest? closure-map max-let-depth body)) + (let* ([l (protect-quote body)] + [any-refs? (ormap (lambda (t) (eq? t 'ref)) param-types)] + [l (cons (make-svector (if any-refs? + (list->vector + (append + (vector->list closure-map) + (let ([v (make-vector (ceiling (/ num-params BITS_PER_MZSHORT)))]) + (for ([t (in-list param-types)] + [i (in-naturals)]) + (when (eq? t 'ref) + (let ([pos (quotient i BITS_PER_MZSHORT)]) + (vector-set! v pos + (bitwise-ior (vector-ref v pos) + (arithmetic-shift 1 (modulo i BITS_PER_MZSHORT))))))) + (vector->list v)))) + closure-map)) + l)] + [l (if any-refs? + (cons (vector-length closure-map) l) + l)]) + (out-marshaled unclosed-procedure-type-num + (list* + (+ (if rest? CLOS_HAS_REST 0) + (if any-refs? CLOS_HAS_REF_ARGS 0) + (if (memq 'preserves-marks flags) CLOS_PRESERVES_MARKS 0) + (if (memq 'is-method flags) CLOS_IS_METHOD 0) + (if (memq 'single-result flags) CLOS_SINGLE_RESULT 0)) + ((if rest? add1 0) num-params) + max-let-depth + name + l) + out))])) + +(define (out-as-bytes expr ->bytes CPT out) + (out-shared expr out (lambda () + (let ([s (->bytes expr)]) + (out-byte CPT out) + (out-number (bytes-length s) out) + (out-bytes s out))))) + +(define (out-data expr out) + (cond + [(prefix? expr) (out-prefix expr out)] + [else (out-form expr out)])) + +(define (out-value expr out) + (cond + [(symbol? expr) + (out-as-bytes expr + (compose string->bytes/utf-8 symbol->string) + CPT_SYMBOL + out)] + [(keyword? expr) + (out-as-bytes expr + (compose string->bytes/utf-8 keyword->string) + CPT_KEYWORD + out)] + [(string? expr) + (out-as-bytes expr + string->bytes/utf-8 + CPT_CHAR_STRING + out)] + [(bytes? expr) + (out-as-bytes expr + values + CPT_BYTE_STRING + out)] + [(path? expr) + (out-as-bytes expr + path->bytes + CPT_PATH + out)] + [(char? expr) + (out-byte CPT_CHAR out) + (out-number (char->integer expr) out)] + [(and (exact-integer? expr) + (and (expr . >= . -1073741824) (expr . <= . 1073741823))) + (out-byte CPT_INT out) + (out-number expr out)] + [(null? expr) + (out-byte CPT_NULL out)] + [(eq? expr #t) + (out-byte CPT_TRUE out)] + [(eq? expr #f) + (out-byte CPT_FALSE out)] + [(void? expr) + (out-byte CPT_VOID out)] + [(box? expr) + (out-byte CPT_BOX out) + (out-data (unbox expr) out)] + [(pair? expr) + (out-byte CPT_LIST out) + (out-number 1 out) + (out-data (car expr) out) + (out-data (cdr expr) out)] + [(vector? expr) + (out-byte CPT_VECTOR out) + (out-number (vector-length expr) out) + (for ([v (in-vector expr)]) + (out-data v out))] + [(hash? expr) + (out-byte CPT_HASH_TABLE out) + (out-number (cond + [(hash-eqv? expr) 2] + [(hash-eq? expr) 0] + [else 1])) + (for ([(k v) (in-hash expr)]) + (out-data k out) + (out-data v out))] + [(svector? expr) + (out-byte CPT_SVECTOR out) + (out-number (vector-length (svector-vec expr)) out) + (for ([n (in-vector (svector-vec expr))]) + (out-number n out))] + [else + (out-byte CPT_QUOTE out) + (let ([s (open-output-bytes)]) + (write (if (quoted? expr) (quoted-v expr) expr) s) + (out-bytes (get-output-bytes s) out))])) + +(define-struct quoted (v)) +(define (protect-quote v) + (if (or (list? v) (vector? v) (box? v) (hash? v)) + (make-quoted v) + v)) + +(define-struct svector (vec)) + +;; ---------------------------------------- + diff --git a/collects/compiler/zo-parse.ss b/collects/compiler/zo-parse.ss index 73d9d2de60..f03d22ee85 100644 --- a/collects/compiler/zo-parse.ss +++ b/collects/compiler/zo-parse.ss @@ -127,20 +127,21 @@ (if (zero? (bitwise-and flags CLOS_HAS_REF_ARGS)) (values (vector-length v) v rest) (values v (car rest) (cdr rest)))] - [(arg-types) (if (zero? (bitwise-and flags CLOS_HAS_REF_ARGS)) - (for/list ([i (in-range num-params)]) 'val) - (for/list ([i (in-range num-params)]) - (if (bitwise-bit-set? - (vector-ref closed-over - (+ closure-size (quotient i BITS_PER_MZSHORT))) - (remainder i BITS_PER_MZSHORT)) - 'ref - 'val)))]) + [(arg-types) (let ([num-params ((if rest? sub1 values) num-params)]) + (if (zero? (bitwise-and flags CLOS_HAS_REF_ARGS)) + (for/list ([i (in-range num-params)]) 'val) + (for/list ([i (in-range num-params)]) + (if (bitwise-bit-set? + (vector-ref closed-over + (+ closure-size (quotient i BITS_PER_MZSHORT))) + (remainder i BITS_PER_MZSHORT)) + 'ref + 'val))))]) (make-lam name (append - (if (bitwise-and flags flags CLOS_PRESERVES_MARKS) '(preserves-marks) null) - (if (bitwise-and flags flags CLOS_IS_METHOD) '(is-method) null) - (if (bitwise-and flags flags CLOS_SINGLE_RESULT) '(single-result) null)) + (if (zero? (bitwise-and flags flags CLOS_PRESERVES_MARKS)) null '(preserves-marks)) + (if (zero? (bitwise-and flags flags CLOS_IS_METHOD)) null '(is-method)) + (if (zero? (bitwise-and flags flags CLOS_SINGLE_RESULT)) null '(single-result))) ((if rest? sub1 values) num-params) arg-types rest?