#lang scheme (require mzlib/etc scheme/list) (provide zo-parse) ;; ---------------------------------------- ;; Structures to represent bytecode (define-syntax-rule (define-form-struct id (field-id ...)) (begin (define-struct id (field-id ...) #:transparent) (provide (struct-out id)))) (define-form-struct compilation-top (max-let-depth prefix code)) ; compiled code always wrapped with this (define-form-struct prefix (num-lifts toplevels stxs)) ; sets up top-level and syntax-object array ;; In toplevels of resove prefix: (define-form-struct global-bucket (name)) ; top-level binding (define-form-struct module-variable (modidx sym pos phase)) ; direct access to exported id ;; In stxs of prefix: (define-form-struct stx (encoded)) ; todo: decode syntax objects (define-form-struct mod (name self-modidx prefix provides requires body syntax-body)) (define-form-struct lam (name flags num-params rest? closure-map max-let-depth body)) ; `lambda' (define-form-struct closure (code gen-id)) ; a static closure (nothing to close over) (define-form-struct case-lam (name clauses)) ; each clause is an lam (define-form-struct let-one (rhs body)) ; pushes one value onto stack (define-form-struct let-void (count boxes? body)) ; create new stack slots (define-form-struct install-value (count pos boxes? rhs body)) ; set existing stack slot(s) (define-form-struct let-rec (procs body)) ; put `letrec'-bound closures into existing stack slots (define-form-struct boxenv (pos body)) ; box existing stack element (define-form-struct localref (unbox? offset clear?)) ; access local via stack (define-form-struct toplevel (depth pos const? mutated?)) ; access binding via prefix array (which is on stack) (define-form-struct topsyntax (depth pos midpt)) ; access syntax object via prefix array (which is on stack) (define-form-struct application (rator rands)) ; function call (define-form-struct branch (test then else)) ; `if' (define-form-struct with-cont-mark (key val body)) ; `with-continuation-mark' (define-form-struct beg0 (seq)) ; `begin0' (define-form-struct sequence (forms)) ; `begin' (define-form-struct splice (forms)) ; top-level `begin' (define-form-struct varref (toplevel)) ; `#%variable-reference' (define-form-struct assign (id rhs undef-ok?)) ; top-level or module-level set! (define-form-struct apply-values (proc args-expr)) ; `(call-with-values (lambda () ,args-expr) ,proc) (define-form-struct primitive (id)) ; direct preference to a kernel primitive ;; Definitions (top level or within module): (define-form-struct def-values (ids rhs)) (define-form-struct def-syntaxes (ids rhs prefix max-let-depth)) (define-form-struct def-for-syntax (ids rhs prefix max-let-depth)) ;; Top-level `require' (define-form-struct req (reqs dummy)) ;; A static closure can refer directly to itself, creating a cycle (define-struct indirect ([v #:mutable]) #:prefab) (provide (struct-out indirect)) ;; ---------------------------------------- ;; Bytecode unmarshalers for various forms (define (read-toplevel v) (define SCHEME_TOPLEVEL_CONST #x01) (define SCHEME_TOPLEVEL_MUTATED #x02) (match v [(cons depth (cons pos flags)) (make-toplevel depth pos (positive? (bitwise-and flags SCHEME_TOPLEVEL_CONST)) (positive? (bitwise-and flags SCHEME_TOPLEVEL_MUTATED)))] [(cons depth pos) (make-toplevel depth pos #f #f)])) (define (read-topsyntax v) (match v [`(,depth ,pos . ,midpt) (make-topsyntax depth pos midpt)])) (define (read-variable v) (if (symbol? v) (make-global-bucket v) (let-values ([(phase modname varname) (match v [(list* phase modname varname) (values phase modname varname)] [(list* modname varname) (values 0 modname varname)])]) (if (and (zero? phase) (eq? modname '#%kernel)) (error 'bucket "var ~a" varname) (make-module-variable modname varname -1 phase))))) (define (read-compilation-top v) (match v [`(,ld ,prefix . ,code) (unless (prefix? prefix) (error 'bad "not prefix ~a" prefix)) (make-compilation-top ld prefix code)])) (define (read-resolve-prefix v) (match v [`(,i ,tv . ,sv) (make-prefix i (vector->list tv) (vector->list sv))])) (define (read-unclosed-procedure v) (define CLOS_HAS_REST 1) (define CLOS_HAS_REF_ARGS 2) (match v [`(,flags ,num-params ,max-let-depth ,name ,v . ,rest) (let ([rest? (positive? (bitwise-and flags CLOS_HAS_REST))]) (let-values ([(closure-size closed-over body) (if (zero? (bitwise-and flags CLOS_HAS_REF_ARGS)) (values (vector-length v) v rest) (values v (car rest) (cdr rest)))]) (make-lam name flags ((if rest? sub1 values) num-params) rest? (if (= closure-size (vector-length closed-over)) closed-over (let ([v2 (make-vector closure-size)]) (vector-copy! v2 0 closed-over 0 closure-size) v2)) max-let-depth body)))])) (define (read-let-value v) (match v [`(,count ,pos ,boxes? ,rhs . ,body) (make-install-value count pos boxes? rhs body)])) (define (read-let-void v) (match v [`(,count ,boxes? . ,body) (make-let-void count boxes? body)])) (define (read-letrec v) (match v [`(,count ,body . ,procs) (make-let-rec procs body)])) (define (read-with-cont-mark v) (match v [`(,key ,val . ,body) (make-with-cont-mark key val body)])) (define (read-sequence v) (make-sequence v)) (define (read-define-values v) (make-def-values (cdr (vector->list v)) (vector-ref v 0))) (define (read-define-syntaxes mk v) (mk (list-tail (vector->list v) 4) (vector-ref v 0) (vector-ref v 1) (vector-ref v 2) #;(vector-ref v 3))) (define (read-define-syntax v) (read-define-syntaxes make-def-syntaxes v)) (define (read-define-for-syntax v) (read-define-syntaxes make-def-for-syntax v)) (define (read-set! v) (make-assign (cadr v) (cddr v) (car v))) (define (read-case-lambda v) (make-case-lam (car v) (cdr v))) (define (read-begin0 v) (match v [(struct sequence (exprs)) (make-beg0 exprs)])) (define (read-boxenv v) (make-boxenv (car v) (cdr v))) (define (read-require v) (make-req (cdr v) (car v))) (define (read-#%variable-ref v) (make-varref v)) (define (read-apply-values v) (make-apply-values (car v) (cdr v))) (define (read-splice v) (make-splice v)) (define (read-module v) (match v [`(,name ,self-modidx ,functional? ,et-functional? ,rename ,max-let-depth ,dummy ,prefix ,kernel-exclusion ,reprovide-kernel? ,indirect-provides ,num-indirect-provides ,protects ,provide-phase-count . ,rest) (let ([phase-data (take rest (* 8 provide-phase-count))]) (match (list-tail rest (* 8 provide-phase-count)) [`(,syntax-body ,body ,requires ,syntax-requires ,template-requires ,label-requires ,more-requires-count . ,more-requires) (make-mod name self-modidx prefix phase-data (list* (cons 0 requires) (cons 1 syntax-requires) (cons -1 template-requires) (cons #f label-requires) more-requires) (vector->list body) (map (lambda (sb) (match sb [`#(,ids ,expr ,max-let-depth ,prefix ,for-stx?) ((if for-stx? make-def-for-syntax make-def-syntaxes) ids expr prefix max-let-depth)])) (vector->list syntax-body)))]))])) (define (read-module-wrap v) v) ;; ---------------------------------------- ;; Unmarshal dispatch for various types (define (read-more-syntax v) (let ([id (car v)] [v (cdr v)]) ;; This is the ..._EXPD mapping from "schpriv.h": (case id [(0) (read-define-values v)] [(1) (read-define-syntax v)] [(2) (read-set! v)] [(3) v] ; a case-lam already [(4) (read-begin0 v)] [(5) (read-boxenv v)] [(6) (read-module-wrap v)] [(7) (read-require v)] [(8) (read-define-for-syntax v)] [(9) (read-#%variable-ref v)] [(10) (read-apply-values v)] [(11) (read-splice v)] [else (error 'read-mode-unsyntax "unknown id: ~e" id)]))) ;; Type mappings from "stypes.h": (define (int->type i) (case i [(0) 'toplevel-type] [(3) 'syntax-type] [(7) 'sequence-type] [(9) 'unclosed-procedure-type] [(10) 'let-value-type] [(11) 'let-void-type] [(12) 'letrec-type] [(14) 'with-cont-mark-type] [(15) 'quote-syntax-type] [(24) 'variable-type] [(25) 'module-variable-type] [(96) 'case-lambda-sequence-type] [(97) 'begin0-sequence-type] [(100) 'module-type] [(103) 'resolve-prefix-type] [else (error 'int->type "unknown type: ~e" i)])) (define type-readers (make-immutable-hash (list (cons 'toplevel-type read-toplevel) (cons 'syntax-type read-more-syntax) (cons 'sequence-type read-sequence) (cons 'unclosed-procedure-type read-unclosed-procedure) (cons 'let-value-type read-let-value) (cons 'let-void-type read-let-void) (cons 'letrec-type read-letrec) (cons 'with-cont-mark-type read-with-cont-mark) (cons 'quote-syntax-type read-topsyntax) (cons 'variable-type read-variable) (cons 'module-variable-type read-variable) (cons 'compilation-top-type read-compilation-top) (cons 'case-lambda-sequence-type read-case-lambda) (cons 'begin0-sequence-type read-sequence) (cons 'module-type read-module) (cons 'resolve-prefix-type read-resolve-prefix)))) (define (get-reader type) (or (hash-ref type-readers type #f) (lambda (v) (error 'read-marshalled "reader for ~a not implemented" type)))) ;; ---------------------------------------- ;; Lowest layer of bytecode parsing (define (split-so all-short so) (define n (if (zero? all-short) 4 2)) (let loop ([so so]) (if (zero? (bytes-length so)) null (cons (integer-bytes->integer (subbytes so 0 n) #f) (loop (subbytes so n)))))) (define (read-simple-number p) ;; not sure if it's really unsigned (integer-bytes->integer (read-bytes 4 p) #f #f)) (define-struct cport ([pos #:mutable] orig-port size bytes symtab shared-offsets)) (define (cp-getc cp) (begin-with-definitions (when ((cport-pos cp) . >= . (cport-size cp)) (error "off the end")) (define r (bytes-ref (cport-bytes cp) (cport-pos cp))) (set-cport-pos! cp (add1 (cport-pos cp))) r)) (define small-list-max 65) (define cpt-table ;; The "schcpt.h" mapping `([0 escape] [1 symbol] [2 symref] [3 weird-symbol] [4 keyword] [5 byte-string] [6 string] [7 char] [8 int] [9 null] [10 true] [11 false] [12 void] [13 box] [14 pair] [15 list] [16 vector] [17 hash-table] [18 stx] [19 gstx] ; unused [20 marshalled] [21 quote] [22 reference] [23 local] [24 local-unbox] [25 svector] [26 application] [27 let-one] [28 branch] [29 module-index] [30 module-var] [31 path] [32 closure] [33 delayed] [34 prefab] [35 60 small-number] [60 80 small-symbol] [80 92 small-marshalled] [92 ,(+ 92 small-list-max) small-proper-list] [,(+ 92 small-list-max) 192 small-list] [192 207 small-local] [207 222 small-local-unbox] [222 247 small-svector] [248 small-application2] [249 small-application3] [247 255 small-application])) (define (cpt-table-lookup i) (for/or ([ent cpt-table]) (match ent [(list k sym) (and (= k i) (cons k sym))] [(list k k* sym) (and (<= k i) (< i k*) (cons k sym))]))) (define (read-compact-bytes port c) (begin0 (subbytes (cport-bytes port) (cport-pos port) (+ (cport-pos port) c)) (set-cport-pos! port (+ c (cport-pos port))))) (define (read-compact-chars port c) (bytes->string/utf-8 (read-compact-bytes port c))) (define (read-compact-list c proper port) (cond [(= 0 c) (if proper null (read-compact port))] [else (cons (read-compact port) (read-compact-list (sub1 c) proper port))])) (define (read-compact-number port) (define flag (cp-getc port)) (cond [(< flag 128) flag] [(zero? (bitwise-and flag #x40)) (let ([a (cp-getc port)]) (+ (a . << . 6) (bitwise-and flag 63)))] [(zero? (bitwise-and flag #x20)) (- (bitwise-and flag #x1F))] [else (let ([a (cp-getc port)] [b (cp-getc port)] [c (cp-getc port)] [d (cp-getc port)]) (let ([n (integer-bytes->integer (bytes a b c d) #f #f)]) (if (zero? (bitwise-and flag #x10)) (- n) n)))])) (define (read-compact-svector port n) (list->vector (reverse (for/list ([i (in-range n)]) (read-compact-number port))))) (define (read-marshalled type port) (let* ([type (if (number? type) (int->type type) type)] [l (read-compact port)] [reader (get-reader type)]) (reader l))) (define (make-local unbox? pos flags) (define SCHEME_LOCAL_CLEAR_ON_READ #x01) (make-localref unbox? pos (positive? (bitwise-and flags SCHEME_LOCAL_CLEAR_ON_READ)))) (define (a . << . b) (arithmetic-shift a b)) (define-struct not-ready ()) ;; ---------------------------------------- ;; Main parsing loop (define (read-compact cp) (let loop ([need-car 0] [proper #f] [last #f] [first #f]) (begin-with-definitions (define ch (cp-getc cp)) (define-values (cpt-start cpt-tag) (let ([x (cpt-table-lookup ch)]) (unless x (error 'read-compact "unknown code : ~a" ch)) (values (car x) (cdr x)))) (define v (case cpt-tag [(delayed) (let ([pos (read-compact-number cp)]) (let ([v (vector-ref (cport-symtab cp) pos)]) (if (not-ready? v) (let ([save-pos (cport-pos cp)]) (set-cport-pos! cp (vector-ref (cport-shared-offsets cp) (sub1 pos))) (let ([v (read-compact cp)]) (vector-set! (cport-symtab cp) pos v) (set-cport-pos! cp save-pos) v)) v)))] [(escape) (let* ([len (read-compact-number cp)] [s (subbytes (cport-bytes cp) (cport-pos cp) (+ (cport-pos cp) len))]) (set-cport-pos! cp (+ (cport-pos cp) len)) (parameterize ([read-accept-compiled #t] [read-accept-bar-quote #t] [read-accept-box #t] [read-accept-graph #t] [read-case-sensitive #t] [read-square-bracket-as-paren #t] [read-curly-brace-as-paren #t] [read-decimal-as-inexact #t] [read-accept-dot #t] [read-accept-infix-dot #t] [read-accept-quasiquote #t]) (read (open-input-bytes s))))] [(reference) (make-primitive (read-compact-number cp))] [(small-list small-proper-list) (let* ([l (- ch cpt-start)] [ppr (eq? cpt-tag 'small-proper-list)]) (if (positive? need-car) (if (= l 1) (cons (read-compact cp) (if ppr null (read-compact cp))) (read-compact-list l ppr cp)) (loop l ppr last first)))] [(let-one) (make-let-one (read-compact cp) (read-compact cp))] [(branch) (make-branch (read-compact cp) (read-compact cp) (read-compact cp))] [(module-index) (module-path-index-join (read-compact cp) (read-compact cp))] [(module-var) (let ([mod (read-compact cp)] [var (read-compact cp)] [pos (read-compact-number cp)]) (make-module-variable mod var pos 0))] [(local-unbox) (let* ([p* (read-compact-number cp)] [p (if (< p* 0) (- (add1 p*)) p*)] [flags (if (< p* 0) (read-compact-number cp) 0)]) (make-local #t p flags))] [(path) (let* ([p (bytes->path (read-compact-bytes cp (read-compact-number cp)))]) (if (relative-path? p) (path->complete-path p (or (current-load-relative-directory) (current-directory))) p))] [(small-number) (let ([l (- ch cpt-start)]) l)] [(int) (read-compact-number cp)] [(false) #f] [(true) #t] [(null) null] [(void) (void)] [(vector) (let* ([n (read-compact-number cp)] [lst (for/list ([i (in-range n)]) (read-compact cp))]) (vector->immutable-vector (list->vector lst)))] [(list) (let* ([n (read-compact-number cp)]) (for/list ([i (in-range n)]) (read-compact cp)))] [(prefab) (let ([v (read-compact cp)]) (apply make-prefab-struct (vector-ref v 0) (cdr (vector->list v))))] [(hash-table) (let ([eq (read-compact-number cp)] [len (read-compact-number cp)]) ((if (zero? eq) make-hash-placeholder make-hasheq-placeholder) (for/list ([i (in-range len)]) (cons (read-compact cp) (read-compact cp)))))] [(marshalled) (read-marshalled (read-compact-number cp) cp)] [(stx) (let ([v (make-reader-graph (read-compact cp))]) (make-stx v))] [(local local-unbox) (let ([c (read-compact-number cp)] [unbox? (eq? cpt-tag 'local-unbox)]) (if (negative? c) (make-local unbox? (- (add1 c)) (read-compact-number cp)) (make-local unbox? c 0)))] [(small-local) (make-local #f (- ch cpt-start) 0)] [(small-local-unbox) (make-local #t (- ch cpt-start) 0)] [(small-symbol) (let ([l (- ch cpt-start)]) (string->symbol (read-compact-chars cp l)))] [(symbol) (let ([l (read-compact-number cp)]) (string->symbol (read-compact-chars cp l)))] [(keyword) (let ([l (read-compact-number cp)]) (string->keyword (read-compact-chars cp l)))] [(byte-string) (let ([l (read-compact-number cp)]) (read-compact-bytes cp l))] [(string) (let ([l (read-compact-number cp)] [cl (read-compact-number cp)]) (read-compact-chars cp l))] [(char) (integer->char (read-compact-number cp))] [(box) (box (read-compact cp))] [(quote) (make-reader-graph (read-compact cp))] [(symref) (let* ([l (read-compact-number cp)] [v (vector-ref (cport-symtab cp) l)]) (if (not-ready? v) (let ([pos (cport-pos cp)]) (set-cport-pos! cp (vector-ref (cport-shared-offsets cp) (sub1 l))) (let ([v (read-compact cp)]) (set-cport-pos! cp pos) (vector-set! (cport-symtab cp) l v) v)) v))] [(weird-symbol) (let ([u (read-compact-number cp)] [str (read-compact-chars cp (read-compact-number cp))]) ;; FIXME: no way to construct quasi-interned symbols: (string->uninterned-symbol str))] [(small-marshalled) (read-marshalled (- ch cpt-start) cp)] [(small-application2) (make-application (read-compact cp) (list (read-compact cp)))] [(small-application3) (make-application (read-compact cp) (list (read-compact cp) (read-compact cp)))] [(small-application) (let ([c (add1 (- ch cpt-start))]) (make-application (read-compact cp) (for/list ([i (in-range (sub1 c))]) (read-compact cp))))] [(application) (let ([c (read-compact-number cp)]) (make-application (read-compact cp) (for/list ([i (in-range c)]) (read-compact cp))))] [(closure) (let* ([l (read-compact-number cp)] [ind (make-indirect #f)]) (vector-set! (cport-symtab cp) l ind) (let* ([v (read-compact cp)] [cl (make-closure v (gensym (let ([s (lam-name v)]) (cond [(symbol? s) s] [(vector? s) (vector-ref s 0)] [else 'closure]))))]) (vector-set! (cport-symtab cp) l cl) (set-indirect-v! ind cl) cl))] [(svector) (read-compact-svector cp (read-compact-number cp))] [(small-svector) (read-compact-svector cp (- ch cpt-start))] [else (error 'read-compact "unknown tag ~a" cpt-tag)])) (cond [(zero? need-car) v] [(and proper (= need-car 1)) (cons v null)] [else (cons v (loop (sub1 need-car) proper last first))])))) ;; path -> bytes ;; implementes read.c:read_compiled (define (zo-parse port) (begin-with-definitions ;; skip the "#~" (unless (equal? #"#~" (read-bytes 2 port)) (error 'zo-parse "not a bytecode stream")) (define version (read-bytes (min 63 (read-byte port)) port)) (define symtabsize (read-simple-number port)) (define all-short (read-byte port)) (define cnt (* (if (not (zero? all-short)) 2 4) (sub1 symtabsize))) (define so (read-bytes cnt port)) (define so* (list->vector (split-so all-short so))) (define shared-size (read-simple-number port)) (define size* (read-simple-number port)) (when (shared-size . >= . size*) (error 'bad-read)) (define rst (read-bytes size* port)) (unless (eof-object? (read port)) (error 'not-end)) (unless (= size* (bytes-length rst)) (error "wrong number of bytes")) (define symtab (make-vector symtabsize (make-not-ready))) (define cp (make-cport 0 port size* rst symtab so*)) (for/list ([i (in-range 1 symtabsize)]) (when (not-ready? (vector-ref symtab i)) (set-cport-pos! cp (vector-ref so* (sub1 i))) (let ([v (read-compact cp)]) (vector-set! symtab i v)))) (set-cport-pos! cp shared-size) (read-marshalled 'compilation-top-type cp))) ;; ---------------------------------------- #; (begin (define (compile/write sexp) (define s (open-output-bytes)) (write (parameterize ([current-namespace (make-base-namespace)]) (eval '(require (for-syntax scheme/base))) (compile sexp)) s) (get-output-bytes s)) (define (compile/parse sexp) (let* ([bs (compile/write sexp)] [p (open-input-bytes bs)]) (zo-parse p))) #;(compile/parse #s(foo 10 13)) (zo-parse (open-input-file "/home/mflatt/proj/plt/collects/scheme/private/compiled/more-scheme_ss.zo")) )