whalesong/parse-bytecode-5.1.1.rkt

570 lines
16 KiB
Racket

#lang racket/base
(require "expression-structs.rkt"
"lexical-structs.rkt"
"typed-module-path.rkt"
syntax/modresolve)
;; Parsing Racket 5.1.1 bytecode structures into our own.
(require compiler/zo-parse
racket/match
racket/list)
(provide parse-bytecode
current-module-path-index-resolver
current-module-path
reset-lam-label-counter!/unit-testing)
(define current-module-path (make-parameter #f))
;; current-module-path-index-resolver: (module-path-index (U Path #f) -> (U Symbol Path)) -> void
;; The module path index resolver figures out how to translate module path indices to module names.
(define current-module-path-index-resolver
(make-parameter
(lambda (mpi relative-to)
(cond
[(self-module-path-index? mpi)
'self]
[else
(resolve-module-path-index mpi relative-to)]))))
(define (self-module-path-index? mpi)
(let-values ([(x y) (module-path-index-split mpi)])
(and (eq? x #f)
(eq? y #f))))
(define (explode-module-path-index mpi)
(let-values ([(x y) (module-path-index-split mpi)])
(cond
[(module-path-index? y)
(cons x (explode-module-path-index y))]
[else
(list x y)])))
;; seen-closures: (hashof symbol -> symbol)
;; As we're parsing, we watch for closure cycles. On any subsequent time where
;; we see a closure cycle, we break the cycle by generating an EmptyClosureReference.
;; The map is from the gen-id to the entry-point label of the lambda.
(define seen-closures (make-parameter (make-hasheq)))
;; Code is copied-and-pasted from compiler/decompile. Maps the primval ids to their respective
;; symbolic names.
(define primitive-table
;; Figure out number-to-id mapping for kernel functions in `primitive'
(let ([bindings
(let ([ns (make-base-empty-namespace)])
(parameterize ([current-namespace ns])
(namespace-require ''#%kernel)
(namespace-require ''#%unsafe)
(namespace-require ''#%flfxnum)
(for/list ([l (namespace-mapped-symbols)])
(cons l (with-handlers ([exn:fail? (lambda (x)
#f)])
(compile l))))))]
[table (make-hash)])
(for ([b (in-list bindings)])
(let ([v (and (cdr b)
(zo-parse (let ([out (open-output-bytes)])
(write (cdr b) out)
(close-output-port out)
(open-input-bytes (get-output-bytes out)))))])
(let ([n (match v
[(struct compilation-top (_ prefix (struct primval (n)))) n]
[else #f])])
(hash-set! table n (car b)))))
table))
;; parse-bytecode: Input-Port -> Expression
(define (parse-bytecode in)
(parameterize ([seen-closures (make-hasheq)])
(let ([compilation-top (zo-parse in)])
(parse-top compilation-top))))
(define (parse-top a-top)
(match a-top
[(struct compilation-top (max-let-depth prefix code))
(make-Top (parse-prefix prefix)
(parse-top-code code))]))
(define (parse-prefix a-prefix)
(match a-prefix
[(struct prefix (num-lifts toplevels stxs))
(make-Prefix
(append (map parse-prefix-toplevel toplevels)
(if (empty? stxs)
empty
(list #f))
(build-list num-lifts
(lambda (i)
#f))))]))
;; parse-top-code: (U form Any -> Expression)
(define (parse-top-code code)
(cond
[(form? code)
(parse-form code)]
[else
(make-Constant code)]))
;; parse-prefix-toplevel: (U #f symbol global-bucket module-variable) -> (U False Symbol GlobalBucket ModuleVariable)
(define (parse-prefix-toplevel a-toplevel)
(cond
[(eq? a-toplevel #f)
#f]
[(symbol? a-toplevel)
a-toplevel]
[(global-bucket? a-toplevel)
(make-GlobalBucket (global-bucket-name a-toplevel))]
[(module-variable? a-toplevel)
(let ([resolver (current-module-path-index-resolver)])
(make-ModuleVariable (module-variable-sym a-toplevel)
(let ([resolved-path-name
(resolver (module-variable-modidx a-toplevel) (current-module-path))])
(wrap-module-name resolved-path-name))))]))
(define (wrap-module-name resolved-path-name)
(cond
[(symbol? resolved-path-name)
(make-ModuleName resolved-path-name)]
[(path? resolved-path-name)
(make-ModuleName
(string->symbol
(path->string resolved-path-name)))]))
;; parse-form: form -> (U Expression)
(define (parse-form a-form)
(cond
[(def-values? a-form)
(parse-def-values a-form)]
[(def-syntaxes? a-form)
(parse-def-syntaxes a-form)]
[(req? a-form)
(parse-req a-form)]
[(seq? a-form)
(parse-seq a-form)]
[(splice? a-form)
(parse-splice a-form)]
[(mod? a-form)
(parse-mod a-form)]
[(expr? a-form)
(parse-expr a-form)]
[else
(error 'parse-form "~s" a-form)]))
;; parse-def-values: def-values -> Expression
(define (parse-def-values form)
(match form
[(struct def-values (ids rhs))
(make-DefValues (map parse-toplevel ids)
(parse-expr-seq-constant rhs))]))
(define (parse-def-syntaxes form)
;; Currently, treat def-syntaxes as a no-op. The compiler will not produce
;; syntax transformers.
(make-Constant (void)))
(define (parse-req form)
(error 'fixme-req))
;; parse-seq: seq -> Expression
(define (parse-seq form)
(match form
[(struct seq (forms))
(make-Seq (map parse-form-item forms))]))
;; parse-form-item: (U form Any) -> Expression
(define (parse-form-item item)
(cond
[(form? item)
(parse-form item)]
[else
(make-Constant item)]))
;; parse-splice: splice -> Expression
(define (parse-splice form)
(match form
[(struct splice (forms))
(make-Splice (map parse-splice-item forms))]))
;; parse-splice-item: (U form Any) -> Expression
(define (parse-splice-item item)
(cond
[(form? item)
(parse-form item)]
[else
(make-Constant item)]))
;; parse-mod: mod -> Expression
(define (parse-mod form)
(match form
[(struct mod (name srcname self-modidx prefix provides requires
body syntax-body unexported max-let-depth dummy lang-info
internal-context))
(make-Module (make-ModuleName name)
(parse-prefix prefix)
(parse-mod-requires self-modidx requires)
(parse-mod-provides provides)
(parse-mod-body body))]))
;; parse-mod-requires: module-path-index (listof (pair (U Integer #f) (listof module-path-index))) -> (listof ModuleName)
(define (parse-mod-requires enclosing-module-path-index requires)
;; We only care about phase 0 --- the runtime.
(let ([resolver (current-module-path-index-resolver)])
(let loop ([requires requires])
(cond
[(empty? requires)
empty]
[(= (car (first requires))
0)
(map (lambda (m)
(let ([enclosing-path (resolver enclosing-module-path-index (current-module-path))])
(cond
[(symbol? enclosing-path)
(wrap-module-name (resolver m (current-module-path)))]
[(path? enclosing-path)
(wrap-module-name (resolver m enclosing-path))])))
(cdr (first requires)))]
[else
(loop (rest requires))]))))
(define (parse-mod-provides provides)
(let* ([resolver (current-module-path-index-resolver)]
[parse-provided (lambda (a-provided)
(match a-provided
[(struct provided (name src src-name nom-mod src-phase protected? insp))
;; fixme: we're not considering all of the fields here...
(make-Provided name src-name)]))])
(let loop ([provides provides])
(cond
[(empty? provides)
empty]
[(= (first (first provides)) 0)
(map parse-provided (second (first provides)))]
[else
(loop (rest provides))]))))
;; parse-mod-body: (listof (or/c form? any/c)) -> Expression
(define (parse-mod-body body)
(let ([parse-item (lambda (item)
(cond
[(form? item)
(parse-form item)]
[else
(make-Constant item)]))])
(make-Splice (map parse-item body))))
(define-values (make-lam-label reset-lam-label-counter!/unit-testing)
(let ([n 0])
(values
(lambda ()
(set! n (add1 n))
(string->symbol (format "lamEntry~a" n)))
(lambda ()
(set! n 0)))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define (parse-expr expr)
(cond
[(lam? expr)
(parse-lam expr (make-lam-label))]
[(closure? expr)
(parse-closure expr)]
[(case-lam? expr)
(parse-case-lam expr)]
[(let-one? expr)
(parse-let-one expr)]
[(let-void? expr)
(parse-let-void expr)]
[(install-value? expr)
(parse-install-value expr)]
[(let-rec? expr)
(parse-let-rec expr)]
[(boxenv? expr)
(parse-boxenv expr)]
[(localref? expr)
(parse-localref expr)]
[(toplevel? expr)
(parse-toplevel expr)]
[(topsyntax? expr)
(parse-topsyntax expr)]
[(application? expr)
(parse-application expr)]
[(branch? expr)
(parse-branch expr)]
[(with-cont-mark? expr)
(parse-with-cont-mark expr)]
[(beg0? expr)
(parse-beg0 expr)]
[(varref? expr)
(parse-varref expr)]
[(assign? expr)
(parse-assign expr)]
[(apply-values? expr)
(parse-apply-values expr)]
[(primval? expr)
(parse-primval expr)]))
(define (parse-lam expr entry-point-label)
(match expr
[(struct lam (name flags num-params param-types rest? closure-map closure-types max-let-depth body))
(let ([lam-name (extract-lam-name name)])
(make-Lam lam-name
num-params
rest?
(parse-expr-seq-constant body)
(vector->list closure-map)
entry-point-label))]))
;; parse-closure: closure -> (U Lam EmptyClosureReference)
;; Either parses as a regular lambda, or if we come across the same closure twice,
;; breaks the cycle by creating an EmptyClosureReference with the pre-existing lambda
;; entry point.
(define (parse-closure expr)
(match expr
[(struct closure (code gen-id))
(let ([seen (seen-closures)])
(cond
[(hash-has-key? seen gen-id)
(match code
[(struct lam (name flags num-params param-types rest? closure-map closure-types max-let-depth body))
(let ([lam-name (extract-lam-name name)])
(make-EmptyClosureReference lam-name
num-params
rest?
(hash-ref seen gen-id)))])]
[else
(let ([fresh-entry-point (make-lam-label)])
(hash-set! seen gen-id fresh-entry-point)
(parse-lam code fresh-entry-point))]))]))
;; extract-lam-name: (U Symbol Vector) -> (U Symbol LamPositionalName)
(define (extract-lam-name name)
(cond
[(symbol? name)
name]
[(vector? name)
(match name
[(vector (and (? symbol?) sym)
(and (? path?) path)
(and (? number?) line)
(and (? number?) column)
(and (? number?) offset)
(and (? number?) span)
_)
(make-LamPositionalName sym
(path->string path)
line
column
offset
span)]
[else
(string->symbol (format "~s" name))])]
[else
'unknown
;; The documentation says that the name must be a symbol or vector, but I'm seeing cases
;; where it returns the empty list when there's no information available.
]))
(define (parse-case-lam exp)
(match exp
[(struct case-lam (name clauses))
(let ([case-lam-label (make-lam-label)])
(make-CaseLam (extract-lam-name name)
(map (lambda (l)
(cond
[(closure? l)
(parse-closure l)]
[else
(parse-lam l (make-lam-label))]))
clauses)
case-lam-label))]))
(define (parse-let-one expr)
(match expr
[(struct let-one (rhs body flonum? unused?))
;; fixme: use flonum? and unused? to generate better code.
(make-Let1 (parse-expr-seq-constant rhs)
(parse-expr-seq-constant body))]))
;; parse-expr-seq-constant: (U expr seq Any) -> Expression
(define (parse-expr-seq-constant x)
(cond
[(expr? x) (parse-expr x)]
[(seq? x) (parse-seq x)]
[else (make-Constant x)]))
(define (parse-let-void expr)
(match expr
[(struct let-void (count boxes? body))
(make-LetVoid count (parse-expr-seq-constant body) boxes?)]))
(define (parse-install-value expr)
(match expr
[(struct install-value (count pos boxes? rhs body))
(make-Seq (list (make-InstallValue count pos (parse-expr-seq-constant rhs) boxes?)
(parse-expr-seq-constant body)))]))
(define (parse-let-rec expr)
(match expr
[(struct let-rec (procs body))
(make-LetRec (map (lambda (p) (parse-lam p (make-lam-label)))
procs)
(parse-expr-seq-constant body))]))
(define (parse-boxenv expr)
(match expr
[(struct boxenv (pos body))
(make-BoxEnv pos (parse-expr-seq-constant body))]))
(define (parse-localref expr)
(match expr
[(struct localref (unbox? pos clear? other-clears? flonum?))
;; FIXME: we should use clear? at the very least: as I understand it,
;; this is here to maintain safe-for-space behavior.
;; We should also make use of flonum information to generate better code.
(make-LocalRef pos unbox?)]))
(define (parse-toplevel expr)
(match expr
;; FIXME: we should also keep track of const? and ready? to produce better code, and to
;; do the required runtime checks when necessary (const?=#f, ready?=#f)
[(struct toplevel (depth pos const? ready?))
(make-ToplevelRef depth pos)]))
(define (parse-topsyntax expr)
(error 'fixme-topsyntax))
(define (parse-application expr)
(match expr
[(struct application (rator rands))
(make-App (parse-application-rator rator)
(map parse-application-rand rands))]))
(define (parse-application-rator rator)
(cond
[(expr? rator)
(parse-expr rator)]
[(seq? rator)
(parse-seq rator)]
[else
(make-Constant rator)]))
(define (parse-application-rand rand)
(cond
[(expr? rand)
(parse-expr rand)]
[(seq? rand)
(parse-seq rand)]
[else
(make-Constant rand)]))
(define (parse-branch expr)
(match expr
[(struct branch (test then else))
(make-Branch (parse-expr-seq-constant test)
(parse-expr-seq-constant then)
(parse-expr-seq-constant else))]))
(define (parse-with-cont-mark expr)
(match expr
[(struct with-cont-mark (key val body))
(make-WithContMark (parse-expr-seq-constant key)
(parse-expr-seq-constant val)
(parse-expr-seq-constant body))]))
(define (parse-beg0 expr)
(match expr
[(struct beg0 (seq))
(make-Begin0 (map parse-expr-seq-constant seq))]))
(define (parse-varref expr)
(match expr
[(struct varref (toplevel))
(make-VariableReference (parse-toplevel toplevel))]))
(define (parse-assign expr)
(match expr
[(struct assign ((struct toplevel (depth pos const? ready?)) rhs undef-ok?))
(make-ToplevelSet depth pos (parse-expr-seq-constant rhs))]))
(define (parse-apply-values expr)
(match expr
[(struct apply-values (proc args-expr))
(make-ApplyValues (parse-expr-seq-constant proc)
(parse-expr-seq-constant args-expr))]))
(define (parse-primval expr)
(match expr
[(struct primval (id))
(let ([name (hash-ref primitive-table id)])
(make-PrimitiveKernelValue name))]))