scheme-to-c/c.ss
2019-09-03 16:42:42 +02:00

3576 lines
138 KiB
Scheme
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

;;; Copyright (c) 2013 Andrew W. Keep
;;; See the accompanying file Copyright for details
;;;
;;; A nanopass compiler developed to use as a demo during Clojure Conj 2013.
;;; The source language for the compiler is:
;;;
;;; Expr --> <Primitive>
;;; | <Var>
;;; | <Const>
;;; | (quote <Datum>)
;;; | (if <Expr> <Expr>)
;;; | (if <Expr> <Expr> <Expr>)
;;; | (or <Expr> ...)
;;; | (and <Expr> ...)
;;; | (not <Expr>)
;;; | (begin <Expr> ... <Expr>)
;;; | (lambda (<Var> ...) <Expr> ... <Expr>)
;;; | (let ([<Var> <Expr>] ...) <Expr> ... <Expr>)
;;; | (letrec ([<Var> <Expr>] ...) <Expr> ... <Expr>)
;;; | (set! <Var> <Expr>)
;;; | (<Expr> <Expr> ...)
;;;
;;; Primitive --> car | cdr | cons | pair? | null? | boolean? | make-vector
;;; | vector-ref | vector-set! | vector? | vector-length | box
;;; | unbox | set-box! | box? | + | - | * | / | = | < | <= | >
;;; | >= | eq?
;;; Var --> symbol
;;; Const --> #t | #f | '() | integer between -2^60 and 2^60 - 1
;;; Datum --> <Const> | (<Datum> . <Datum>) | #(<Datum> ...)
;;;
;;; or in nanopass parlance:
;;; (define-language Lsrc
;;; (terminals
;;; (symbol (x))
;;; (primitive (pr))
;;; (constant (c))
;;; (datum (d)))
;;; (Expr (e body)
;;; pr
;;; x
;;; c
;;; (quote d)
;;; (if e0 e1)
;;; (if e0 e1 e2)
;;; (or e* ...)
;;; (and e* ...)
;;; (not e)
;;; (begin e* ... e)
;;; (lambda (x* ...) body* ... body)
;;; (let ([x* e*] ...) body* ... body)
;;; (letrec ([x* e*] ...) body* ... body)
;;; (set! x e)
;;; (e e* ...)))
;;;
;;; The following exports are defined for this library:
;;;
;;; (my-tiny-compile <exp>)
;;; my-tiny-compile is the main interface the compiler, where <exp> is
;;; a quoted expression for the compiler to evaluate. This procedure will
;;; run the nanopass parts of the compiler, produce a C output file in t.c,
;;; compile it using gcc to a program t, run the program t, directing its
;;; output to t.out, and finally use the Scheme reader to read t.out and
;;; return the value to the host Scheme system. For example, if we wanted
;;; to run a program that calculates the factorial of 5, we could do the
;;; following:
;;; (my-tiny-compile '(letrec ([f (lambda (n)
;;; (if (= n 0)
;;; 1
;;; (* n (f (- n 1)))))])
;;; (f 10)))
;;;
;;; (trace-passes)
;;; (trace-passes <pass-spec>)
;;; trace-passes is a parameter used by my-tiny-compile to decide what
;;; passees should have their output printed. When it is called without
;;; any arguments, it returns the list of passes to be traced. When it
;;; is called with an argument, the argument should be one of the
;;; following:
;;; '<pass-name> - sets this pass to be traced
;;; '(<pass-name 0> <pass-name 1> ...) - set the list of passes to trace
;;; #t - traces all passes
;;; #f - turns off trace passing
;;;
;;; all-passes
;;; lists all passes in the compiler.
;;;
;;; (use-boehm?)
;;; (use-boehm? <boolean>)
;;; use-boehm? is a parameter that indicates if the generated C code should
;;; attempt to use the boehm garbage collector. This feature is, as of
;;; yet, untested.
;;;
;;; Internals that are exported to make them available for programmers
;;; experimenting with the compiler.
;;;
;;; TBD
;;;
;;;
(library (c)
(export
Lsrc unparse-Lsrc
L1 unparse-L1
L2 unparse-L2
L3 unparse-L3
L4 unparse-L4
L5 unparse-L5
L6 unparse-L6
L7 unparse-L7
L8 unparse-L8
L9 unparse-L9
L10 unparse-L10
L11 unparse-L11
L12 unparse-L12
L13 unparse-L13
L14 unparse-L14
L15 unparse-L15
L16 unparse-L16
L17 unparse-L17
L18 unparse-L18
L19 unparse-L19
; L20 unparse-L20
L21 unparse-L21
L22 unparse-L22
unique-var
user-alloc-value-prims
user-non-alloc-value-prims
user-pred-prims
user-effect-prims
user-prims
void+user-non-alloc-value-prims
void+user-prims
closure+user-alloc-value-prims
closure+void+user-non-alloc-value-prims
closure+user-effect-prims
internal+closure+user-effect-prims
closure+void+user-prims
primitive?
void+primitive?
closure+void+primitive?
effect-free-prim?
predicate-primitive?
effect-primitive?
value-primitive?
non-alloc-value-primitive?
effect+internal-primitive?
target-fixnum?
constant?
datum?
integer-64?
set-cons
union
difference
intersect
parse-and-rename
remove-one-armed-if
remove-and-or-not
make-begin-explicit
inverse-eta-raw-primitives
quote-constants
remove-complex-constants
identify-assigned-variables
purify-letrec
optimize-direct-call
find-let-bound-lambdas
remove-anonymous-lambda
convert-assignments
uncover-free
convert-closures
optimize-known-call
expose-closure-prims
lift-lambdas
remove-complex-opera*
recognize-context
expose-allocation-primitives
return-of-set!
flatten-set!
; push-if
specify-constant-representation
expand-primitives
generate-c
use-boehm?
my-tiny-compile
trace-passes
all-passes)
(import (nanopass) (rnrs)
(only (implementation-helpers) printf format system pretty-print))
;;; As of yet untested feature for using the boehm GC
;;; in the compiled output of our compiler.
(define use-boehm?
(let ([use? #f])
(case-lambda
[() use?]
[(u?) (set! use? u?)])))
;;; Representation of our data types.
;;; We use tagged pointers, because all of our pointers are 8-byte aligned,
;;; leaving te bottom 3 bits always being 0. Using these 3 bits for tags
;;; lets us store things like fixnums as pointers, and differentiate them
;;; from pointers like closures and vectors. It also saves us using a word
;;; for a tag when in our representation of vectors, closures, etc.
(define fixnum-tag #b000)
(define fixnum-mask #b111)
(define pair-tag #b001)
(define pair-mask #b111)
(define box-tag #b010)
(define box-mask #b111)
(define vector-tag #b011)
(define vector-mask #b111)
(define closure-tag #b100)
(define closure-mask #b111)
;;; NOTE: #b101 is used for constants
(define boolean-tag #b1101)
(define boolean-mask #b1111)
(define true-rep #b111101)
(define false-rep #b101101)
(define null-rep #b100101)
(define void-rep #b110101)
(define fixnum-shift 3)
(define word-size 8)
;;; Helper function for representing unique variables as symbols by adding a
;;; number to the variables (so if we start with f we get f.n where n might
;;; be 1, 2, 3, etc, and is unique).
(define unique-var
(let ()
(define count 0)
(lambda (name)
(let ([c count])
(set! count (+ count 1))
(string->symbol
(string-append (symbol->string name) "." (number->string c)))))))
;; strip the numberic bit back off the unique-var
(define base-var
(lambda (x)
(define s0
(lambda (rls)
(if (null? rls)
(error 'base-var "not a unique-var created variable" x)
(let ([c (car rls)])
(cond
[(char-numeric? c) (s1 (cdr rls))]
[else (error 'base-var
"not a unique-var created variable" x)])))))
(define s1
(lambda (rls)
(if (null? rls)
(error 'base-var "not a unique-var created variable" x)
(let ([c (car rls)])
(cond
[(char-numeric? c) (s1 (cdr rls))]
[(char=? c #\.) (cdr rls)]
[else (error 'base-var
"not a unique-var created variable" x)])))))
(string->symbol
(list->string
(reverse
(s0 (reverse (string->list (symbol->string x)))))))))
;;; Convenience procedure for building temporaries in the compiler.
(define make-tmp (lambda () (unique-var 't)))
;;; Helpers for the various sets of primitives we have over the course of the
;;; compiler
;;; All primitives:
;;;
;;; | | | Langauge | Language |
;;; primitive | arity | context | introduced | removed |
;;; --------------------+-------+---------+------------+----------+
;;; cons | 2 | value | Lsrc | L17 |
;;; make-vector | 1 | value | Lsrc | L17 |
;;; box | 1 | value | Lsrc | L17 |
;;; car | 1 | value | Lsrc | L22 |
;;; cdr | 1 | value | Lsrc | L22 |
;;; vector-ref | 2 | value | Lsrc | L22 |
;;; vector-length | 1 | value | Lsrc | L22 |
;;; unbox | 1 | value | Lsrc | L22 |
;;; + | 2 | value | Lsrc | L22 |
;;; - | 2 | value | Lsrc | L22 |
;;; * | 2 | value | Lsrc | L22 |
;;; / | 2 | value | Lsrc | L22 |
;;; pair? | 1 | pred | Lsrc | L22 |
;;; null? | 1 | pred | Lsrc | L22 |
;;; boolean? | 1 | pred | Lsrc | L22 |
;;; vector? | 1 | pred | Lsrc | L22 |
;;; box? | 1 | pred | Lsrc | L22 |
;;; = | 2 | pred | Lsrc | L22 |
;;; < | 2 | pred | Lsrc | L22 |
;;; <= | 2 | pred | Lsrc | L22 |
;;; > | 2 | pred | Lsrc | L22 |
;;; >= | 2 | pred | Lsrc | L22 |
;;; eq? | 2 | pred | Lsrc | L22 |
;;; vector-set! | 3 | effect | Lsrc | L22 |
;;; set-box! | 2 | effect | Lsrc | L22 |
;;; --------------------+-------+---------+------------+----------+
;;; void | 0 | value | L1 | L22 |
;;; --------------------+-------+---------+------------+----------+
;;; make-closure | 1 | value | L13 | L17 |
;;; closure-code | 2 | value | L13 | L22 |
;;; closure-ref | 2 | value | L13 | L22 |
;;; closure-code-set! | 2 | effect | L13 | L22 |
;;; closure-data-set! | 3 | effect | L13 | L22 |
;;; --------------------+-------+---------+------------+----------+
;;; $vector-length-set! | 2 | effect | L17 | L22 |
;;; $set-car! | 2 | effect | L17 | L22 |
;;; $set-cdr! | 2 | effect | L17 | L22 |
;;;
;;; This is a slightly cleaned up version, but this might still be better
;;; cleaned up by adding a define-primitives form, perhaps even one that can
;;; be used in the later parts of the compiler.
;;; user value primitives that perform allocation
(define user-alloc-value-prims
'((cons . 2) (make-vector . 1) (box . 1)))
;;; user value primitives that do not perform allocation
(define user-non-alloc-value-prims
'((car . 1) (cdr . 1) (vector-ref . 2) (vector-length . 1) (unbox . 1)
(+ . 2) (- . 2) (* . 2) (/ . 2)))
;;; user predicate primitives
;;; TODO: add procedure?
(define user-pred-prims
'((pair? . 1) (null? . 1) (boolean? . 1) (vector? . 1) (box? . 1) (= . 2)
(< . 2) (<= . 2) (> . 2) (>= . 2) (eq? . 2)))
;;; user effect primitives
(define user-effect-prims
'((vector-set! . 3) (set-box! . 2)))
;;; an association list with the user primitives
(define user-prims
(append user-alloc-value-prims user-non-alloc-value-prims user-pred-prims
user-effect-prims))
;;; void primitive + non-allocation user value primitives
(define void+user-non-alloc-value-prims
(cons '(void . 0) user-non-alloc-value-prims))
;;; an association list with void and all the user primitives
(define void+user-prims
(append user-alloc-value-prims void+user-non-alloc-value-prims
user-pred-prims user-effect-prims))
;;; all the allocation value primitives, including make-closure primitive
(define closure+user-alloc-value-prims
(cons '(make-closure . 1) user-alloc-value-prims))
;;; all the non-allocation value primitives, include the closure primitives
(define closure+void+user-non-alloc-value-prims
(cons* '(closure-code . 2) '(closure-ref . 2)
void+user-non-alloc-value-prims))
;; all the user effect primitives with the closure primitives
(define closure+user-effect-prims
(cons* '(closure-code-set! . 2) '(closure-data-set! . 3)
user-effect-prims))
;; all the user effect primitives, closure primitives, and internal primitives
(define internal+closure+user-effect-prims
(cons* '($vector-length-set! . 2) '($set-car! . 2) '($set-cdr! . 2)
closure+user-effect-prims))
;; association list including all prims except the three final internal
;; primitives
(define closure+void+user-prims
(append closure+user-alloc-value-prims
closure+void+user-non-alloc-value-prims user-pred-prims
closure+user-effect-prims))
;;; various predicates for determining if a primitve is a valid prim.
(define primitive?
(lambda (x)
(assq x user-prims)))
(define void+primitive?
(lambda (x)
(assq x void+user-prims)))
(define closure+void+primitive?
(lambda (x)
(assq x closure+void+user-prims)))
(define effect-free-prim?
(lambda (x)
(assq x (append void+user-non-alloc-value-prims user-alloc-value-prims
user-pred-prims))))
(define predicate-primitive?
(lambda (x)
(assq x user-pred-prims)))
(define effect-primitive?
(lambda (x)
(assq x closure+user-effect-prims)))
(define value-primitive?
(lambda (x)
(assq x (append closure+user-alloc-value-prims
closure+void+user-non-alloc-value-prims))))
(define non-alloc-value-primitive?
(lambda (x)
(assq x closure+void+user-non-alloc-value-prims)))
(define effect+internal-primitive?
(lambda (x)
(assq x internal+closure+user-effect-prims)))
;;;;;;;;;;
;;; Helper functions for identifying terminals in the nanopass languages.
;;; determine if we have a 61-bit signed integer
(define target-fixnum?
(lambda (x)
(and (and (integer? x) (exact? x))
(<= (- (expt 2 60)) x (- (expt 2 60) 1)))))
;;; determine if we have a constant: #t, #f, '(), or 61-bit signed integer.
(define constant?
(lambda (x)
(or (target-fixnum? x) (boolean? x) (null? x))))
;;; determine if we have a valid datum (a constant, a pair of datum, or a
;;; vector of datum)
(define datum?
(lambda (x)
(or (constant? x)
(and (pair? x) (datum? (car x)) (datum? (cdr x)))
(and (vector? x)
(let loop ([i (vector-length x)])
(or (fx=? i 0)
(let ([i (fx- i 1)])
(and (datum? (vector-ref x i))
(loop i)))))))))
;;; determine if we have a 64-bit signed integer (used later in the compiler
;;; to hold the ptr representation).
(define integer-64?
(lambda (x)
(and (and (integer? x) (exact? x))
(<= (- (expt 2 63)) x (- (expt 2 63) 1)))))
;;; Random helper available on most Scheme systems, but irritatingly not in
;;; the R6RS standard.
(define make-list
(case-lambda
[(n) (make-list n (if #f #f))]
[(n v) (let loop ([n n] [ls '()])
(if (zero? n)
ls
(loop (fx- n 1) (cons v ls))))]))
;;;;;;;;
;;; The standard (not very efficient) Scheme representation of sets as lists
;;; add an item to a set
(define set-cons
(lambda (x set)
(if (memq x set)
set
(cons x set))))
;;; construct the intersection of 0 to n sets
(define intersect
(lambda set*
(if (null? set*)
'()
(fold-left (lambda (seta setb)
(let loop ([seta seta] [fset '()])
(if (null? seta)
fset
(let ([a (car seta)])
(if (memq a setb)
(loop (cdr seta) (cons a fset))
(loop (cdr seta) fset))))))
(car set*) (cdr set*)))))
;;; construct the union of 0 to n sets
(define union
(lambda set*
(if (null? set*)
'()
(fold-left (lambda (seta setb)
(let loop ([setb setb] [seta seta])
(if (null? setb)
seta
(loop (cdr setb) (set-cons (car setb) seta)))))
(car set*) (cdr set*)))))
;;; construct the difference of 0 to n sets
(define difference
(lambda set*
(if (null? set*)
'()
(fold-right (lambda (setb seta)
(let loop ([seta seta] [final '()])
(if (null? seta)
final
(let ([a (car seta)])
(if (memq a setb)
(loop (cdr seta) final)
(loop (cdr seta) (cons a final)))))))
(car set*) (cdr set*)))))
;;; Language definitions for Lsrc and L1 to L22
;;; Both the language extension and the fully specified language is included
;;; (though the fully specified language may be out of date). This can be
;;; regenerated by doing:
;;; > (import (c))
;;; > (import (nanopass))
;;; > (language->s-expression L10) => generates L10 definition
(define-language Lsrc
(terminals
(symbol (x))
(primitive (pr))
(constant (c))
(datum (d)))
(Expr (e body)
pr
x
c
(quote d)
(if e0 e1)
(if e0 e1 e2)
(or e* ...)
(and e* ...)
(not e)
(begin e* ... e)
(lambda (x* ...) body* ... body)
(let ([x* e*] ...) body* ... body)
(letrec ([x* e*] ...) body* ... body)
(set! x e)
(e e* ...)))
;;; Language 1: removes one-armed if and adds the void primitive
;
; (define-language L1
; (terminals (void+primitive (pr))
; (symbol (x))
; (constant (c))
; (datum (d)))
; (Expr (e body)
; pr
; x
; c
; (quote d)
; (if e0 e1 e2)
; (or e* ...)
; (and e* ...)
; (not e)
; (begin e* ... e)
; (lambda (x* ...) body* ... body)
; (let ([x* e*] ...) body* ... body)
; (letrec ([x* e*] ...) body* ... body)
; (set! x e)
; (e e* ...)))
;
(define-language L1
(extends Lsrc)
(terminals
(- (primitive (pr)))
(+ (void+primitive (pr))))
(Expr (e body)
(- (if e0 e1))))
;;; Language 2: removes or, and, and not forms
;
; (define-language L2
; (terminals (void+primitive (pr))
; (symbol (x))
; (constant (c))
; (datum (d)))
; (Expr (e body)
; pr
; x
; c
; (quote d)
; (if e0 e1 e2)
; (begin e* ... e)
; (lambda (x* ...) body* ... body)
; (let ([x* e*] ...) body* ... body)
; (letrec ([x* e*] ...) body* ... body)
; (set! x e)
; (e e* ...)))
;
(define-language L2
(extends L1)
(Expr (e body)
(- (or e* ...)
(and e* ...)
(not e))))
;;; Language 3: removes multiple expressions from the body of lambda, let,
;;; and letrec (to be replaced with a single begin expression that contains
;;; the expressions from the body).
;
; (define-language L3
; (terminals (void+primitive (pr))
; (symbol (x))
; (constant (c))
; (datum (d)))
; (Expr (e body)
; (letrec ([x* e*] ...) body)
; (let ([x* e*] ...) body)
; (lambda (x* ...) body)
; pr
; x
; c
; (quote d)
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...)))
;
(define-language L3
(extends L2)
(Expr (e body)
(- (lambda (x* ...) body* ... body)
(let ([x* e*] ...) body* ... body)
(letrec ([x* e*] ...) body* ... body))
(+ (lambda (x* ...) body)
(let ([x* e*] ...) body)
(letrec ([x* e*] ...) body))))
;;; Language 4: removes raw primitives (to be replaced with a lambda and a
;;; primitive call).
;
; (define-language L4
; (terminals (void+primitive (pr))
; (symbol (x))
; (constant (c))
; (datum (d)))
; (Expr (e body)
; (primcall pr e* ...)
; (letrec ([x* e*] ...) body)
; (let ([x* e*] ...) body)
; (lambda (x* ...) body)
; x
; c
; (quote d)
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...)))
;
(define-language L4
(extends L3)
(Expr (e body)
(- pr)
(+ (primcall pr e* ...) => (pr e* ...))))
;;; Language 5: removes raw constants (to be replaced with quoted constant).
;
; (define-language L5
; (terminals
; (void+primitive (pr))
; (symbol (x))
; (datum (d)))
; (Expr (e body)
; (primcall pr e* ...)
; (letrec ([x* e*] ...) body)
; (let ([x* e*] ...) body)
; (lambda (x* ...) body)
; x
; (quote d)
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...)))
;
(define-language L5
(extends L4)
(terminals
(- (constant (c))))
(Expr (e body)
(- c)))
;;; Language 6: removes quoted datum (to be replaced with explicit calls to
;;; cons and make-vector+vector-set!).
;
; (define-language L6
; (terminals
; (constant (c))
; (void+primitive (pr))
; (symbol (x)))
; (Expr (e body)
; (quote c)
; (primcall pr e* ...)
; (letrec ([x* e*] ...) body)
; (let ([x* e*] ...) body)
; (lambda (x* ...) body)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...)))
;
(define-language L6
(extends L5)
(terminals
(- (datum (d)))
(+ (constant (c))))
(Expr (e body)
(- (quote d))
(+ (quote c))))
;;; Language 7: adds a listing of assigned variables to the body of the
;;; binding forms: let, letrec, and lambda.
; (define-language L7
; (terminals
; (symbol (x a))
; (constant (c))
; (void+primitive (pr)))
; (Expr (e body)
; (letrec ([x* e*] ...) abody)
; (let ([x* e*] ...) abody)
; (lambda (x* ...) abody)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...))
; (AssignedBody (abody)
; (assigned (a* ...) body)))
;
(define-language L7
(extends L6)
(terminals
(- (symbol (x)))
(+ (symbol (x a))))
(Expr (e body)
(- (lambda (x* ...) body)
(let ([x* e*] ...) body)
(letrec ([x* e*] ...) body))
(+ (lambda (x* ...) abody)
(let ([x* e*] ...) abody)
(letrec ([x* e*] ...) abody)))
(AssignedBody (abody)
(+ (assigned (a* ...) body))))
;;; Language 8: letrec binding is changed to only bind variables to lambdas.
;
; (define-language L8
; (terminals (symbol (x a))
; (constant (c))
; (void+primitive (pr)))
; (Expr (e body)
; (letrec ([x* le*] ...) body)
; le
; (let ([x* e*] ...) abody)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...))
; (AssignedBody (abody)
; (assigned (a* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) abody)))
;
(define-language L8
(extends L7)
(Expr (e body)
(- (lambda (x* ...) abody)
(letrec ([x* e*] ...) abody))
(+ le
(letrec ([x* le*] ...) body)))
(LambdaExpr (le)
(+ (lambda (x* ...) abody))))
;;; Language 9: removes lambda expressions from expression context,
;;; effectively meaning we can only have lambdas bound in the right-hand-side
;;; of letrec expressions.
;
; (define-language L9
; (terminals
; (symbol (x a))
; (constant (c))
; (void+primitive (pr)))
; (Expr (e body)
; (letrec ([x* le*] ...) body)
; (let ([x* e*] ...) abody)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (set! x e)
; (e e* ...))
; (AssignedBody (abody)
; (assigned (a* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) abody)))
;
(define-language L9
(extends L8)
(Expr (e body)
(- le)))
;;; Language 10: removes set! and assigned bodies (to be replaced by set-box!
;;; primcall for set!, and unbox primcall for references of assigned variables).
;
; (define-language L10
; (terminals
; (symbol (x))
; (constant (c))
; (void+primitive (pr)))
; (Expr (e body)
; (let ([x* e*] ...) body)
; (letrec ([x* le*] ...) body)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (e e* ...))
; (LambdaExpr (le)
; (lambda (x* ...) body)))
;
(define-language L10
(extends L9)
(terminals
(- (symbol (x a)))
(+ (symbol (x))))
(Expr (e body)
(- (let ([x* e*] ...) abody)
(set! x e))
(+ (let ([x* e*] ...) body)))
(LambdaExpr (le)
(- (lambda (x* ...) abody))
(+ (lambda (x* ...) body)))
(AssignedBody (abody)
(- (assigned (a* ...) body))))
;;; Language 11: add a list of free variables to the body of lambda
;;; expressions (starting closure conversion code).
;
; (define-language L11
; (terminals
; (symbol (x f))
; (constant (c))
; (void+primitive (pr)))
; (Expr (e body)
; (let ([x* e*] ...) body)
; (letrec ([x* le*] ...) body)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (e e* ...))
; (LambdaExpr (le)
; (lambda (x* ...) fbody))
; (FreeBody (fbody)
; (free (f* ...) body)))
;
(define-language L11
(extends L10)
(terminals
(- (symbol (x)))
(+ (symbol (x f))))
(LambdaExpr (le)
(- (lambda (x* ...) body))
(+ (lambda (x* ...) fbody)))
(FreeBody (fbody)
(+ (free (f* ...) body))))
;;; Language L12: removes the letrec form and adds closure and labels forms
;;; to replace it. The closure form binds a variable to a label (code
;;; pointer) and its set of free variables, and the labels form binds labels
;;; (code pointer) to lambda expressions.
;
; (define-language L12
; (terminals
; (symbol (x f l))
; (constant (c))
; (void+primitive (pr)))
; (Expr (e body)
; (label l)
; (closures ((x* l* f** ...) ...) lbody)
; (let ([x* e*] ...) body)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (e e* ...))
; (LambdaExpr (le)
; (lambda (x* ...) fbody))
; (FreeBody (fbody)
; (free (f* ...) body))
; (LabelsBody (lbody)
; (labels ([l* le*] ...) body)))
;
(define-language L12
(extends L11)
(terminals
(- (symbol (x f)))
(+ (symbol (x f l))))
(Expr (e body)
(- (letrec ([x* le*] ...) body))
(+ (closures ([x* l* f** ...] ...) lbody)
(label l)))
(LabelsBody (lbody)
(+ (labels ([l* le*] ...) body))))
;;; Language 13: finishes closure conversion, removes the closures form,
;;; replacing it with primitive calls to deal with closure objects, and
;;; raises the labels from into the Expr non-terminal.
;
; (define-language L13
; (terminals
; (closure+void+primitive (pr))
; (symbol (x f l))
; (constant (c)))
; (Expr (e body)
; (labels ([l* le*] ...) body)
; (label l)
; (let ([x* e*] ...) body)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (e e* ...))
; (LambdaExpr (le)
; (lambda (x* ...) body)))
;
(define-language L13
(extends L12)
(terminals
(- (void+primitive (pr)))
(+ (closure+void+primitive (pr))))
(Expr (e body)
(- (closures ([x* l* f** ...] ...) lbody))
(+ (labels ([l* le*] ...) body)))
(LabelsBody (lbody)
(- (labels ([l* le*] ...) body)))
(LambdaExpr (le)
(- (lambda (x* ...) fbody))
(+ (lambda (x* ...) body)))
(FreeBody (fbody)
(- (free (f* ...) body))))
;;; Language 14: removes labels form from the Expr nonterminal and puts a
;;; single labels form at the top. Essentially this raises all of our
;;; closure converted functions to the top.
;
; (define-language L14
; (entry Program)
; (terminals
; (closure+void+primitive (pr))
; (symbol (x f l))
; (constant (c)))
; (Expr (e body)
; (label l)
; (let ([x* e*] ...) body)
; (quote c)
; (primcall pr e* ...)
; x
; (if e0 e1 e2)
; (begin e* ... e)
; (e e* ...))
; (LambdaExpr (le)
; (lambda (x* ...) body))
; (Program (p)
; (labels ([l* le*] ...) l)))
;
(define-language L14
(extends L13)
(entry Program)
(Program (p)
(+ (labels ([l* le*] ...) l)))
(Expr (e body)
(- (labels ([l* le*] ...) body))))
;;; Language 15: moves simple expressions (constants and variable references)
;;; out of the Expr nonterminal, and replaces expressions referred to in
;;; calls and primcalls with simple expressions. This effectively removes
;;; complex operands to calls and primcalls.
;
; (define-language L15
; (entry Program)
; (terminals
; (closure+void+primitive (pr))
; (symbol (x f l))
; (constant (c)))
; (Expr (e body)
; (se se* ...)
; (primcall pr se* ...)
; se
; (label l)
; (let ([x* e*] ...) body)
; (if e0 e1 e2)
; (begin e* ... e))
; (LambdaExpr (le)
; (lambda (x* ...) body))
; (Program (p)
; (labels ([l* le*] ...) l))
; (SimpleExpr (se)
; x
; (quote c)))
;
(define-language L15
(extends L14)
(Expr (e body)
(- x
(quote c)
(label l)
(primcall pr e* ...)
(e e* ...))
(+ se
(primcall pr se* ...) => (pr se* ...)
(se se* ...)))
(SimpleExpr (se)
(+ x
(label l)
(quote c))))
;;; Language 16: separates the Expr nonterminal into the Value, Effect, and
;;; Predicate nonterminals. This is needed to translate from our expression
;;; language into a language like C that has statements (effects) and
;;; expressions (values) and predicates that need to be simply values.
(define-language L16
(terminals
(symbol (x l))
(value-primitive (vpr))
(effect-primitive (epr))
(predicate-primitive (ppr))
(constant (c)))
(Program (prog)
(labels ([l* le*] ...) l))
(LambdaExpr (le)
(lambda (x* ...) body))
(SimpleExpr (se)
x
(label l)
(quote c))
(Value (v body)
se
(if p0 v1 v2)
(begin e* ... v)
(let ([x* v*] ...) body)
(primcall vpr se* ...) => (vpr se* ...)
(se se* ...))
(Effect (e)
(nop)
(if p0 e1 e2)
(begin e* ... e)
(let ([x* v*] ...) e)
(primcall epr se* ...) => (epr se* ...)
(se se* ...))
(Predicate (p)
(true)
(false)
(if p0 p1 p2)
(begin e* ... p)
(let ([x* v*] ...) p)
(primcall ppr se* ...) => (ppr se* ...)))
;;; Language 17: removes the allocation primitives: cons, box, make-vector,
;;; and make-closure and adds a generic alloc form for specifying allocation. It
;;; also adds raw integers for specifying type tags in the alloc form.
;
; (define-language L17
; (entry Program)
; (terminals
; (integer-64 (i))
; (effect+internal-primitive (epr))
; (non-alloc-value-primitive (vpr))
; (symbol (x l))
; (predicate-primitive (ppr))
; (constant (c)))
; (Program (prog)
; (labels ([l* le*] ...) l))
; (LambdaExpr (le)
; (lambda (x* ...) body))
; (SimpleExpr (se)
; x
; (label l)
; (quote c))
; (Value (v body)
; (alloc i se)
; se
; (if p0 v1 v2)
; (begin e* ... v)
; (let ([x* v*] ...) body)
; (primcall vpr se* ...)
; (se se* ...))
; (Effect (e)
; (nop)
; (if p0 e1 e2)
; (begin e* ... e)
; (let ([x* v*] ...) e)
; (primcall epr se* ...)
; (se se* ...))
; (Predicate (p)
; (true)
; (false)
; (if p0 p1 p2)
; (begin e* ... p)
; (let ([x* v*] ...) p)
; (primcall ppr se* ...)))
;
(define-language L17
(extends L16)
(terminals
(- (value-primitive (vpr))
(effect-primitive (epr)))
(+ (non-alloc-value-primitive (vpr))
(effect+internal-primitive (epr))
(integer-64 (i))))
(Value (v body)
(+ (alloc i se))))
;;; Language L18: removes let forms and replaces them with a top-level locals
;;; form that indicates which variables are bound in the function (so they
;;; can be listed at the top of our C function) and set! that do simple
;;; assignments.
;
; (define-language L18
; (entry Program)
; (terminals
; (integer-64 (i))
; (effect+internal-primitive (epr))
; (non-alloc-value-primitive (vpr))
; (symbol (x l))
; (predicate-primitive (ppr))
; (constant (c)))
; (Program (prog)
; (labels ([l* le*] ...) l))
; (SimpleExpr (se)
; x
; (label l)
; (quote c))
; (Value (v body)
; (alloc i se)
; se
; (if p0 v1 v2)
; (begin e* ... v)
; (primcall vpr se* ...)
; (se se* ...))
; (Effect (e)
; (set! x v)
; (nop)
; (if p0 e1 e2)
; (begin e* ... e)
; (primcall epr se* ...)
; (se se* ...))
; (Predicate (p)
; (true)
; (false)
; (if p0 p1 p2)
; (begin e* ... p)
; (primcall ppr se* ...))
; (LocalsBody (lbody)
; (locals (x* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) lbody)))
;
(define-language L18
(extends L17)
(Value (v body)
(- (let ([x* v*] ...) body)))
(Effect (e)
(- (let ([x* v*] ...) e))
(+ (set! x v)))
(Predicate (p)
(- (let ([x* v*] ...) p)))
(LambdaExpr (le)
(- (lambda (x* ...) body))
(+ (lambda (x* ...) lbody)))
(LocalsBody (lbody)
(+ (locals (x* ...) body))))
;;; Language 19: simplify the right-hand-side of a set! so that it can
;;; contain, simple expression, allocations, primcalls, and function calls,
;;; but not ifs, or begins.
;
; (define-language L19
; (terminals
; (integer-64 (i))
; (effect+internal-primitive (epr))
; (non-alloc-value-primitive (vpr))
; (symbol (x l))
; (predicate-primitive (ppr))
; (constant (c)))
; (Program (prog)
; (labels ([l* le*] ...) l))
; (SimpleExpr (se)
; x
; (label l)
; (quote c))
; (Value (v body)
; rhs
; (if p0 v1 v2)
; (begin e* ... v))
; (Effect (e)
; (set! x rhs)
; (nop)
; (if p0 e1 e2)
; (begin e* ... e)
; (primcall epr se* ...)
; (se se* ...))
; (Predicate (p)
; (true)
; (false)
; (if p0 p1 p2)
; (begin e* ... p)
; (primcall ppr se* ...))
; (LocalsBody (lbody)
; (locals (x* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) lbody))
; (Rhs (rhs)
; se
; (alloc i se)
; (primcall vpr se* ...)
; (se se* ...)))
;
(define-language L19
(extends L18)
(Value (v body)
(- se
(alloc i se)
(primcall vpr se* ...)
(se se* ...))
(+ rhs))
(Rhs (rhs)
(+ se
(alloc i se)
(primcall vpr se* ...) => (vpr se* ...)
(se se* ...)))
(Effect (e)
(- (set! x v))
(+ (set! x rhs))))
;;; Language L20: remove begin from the predicate production (effectively
;;; forcing the if to only have if, true, false, and predicate primitive
;;; calls).
;;; TODO: removed this language because our push-if pass was buggy, and
;;; fixing it requires us to flatten code into something like
;;; basic blocks, and we can avoid doing this since our target
;;; is C. We could revisit it for other backend targets.
;
; (define-language L20
; (terminals
; (integer-64 (i))
; (effect+internal-primitive (epr))
; (non-alloc-value-primitive (vpr))
; (symbol (x l))
; (predicate-primitive (ppr))
; (constant (c)))
; (Program (prog)
; (labels ([l* le*] ...) l))
; (SimpleExpr (se)
; x
; (label l)
; (quote c))
; (Value (v body)
; rhs
; (if p0 v1 v2)
; (begin e* ... v))
; (Effect (e)
; (set! x rhs)
; (nop)
; (if p0 e1 e2)
; (begin e* ... e)
; (primcall epr se* ...)
; (se se* ...))
; (Predicate (p)
; (true)
; (false)
; (if p0 p1 p2)
; (primcall ppr se* ...))
; (LocalsBody (lbody)
; (locals (x* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) lbody))
; (Rhs (rhs)
; se
; (alloc i se)
; (primcall vpr se* ...)
; (se se* ...)))
;
; (define-language L20
; (extends L19)
; (Predicate (p)
; (- (begin e* ... p))))
;;; Language 21: remove quoted constants and replace it with our raw ptr
;;; representation (i.e. 64-bit integers)
;
; (define-language L21
; (terminals
; (integer-64 (i))
; (effect+internal-primitive (epr))
; (non-alloc-value-primitive (vpr))
; (symbol (x l))
; (predicate-primitive (ppr)))
; (Program (prog)
; (labels ([l* le*] ...) l))
; (SimpleExpr (se)
; i
; x
; (label l))
; (Value (v body)
; rhs
; (if p0 v1 v2)
; (begin e* ... v))
; (Effect (e)
; (set! x rhs)
; (nop)
; (if p0 e1 e2)
; (begin e* ... e)
; (primcall epr se* ...)
; (se se* ...))
; (Predicate (p)
; (true)
; (false)
; (if p0 p1 p2)
; (primcall ppr se* ...))
; (LocalsBody (lbody)
; (locals (x* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) lbody))
; (Rhs (rhs)
; se
; (alloc i se)
; (primcall vpr se* ...)
; (se se* ...)))
;
(define-language L21
(extends L19)
(terminals
(- (constant (c))))
(SimpleExpr (se)
(- (quote c))
(+ i)))
;;; Language 22: remove the primcalls and replace them with mref (memory
;;; references), add, subtract, multiply, divide, shift-right, shift-left,
;;; logand, mset! (memory set), =, <, and <=.
;;;
;;; TODO: we should probably replace this with "machine" instructions
;;; instead, so that we can more easily extend the language and generate C
;;; code from it.
;
; (define-language L22
; (terminals
; (integer-64 (i))
; (effect+internal-primitive (epr))
; (non-alloc-value-primitive (vpr))
; (symbol (x l))
; (predicate-primitive (ppr)))
; (Program (prog)
; (labels ([l* le*] ...) l))
; (SimpleExpr (se)
; (logand se0 se1)
; (shift-left se0 se1)
; (shift-right se0 se1)
; (divide se0 se1)
; (multiply se0 se1)
; (subtract se0 se1)
; (add se0 se1)
; (mref se0 (maybe se1?) i)
; i
; x
; (label l))
; (Value (v body)
; rhs
; (if p0 v1 v2)
; (begin e* ... v))
; (Effect (e)
; (mset! se0 (maybe se1?) i se2)
; (set! x rhs)
; (nop)
; (if p0 e1 e2)
; (begin e* ... e)
; (se se* ...))
; (Predicate (p)
; (<= se0 se1)
; (< se0 se1)
; (= se0 se1)
; (true)
; (false)
; (if p0 p1 p2))
; (LocalsBody (lbody)
; (locals (x* ...) body))
; (LambdaExpr (le)
; (lambda (x* ...) lbody))
; (Rhs (rhs)
; se
; (alloc i se)
; (se se* ...)))
;
(define-language L22
(extends L21)
(Rhs (rhs)
(- (primcall vpr se* ...)))
(SimpleExpr (se)
(+ (mref se0 (maybe se1?) i)
(add se0 se1)
(subtract se0 se1)
(multiply se0 se1)
(divide se0 se1)
(shift-right se0 se1)
(shift-left se0 se1)
(logand se0 se1)))
(Effect (e)
(- (primcall epr se* ...))
(+ (mset! se0 (maybe se1?) i se2)))
(Predicate (p)
(- (primcall ppr se* ...))
(+ (= se0 se1)
(< se0 se1)
(<= se0 se1))))
;;;;;;;;;
;;; beginning of our pass listings
;;; pass: parse-and-rename : S-expression -> Lsrc (or error)
;;;
;;; parses an S-expression, and, if it conforms to the input language,
;;; renames the local variables to be represented with a unique variable.
;;; This helps us to separate keywords from varialbes and recognize one
;;; variable binding as different from another. This step is also called
;;; alpha-renaming or alpha-conversion. The output will be in the Lsrc
;;; language forms, represented as records.
;;;
;;; Some design decisions here: We could have decided to have this pass
;;; remove one-armed ifs, remove and, or, and not, setup begins in the body
;;; of our letrec, let, and lambda, and potentially quoted constants and
;;; eta-expanded raw primitives, rather than doing each of these as separate
;;; passes. I have not done this here, primarily for educational reasons,
;;; since these simple passes are a gentle introduction to how the passes are
;;; written.
;;;
(define-pass parse-and-rename : * (e) -> Lsrc ()
;;; Helper functions for this pass.
(definitions
;;; process-body - used to process the body of begin, let, letrec, and
;;; lambda expressions. since all four of these have the same pattern in
;;; the body.
(define process-body
(lambda (who env body* f)
(when (null? body*) (error who "invalid empty body"))
(let loop ([body (car body*)] [body* (cdr body*)] [rbody* '()])
(if (null? body*)
(f (reverse rbody*) (Expr body env))
(loop (car body*) (cdr body*)
(cons (Expr body env) rbody*))))))
;;; vars-unique? - processes the list of bindings to make sure all of the
;;; variable names are different (i.e. we don't want to allow
;;; (lambda (x x) x), since we would not know which x is which).
(define vars-unique?
(lambda (fmls)
(let loop ([fmls fmls])
(or (null? fmls)
(and (not (memq (car fmls) (cdr fmls)))
(loop (cdr fmls)))))))
;;; unique-vars - builds a list of unique variables based on a set of
;;; formals and extends the environment. it takes a function as an
;;; argument (effectively a continuation), and passes it the updated
;;; environment and a list of unique variables.
(define unique-vars
(lambda (env fmls f)
(unless (vars-unique? fmls)
(error 'unique-vars "invalid formals" fmls))
(let loop ([fmls fmls] [env env] [rufmls '()])
(if (null? fmls)
(f env (reverse rufmls))
(let* ([fml (car fmls)] [ufml (unique-var fml)])
(loop (cdr fmls) (cons (cons fml ufml) env)
(cons ufml rufmls)))))))
;;; process-bindings - processes the bindings of a let or letrec and
;;; produces bindings for unique variables for each of the original
;;; variables. it also processes the right-hand sides of the variable
;;; bindings and selects either the original environment (for let) or the
;;; updated environment (for letrec).
(define process-bindings
(lambda (rec? env bindings f)
(let loop ([bindings bindings] [rfml* '()] [re* '()])
(if (null? bindings)
(unique-vars env rfml*
(lambda (new-env rufml*)
(let ([env (if rec? new-env env)])
(let loop ([rufml* rufml*]
[re* re*]
[ufml* '()]
[e* '()])
(if (null? rufml*)
(f new-env ufml* e*)
(loop (cdr rufml*) (cdr re*)
(cons (car rufml*) ufml*)
(cons (Expr (car re*) env) e*)))))))
(let ([binding (car bindings)])
(loop (cdr bindings) (cons (car binding) rfml*)
(cons (cadr binding) re*)))))))
;;; Expr* - helper to process a list of expressions.
(define Expr*
(lambda (e* env)
(map (lambda (e) (Expr e env)) e*)))
;;; with-output-language rebinds quasiquote so that it will build
;;; language records.
(with-output-language (Lsrc Expr)
;;; build-primitive - this is a helper function to build entries in the
;;; initial environment for our user primitives. the initial
;;; enviornment contains a mapping of keywords and primitives to
;;; processing functions that check their arity (in the case of
;;; primitives) or their forms (in the case of keywords). These are
;;; put into an environment, because keywords and primitives can be
;;; rebound. (i.e. (lambda (lambda) (lambda lambda)) is a perfectly
;;; valid function in Scheme that takes a function as an argument and
;;; applies the argument to itself).
(define build-primitive
(lambda (as)
(let ([name (car as)] [argc (cdr as)])
(cons name
(if (< argc 0)
(error who
"primitives with arbitrary counts are not currently supported"
name)
;;; we'd love to support arbitrary argument lists, but we'd
;;; need to either:
;;; 1. get rid of raw primitives, or
;;; 2. add function versions of our raw primitives with
;;; arbitrary arguments, or (possibly and)
;;; 3. add general handling for functions with arbitrary
;;; arguments. (i.e. support for (lambda args <body>)
;;; or (lambda (x y . args) <body>), which we don't
;;; currently support.
#;(let ([argc (bitwise-not argc)])
(lambda (env . e*)
(if (>= (length e*) argc)
`(,name ,(Expr* e* env) ...)
(error name "invalid argument count"
(cons name e*)))))
(lambda (env . e*)
(if (= (length e*) argc)
`(,name ,(Expr* e* env) ...)
(error name "invalid argument count"
(cons name e*)))))))))
;;; initial-env - this is our initial environment, expressed as an
;;; association list of keywords and primitives (represented as
;;; symbols) to procedure handlers (represented as procedures). As the
;;; program is processed through this pass, it will be extended with
;;; local bidings from variables (represented as symbols) to unique
;;; variables (represented as symbols with a format of symbol.number).
(define initial-env
(cons*
(cons 'quote (lambda (env d)
(unless (datum? d)
(error 'quote "invalid datum" d))
`(quote ,d)))
(cons 'if (case-lambda
[(env e0 e1) `(if ,(Expr e0 env) ,(Expr e1 env))]
[(env e0 e1 e2)
`(if ,(Expr e0 env) ,(Expr e1 env) ,(Expr e2 env))]
[x (error 'if (if (< (length x) 3)
"too few arguments"
"too many arguments")
x)]))
(cons 'or (lambda (env . e*) `(or ,(Expr* e* env) ...)))
(cons 'and (lambda (env . e*) `(and ,(Expr* e* env) ...)))
(cons 'not (lambda (env e) `(not ,(Expr e env))))
(cons 'begin (lambda (env . e*)
(process-body 'begin env e*
(lambda (e* e)
`(begin ,e* ... ,e)))))
(cons 'lambda (lambda (env fmls . body*)
(unique-vars env fmls
(lambda (env fmls)
(process-body 'lambda env body*
(lambda (body* body)
`(lambda (,fmls ...)
,body* ... ,body)))))))
(cons 'let (lambda (env bindings . body*)
(process-bindings #f env bindings
(lambda (env x* e*)
(process-body 'let env body*
(lambda (body* body)
`(let ([,x* ,e*] ...) ,body* ... ,body)))))))
(cons 'letrec (lambda (env bindings . body*)
(process-bindings #t env bindings
(lambda (env x* e*)
(process-body 'letrec env body*
(lambda (body* body)
`(letrec ([,x* ,e*] ...)
,body* ... ,body)))))))
(cons 'set! (lambda (env x e)
(cond
[(assq x env) =>
(lambda (as)
(let ([v (cdr as)])
(if (symbol? v)
`(set! ,v ,(Expr e env))
(error 'set! "invalid syntax"
(list 'set! x e)))))]
[else (error 'set! "set to unbound variable"
(list 'set! x e))])))
(map build-primitive user-prims)))
;;; App - helper for handling applications.
(define App
(lambda (e env)
(let ([e (car e)] [e* (cdr e)])
`(,(Expr e env) ,(Expr* e* env) ...))))))
;;; transformer: Expr: S-expression -> LSrc:Expr (or error)
;;;
;;; parses an S-expression, looking for a pair (which indicates, a
;;; keyword use, a primitive call, or a normal function call), a symbol
;;; (which indicates a variable reference or a primitive reference), or one of
;;; our constants (which indicates a raw constant).
(Expr : * (e env) -> Expr ()
(cond
[(pair? e)
(cond
[(assq (car e) env) =>
(lambda (as)
(let ([v (cdr as)])
(if (procedure? v)
(apply v env (cdr e))
(App e env))))]
[else (App e env)])]
[(symbol? e)
(cond
[(assq e env) =>
(lambda (as)
(let ([v (cdr as)])
(cond
[(symbol? v) v]
[(primitive? e) e]
[else (error who "invalid syntax" e)])))]
[else (error who "unbound variable" e)])]
[(constant? e) e]
[else (error who "invalid expression" e)]))
;;; kick off processing the S-expression by handing Expr our initial
;;; S-expression and the initial environment.
(Expr e initial-env))
;;; pass: remove-one-armed-if : Lsrc -> L1
;;;
;;; this pass replaces the (if e0 e1) form with an if that will explicitly
;;; produce a void value when the predicate expression returns false. In
;;; other words:
;;; (if e0 e1) => (if e0 e1 (void))
;;;
;;; Design descision: kept seperate from parse-and-rename to make it easier
;;; to understand how the nanopass framework can be used.
;;;
(define-pass remove-one-armed-if : Lsrc (e) -> L1 ()
(Expr : Expr (e) -> Expr ()
[(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))]))
;;; pass: remove-and-or-not : L1 -> L2
;;;
;;; this pass looks for references to and, or, and not and replaces it with
;;; the appropriate if expressions. this pass follows the standard
;;; expansions and has one small optimization:
;;;
;;; (if (not e0) e1 e2) => (if e0 e2 e1) [optimization]
;;; (and) => #t [from Scheme standard]
;;; (or) => #f [from Scheme standard]
;;; (and e e* ...) => (if e (and e* ...) #f) [standard expansion]
;;; (or e e* ...) => (let ([t e]) [standard expansion -
;;; (if t t (or e* ...))) avoids computing e twice]
;;;
;;; Design decision: again kept separate from parse-and-rename to simplify
;;; the discussion of this pass (adding it to parse-and-rename doesn't really
;;; make parse-and-rename much more complicated, and if we had a macro
;;; system, which would likely be implemented in parse-and-rename, or before
;;; it, we would probably want and, or, and not defined as macros, rather
;;; than forms in the language, in which case this pass would be
;;; unnecessary).
;;;
(define-pass remove-and-or-not : L1 (e) -> L2 ()
(Expr : Expr (e) -> Expr ()
[(if (not ,[e0]) ,[e1] ,[e2]) `(if ,e0 ,e2 ,e1)]
[(not ,[e0]) `(if ,e0 #f #t)]
[(and) #t]
[(and ,[e] ,[e*] ...)
;; tiny inline loop (not tail recursive, so called f instead of loop)
(let f ([e e] [e* e*])
(if (null? e*)
e
`(if ,e ,(f (car e*) (cdr e*)) #f)))]
[(or) #f]
[(or ,[e] ,[e*] ...)
;; tiny inline loop (not tail recursive, so called f instead of loop)
(let f ([e e] [e* e*])
(if (null? e*)
e
(let ([t (make-tmp)])
`(let ([,t ,e]) (if ,t ,t ,(f (car e*) (cdr e*)))))))]))
;;; pass: make-being-explicit : L2 -> L3
;;;
;;; this pass takes the L2 let, letrec, and lambda expressions (which have
;;; bodies that can contain more than one expression), and converts them into
;;; bodies with a single expression, wrapped in a begin if necessary. To
;;; avoid polluting the output with extra begins that contain only one
;;; expression the build-begin helper checks to see if there is more then one
;;; expression and if there is builds a begin.
;;;
;;; Effectively this does the following:
;;; (let ([x* e*] ...) body0 body* ... body1) =>
;;; (let ([x* e*] ...) (begin body0 body* ... body1))
;;; (letrec ([x* e*] ...) body0 body* ... body1) =>
;;; (letrec ([x* e*] ...) (begin body0 body* ... body1))
;;; (lambda (x* ...) body0 body* ... body1) =>
;;; (lambda (x* ...) (begin body0 body* ... body1))
;;;
;;; Design Decision: This could have been included with rename-and-parse,
;;; without making it significantly more compilicated, but was separated out
;;; to continue with simpler nanopass passes to help make it more obvious
;;; what is going on here.
;;;
(define-pass make-begin-explicit : L2 (e) -> L3 ()
(Expr : Expr (e) -> Expr ()
;;; Note: the defitions are within the body of the Expr transformer
;;; instead of being within the body of the pass. This means the
;;; quasiquote is bound to the Expr form, and we don't need to use
;;; with-output-language.
(definitions
;;; build-begin - helper function to build a begin only when the body
;;; contains more then one expression. (this version of the helper
;;; is a little over-kill, but it makes our traces look a little
;;; cleaner. there should be a simpler way of doing this.)
(define build-begin
(lambda (e* e)
(nanopass-case (L3 Expr) e
[(begin ,e1* ... ,e)
(build-begin (append e* e1*) e)]
[else
(if (null? e*)
e
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,e)
(let ([e (car e*)])
(nanopass-case (L3 Expr) e
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(let ([,x* ,[e*]] ...) ,[body*] ... ,[body])
`(let ([,x* ,e*] ...) ,(build-begin body* body))]
[(letrec ([,x* ,[e*]] ...) ,[body*] ... ,[body])
`(letrec ([,x* ,e*] ...) ,(build-begin body* body))]
[(lambda (,x* ...) ,[body*] ... ,[body])
`(lambda (,x* ...) ,(build-begin body* body))]))
;;; pass : inverse-eta-raw-primitives : L3 -> L4
;;;
;;; Eta reduction recognizes a function that takes a set of arguments and
;;; passes those arguments directly to another function, and unwraps the
;;; function. For instance, the function:
;;; (lambda (x y) (f x y))
;;; can be eta reduced to:
;;; f
;;; Eta reduction is not always a semantics preserving transformation because
;;; it can change the termination properties of the program, for instance a
;;; program that terminates, could turn into one that does not because a
;;; function is applied directly, rather than a function that might never be
;;; applied.
;;;
;;; In this pass, we are applying the inverse operation and adding a lambda
;;; wrapper when we see a primitive. We do this so that primitives, which we
;;; are going to open code into a C-code equivalent, can still be treated as
;;; though it was a Scheme procedure. This allows us to map over primitives,
;;; which would otherwise not be possible with our code generation. Our
;;; transformation looks for primitives in call position, marking them as
;;; primitive calls, and primitives not in call position are eta-expanded to move
;;; them into call position.
;;;
;;; (pr e* ...) => (primcall pr e* ...)
;;; pr => (lambda (x* ...) (primcall pr x* ...))
;;;
;;; Design decision: Another way to handle this would be to create a single
;;; function for each primitive, and lift these definitions to the top-level
;;; of the program, including just those primitives that are used. This
;;; would avoid the potential to re-creating the same procedure over and over
;;; again, as we are now.
;;;
(define-pass inverse-eta-raw-primitives : L3 (e) -> L4 ()
(Expr : Expr (e) -> Expr ()
[(,pr ,[e*] ...) `(primcall ,pr ,e* ...)]
[,pr (cond
[(assq pr void+user-prims) =>
(lambda (as)
(do ((i (cdr as) (fx- i 1))
(x* '() (cons (make-tmp) x*)))
((fx=? i 0) `(lambda (,x* ...) (primcall ,pr ,x* ...)))))]
[else (error who "unexpected primitive" pr)])]))
;;; pass: quote-constants : L4 -> L5
;;;
;;; A simple pass to find raw constants and wrap them in a quote.
;;; c => (quote c)
;;;
;;; Design decision: This could simply be included in the next pass.
;;;
(define-pass quote-constants : L4 (e) -> L5 ()
(Expr : Expr (e) -> Expr ()
[,c `(quote ,c)]))
;;; pass: remove-complex-constants : L5 -> L6
;;;
;;; Lifts creation of constants composed of vectors or pairs outside the body
;;; of the program and makes their creation explicit. In place of the
;;; constants, a temporary variable reference is created. The total
;;; transform looks something like the following:
;;;
;;; (letrec ([add-pair-parts (lambda (p) (+ (car p) (cdr p)))])
;;; (+ (add-pair-parts '(4 . 5)) (add-pair-parts '(6 .7)))) =>
;;; (let ([t0 (cons 4 5)] [t1 (cons 6 7)])
;;; (letrec ([add-pair-parts (lambda (p) (+ (car p) (cdr p)))])
;;; (+ (add-pair-parts t0) (add-pair-parts t1))))
;;;
;;; Design decision: Another possibility is to simply convert the constants
;;; into their memory-layed out variations, rather than treating it in pieces
;;; like this. We could extend our C run-time support to know about these
;;; pre-layed out items so that we do not need to construct them when the
;;; program starts running.
;;;
(define-pass remove-complex-constants : L5 (e) -> L6 ()
(definitions
;;; t* and e* used to gather up our final constant bindings (via set!)
(define t* '())
(define e* '()))
(Expr : Expr (e) -> Expr ()
(definitions
;;; datum->expr - a helper function for recurring through the parts of
;;; a vector or pair datum to construct its parts, until it reaches the
;;; constants in the leaves of the datum. We put this definition
;;; within the Expr transformer so that quasiquote will be bound to the
;;; L6:Expr nonterminal creation code.
(define datum->expr
(lambda (x)
(cond
[(pair? x) ;; if we have a pair, cons its recurred parts.
`(primcall cons ,(datum->expr (car x)) ,(datum->expr (cdr x)))]
[(vector? x) ;; if we have a vector ...
(let ([l (vector-length x)] [t (make-tmp)])
;; 1. create a vector of the proper size
`(let ([,t (primcall make-vector (quote ,l))])
(begin
;; 2. set each elemenet in the vector with its recurred
;; parts.
,(let loop ([l l] [e* '()])
(if (fx=? l 0)
e*
(let ([l (fx- l 1)])
(loop l
(cons
`(primcall vector-set! ,t
(quote ,l)
,(datum->expr (vector-ref x l)))
e*)))))
...
;; and return the vector as the final expression
,t)))]
;; if it is a constant, simply quote it.
[(constant? x) `(quote ,x)]))))
[(quote ,d) ;; look for quoted constants
(if (constant? d) ;; if they are already simple
`(quote ,d) ;; quote them
(let ([t (make-tmp)]) ;; otherwise create a binding for them
(set! t* (cons t t*))
(set! e* (cons (datum->expr d) e*))
t))])
;; in the body, call the Expr transformer, and if t* is null (indicating we
;; did not find any complex constants) don't bother creating the empty let
;; around it.
(let ([e (Expr e)])
(if (null? t*)
e
`(let ([,t* ,e*] ...) ,e))))
;;; pass: identify-assigned-variables : L6 -> L7
;;;
;;; This pass identifies which variables are assigned using set!. This is the
;;; first step in a process known as assignment conversion. We separate
;;; assigned varaibles from unassigned variables, and assigned variables are
;;; converted into reference cells that can be manipulated through
;;; primitives. In this compiler, we use the existing box type to create the
;;; cells (using the box primitive), reference the cells (using the unbox
;;; primitive), and mutating the cells (using the set-box! primitive). One
;;; of the reasons we perform assignment conversion is it allows multiple
;;; closures to capture the same mutable variable and all of the closures
;;; will see the same, up-to-date, value for that variable since they all
;;; simply contain a pointer to the reference cell. If we didn't do this
;;; conversion, we would need to figure out a different way to handle set! so
;;; that the updates are propagated to all the closures that close over the
;;; variable. The eventual effect of assignemnt conversion is the following:
;;; (let ([x 5])
;;; (set! x (+ x 5))
;;; (+ x x)) =>
;;; (let ([t0 5])
;;; (let ([x (box t0)])
;;; (primcall set-box! x (+ (unbox x) 5)
;;; (+ (unbox x) (unbox x))))
;;; (of course in this example, we could have simply shadowed x)
;;;
;;; This pass, however, is simply an analysis pass. It gathers up the set of
;;; assigned variables and deposits them in an AssignedBody just inside their
;;; binding points. The transformation in this pass is:
;;;
;;; (let ([x 5] [y 7] [z 10])
;;; (set! x (+ x y))
;;; (+ x z)) =>
;;; (let ([x 5] [y 7] [z 10])
;;; (assigned (x)
;;; (set! x (+ x y))
;;; (+ x z)))
;;;
;;; The key operations we depend on are:
;;; set-cons - to extend a set with a newly found assigned variable.
;;; intersect - to determine which assigned variables are bound by a lambda,
;;; let, or letrec.
;;; difference - to remove assigned variables from a set once we locate their
;;; binding form.
;;; union - to gather assigned variables from sub-expressions into a
;;; single set.
;;;
;;; Note: we are using a relatively inefficient representation of sets here,
;;; simply representing them as lists and using our set-cons, intersect,
;;; difference, and union procedures to maintain their set-ness. We could
;;; choose a more efficient set representation, perhaps leveraging insertion
;;; sort or something similar, or we could choose to represent our variables
;;; using a mutable record, with a field to indicate if it is assigned.
;;; Either approach will improve the worst case performance of this pass,
;;; though the mutable record version will get us down to a linear cost,
;;; which is the best case for any pass in the current version of the
;;; nanopass framework.
;;;
(define-pass identify-assigned-variables : L6 (e) -> L7 ()
(Expr : Expr (e) -> Expr ('())
;; identify an assigned variable
[(set! ,x ,[e assigned*]) (values `(set! ,x ,e) (set-cons x assigned*))]
;; deposit assigned variables at lambda, let, and letrec binding sites
[(lambda (,x* ...) ,[body assigned*])
(values
`(lambda (,x* ...) (assigned (,(intersect x* assigned*) ...) ,body))
(difference assigned* x*))]
[(let ([,x* ,[e* assigned**]] ...) ,[body assigned*])
(values
`(let ([,x* ,e*] ...)
(assigned (,(intersect x* assigned*) ...) ,body))
(apply union (difference assigned* x*) assigned**))]
[(letrec ([,x* ,[e* assigned**]] ...) ,[body assigned*])
(let ([assigned* (apply union assigned* assigned**)])
(values
`(letrec ([,x* ,e*] ...)
(assigned (,(intersect x* assigned*) ...) ,body))
(difference assigned* x*)))]
;; traverse forms with nested expressions to gather up the assignments
;; from each sub-expression. this could be simplified if the nanopass
;; framework had a way to automatically combine these.
[(primcall ,pr ,[e* assigned**] ...)
(values `(primcall ,pr ,e* ...) (apply union assigned**))]
[(if ,[e0 assigned0*] ,[e1 assigned1*] ,[e2 assigned2*])
(values `(if ,e0 ,e1 ,e2) (union assigned0* assigned1* assigned2*))]
[(begin ,[e* assigned**] ... ,[e assigned*])
(values `(begin ,e* ... ,e) (apply union assigned* assigned**))]
[(,[e assigned*] ,[e* assigned**] ...)
(values `(,e ,e* ...) (apply union assigned* assigned**))])
;; in the body, call
(let-values ([(e assigned*) (Expr e)])
(unless (null? assigned*)
(error who "found one or more unbound variables" assigned*))
e))
;;; pass: purify-letrec : L7 -> L8
;;;
;;; this pass looks for places where letrec is used to bind something other
;;; than a lambda expression, or where a letrec bound variable is assigned
;;; and moves these bindings into let bindings. when the pass is done all of
;;; the letrecs in our program will be immutable and bind only lambda
;;; expressions. For instance, the following example:
;;;
;;; (letrec ([f (lambda (g x) (g x))]
;;; [a 5]
;;; [b (+ 5 7)]
;;; [g (lambda (h) (f h 5))]
;;; [c (let ([x 10]) ((letrec ([zero? (lambda (n) (= n 0))]
;;; [f (lambda (n)
;;; (if (zero? n)
;;; 1
;;; (* n (f (- n 1)))))])
;;; f)
;;; x))]
;;; [m 10]
;;; [z (lambda (x) x)])
;;; (set! z (lambda (x) (+ x x)))
;;; (set! m (+ m m))
;;; (+ (+ (+ (f z a) (f z b)) (f z c)) (g z))))
;;; =>
;;; (let ([z (quote #f)] [m '#f] [c '#f])
;;; (let ([b (+ '5 '7)] [a '5])
;;; (letrec ([g (lambda (h) (f h '5))]
;;; [f (lambda (g x) (g x))])
;;; (begin
;;; (set! z (lambda (x) x))
;;; (set! m '10)
;;; (set! c
;;; (let ([x '10])
;;; ((letrec ([f (lambda (n)
;;; (if (zero? n)
;;; '1
;;; (* n (f (- n '1)))))]
;;; [zero? (lambda (n) (= n '0))])
;;; f)
;;; x)))
;;; (begin
;;; (set! z (lambda (x) (+ x x)))
;;; (set! m (+ m m))
;;; (+ (+ (+ (f z a) (f z b)) (f z c)) (g z)))))))
;;;
;;; The algorithm for doing this is fairly simple. We attempt to separate
;;; the bindings into simple bindings, lambda bindings, and complex bindings.
;;; Simple bindings bind a constant, a variable reference not bound in this
;;; letrec, the call to an effect free primitive, a begin that contains only
;;; simple expressions, or an if that contains only simple expressions to an
;;; immutable variable. The simple? predicate is used for determining when an
;;; expression is simple. A lambda expression is simply a lambda, and a
;;; complex expression is any other expression.
;;;
;;; Design decision: There are many other approaches that we could use,
;;; including those described in the "Fixing Letrec: A Faithful Yet Efficient
;;; Implementation of Schemes Recursive Binding Construct" by Waddell, et.
;;; al. and "Fixing Letrec (reloaded)" by Ghuloum and Dybvig. Earlier
;;; versions of Chez Scheme used the earlier paper, which documented how to
;;; properly handle R5RS letrecs, and newer versions use the latter paper
;;; which described how to properly handle R6RS letrec and letrec*.
;;;
(define-pass purify-letrec : L7 (e) -> L8 ()
(definitions
;; lambda? - use nanopass case to determine if an L8:Expr is a lambda
;; expression.
(define lambda?
(lambda (e)
(nanopass-case (L8 Expr) e
[(lambda (,x* ...) ,abody) #t]
[else #f])))
;; simple? - use nanopass case to deteremin if an L8:Expr is a "simple",
;; effect free expression.
(define simple?
(lambda (x bound* assigned*)
(let f ([x x])
(nanopass-case (L8 Expr) x
[(quote ,c) #t]
[,x (not (or (memq x bound*) (memq x assigned*)))]
[(primcall ,pr ,e* ...)
(and (effect-free-prim? pr) (for-all f e*))]
[(begin ,e* ... ,e) (and (for-all f e*) (f e))]
[(if ,e0 ,e1 ,e2) (and (f e0) (f e1) (f e2))]
[else #f])))))
(Expr : Expr (e) -> Expr ()
(definitions
;; build a let, when there are bindings, otherwise, just return the
;; body.
(define build-let
(lambda (x* e* a* body)
(if (null? x*)
body
`(let ([,x* ,e*] ...) (assigned (,a* ...) ,body)))))
;; build a letrec, when there are bindings, otherwise, just return the
;; body
(define build-letrec
(lambda (x* e* body)
(if (null? x*)
body
`(letrec ([,x* ,e*] ...) ,body))))
;; build a begin when we have more then one expression, otherwise just
;; return the one expression.
(define build-begin
(lambda (e* e)
(nanopass-case (L8 Expr) e
[(begin ,e1* ... ,e)
(build-begin (append e* e1*) e)]
[else
(if (null? e*)
e
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,e)
(let ([e (car e*)])
(nanopass-case (L8 Expr) e
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(letrec ([,x* ,[e*]] ...) (assigned (,a* ...) ,[body]))
;; loop through our bindings, separating them into simple, lambda, and
;; complex.
(let loop ([xb* x*] [e* e*]
[xs* '()] [es* '()]
[xl* '()] [el* '()]
[xc* '()] [ec* '()])
(if (null? xb*)
;; when we're done bind the complex bindings to #f, they are now
;; all assigned, then bind the simple bindings, then create a
;; letrec binding for the lambda expressions (eliminate the
;; assigned body, since we know none of them are assigned), and
;; finally use set! to set the values of our complex bindings.
(build-let xc* (make-list (length xc*) `(quote #f)) xc*
(build-let xs* es* '()
(build-letrec xl* el*
(build-begin
(map (lambda (xc ec) `(set! ,xc ,ec)) xc* ec*)
body))))
(let ([x (car xb*)] [e (car e*)])
(cond
[(and (not (memq x a*)) (lambda? e))
(loop (cdr xb*) (cdr e*) xs* es*
(cons x xl*) (cons e el*) xc* ec*)]
[(and (not (memq x a*)) (simple? e x* a*))
(loop (cdr xb*) (cdr e*) (cons x xs*) (cons e es*)
xl* el* xc* ec*)]
[else (loop (cdr xb*) (cdr e*) xs* es* xl* el*
(cons x xc*) (cons e ec*))]))))]))
;;; pass: optimize-direct-call : L8 -> L8
;;;
;;; one of our simplest optimizations, we convert a directly applied lambdas
;;; into a let. this allows us to avoid the creation of a closure for the
;;; let, and allows us instead to create a local binding within a function.
;;; the transform is simple:
;;;
;;; ((lambda (x* ...) body) e* ...) => (let ([x* e*] ...) body)
;;; where (length x*) == (length e*)
;;;
(define-pass optimize-direct-call : L8 (e) -> L8 ()
(Expr : Expr (e) -> Expr ()
[((lambda (,x* ...) ,[abody]) ,[e* -> e*] ...)
(guard (fx=? (length x*) (length e*)))
`(let ([,x* ,e*] ...) ,abody)]))
;;; pass: find-let-bound-lambdas : L8 -> L8
;;;
;;; this pass looks for places where let is used to bind a lambda expression
;;; to an immutable variable and converts this binding into a letrec binding.
;;; Part of the reason we can do this here, is because we have uniquely named
;;; each of our variables and none of these variables can be referenced in
;;; the right-hand side of the let bindings. If it was still possible for
;;; variables to have same name, this would not be a legal transformation,
;;; since it might cause a lambda that did not capture a variable bound in
;;; this let to bind the variable in the resulting letrec. The
;;; transformation looks like:
;;;
;;; (let ([x 5] [f (lambda (n) (+ n n))] [g (lambda (x) x)] [m 10])
;;; (assigned (g m)
;;; body)) =>
;;; (let ([x 5] [g (lambda (x) x)] [m 10])
;;; (assigned (g m)
;;; (letrec ([f (lambda (n) (+ n n))])
;;; body)))
;;;
;;; Design decisions: Handling of let can be incorporated into the handling
;;; of letrec, either through one of the algorithms described in the design
;;; decisions of the purify-letrec pass, or in the existing letrec pass. It
;;; is kept separate here, largely to make the letrec pass more straight
;;; forward to understand.
;;;
(define-pass find-let-bound-lambdas : L8 (e) -> L8 ()
(Expr : Expr (e) -> Expr ()
(definitions
;; build-let - constructs a let if any variables are bound by the let,
;; or simply returns the body if there are no bindings.
(define build-let
(lambda (x* e* a* body)
(if (null? x*)
body
`(let ([,x* ,e*] ...) (assigned (,a* ...) ,body)))))
;; build-letrec - constructs a letrec if any variables are bound by the
;; letrec, or simple returns the body if there are no bindings.
(define build-letrec
(lambda (x* le* body)
(if (null? x*)
body
`(letrec ([,x* ,le*] ...) ,body)))))
[(let ([,x* ,[e*]] ...) (assigned (,a* ...) ,[body]))
;; executes a similar algorithm to the purify-letrec pass, though it
;; does not separate simple from complex bindings, since we currently
;; handle both in the let.
(let loop ([xb* x*] [e* e*] [xl* '()] [el* '()] [xo* '()] [eo* '()])
(if (null? xb*)
(build-let xo* eo* a* (build-letrec xl* el* body))
(let ([x (car xb*)] [e (car e*)])
(nanopass-case (L8 Expr) e
[(lambda (,x* ...) ,abody)
(guard (not (memq x a*)))
(loop (cdr xb*) (cdr e*) (cons x xl*) (cons e el*) xo* eo*)]
[else (loop (cdr xb*) (cdr e*) xl* el*
(cons x xo*) (cons e eo*))]))))]))
;;; pass: remove-anonymous-lambda : L8 -> L9
;;;
;;; since we are generating a C function for each Scheme lambda, we need to
;;; have a name for each of these lambdas. In addition we need a name to use
;;; as the code pointer label, so that we can lift the lambdas to the top
;;; level of the program. The transformation is fairly simple. If we find a
;;; lambda in expression position (i.e. not in the right-hand side of a
;;; letrec binding) then we wrap a letrec around it that gives it a new name.
;;;
;;; (letrec ([l* (lambda (x** ...) body*)] ...) body) => (no change)
;;; (letrec ([l* (lambda (x** ...) body*)] ...) body)
;;;
;;; (lambda (x* ...) body) => (letrec ([t0 (lambda (x* ...) body)]) t0)
;;;
(define-pass remove-anonymous-lambda : L8 (e) -> L9 ()
(Expr : Expr (e) -> Expr ()
[(lambda (,x* ...) ,[abody])
(let ([t (unique-var 'anon)])
`(letrec ([,t (lambda (,x* ...) ,abody)]) ,t))]))
;;; pass: convert-assignments : L9 -> L10
;;;
;;; this pass completes the assignment conversion process that we started in
;;; identify-assigned-variables. We use the assigned variable list through
;;; our previous passes to make decisions about how bindings were separated.
;;; Now, we are ready to change these explicitly to the box, unbox, and
;;; set-box! primitive calls described in the identified-assigned-variable
;;; pass. We also introduce new temporaries to contain the value that will
;;; be put in the box. this is largely because we don't want our
;;; representation of assigned variables to be observable from inside the
;;; program, and if we were to introduce an operator like call/cc to our
;;; implementation, then the order our variables were setup could potentially
;;; be identified by seeing that the allocation and computation of the values
;;; are intermixed. Instead, we want all the computation to happen, then the
;;; allocation, and then the allocated locations are updated with the values.
;;;
;;; Our transform thus looks like the following:
;;;
;;; (let ([x0 e0] [x1 e1] ... [xa0 ea0] [xa1 xa0] ...)
;;; (assigned (xa0 xa1 ...)
;;; body))
;;; =>
;;; (let ([x0 e0] [x1 e1] ... [t0 ea0] [t1 ea1] ...)
;;; (let ([xa0 (primcall box t0)] [xa1 (primcall box t1)] ...)
;;; body^))
;;;
;;; (lambda (x0 x1 ... xa0 xa1 ...) (assigned (xa0 xa1 ...) body))
;;; =>
;;; (lambda (x0 x1 ... t0 t1 ...)
;;; (let ([xa0 (primcall box t0)] [xa1 (primcall box t1)] ...)
;;; body^))
;;;
;;; where
;;; (set! xa0 e) => (primcall set-box! xa0 e^)
;;; and
;;; xa0 => (primcall unbox xa0)
;;; in body^ and e^
;;;
;;; We could choose another data structure, or even create a new data
;;; structure to perform the conversion with, however, we've choosen the box
;;; because it contains exactly one cell, and takes up just one word in
;;; memory, where as our pair and vector take at least two words in memory.
;;; This decision might be different if we had other constraints on how we
;;; lay out memory.
;;;
(define-pass convert-assignments : L9 (e) -> L10 ()
(definitions
;; lookup - looks for assigned variables in the environment and maps them
;; to their temporaries.
(define lookup
(lambda (env)
(lambda (x)
(cond
[(assq x env) => cdr]
[else x]))))
;; build-env - generates temporaries, extends the environment, and
;; returns the final list of unassigned binding variables, the list of
;; emporaries, and the updated environment, through the call to f
(define build-env
(lambda (x* a* env f)
(let ([t* (map (lambda (x) (make-tmp)) a*)])
(let ([env (append (map cons a* t*) env)])
(f (map (lookup env) x*) t* env)))))
(with-output-language (L10 Expr)
;; make-boxes - build the calls to box to create the storage locations
;; for our assigned variables.
(define make-boxes
(lambda (t*)
(map (lambda (t) `(primcall box ,t)) t*)))
;; build-let - builds a let if there are any bindings, or returns the
;; body if there are none.
(define build-let
(lambda (x* e* body)
(if (null? x*)
body
`(let ([,x* ,e*] ...) ,body))))))
(Expr : Expr (e [env '()]) -> Expr ()
[(let ([,x* ,[e*]] ...) (assigned (,a* ...) ,body))
(build-env x* a* env
(lambda (x* t* env)
(build-let x* e*
(build-let a* (make-boxes t*)
(Expr body env)))))]
[,x (if (assq x env) `(primcall unbox ,x) x)]
[(set! ,x ,[e]) `(primcall set-box! ,x ,e)])
(LambdaExpr : LambdaExpr (le env) -> LambdaExpr ()
[(lambda (,x* ...) (assigned (,a* ...) ,body))
(build-env x* a* env
(lambda (x* t* env)
`(lambda (,x* ...)
,(build-let a* (make-boxes t*) (Expr body env)))))]))
;;; pass: uncover-free : L10 -> L11
;;;
;;; this pass performs the first step in closure conversion, determining the
;;; set of free-variables for each lambda expression. this list of free
;;; variables is an approximation of the values that need to be available to
;;; a procedure as its captured environment when a procedure is executed.
;;; there are numerous ways to shrink, or even eliminate this list, but in
;;; this compiler we are currently skipping any of these steps, and simply
;;; taking this set of free variables as the set we need to capture. (For
;;; one possible closure optimization technique see "Optimizing Flat
;;; Closures" by Keep et. al. or Chapter 5. of "A Nanopass Compiler for
;;; Commercial Compiler Development" by Keep). This is an analysis pass,
;;; so we are just gathering up the free variables. This will look somewhat
;;; similar to the identify-assigned-variables, except we care about all
;;; variable references, but only the free variables at lambdas.
;;;
(define-pass uncover-free : L10 (e) -> L11 ()
(Expr : Expr (e) -> Expr (free*)
;; quoted constants have no variable references
[(quote ,c) (values `(quote ,c) '())]
;; gather up variable references
[,x (values x (list x))]
;; if we find a let or a letrec remove the bound variables from the list
;; of references.
[(let ([,x* ,[e* free**]] ...) ,[e free*])
(values `(let ([,x* ,e*] ...) ,e)
(apply union (difference free* x*) free**))]
[(letrec ([,x* ,[le* free**]] ...) ,[body free*])
(values `(letrec ([,x* ,le*] ...) ,body)
(difference (apply union free* free**) x*))]
;; in all the other cases, we simply want to gather up the
;; variable references from each sub expression
[(if ,[e0 free0*] ,[e1 free1*] ,[e2 free2*])
(values `(if ,e0 ,e1 ,e2) (union free0* free1* free2*))]
[(begin ,[e* free**] ... ,[e free*])
(values `(begin ,e* ... ,e) (apply union free* free**))]
[(primcall ,pr ,[e* free**]...)
(values `(primcall ,pr ,e* ...) (apply union free**))]
[(,[e free*] ,[e* free**] ...)
(values `(,e ,e* ...) (apply union free* free**))])
(LambdaExpr : LambdaExpr (le) -> LambdaExpr (free*)
;; at the lambda expression, remove our bound variables, everything else
;; is free. we continue to return the free variables until we find their
;; binding forms.
[(lambda (,x* ...) ,[body free*])
(let ([free* (difference free* x*)])
(values `(lambda (,x* ...) (free (,free* ...) ,body)) free*))])
;; in the body, we kick off with the Expr call, and make sure that we have
;; an empty free list when we reach the top, because we expect our programs
;; to be self-contained with no free-references.
(let-values ([(e free*) (Expr e)])
(unless (null? free*) (error who "found unbound variables" free*))
e))
;;; pass: convert-closures : L11 -> L12
;;;
;;; this pass begins closure conversion, using the free variable lists
;;; gathered in the previous pass to begin creating our closure data
;;; structures. This pass splits letrec bindings into a 'closures' binding
;;; form, which lists the bound variable, a label that will refer to the code
;;; of the function (and will become the function name), and the list of free
;;; variables that will be included in the final closure datastructure. The
;;; second binding form is the labels form, which binds the label for a
;;; procedure to the procedures code. We also add an explicit closure
;;; pointer argument to each procedure. If we were compiling to assembly
;;; code, we might avoid this and just specify a register to hold the closure
;;; pointer. We can also eliminate the need for the closure pointer if we
;;; use the correct optimizations. Finally, we add the explicit closure
;;; argument to each procedure call site.
;;;
;;; These transformations look as follows:
;;;
;;; (letrec ([x* (lambda (x** ...) (free (f** ...) body*))] ...) body) =>
;;; (closures ([x* l* f** ...] ...)
;;; (labels ([l* (lambda (cp* x** ...) (free (f** ...) body*))] ...) body))
;;; where l* is a list of labels for each lambda expression and cp* is a
;;; list of variables representing an explicit closure argument
;;;
;;; (x e* ...) => (x x e* ...) ; a small optimization
;;; (e e* ...) => (let ([t e]) (t t e* ...))
;;;
;;; Design decision: We separate the steps of closure creation and explicit
;;; allocation and setting of closure values, partially so that we can
;;; implement closure optimization passes that can help reduce the number of
;;; free variables, or even eliminate closures entirely, when we do not have
;;; any free variables.
;;;
(define-pass convert-closures : L11 (e) -> L12 ()
(definitions
(define make-cp (lambda (x) (unique-var 'cp)))
(define make-label
(lambda (x)
(unique-var
(string->symbol
(string-append "l:"
(symbol->string (base-var x))))))))
(Expr : Expr (e) -> Expr ()
[(letrec ([,x* (lambda (,x** ...) (free (,f** ...) ,[body*]))] ...)
,[body])
(let ([l* (map make-label x*)] [cp* (map make-cp x*)])
`(closures ([,x* ,l* ,f** ...] ...)
(labels ([,l* (lambda (,cp* ,x** ...)
(free (,f** ...) ,body*))] ...)
,body)))]
[(,x ,[e*] ...) `(,x ,x ,e* ...)]
[(,[e] ,[e*] ...)
(let ([t (make-tmp)])
`(let ([,t ,e]) (,t ,t ,e* ...)))]))
;;; pass: optimize-known-call : L12 -> L12
;;;
;;; a tiny "optimization" pass that recognizes when we know what procedure
;;; is being called, and refers to the procedure directly, rather than
;;; requiring that the procedure pointer be accessed through a dereference
;;; of the closure pointer. This allows the procedure to be called as:
;;;
;;; func_name_10(...)
;;;
;;; instead of:
;;;
;;; ((ptr (*)(ptr, ...))*(func_closure_10 + closure-code-offset - closure-tag)(...)
;;;
;;; in addition to looking simpler, it also avoids indirect calls, which
;;; means both that we can avoid an extra memory reference, and the C
;;; compiler has a better opportunity to optimize the call, and the processor
;;; can potentially handle the code faster (in addition avoiding the extra
;;; memory reference).
;;;
;;; Design decision: Our approach to determining when a call is known is
;;; pretty simple. When we pass a closure binding we add the binding of the
;;; variable to the label to our environment, and if we encounter a call to
;;; one of these variables, we replace it with a reference to the label.
;;; This gives us good results, but it will not detect every known call that
;;; we might be able to find if we used a more expensive analysis like
;;; control-flow analysis. For our purposes, the linear-time optimization
;;; is fast and simple, but if we want a more precise analysis, and we are
;;; willing to pay the additional cost (slightly less than cubic for 0CFA or
;;; exponential for 1CFA or higher), than we could perform a more precise
;;; analysis here.
;;;
(define-pass optimize-known-call : L12 (e) -> L12 ()
(LabelsBody : LabelsBody (lbody env) -> LabelsBody ())
(LambdaExpr : LambdaExpr (le env) -> LambdaExpr ())
(FreeBody : FreeBody (fbody env) -> FreeBody ())
(Expr : Expr (e [env '()]) -> Expr ()
[(closures ([,x* ,l* ,f** ...] ...) ,lbody)
(let ([env (fold-left
(lambda (env x l) (cons (cons x l) env))
env x* l*)])
(let ([lbody (LabelsBody lbody env)])
`(closures ([,x* ,l* ,f** ...] ...) ,lbody)))]
[(,x ,[e*] ...)
(cond
[(assq x env) => (lambda (as) `((label ,(cdr as)) ,e* ...))]
[else `(,x ,e* ...)])]))
;;; pass: expose-closure-prims : L12 -> L13
;;;
;;; this pass finishes closure conversion by turning our closures form into a
;;; let binding closure variables to explicit closure allocations (using the
;;; added make-closure primitive) and explicit closure set!s to fill in the
;;; code (with the closure-code-set! primitive) and free variable values of
;;; the closure (with the closre-data-set! primitive). We do this as
;;; separate creation and mutation steps, since we may have circular
;;; datastructures, where we need to place the value of a closure allocated
;;; in the let binding in a closure bound by the same let binding. We also
;;; move the labels form into plae as an expression, discard the free
;;; variable list form the body of our lambda expressions, and make explicit
;;; references to the closure code slot (with the closure-code primitive)
;;; where closures are called, and the closure data slots (with the
;;; closure-ref primitive) where a free variable is referenced.
;;;
;;; The transform looks as follows:
;;; (closures ([x* l* f** ...] ...) lbody) =>
;;; (let ([x* (primcall make-closure ---)] ...)
;;; (begin
;;; (primcall closure-code-set! x* l*) ...
;;; (primcall closure-data-set! x* 0 (car f**))
;;; (primcall closure-data-set! x* 1 (cadr f**))
;;; ...))
;;;
;;; (x e* ...) => ((closure-code x) e* ...)
;;; x => (closure-ref cp idx) ; where x is a free variable, and
;;; ; idx is the offset of the free
;;; ; variable in the closure.
;;;
;;;
;;; Design decision: We could also combine this with the lift-lambdas pass
;;; and finish lifting (our now first-order) procedures to the top-level of
;;; the program. It is reasonable to keep these separate, since their action
;;; on the code is a little different, but they could also be combined
;;; without much trouble.
;;;
(define-pass expose-closure-prims : L12 (e) -> L13 ()
(Expr : Expr (e [cp #f] [free* '()]) -> Expr ()
(definitions
(define handle-closure-ref
(lambda (x cp free*)
(let loop ([free* free*] [i 0])
(cond
[(null? free*) x]
[(eq? x (car free*)) `(primcall closure-ref ,cp (quote ,i))]
[else (loop (cdr free*) (fx+ i 1))]))))
(define build-closure-set*
(lambda (x* l* f** cp free*)
(fold-left
(lambda (e* x l f*)
(let loop ([f* f*] [i 0] [e* e*])
(if (null? f*)
(cons `(primcall closure-code-set! ,x (label ,l)) e*)
(loop (cdr f*) (fx+ i 1)
(cons `(primcall closure-data-set! ,x (quote ,i)
,(handle-closure-ref (car f*) cp free*))
e*)))))
'()
x* l* f**))))
[(closures ([,x* ,l* ,f** ...] ...)
(labels ([,l2* ,[le*]] ...) ,[body]))
(let ([size* (map length f**)])
`(let ([,x* (primcall make-closure (quote ,size*))] ...)
(labels ([,l2* ,le*] ...)
(begin
,(build-closure-set* x* l* f** cp free*) ...
,body))))]
[,x (handle-closure-ref x cp free*)]
[((label ,l) ,[e*] ...) `((label ,l) ,e* ...)]
[(,[e] ,[e*] ...) `((primcall closure-code ,e) ,e* ...)])
(LabelsBody : LabelsBody (lbody) -> Expr ())
(LambdaExpr : LambdaExpr (le) -> LambdaExpr ()
[(lambda (,x ,x* ...) (free (,f* ...) ,[body x f* -> body]))
`(lambda (,x ,x* ...) ,body)]))
;;; pass: lift-lambdas : L13 -> L14
;;;
;;; lifts all of the labels and lambda expressions to a top-level labels
;;; binding. when we generate C code, these will become top-level C
;;; functions.
;;;
;;; Design decisions: This pass is written using mutation, largely to shorten
;;; the code that would gather up the label and lambda expression lists.
;;; Another approach would be to gather these up by returning extra values
;;; from each expression that has the list of labels and lambda expressions.
;;; This would be simpler if the nanopass framework supported a way to flow
;;; extra values through the data, but it doesn't currently support this
;;; (it's on my feature todo list :).
;;;
(define-pass lift-lambdas : L13 (e) -> L14 ()
(definitions
(define *l* '())
(define *le* '()))
(Expr : Expr (e) -> Expr ()
[(labels ([,l* ,[le*]] ...) ,[body])
(set! *l* (append l* *l*))
(set! *le* (append le* *le*))
body])
(let ([e (Expr e)] [l (unique-var 'l:program)])
`(labels ([,l (lambda () ,e)] [,*l* ,*le*] ...) ,l)))
;;; pass: remove-complex-opera* : L14 -> L15
;;;
;;; this pass removes nested complex operators. strictly speaking, this is
;;; not something that we need to do since C is our target, however if we
;;; want to taret assembly or something like LLVM. If we target something
;;; like JavaScript, however, we might want to eliminate this.
;;;
;;; one reason I like this pass, is that it is a very simple pass for
;;; something that is relatively complicated because the nanopadd framework
;;; is really able to do a lot of work for us.
;;;
;;; Design decision: If we decide to remove this pass, the C code generation
;;; pass will have to be a bit smarter about how it generates code, because
;;; we will then have complex arguments, however, any decent C compiler
;;; should be able to keep up with the tricks we'd need to play.
;;;
(define-pass remove-complex-opera* : L14 (e) -> L15 ()
(definitions
(with-output-language (L15 Expr)
(define build-let
(lambda (x* e* body)
(if (null? x*)
body
`(let ([,x* ,e*] ...) ,body)))))
(define simplify*
(lambda (e* f)
(let loop ([e* e*] [t* '()] [te* '()] [re* '()])
(if (null? e*)
(build-let t* te* (f (reverse re*)))
(let ([e (car e*)])
(nanopass-case (L15 Expr) e
[,x (loop (cdr e*) t* te* (cons x re*))]
[(quote ,c) (loop (cdr e*) t* te* (cons e re*))]
[(label ,l) (loop (cdr e*) t* te* (cons e re*))]
[else (let ([t (make-tmp)])
(loop (cdr e*) (cons t t*)
(cons e te*) (cons t re*)))])))))))
(Expr : Expr (e) -> Expr ()
[(primcall ,pr ,[e*] ...)
(simplify* e*
(lambda (e*)
`(primcall ,pr ,e* ...)))]
[(,[e] ,[e*] ...)
(simplify* (cons e e*)
(lambda (e*)
`(,(car e*) ,(cdr e*) ...)))]))
;;; pass: recognize-context : L15 -> L16
;;;
;;; This pass seperates the Expr into Value, Effect, and Predicate cases.
;;; The basic idea is to recognize where we have primitive calls that are out
;;; of place for the value that they produce, the effect they perform, or the
;;; branching direction they cause us to select. This is partially necessary
;;; because we are choosing our own represenation for values, which may not
;;; be the same as C's representation, and because we require that each
;;; procedure return a value. The basic idea is pretty simple, the body of a
;;; procedure is in Value context, so this is the context we start in. When
;;; we process an 'if' form, the test position is in predicate context. In
;;; this context we need to produce a true or false value in C (i.e. 0 for
;;; true, or a non-zero integer, usually 1, for true). If we are in Value
;;; context and we encounter a 'begin' form, the expressions before the end
;;; of the 'begin' form are in effect context.
;;;
;;; The rules are as follows:
;;; In Value context:
;;; (primcall effect-prim e* ...) =>
;;; (begin (primcall effect-prim e* ...) (primcall void))
;;; (primcall pred-prim e* ...) =>
;;; (if (primcall pred-prim e* ...) (quote #t) (quote #f))
;;;
;;; In Effect context:
;;; x => (nop)
;;; (quote c) => (nop)
;;; (label l) => (nop)
;;; (primcall value-prim e* ...) => (nop)
;;; (primcall effect-prim e* ...) => (nop)
;;;
;;; In Predicate context (remember in Scheme #f is the only false value):
;;; x => (if (primcall = x #f) (false) (true))
;;; (quote #f) => (false)
;;; (quote (not #f)) => (true)
;;; (primcall value-prim e* ...) =>
;;; (if (let ([t (primcall value-prim e* ...)])
;;; (= t (quote #f)))
;;; (false)
;;; (true))
;;; (primcall effect-prim e* ...) =>
;;; (begin (primcall effect-prim e* ...) (true)) ; (void) is not #f!
;;; (se se* ...) =>
;;; (if (let ([t (se se* ...)])
;;; (primcall = t (quote #f)))
;;; (false)
;;; (true))
;;; we also do a small optimization, if we see (true) or (false) in
;;; the output of an 'if' test form, we choose either the consequent or
;;; the alternative.
;;;
;;; Design decision: We could swap recognize-context and
;;; remove-complex-expr*, which would allow us to avoid building the 'let'
;;; form when a Value prim or procedure call appears in the Predicate
;;; context. On the other hand, we would need to process three contexts of
;;; Expr, and maintain the context separation.
;;;
(define-pass recognize-context : L15 (e) -> L16 ()
(Value : Expr (e) -> Value ()
[(primcall ,pr ,[se*] ...)
(guard (value-primitive? pr))
`(primcall ,pr ,se* ...)]
[(primcall ,pr ,[se*] ...)
(guard (predicate-primitive? pr))
`(if (primcall ,pr ,se* ...) (quote #t) (quote #f))]
[(primcall ,pr ,[se*] ...)
(guard (effect-primitive? pr))
`(begin (primcall ,pr ,se* ...) (primcall void))]
[(primcall ,pr ,se* ...)
(error who "unexpected primitive found" pr)])
(Effect : Expr (e) -> Effect ()
[,se `(nop)]
[(primcall ,pr ,[se*] ...)
(guard (effect-primitive? pr))
`(primcall ,pr ,se* ...)]
[(primcall ,pr ,[se*] ...)
(guard (or (value-primitive? pr) (predicate-primitive? pr)))
`(nop)]
[(primcall ,pr ,se* ...)
(error who "unexpected primitive found" pr)])
(Predicate : Expr (e) -> Predicate ()
[(quote ,c) (if c `(true) `(false))]
[,x `(if (primcall eq? x (quote #f)) (false) (true))]
[(if ,[p0] ,[p1] ,[p2])
(nanopass-case (L16 Predicate) p0
[(true) p1]
[(false) p2]
[else `(if ,p0 ,p1 ,p2)])]
[(,[se] ,[se*] ...)
(let ([t (make-tmp)])
`(if (let ([,t (,se ,se* ...)])
(primcall = ,t (quote #f)))
(false)
(true)))]
[(primcall ,pr ,[se*] ...)
(guard (predicate-primitive? pr))
`(primcall ,pr ,se* ...)]
[(primcall ,pr ,[se*] ...)
(guard (effect-primitive? pr))
`(begin (primcall ,pr ,se* ...) (true))]
[(primcall ,pr ,[se*] ...)
(guard (value-primitive? pr))
(let ([t (make-tmp)])
`(if (let ([,t (primcall ,pr ,se* ...)])
(primcall eq? ,t (quote #f)))
(false)
(true)))]
[(primcall ,pr ,se* ...)
(error who "unexpected primitive found" pr)]))
;;; pass: expose-allocation-primitives : L16 -> L17
;;;
;;; this pass replaces the primitives that allocate new Scheme data
;;; structures with a generic alloc form that takes the number of bytes to
;;; allocate and the tag to add. (We cheat a little on the number of bytes
;;; by using the fact that our fixnum data type is going to be adjusted
;;; appropriately from representing the number of words in the data structure
;;; to the number of bytes in the data structure.) This will eliminate
;;; primitive calls to make-vector, make-closure, box, and cons and replace
;;; it with allocs and explicit sets. One thing to note is that in the case
;;; of box and cons, we want to be sure that the arguments are evaluated
;;; first, then the space is allocated, and finally the values are set in the
;;; data structure. We do this because, while we can evaluate the arguments
;;; in any order, however, we need to complete their evaluation before we
;;; start executing the primitive. In our little compiler, we could get away
;;; with cheating, but if we added a feature like call/cc our cheats would be
;;; observable.
;;;
(define-pass expose-allocation-primitives : L16 (e) -> L17 ()
(Value : Value (v) -> Value ()
[(primcall ,vpr ,[se])
(case vpr
[(make-vector)
(nanopass-case (L17 SimpleExpr) se
[(quote ,c)
(target-fixnum? c)
(let ([t (make-tmp)])
`(let ([,t (alloc ,vector-tag (quote ,(+ c 1)))])
(begin
(primcall $vector-length-set! ,t (quote ,c))
,t)))]
[else (let ([t0 (make-tmp)] [t1 (make-tmp)] [t2 (make-tmp)])
`(let ([,t0 ,se])
(let ([,t1 (primcall + ,t0 (quote 1))])
(let ([,t2 (alloc ,vector-tag ,t1)])
(begin
(primcall $vector-length-set! ,t2 ,t0)
,t2)))))])]
[(make-closure)
(nanopass-case (L17 SimpleExpr) se
[(quote ,c)
(guard (target-fixnum? c))
`(alloc ,closure-tag (quote ,(+ c 1)))]
[else (error who
"expected constant argument for make-closure primcall"
(unparse-L16 v))])]
[(box)
(let ([t0 (make-tmp)] [t1 (make-tmp)])
`(let ([,t0 ,se])
(let ([,t1 (alloc ,box-tag (quote 1))])
(begin
(primcall set-box! ,t1 ,t0)
,t1))))]
[else `(primcall ,vpr ,se)])]
[(primcall ,vpr ,[se0] ,[se1])
(case vpr
[(cons)
(let ([t0 (make-tmp)] [t1 (make-tmp)] [t2 (make-tmp)])
`(let ([,t0 ,se0] [,t1 ,se1])
(let ([,t2 (alloc ,pair-tag (quote 2))])
(begin
(primcall $set-car! ,t2 ,t0)
(primcall $set-cdr! ,t2 ,t1)
,t2))))]
[else `(primcall ,vpr ,se0 ,se1)])]))
;;; pass: return-of-set! : L17 -> L18
;;;
;;; In this pass we remove the 'let' form and replace it with set!. While
;;; this set! looks like the source-level set!, it really is not the same
;;; thing, since each of our variables only ever receive one value over the
;;; course of running the program. If we were compiling to assembly or LLVM,
;;; these set!s would directly set the variable at its allocated position,
;;; i.e. in a register or memory location. Here we leave the job of deciding
;;; where to allocate each of our single-assignemnt variables. In this pass,
;;; we also gather up all of the variables as locals, so that we can put our
;;; variable declarations at the start of the C function. (This is not
;;; required in a modern C compiler, but it does make our job easier, since
;;; we don't have to worry about needing to create variables in C contexts
;;; where it might not be allowed.) This latter job is what causes all of
;;; the extra work, since there is not a good way to gather up the values
;;; without returning from every form in each of our three contexts.
;;;
;;; Design decision: We could simplify this pass by putting it before the
;;; recognize-context pass, but that would compilcate the recognize-context
;;; pass. With all of these types of decisions, it is largely a balancing
;;; act of managing the complexity of individual passes, to try to keep the
;;; compiler as simple as possible.
;;;
(define-pass return-of-set! : L17 (e) -> L18 ()
(definitions
(with-output-language (L18 Effect)
(define build-set*!
(lambda (x* v* body build-begin)
(build-begin
(map (lambda (x v) `(set! ,x ,v)) x* v*)
body)))))
(SimpleExpr : SimpleExpr (se) -> SimpleExpr ('()))
(Value : Value (v) -> Value ('())
(definitions
(define build-begin
(lambda (e* v)
(nanopass-case (L18 Value) v
[(begin ,e1* ... ,v)
(build-begin (append e* e1*) v)]
[else
(if (null? e*)
v
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,v)
(let ([e (car e*)])
(nanopass-case (L18 Effect) e
[(nop) (loop (cdr e*) re*)]
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(if ,[p0 var0*] ,[v1 var1*] ,[v2 var2*])
(values `(if ,p0 ,v1 ,v2) (append var0* var1* var2*))]
[(begin ,[e* var**] ... ,[v var*])
(values (build-begin e* v) (apply append var* var**))]
[(primcall ,vpr ,[se* var**] ...)
(values `(primcall ,vpr ,se* ...) (apply append var**))]
[(,[se var*] ,[se* var**] ...)
(values `(,se ,se* ...) (apply append var* var**))]
[(let ([,x* ,[v* var**]] ...) ,[body var*])
(values
(build-set*! x* v* body build-begin)
(apply append x* var* var**))])
(Effect : Effect (e) -> Effect ('())
(definitions
(define build-begin
(lambda (e* e)
(nanopass-case (L18 Effect) e
[(begin ,e1* ... ,e)
(build-begin (append e* e1*) e)]
[else
(if (null? e*)
e
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,e)
(let ([e (car e*)])
(nanopass-case (L18 Effect) e
[(nop) (loop (cdr e*) re*)]
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(if ,[p0 var0*] ,[e1 var1*] ,[e2 var2*])
(values `(if ,p0 ,e1 ,e2) (append var0* var1* var2*))]
[(begin ,[e* var**] ... ,[e var*])
(values (build-begin e* e) (apply append var* var**))]
[(primcall ,epr ,[se* var**] ...)
(values `(primcall ,epr ,se* ...) (apply append var**))]
[(,[se var*] ,[se* var**] ...)
(values `(,se ,se* ...) (apply append var* var**))]
[(let ([,x* ,[v* var**]] ...) ,[e var*])
(values
(build-set*! x* v* e build-begin)
(apply append x* var* var**))])
(Predicate : Predicate (p) -> Predicate ('())
(definitions
(define build-begin
(lambda (e* p)
(nanopass-case (L18 Predicate) p
[(begin ,e1* ... ,p)
(build-begin (append e* e1*) p)]
[else
(if (null? e*)
p
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,p)
(let ([e (car e*)])
(nanopass-case (L18 Effect) e
[(nop) (loop (cdr e*) re*)]
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(if ,[p0 var0*] ,[p1 var1*] ,[p2 var2*])
(values `(if ,p0 ,p1 ,p2) (append var0* var1* var2*))]
[(begin ,[e* var**] ... ,[p var*])
(values (build-begin e* p) (apply append var* var**))]
[(primcall ,ppr ,[se* var**] ...)
(values `(primcall ,ppr ,se* ...) (apply append var**))]
[(let ([,x* ,[v* var**]] ...) ,[p var*])
(values
(build-set*! x* v* p build-begin)
(apply append x* var* var**))])
(LambdaExpr : LambdaExpr (le) -> LambdaExpr ()
[(lambda (,x* ...) ,[body var*])
`(lambda (,x* ...) (locals (,var* ...) ,body))]))
;;; pass: flatten-set! : L18 -> L19
;;;
;;; In the previous pass we remove the 'let' form, but we now may have set!
;;; expressions on the right-hand side of a set!, such as the following:
;;;
;;; (set! x.0 (begin
;;; (set! y.1 5)
;;; (set! z.2 7)
;;; (primcall + y.1 z.2)))
;;;
;;; However, while this is legal in C, we'd like to avoid this, which will
;;; help us generate a little easier to read code, and again if we were
;;; targeting something like assembly, would be required. We can transform
;;; our example above into:
;;;
;;; (begin
;;; (set! y.1 5)
;;; (set! z.2 7)
;;; (set! x.0 (primcall + y.1 z.2)))
;;;
(define-pass flatten-set! : L18 (e) -> L19 ()
(SimpleExpr : SimpleExpr (se) -> SimpleExpr ())
(Effect : Effect (e) -> Effect ()
[(set! ,x ,v) (flatten v x)])
(flatten : Value (v x) -> Effect ()
[,se `(set! ,x ,(SimpleExpr se))]
[(primcall ,vpr ,[se*] ...) `(set! ,x (primcall ,vpr ,se* ...))]
[(alloc ,i ,[se]) `(set! ,x (alloc ,i ,se))]
[(,[se] ,[se*] ...) `(set! ,x (,se ,se* ...))]))
;;; pass: push-if : L19 -> L20
;;;
;;; It turns out I was a little overzealous with this pass and didn't quite
;;; handle all of the cases. In particular, in my hustle, I did not think
;;; about the `(if p0 p1 p2) where the result expressions contain effects...
;;; i.e. (if (begin ,e0* ... ,p0) (begin ,e1* ... ,p1) (begin ,e2* ...
;;; ,p2)) can only be handled if:
;;; 1. we are willing to copy the code for the tail of our ifs (we aren't,
;;; this can lead to exponential code explosion) or
;;; 2. if we are willing to flatten this code and use labels and gotos in
;;; our generated code.
;;; Number 2 is a more reasonable solution, but lucky for us, C will allow us
;;; to generate code like the following:
;;;
;;; (if (begin ,e0* ... ,p0) (begin ,e1* ... ,p1) (begin ,e2* ... ,p2)) =>
;;;
;;; (((e0*[0]), (e0*[1]), ..., (e0*[n]), p0) ?
;;; ((e1*[0]), (e1*[1]), ..., (e1*[n]), p1) :
;;; ((e2*[0]), (e2*[1]), ..., (e2*[n]), p2))
;;;
;;; I've left the pass here as an example that even when we think we've got a
;;; pass written and working, it easy to miss things, which is why we test,
;;; and why we need to think carefully as we work through the compiler.
;;;
; (define-pass push-if : L19 (e) -> L20 ()
; (Value : Value (v) -> Value ()
; (definitions
; (define build-begin
; (lambda (e* v)
; (if (null? e*) v `(begin ,e* ... ,v)))))
; [(if ,[p0 e*] ,[v1] ,[v2]) (build-begin e* `(if ,p0 ,v1 ,v2))])
; (Effect : Effect (e) -> Effect ()
; (definitions
; (define build-begin
; (lambda (e* e)
; (if (null? e*) e `(begin ,e* ... ,e)))))
; [(if ,[p0 e*] ,[e1] ,[e2]) (build-begin e* `(if ,p0 ,e1 ,e2))])
; (Predicate : Predicate (p) -> Predicate ('())
; [(begin ,[e*] ... ,[p more-e*]) (values p (append e* more-e*))]
; [(if ,[p0 e0*] ,[p1 e1*] ,[p2 e2*])
; (values `(if ,p0 (begin ,e1* ... p1) (begin ,e2* ... ,p2)) e0*)]))
;;; pass: specify-constant-representation : L19 -> L21
;;;
;;; This pass replaces our quoted constants with the explicit ptr
;;; representation we've decided to use. This effectively replaces each of our
;;; constants with a 64-bit integer. The conversion is pretty simple:
;;;
;;; #f => false-rep
;;; #t => true-rep
;;; '() => null-rep
;;; fixnum => fixnum << fixnum-shift (yielding 64-bit integer)
;;;
(define-pass specify-constant-representation : L19 (e) -> L21 ()
(SimpleExpr : SimpleExpr (se) -> SimpleExpr ()
[(quote ,c)
(cond
[(eq? c #f) false-rep]
[(eq? c #t) true-rep]
[(null? c) null-rep]
[(target-fixnum? c)
(bitwise-arithmetic-shift-left c fixnum-shift)])]))
;;; pass: expand-primitives : L21 -> L22
;;;
;;; this pass expands our Scheme primitives into something close to their
;;; C-language equivalents. This changes our math primitives to do the
;;; adjustments required by changing the representation of fixnums (it works
;;; fine for + and -, but * and / require us to do some shifting in order to
;;; have a fixnum as a result). We also translate all of our memory
;;; referencing primitives to mrefs and memory setting primitives into
;;; msets!. When we generate C code for these, we will do the pointer
;;; arithmetic required and then dereference the calculated address.
;;; Remember, that because of our tags, we need to do some pointer arithmetic
;;; for any dereference we wish to perform. This pointer arithmetic, though,
;;; can be handled in a single memory reference argument on an x86_64 (which
;;; is our assumed target platform).
;;;
;;; Design decision: Right now each of our "instructions" is a separate form
;;; in the language, however, if we were to extend our source language and
;;; primitive set much farther, it is likely that we would want to revisit
;;; this to choose a representation where a single form could represent
;;; several of these instructions. This might also be desirable if we change
;;; the representation to LLVM or asm.js.
;;;;
(define-pass expand-primitives : L21 (e) -> L22 ()
(Value : Value (v) -> Value ()
(definitions
(define build-begin
(lambda (e* v)
(nanopass-case (L22 Value) v
[(begin ,e1* ... ,v)
(build-begin (append e* e1*) v)]
[else
(if (null? e*)
v
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,v)
(let ([e (car e*)])
(nanopass-case (L22 Effect) e
[(nop) (loop (cdr e*) re*)]
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(begin ,[e*] ... ,[v]) (build-begin e* v)])
(Rhs : Rhs (rhs) -> Rhs ()
[(primcall ,vpr)
(case vpr
[(void) void-rep]
[else (error who "unexpected value primitive" vpr)])]
[(primcall ,vpr ,[se])
(case vpr
[(car) `(mref ,se #f ,(- pair-tag))]
[(cdr) `(mref ,se #f ,(- word-size pair-tag))]
[(unbox) `(mref ,se #f ,(- box-tag))]
[(closure-code) `(mref ,se #f ,(- closure-tag))]
[(vector-length) `(mref ,se #f ,(- vector-tag))]
[else (error who "unexpected value primitive" vpr)])]
[(primcall ,vpr ,[se0] ,[se1])
(case vpr
[(closure-ref) `(mref ,se0 ,se1 ,(- word-size closure-tag))]
[(vector-ref) `(mref ,se0 ,se1 ,(- word-size vector-tag))]
[(+) `(add ,se0 ,se1)]
[(-) `(subtract ,se0 ,se1)]
;; when we multiply or divide, we need to shift either one of the
;; arguments or the result. we could also be a bit more clever here,
;; if one of the arguments is a constant, we can perform the shift
;; ahead of time (assuming the constant still fits within the 64-bit
;; width
[(*) `(multiply ,se0 (shift-right ,se1 ,fixnum-shift))]
[(/) `(shift-left (divide ,se0 ,se1) ,fixnum-shift)]
[else (error who "unexpected value primitive" vpr)])]
[(primcall ,vpr ,se* ...)
(error who "unexpected value primitive" vpr)])
(Effect : Effect (e) -> Effect ()
(definitions
(define build-begin
(lambda (e* e)
(nanopass-case (L22 Effect) e
[(begin ,e1* ... ,e)
(build-begin (append e* e1*) e)]
[else
(if (null? e*)
e
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,e)
(let ([e (car e*)])
(nanopass-case (L22 Effect) e
[(nop) (loop (cdr e*) re*)]
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(begin ,[e*] ... ,[e]) (build-begin e* e)]
[(primcall ,epr ,[se0] ,[se1])
(case epr
[(set-box!) `(mset! ,se0 #f ,(- box-tag) ,se1)]
[($set-car!) `(mset! ,se0 #f ,(- pair-tag) ,se1)]
[($set-cdr!) `(mset! ,se0 #f ,(- word-size pair-tag) ,se1)]
[($vector-length-set!) `(mset! ,se0 #f ,(- vector-tag) ,se1)]
[(closure-code-set!) `(mset! ,se0 #f ,(- closure-tag) ,se1)]
[else (error who "unexpected effect primitive" epr)])]
[(primcall ,epr ,[se0] ,[se1] ,[se2])
(case epr
[(vector-set!) `(mset! ,se0 ,se1 ,(- word-size vector-tag) ,se2)]
[(closure-data-set!)
`(mset! ,se0 ,se1 ,(- word-size closure-tag) ,se2)]
[else (error who "unexpected effect primitive" epr)])]
[(primcall ,epr ,se* ...)
(error who "unexpected effect primitive" epr)])
(Predicate : Predicate (p) -> Predicate ()
(definitions
(define build-begin
(lambda (e* p)
(nanopass-case (L22 Predicate) p
[(begin ,e1* ... ,p)
(build-begin (append e* e1*) p)]
[else
(if (null? e*)
p
(let loop ([e* e*] [re* '()])
(if (null? e*)
`(begin ,(reverse re*) ... ,p)
(let ([e (car e*)])
(nanopass-case (L22 Effect) e
[(nop) (loop (cdr e*) re*)]
[(begin ,e0* ... ,e0)
(loop (append e0* (cons e0 (cdr e*))) re*)]
[else (loop (cdr e*) (cons (car e*) re*))])))))]))))
[(begin ,[e*] ... ,[p]) (build-begin e* p)]
[(primcall ,ppr ,[se])
(case ppr
[(pair?) `(= (logand ,se ,pair-mask) ,pair-tag)]
[(null?) `(= ,se ,null-rep)]
[(boolean?) `(= (logand ,se ,boolean-mask) ,boolean-tag)]
[(vector?) `(= (logand ,se ,vector-mask) ,vector-tag)]
[(box?) `(= (logand ,se ,box-mask) ,box-tag)]
[else (error who "unexpected predicate primitive" ppr)])]
[(primcall ,ppr ,[se0] ,[se1])
(case ppr
[(eq? =) `(= ,se0 ,se1)]
[(<) `(< ,se0 ,se1)]
[(<=) `(<= ,se0 ,se1)]
[(>) `(<= ,se1 ,se0)]
[(>=) `(< ,se1 ,se0)]
[else (error who "unexpected predicate primitive" ppr)])]
[(primcall ,ppr ,se* ...)
(error who "unexpected predicate primitive" ppr)]))
;;; pass: generate-C : L22 -> printed-output
;;;
;;; this pass takes a program in the L22 language and produces a printed C
;;; program. using a string or file port, the results of this can be
;;; captured in a string or sent to a file to be compiled. The code that it
;;; produces can be a little difficult to read, particularly with all of the
;;; casts to and from ptr values.
;;;
;;; TODO: this pass is fairly convoluted, and could use some refactoring. We
;;; might also want to try to pretty-print the C code so that it prints
;;; out a bit better.
;;;
(define-pass generate-c : L22 (e) -> * ()
(definitions
(define string-join
(lambda (str* jstr)
(cond
[(null? str*) ""]
[(null? (cdr str*)) (car str*)]
[else (string-append (car str*) jstr (string-join (cdr str*) jstr))])))
;;; symbol->c-id - converts any Scheme symbol into a valid C identifier.
(define symbol->c-id
(lambda (sym)
(let ([ls (string->list (symbol->string sym))])
(if (null? ls)
"_"
(let ([fst (car ls)])
(list->string
(cons
(if (char-alphabetic? fst) fst #\_)
(map (lambda (c)
(if (or (char-alphabetic? c)
(char-numeric? c))
c
#\_))
(cdr ls)))))))))
;;; emit-function-header - generates a function header to be used in the
;;; declaration of a function or the definition of a function.
(define format-function-header
(lambda (l x*)
(format "ptr ~a(~a)" l
(string-join
(map
(lambda (x)
(format "ptr ~a" (symbol->c-id x)))
x*)
", "))))
(define format-label-call
(lambda (l se*)
(format " ~a(~a)" (symbol->c-id l)
(string-join
(map (lambda (se)
(format "(ptr)~a" (format-simple-expr se)))
se*)
", "))))
(define format-general-call
(lambda (se se*)
(format "((ptr (*)(~a))~a)(~a)"
(string-join (make-list (length se*) "ptr") ", ")
(format-simple-expr se)
(string-join
(map (lambda (se)
(format "(ptr)~a" (format-simple-expr se)))
se*)
", "))))
(define format-binop
(lambda (op se0 se1)
(format "((long)~a ~a (long)~a)"
(format-simple-expr se0)
op
(format-simple-expr se1))))
(define format-set!
(lambda (x rhs)
(format "~a = (ptr)~a" (symbol->c-id x) (format-rhs rhs)))))
;; transformer to print our function declarations
(emit-function-decl : LambdaExpr (le l) -> * ()
[(lambda (,x* ...) ,lbody)
(printf "~a;~%" (format-function-header l x*))])
;; transformer to print our function definitions
(emit-function-def : LambdaExpr (le l) -> * ()
[(lambda (,x* ...) ,lbody)
(printf "~a {~%" (format-function-header l x*))
(emit-function-body lbody)
(printf "}~%~%")])
;; transformer to emit the body of a function
(emit-function-body : LocalsBody (lbody) -> * ()
[(locals (,x* ...) ,body)
(for-each (lambda (x) (printf " ptr ~a;~%" (symbol->c-id x))) x*)
(emit-value body x*)])
;; transformer to emit expressions in value context
(emit-value : Value (v locals*) -> * ()
[(if ,p0 ,v1 ,v2)
(printf " if (~a) {~%" (format-predicate p0))
(emit-value v1 locals*)
(printf " } else {~%")
(emit-value v2 locals*)
(printf " }~%")]
[(begin ,e* ... ,v)
(for-each emit-effect e*)
(emit-value v locals*)]
[,rhs (printf " return (ptr)~a;\n" (format-rhs rhs))])
;; transformer to format Predicate expressions into strings
(format-predicate : Predicate (p) -> * (str)
[(if ,p0 ,p1 ,p2)
(format "((~a) ? (~a) : (~a))"
(format-predicate p0)
(format-predicate p1)
(format-predicate p2))]
[(<= ,se0 ,se1) (format-binop "<=" se0 se1)]
[(< ,se0 ,se1) (format-binop "<" se0 se1)]
[(= ,se0 ,se1) (format-binop "==" se0 se1)]
[(true) "1"]
[(false) "0"]
[(begin ,e* ... ,p)
(string-join
(fold-right (lambda (e s*) (cons (format-effect e) s*))
(list (format-predicate p)) e*)
", ")])
;; transformer to format effects in predicate context into strings
(format-effect : Effect (e) -> * (str)
[(if ,p0 ,e1 ,e2)
(format "((~a) ? (~a) : (~a))"
(format-predicate p0)
(format-effect e1)
(format-effect e2))]
[((label ,l) ,se* ...) (format-label-call l se*)]
[(,se ,se* ...) (format-general-call se se*)]
[(set! ,x ,rhs) (format-set! x rhs)]
[(nop) "0"]
[(begin ,e* ... ,e)
(string-join
(fold-right (lambda (e s*) (cons (format-effect e) s*))
(list (format-effect e)) e*)
", ")]
[(mset! ,se0 ,se1? ,i ,se2)
(if se1?
(format "((*((ptr*)((long)~a + (long)~a + ~d))) = (ptr)~a)"
(format-simple-expr se0) (format-simple-expr se1?)
i (format-simple-expr se2))
(format "((*((ptr*)((long)~a + ~d))) = (ptr)~a)"
(format-simple-expr se0) i (format-simple-expr se2)))])
;; formats simple expressions in to strings
(format-simple-expr : SimpleExpr (se) -> * (str)
[,x (symbol->c-id x)]
[,i (number->string i)]
[(label ,l) (format "(*~a)" (symbol->c-id l))]
[(logand ,se0 ,se1) (format-binop "&" se0 se1)]
[(shift-right ,se0 ,se1) (format-binop ">>" se0 se1)]
[(shift-left ,se0 ,se1) (format-binop "<<" se0 se1)]
[(divide ,se0 ,se1) (format-binop "/" se0 se1)]
[(multiply ,se0 ,se1) (format-binop "*" se0 se1)]
[(subtract ,se0 ,se1) (format-binop "-" se0 se1)]
[(add ,se0 ,se1) (format-binop "+" se0 se1)]
[(mref ,se0 ,se1? ,i)
(if se1?
(format "(*((ptr)((long)~a + (long)~a + ~d)))"
(format-simple-expr se0)
(format-simple-expr se1?) i)
(format "(*((ptr)((long)~a + ~d)))" (format-simple-expr se0) i))])
;; prints expressions in effect position into C statements
(emit-effect : Effect (e) -> * ()
[(if ,p0 ,e1 ,e2)
(printf " if (~a) {~%" (format-predicate p0))
(emit-effect e1)
(printf " } else {~%")
(emit-effect e2)
(printf " }~%")]
[((label ,l) ,se* ...) (printf " ~a;\n" (format-label-call l se*))]
[(,se ,se* ...) (printf " ~a;\n" (format-general-call se se*))]
[(set! ,x ,rhs) (printf " ~a;\n" (format-set! x rhs))]
[(nop) (if #f #f)]
[(begin ,e* ... ,e)
(for-each emit-effect e*)
(emit-effect e)]
[(mset! ,se0 ,se1? ,i ,se2)
(if se1?
(printf "(*((ptr*)((long)~a + (long)~a + ~d))) = (ptr)~a;\n"
(format-simple-expr se0) (format-simple-expr se1?)
i (format-simple-expr se2))
(printf "(*((ptr*)((long)~a + ~d))) = (ptr)~a;\n"
(format-simple-expr se0) i (format-simple-expr se2)))])
;; formats the right-hand side of a set! into a C expression
(format-rhs : Rhs (rhs) -> * (str)
[((label ,l) ,se* ...) (format-label-call l se*)]
[(,se ,se* ...) (format-general-call se se*)]
[(alloc ,i ,se)
(if (use-boehm?)
(format "(ptr)((long)GC_MALLOC(~a) + ~dl)"
(format-simple-expr se) i)
(format "(ptr)((long)malloc(~a) + ~dl)"
(format-simple-expr se) i))]
[,se (format-simple-expr se)])
;; emits a C program for our progam expression
(Program : Program (p) -> * ()
[(labels ([,l* ,le*] ...) ,l)
(let ([l (symbol->c-id l)] [l* (map symbol->c-id l*)])
(define-syntax emit-include
(syntax-rules ()
[(_ name) (printf "#include <~s>\n" 'name)]))
(define-syntax emit-predicate
(syntax-rules ()
[(_ PRED_P mask tag)
(emit-c-macro PRED_P (x) "(((long)x & ~d) == ~d)" mask tag)]))
(define-syntax emit-eq-predicate
(syntax-rules ()
[(_ PRED_P rep)
(emit-c-macro PRED_P (x) "((long)x == ~d)" rep)]))
(define-syntax emit-c-macro
(lambda (x)
(syntax-case x()
[(_ NAME (x* ...) fmt args ...)
#'(printf "#define ~s(~a) ~a\n" 'NAME
(string-join (map symbol->string '(x* ...)) ", ")
(format fmt args ...))])))
;; the following printfs output the tiny C runtime we are using
;; to wrap the result of our compiled Scheme program.
(emit-include stdio.h)
(if (use-boehm?)
(emit-include gc.h)
(emit-include stdlib.h))
(emit-predicate FIXNUM_P fixnum-mask fixnum-tag)
(emit-predicate PAIR_P pair-mask pair-tag)
(emit-predicate BOX_P box-mask box-tag)
(emit-predicate VECTOR_P vector-mask vector-tag)
(emit-predicate PROCEDURE_P closure-mask closure-tag)
(emit-eq-predicate TRUE_P true-rep)
(emit-eq-predicate FALSE_P false-rep)
(emit-eq-predicate NULL_P null-rep)
(emit-eq-predicate VOID_P void-rep)
(printf "typedef long* ptr;\n")
(emit-c-macro FIX (x) "((long)x << ~d)" fixnum-shift)
(emit-c-macro UNFIX (x) "((long)x >> ~d)" fixnum-shift)
(emit-c-macro UNBOX (x) "((ptr)*((ptr)((long)x - ~d)))" box-tag)
(emit-c-macro VECTOR_LENGTH_S (x) "((ptr)*((ptr)((long)x - ~d)))" vector-tag)
(emit-c-macro VECTOR_LENGTH_C (x) "UNFIX(VECTOR_LENGTH_S(x))")
(emit-c-macro VECTOR_REF (x i) "((ptr)*((ptr)((long)x - ~d + ((i+1) * ~d))))" vector-tag word-size)
(emit-c-macro CAR (x) "((ptr)*((ptr)((long)x - ~d)))" pair-tag)
(emit-c-macro CDR (x) "((ptr)*((ptr)((long)x - ~d + ~d)))" pair-tag word-size)
(printf "void print_scheme_value(ptr x) {\n")
(printf " long i, veclen;\n")
(printf " ptr p;\n")
(printf " if (TRUE_P(x)) {\n")
(printf " printf(\"#t\");\n")
(printf " } else if (FALSE_P(x)) {\n")
(printf " printf(\"#f\");\n")
(printf " } else if (NULL_P(x)) {\n")
(printf " printf(\"()\");\n")
(printf " } else if (VOID_P(x)) {\n")
(printf " printf(\"(void)\");\n")
(printf " } else if (FIXNUM_P(x)) {\n")
(printf " printf(\"%ld\", UNFIX(x));\n")
(printf " } else if (PAIR_P(x)) {\n")
(printf " printf(\"(\");\n")
(printf " for (p = x; PAIR_P(p); p = CDR(p)) {\n")
(printf " print_scheme_value(CAR(p));\n")
(printf " if (PAIR_P(CDR(p))) { printf(\" \"); }\n")
(printf " }\n")
(printf " if (NULL_P(p)) {\n")
(printf " printf(\")\");\n")
(printf " } else {\n")
(printf " printf(\" . \");\n")
(printf " print_scheme_value(p);\n")
(printf " printf(\")\");\n")
(printf " }\n")
(printf " } else if (BOX_P(x)) {\n")
(printf " printf(\"#(box \");\n")
(printf " print_scheme_value(UNBOX(x));\n")
(printf " printf(\")\");\n")
(printf " } else if (VECTOR_P(x)) {\n")
(printf " veclen = VECTOR_LENGTH_C(x);\n")
(printf " printf(\"#(\");\n")
(printf " for (i = 0; i < veclen; i += 1) {\n")
(printf " print_scheme_value(VECTOR_REF(x,i));\n")
(printf " if (i < veclen) { printf(\" \"); } \n")
(printf " }\n")
(printf " printf(\")\");\n")
(printf " } else if (PROCEDURE_P(x)) {\n")
(printf " printf(\"#(procedure)\");\n")
(printf " }\n")
(printf "}\n")
(map emit-function-decl le* l*)
(map emit-function-def le* l*)
(printf "int main(int argc, char * argv[]) {\n")
(printf " print_scheme_value(~a());\n" l)
(printf " printf(\"\\n\");\n")
(printf " return 0;\n")
(printf "}\n"))]))
;;; a little helper mostly shamelesly stolen from
;;; the Chez Scheme User's Guide
(define-syntax with-implicit
(syntax-rules ()
[(_ (tid id ...) body0 ... body1)
(with-syntax ([id (datum->syntax #'tid 'id)] ...)
body0 ... body1)]))
;;; a little macro to make building a compiler with tracing that we can turn
;;; off and on easier. no support for looping in this, but the syntax is very
;;; simple:
;;; (define-compiler my-compiler-name
;;; (pass1 unparser)
;;; (pass2 unparser)
;;; ...
;;; pass-to-generate-c)
;;;
(define-syntax define-compiler
(lambda (x)
(syntax-case x ()
[(_ name (pass unparser) ... gen-c)
(with-implicit (name all-passes trace-passes)
#`(begin
(define all-passes '(pass ... gen-c))
(define trace-passes
(let ([passes '()])
(case-lambda
[() passes]
[(x)
(cond
[(symbol? x)
(unless (memq x all-passes)
(error 'trace-passes "invalid pass name" x))
(set! passes (list x))]
[(list? x)
(unless (for-all (lambda (x) (memq x all-passes)) x)
(error 'trace-passes
"one or more invalid pass names" x))
(set! passes x)]
[(eq? x #t) (set! passes all-passes)]
[(eq? x #f) (set! passes '())]
[else (error 'trace-passes
"invalid pass specifier" x)])])))
(define name
(lambda (x)
#,(let loop ([pass* #'(pass ...)]
[unparser* #'(unparser ...)])
(if (null? pass*)
#'(begin
(when (file-exists? "t.c") (delete-file "t.c"))
(with-output-to-file "t.c"
(lambda () (gen-c x)))
(when (memq 'gen-c (trace-passes))
(printf "output of pass ~s~%" 'gen-c)
(call-with-input-file "t.c"
(lambda (ip)
(let f ()
(let ([s (get-string-n ip 512)])
(unless (eof-object? s)
(display s)
(f)))))))
(system
(format "gcc -m64 ~a t.c -o t"
(if (use-boehm?) "-lgc" "")))
(when (file-exists? "t.out")
(delete-file "t.out"))
(system "./t > t.out")
(call-with-input-file "t.out" read))
(with-syntax ([pass (car pass*)]
[unparser (car unparser*)])
#`(let ([x (pass x)])
(when (memq 'pass (trace-passes))
(printf "output of pass ~s~%" 'pass)
(pretty-print (unparser x)))
#,(loop (cdr pass*)
(cdr unparser*))))))))))])))
;;; the definition of our compiler that pulls in all of our passes and runs
;;; them in sequence checking to see if the programmer wants them traced.
(define-compiler my-tiny-compile
(parse-and-rename unparse-Lsrc)
(remove-one-armed-if unparse-L1)
(remove-and-or-not unparse-L2)
(make-begin-explicit unparse-L3)
(inverse-eta-raw-primitives unparse-L4)
(quote-constants unparse-L5)
(remove-complex-constants unparse-L6)
(identify-assigned-variables unparse-L7)
(purify-letrec unparse-L8)
(optimize-direct-call unparse-L8)
(find-let-bound-lambdas unparse-L8)
(remove-anonymous-lambda unparse-L9)
(convert-assignments unparse-L10)
(uncover-free unparse-L11)
(convert-closures unparse-L12)
(optimize-known-call unparse-L12)
(expose-closure-prims unparse-L13)
(lift-lambdas unparse-L14)
(remove-complex-opera* unparse-L15)
(recognize-context unparse-L16)
(expose-allocation-primitives unparse-L17)
(return-of-set! unparse-L18)
(flatten-set! unparse-L19)
; (push-if unparse-L20)
(specify-constant-representation unparse-L21)
(expand-primitives unparse-L22)
generate-c))