;;; Copyright (c) 2013 Andrew W. Keep ;;; See the accompanying file Copyright for details ;;; ;;; A nanopass compiler developed to use as a demo during Clojure Conj 2013. ;;; The source language for the compiler is: ;;; ;;; Expr --> ;;; | ;;; | ;;; | (quote ) ;;; | (if ) ;;; | (if ) ;;; | (or ...) ;;; | (and ...) ;;; | (not ) ;;; | (begin ... ) ;;; | (lambda ( ...) ... ) ;;; | (let ([ ] ...) ... ) ;;; | (letrec ([ ] ...) ... ) ;;; | (set! ) ;;; | ( ...) ;;; ;;; Primitive --> car | cdr | cons | pair? | null? | boolean? | make-vector ;;; | vector-ref | vector-set! | vector? | vector-length | box ;;; | unbox | box-set! | box? | + | - | * | / | = | < | <= | > ;;; | >= | eq? ;;; Var --> symbol ;;; Const --> #t | #f | '() | integer between -2^60 and 2^60 - 1 ;;; Datum --> | ( . ) | #( ...) ;;; ;;; or in nanopass parlance: ;;; (define-language Lsrc ;;; (terminals ;;; (symbol (x)) ;;; (primitive (pr)) ;;; (constant (c)) ;;; (datum (d))) ;;; (Expr (e body) ;;; pr ;;; x ;;; c ;;; (quote d) ;;; (if e0 e1) ;;; (if e0 e1 e2) ;;; (or e* ...) ;;; (and e* ...) ;;; (not e) ;;; (begin e* ... e) ;;; (lambda (x* ...) body* ... body) ;;; (let ([x* e*] ...) body* ... body) ;;; (letrec ([x* e*] ...) body* ... body) ;;; (set! x e) ;;; (e e* ...))) ;;; ;;; The following exports are defined for this library: ;;; ;;; (my-tiny-compile ) ;;; my-tiny-compile is the main interface the compiler, where is ;;; a quoted expression for the compiler to evaluate. This procedure will ;;; run the nanopass parts of the compiler, produce a C output file in t.c, ;;; compile it using gcc to a program t, run the program t, directing its ;;; output to t.out, and finally use the Scheme reader to read t.out and ;;; return the value to the host Scheme system. For example, if we wanted ;;; to run a program that calculates the factorial of 5, we could do the ;;; following: ;;; (my-tiny-compile '(letrec ([f (lambda (n) ;;; (if (= n 0) ;;; 1 ;;; (* n (f (- n 1)))))]) ;;; (f 10))) ;;; ;;; (trace-passes) ;;; (trace-passes ) ;;; trace-passes is a parameter used by my-tiny-compile to decide what ;;; passees should have their output printed. When it is called without ;;; any arguments, it returns the list of passes to be traced. When it ;;; is called with an argument, the argument should be one of the ;;; following: ;;; ' - sets this pass to be traced ;;; '( ...) - set the list of passes to trace ;;; #t - traces all passes ;;; #f - turns off trace passing ;;; ;;; all-passes ;;; lists all passes in the compiler. ;;; ;;; (use-boehm?) ;;; (use-boehm? ) ;;; use-boehm? is a parameter that indicates if the generated C code should ;;; attempt to use the boehm garbage collector. This feature is, as of ;;; yet, untested. ;;; ;;; Internals that are exported to make them available for programmers ;;; experimenting with the compiler. ;;; ;;; TBD ;;; ;;; (library (c) (export Lsrc unparse-Lsrc L1 unparse-L1 L2 unparse-L2 L3 unparse-L3 L4 unparse-L4 L5 unparse-L5 L6 unparse-L6 L7 unparse-L7 L8 unparse-L8 L9 unparse-L9 L10 unparse-L10 L11 unparse-L11 L12 unparse-L12 L13 unparse-L13 L14 unparse-L14 L15 unparse-L15 L16 unparse-L16 L17 unparse-L17 L18 unparse-L18 L19 unparse-L19 ; L20 unparse-L20 L21 unparse-L21 L22 unparse-L22 unique-var user-alloc-value-prims user-non-alloc-value-prims user-pred-prims user-effect-prims user-prims void+user-non-alloc-value-prims void+user-prims closure+user-alloc-value-prims closure+void+user-non-alloc-value-prims closure+user-effect-prims internal+closure+user-effect-prims closure+void+user-prims primitive? void+primitive? closure+void+primitive? effect-free-prim? predicate-primitive? effect-primitive? value-primitive? non-alloc-value-primitive? effect+internal-primitive? target-fixnum? constant? datum? integer-64? set-cons union difference intersect parse-and-rename remove-one-armed-if remove-and-or-not make-begin-explicit inverse-eta-raw-primitives quote-constants remove-complex-constants identify-assigned-variables purify-letrec optimize-direct-call find-let-bound-lambdas remove-anonymous-lambda convert-assignments uncover-free convert-closures optimize-known-call expose-closure-prims lift-lambdas remove-complex-opera* recognize-context expose-allocation-primitives return-of-set! flatten-set! ; push-if specify-constant-representation expand-primitives generate-c use-boehm? my-tiny-compile trace-passes all-passes) (import (nanopass) (rnrs) (only (implementation-helpers) printf format system pretty-print)) ;;; As of yet untested feature for using the boehm GC ;;; in the compiled output of our compiler. (define use-boehm? (let ([use? #f]) (case-lambda [() use?] [(u?) (set! use? u?)]))) ;;; Representation of our data types. ;;; We use tagged pointers, because all of our pointers are 8-byte aligned, ;;; leaving te bottom 3 bits always being 0. Using these 3 bits for tags ;;; lets us store things like fixnums as pointers, and differentiate them ;;; from pointers like closures and vectors. It also saves us using a word ;;; for a tag when in our representation of vectors, closures, etc. (define fixnum-tag #b000) (define fixnum-mask #b111) (define pair-tag #b001) (define pair-mask #b111) (define box-tag #b010) (define box-mask #b111) (define vector-tag #b011) (define vector-mask #b111) (define closure-tag #b100) (define closure-mask #b111) ;;; NOTE: #b101 is used for constants (define boolean-tag #b1101) (define boolean-mask #b1111) (define true-rep #b111101) (define false-rep #b101101) (define null-rep #b100101) (define void-rep #b110101) (define fixnum-shift 3) (define word-size 8) ;;; Helper function for representing unique variables as symbols by adding a ;;; number to the variables (so if we start with f we get f.n where n might ;;; be 1, 2, 3, etc, and is unique). (define unique-var (let () (define count 0) (lambda (name) (let ([c count]) (set! count (+ count 1)) (string->symbol (string-append (symbol->string name) "." (number->string c))))))) ;;; Convenience procedure for building temporaries in the compiler. (define make-tmp (lambda () (unique-var 't))) ;;; Helpers for the various sets of primitives we have over the course of the ;;; compiler ;;; All primitives: ;;; ;;; | | | Langauge | Language | ;;; primitive | arity | context | introduced | removed | ;;; --------------------+-------+---------+------------+----------+ ;;; cons | 2 | value | Lsrc | L17 | ;;; make-vector | 1 | value | Lsrc | L17 | ;;; box | 1 | value | Lsrc | L17 | ;;; car | 1 | value | Lsrc | L22 | ;;; cdr | 1 | value | Lsrc | L22 | ;;; vector-ref | 2 | value | Lsrc | L22 | ;;; vector-length | 1 | value | Lsrc | L22 | ;;; unbox | 1 | value | Lsrc | L22 | ;;; + | 2 | value | Lsrc | L22 | ;;; - | 2 | value | Lsrc | L22 | ;;; * | 2 | value | Lsrc | L22 | ;;; / | 2 | value | Lsrc | L22 | ;;; pair? | 1 | pred | Lsrc | L22 | ;;; null? | 1 | pred | Lsrc | L22 | ;;; boolean? | 1 | pred | Lsrc | L22 | ;;; vector? | 1 | pred | Lsrc | L22 | ;;; box? | 1 | pred | Lsrc | L22 | ;;; = | 2 | pred | Lsrc | L22 | ;;; < | 2 | pred | Lsrc | L22 | ;;; <= | 2 | pred | Lsrc | L22 | ;;; > | 2 | pred | Lsrc | L22 | ;;; >= | 2 | pred | Lsrc | L22 | ;;; eq? | 2 | pred | Lsrc | L22 | ;;; vector-set! | 3 | effect | Lsrc | L22 | ;;; box-set! | 2 | effect | Lsrc | L22 | ;;; --------------------+-------+---------+------------+----------+ ;;; void | 0 | value | L1 | L22 | ;;; --------------------+-------+---------+------------+----------+ ;;; make-closure | 1 | value | L13 | L17 | ;;; closure-code | 2 | value | L13 | L22 | ;;; closure-ref | 2 | value | L13 | L22 | ;;; closure-code-set! | 2 | effect | L13 | L22 | ;;; closure-data-set! | 3 | effect | L13 | L22 | ;;; --------------------+-------+---------+------------+----------+ ;;; $vector-length-set! | 2 | effect | L17 | L22 | ;;; $set-car! | 2 | effect | L17 | L22 | ;;; $set-cdr! | 2 | effect | L17 | L22 | ;;; ;;; This is a slightly cleaned up version, but this might still be better ;;; cleaned up by adding a define-primitives form, perhaps even one that can ;;; be used in the later parts of the compiler. ;;; user value primitives that perform allocation (define user-alloc-value-prims '((cons . 2) (make-vector . 1) (box . 1))) ;;; user value primitives that do not perform allocation (define user-non-alloc-value-prims '((car . 1) (cdr . 1) (vector-ref . 2) (vector-length . 1) (unbox . 1) (+ . 2) (- . 2) (* . 2) (/ . 2))) ;;; user predicate primitives ;;; TODO: add procedure? (define user-pred-prims '((pair? . 1) (null? . 1) (boolean? . 1) (vector? . 1) (box? . 1) (= . 2) (< . 2) (<= . 2) (> . 2) (>= . 2) (eq? . 2))) ;;; user effect primitives (define user-effect-prims '((vector-set! . 3) (box-set! . 2))) ;;; an association list with the user primitives (define user-prims (append user-alloc-value-prims user-non-alloc-value-prims user-pred-prims user-effect-prims)) ;;; void primitive + non-allocation user value primitives (define void+user-non-alloc-value-prims (cons '(void . 0) user-non-alloc-value-prims)) ;;; an association list with void and all the user primitives (define void+user-prims (append user-alloc-value-prims void+user-non-alloc-value-prims user-pred-prims user-effect-prims)) ;;; all the allocation value primitives, including make-closure primitive (define closure+user-alloc-value-prims (cons '(make-closure . 1) user-alloc-value-prims)) ;;; all the non-allocation value primitives, include the closure primitives (define closure+void+user-non-alloc-value-prims (cons* '(closure-code . 2) '(closure-ref . 2) void+user-non-alloc-value-prims)) ;; all the user effect primitives with the closure primitives (define closure+user-effect-prims (cons* '(closure-code-set! . 2) '(closure-data-set! . 3) user-effect-prims)) ;; all the user effect primitives, closure primitives, and internal primitives (define internal+closure+user-effect-prims (cons* '($vector-length-set! . 2) '($set-car! . 2) '($set-cdr! . 2) closure+user-effect-prims)) ;; association list including all prims except the three final internal ;; primitives (define closure+void+user-prims (append closure+user-alloc-value-prims closure+void+user-non-alloc-value-prims user-pred-prims closure+user-effect-prims)) ;;; various predicates for determining if a primitve is a valid prim. (define primitive? (lambda (x) (assq x user-prims))) (define void+primitive? (lambda (x) (assq x void+user-prims))) (define closure+void+primitive? (lambda (x) (assq x closure+void+user-prims))) (define effect-free-prim? (lambda (x) (assq x (append void+user-non-alloc-value-prims user-alloc-value-prims user-pred-prims)))) (define predicate-primitive? (lambda (x) (assq x user-pred-prims))) (define effect-primitive? (lambda (x) (assq x closure+user-effect-prims))) (define value-primitive? (lambda (x) (assq x (append closure+user-alloc-value-prims closure+void+user-non-alloc-value-prims)))) (define non-alloc-value-primitive? (lambda (x) (assq x closure+void+user-non-alloc-value-prims))) (define effect+internal-primitive? (lambda (x) (assq x internal+closure+user-effect-prims))) ;;;;;;;;;; ;;; Helper functions for identifying terminals in the nanopass languages. ;;; determine if we have a 61-bit signed integer (define target-fixnum? (lambda (x) (and (and (integer? x) (exact? x)) (<= (- (expt 2 60)) x (- (expt 2 60) 1))))) ;;; determine if we have a constant: #t, #f, '(), or 61-bit signed integer. (define constant? (lambda (x) (or (target-fixnum? x) (boolean? x) (null? x)))) ;;; determine if we have a valid datum (a constant, a pair of datum, or a ;;; vector of datum) (define datum? (lambda (x) (or (constant? x) (and (pair? x) (datum? (car x)) (datum? (cdr x))) (and (vector? x) (let loop ([i (vector-length x)]) (or (fx=? i 0) (let ([i (fx- i 1)]) (and (datum? (vector-ref x i)) (loop i))))))))) ;;; determine if we have a 64-bit signed integer (used later in the compiler ;;; to hold the ptr representation). (define integer-64? (lambda (x) (and (and (integer? x) (exact? x)) (<= (- (expt 2 63)) x (- (expt 2 63) 1))))) ;;; Random helper available on most Scheme systems, but irritatingly not in ;;; the R6RS standard. (define make-list (case-lambda [(n) (make-list n (if #f #f))] [(n v) (let loop ([n n] [ls '()]) (if (zero? n) ls (loop (fx- n 1) (cons v ls))))])) ;;;;;;;; ;;; The standard (not very efficient) Scheme representation of sets as lists ;;; add an item to a set (define set-cons (lambda (x set) (if (memq x set) set (cons x set)))) ;;; construct the intersection of 0 to n sets (define intersect (lambda set* (if (null? set*) '() (fold-left (lambda (seta setb) (let loop ([seta seta] [fset '()]) (if (null? seta) fset (let ([a (car seta)]) (if (memq a setb) (loop (cdr seta) (cons a fset)) (loop (cdr seta) fset)))))) (car set*) (cdr set*))))) ;;; construct the union of 0 to n sets (define union (lambda set* (if (null? set*) '() (fold-left (lambda (seta setb) (let loop ([setb setb] [seta seta]) (if (null? setb) seta (loop (cdr setb) (set-cons (car setb) seta))))) (car set*) (cdr set*))))) ;;; construct the difference of 0 to n sets (define difference (lambda set* (if (null? set*) '() (fold-right (lambda (setb seta) (let loop ([seta seta] [final '()]) (if (null? seta) final (let ([a (car seta)]) (if (memq a setb) (loop (cdr seta) final) (loop (cdr seta) (cons a final))))))) (car set*) (cdr set*))))) ;;; Language definitions for Lsrc and L1 to L22 ;;; Both the language extension and the fully specified language is included ;;; (though the fully specified language may be out of date). This can be ;;; regenerated by doing: ;;; > (import (c)) ;;; > (import (nanopass)) ;;; > (language->s-expression L10) => generates L10 definition (define-language Lsrc (terminals (symbol (x)) (primitive (pr)) (constant (c)) (datum (d))) (Expr (e body) pr x c (quote d) (if e0 e1) (if e0 e1 e2) (or e* ...) (and e* ...) (not e) (begin e* ... e) (lambda (x* ...) body* ... body) (let ([x* e*] ...) body* ... body) (letrec ([x* e*] ...) body* ... body) (set! x e) (e e* ...))) ;;; Language 1: removes one-armed if and adds the void primitive ; ; (define-language L1 ; (terminals (void+primitive (pr)) ; (symbol (x)) ; (constant (c)) ; (datum (d))) ; (Expr (e body) ; pr ; x ; c ; (quote d) ; (if e0 e1 e2) ; (or e* ...) ; (and e* ...) ; (not e) ; (begin e* ... e) ; (lambda (x* ...) body* ... body) ; (let ([x* e*] ...) body* ... body) ; (letrec ([x* e*] ...) body* ... body) ; (set! x e) ; (e e* ...))) ; (define-language L1 (extends Lsrc) (terminals (- (primitive (pr))) (+ (void+primitive (pr)))) (Expr (e body) (- (if e0 e1)))) ;;; Language 2: removes or, and, and not forms ; ; (define-language L2 ; (terminals (void+primitive (pr)) ; (symbol (x)) ; (constant (c)) ; (datum (d))) ; (Expr (e body) ; pr ; x ; c ; (quote d) ; (if e0 e1 e2) ; (begin e* ... e) ; (lambda (x* ...) body* ... body) ; (let ([x* e*] ...) body* ... body) ; (letrec ([x* e*] ...) body* ... body) ; (set! x e) ; (e e* ...))) ; (define-language L2 (extends L1) (Expr (e body) (- (or e* ...) (and e* ...) (not e)))) ;;; Language 3: removes multiple expressions from the body of lambda, let, ;;; and letrec (to be replaced with a single begin expression that contains ;;; the expressions from the body). ; ; (define-language L3 ; (terminals (void+primitive (pr)) ; (symbol (x)) ; (constant (c)) ; (datum (d))) ; (Expr (e body) ; (letrec ([x* e*] ...) body) ; (let ([x* e*] ...) body) ; (lambda (x* ...) body) ; pr ; x ; c ; (quote d) ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...))) ; (define-language L3 (extends L2) (Expr (e body) (- (lambda (x* ...) body* ... body) (let ([x* e*] ...) body* ... body) (letrec ([x* e*] ...) body* ... body)) (+ (lambda (x* ...) body) (let ([x* e*] ...) body) (letrec ([x* e*] ...) body)))) ;;; Language 4: removes raw primitives (to be replaced with a lambda and a ;;; primitive call). ; ; (define-language L4 ; (terminals (void+primitive (pr)) ; (symbol (x)) ; (constant (c)) ; (datum (d))) ; (Expr (e body) ; (primcall pr e* ...) ; (letrec ([x* e*] ...) body) ; (let ([x* e*] ...) body) ; (lambda (x* ...) body) ; x ; c ; (quote d) ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...))) ; (define-language L4 (extends L3) (Expr (e body) (- pr) (+ (primcall pr e* ...) => (pr e* ...)))) ;;; Language 5: removes raw constants (to be replaced with quoted constant). ; ; (define-language L5 ; (terminals ; (void+primitive (pr)) ; (symbol (x)) ; (datum (d))) ; (Expr (e body) ; (primcall pr e* ...) ; (letrec ([x* e*] ...) body) ; (let ([x* e*] ...) body) ; (lambda (x* ...) body) ; x ; (quote d) ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...))) ; (define-language L5 (extends L4) (terminals (- (constant (c)))) (Expr (e body) (- c))) ;;; Language 6: removes quoted datum (to be replaced with explicit calls to ;;; cons and make-vector+vector-set!). ; ; (define-language L6 ; (terminals ; (constant (c)) ; (void+primitive (pr)) ; (symbol (x))) ; (Expr (e body) ; (quote c) ; (primcall pr e* ...) ; (letrec ([x* e*] ...) body) ; (let ([x* e*] ...) body) ; (lambda (x* ...) body) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...))) ; (define-language L6 (extends L5) (terminals (- (datum (d))) (+ (constant (c)))) (Expr (e body) (- (quote d)) (+ (quote c)))) ;;; Language 7: adds a listing of assigned variables to the body of the ;;; binding forms: let, letrec, and lambda. ; (define-language L7 ; (terminals ; (symbol (x a)) ; (constant (c)) ; (void+primitive (pr))) ; (Expr (e body) ; (letrec ([x* e*] ...) abody) ; (let ([x* e*] ...) abody) ; (lambda (x* ...) abody) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...)) ; (AssignedBody (abody) ; (assigned (a* ...) body))) ; (define-language L7 (extends L6) (terminals (- (symbol (x))) (+ (symbol (x a)))) (Expr (e body) (- (lambda (x* ...) body) (let ([x* e*] ...) body) (letrec ([x* e*] ...) body)) (+ (lambda (x* ...) abody) (let ([x* e*] ...) abody) (letrec ([x* e*] ...) abody))) (AssignedBody (abody) (+ (assigned (a* ...) body) => body))) ;;; Language 8: letrec binding is changed to only bind variables to lambdas. ; ; (define-language L8 ; (terminals (symbol (x a)) ; (constant (c)) ; (void+primitive (pr))) ; (Expr (e body) ; (letrec ([x* le*] ...) body) ; le ; (let ([x* e*] ...) abody) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...)) ; (AssignedBody (abody) ; (assigned (a* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) abody))) ; (define-language L8 (extends L7) (Expr (e body) (- (lambda (x* ...) abody) (letrec ([x* e*] ...) abody)) (+ le (letrec ([x* le*] ...) body))) (LambdaExpr (le) (+ (lambda (x* ...) abody)))) ;;; Language 9: removes lambda expressions from expression context, ;;; effectively meaning we can only have lambdas bound in the right-hand-side ;;; of letrec expressions. ; ; (define-language L9 ; (terminals ; (symbol (x a)) ; (constant (c)) ; (void+primitive (pr))) ; (Expr (e body) ; (letrec ([x* le*] ...) body) ; (let ([x* e*] ...) abody) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (set! x e) ; (e e* ...)) ; (AssignedBody (abody) ; (assigned (a* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) abody))) ; (define-language L9 (extends L8) (Expr (e body) (- le))) ;;; Language 10: removes set! and assigned bodies (to be replaced by set-box! ;;; primcall for set!, and unbox primcall for references of assigned variables). ; ; (define-language L10 ; (terminals ; (symbol (x)) ; (constant (c)) ; (void+primitive (pr))) ; (Expr (e body) ; (let ([x* e*] ...) body) ; (letrec ([x* le*] ...) body) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (e e* ...)) ; (LambdaExpr (le) ; (lambda (x* ...) body))) ; (define-language L10 (extends L9) (terminals (- (symbol (x a))) (+ (symbol (x)))) (Expr (e body) (- (let ([x* e*] ...) abody) (set! x e)) (+ (let ([x* e*] ...) body))) (LambdaExpr (le) (- (lambda (x* ...) abody)) (+ (lambda (x* ...) body))) (AssignedBody (abody) (- (assigned (a* ...) body)))) ;;; Language 11: add a list of free variables to the body of lambda ;;; expressions (starting closure conversion code). ; ; (define-language L11 ; (terminals ; (symbol (x f)) ; (constant (c)) ; (void+primitive (pr))) ; (Expr (e body) ; (let ([x* e*] ...) body) ; (letrec ([x* le*] ...) body) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (e e* ...)) ; (LambdaExpr (le) ; (lambda (x* ...) fbody)) ; (FreeBody (fbody) ; (free (f* ...) body))) ; (define-language L11 (extends L10) (terminals (- (symbol (x))) (+ (symbol (x f)))) (LambdaExpr (le) (- (lambda (x* ...) body)) (+ (lambda (x* ...) fbody))) (FreeBody (fbody) (+ (free (f* ...) body)))) ;;; Language L12: removes the letrec form and adds closure and labels forms ;;; to replace it. The closure form binds a variable to a label (code ;;; pointer) and its set of free variables, and the labels form binds labels ;;; (code pointer) to lambda expressions. ; ; (define-language L12 ; (terminals ; (symbol (x f l)) ; (constant (c)) ; (void+primitive (pr))) ; (Expr (e body) ; (label l) ; (closures ((x* l* f** ...) ...) lbody) ; (let ([x* e*] ...) body) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (e e* ...)) ; (LambdaExpr (le) ; (lambda (x* ...) fbody)) ; (FreeBody (fbody) ; (free (f* ...) body)) ; (LabelsBody (lbody) ; (labels ([l* le*] ...) body))) ; (define-language L12 (extends L11) (terminals (- (symbol (x f))) (+ (symbol (x f l)))) (Expr (e body) (- (letrec ([x* le*] ...) body)) (+ (closures ([x* l* f** ...] ...) lbody) (label l))) (LabelsBody (lbody) (+ (labels ([l* le*] ...) body)))) ;;; Language 13: finishes closure conversion, removes the closures form, ;;; replacing it with primitive calls to deal with closure objects, and ;;; raises the labels from into the Expr non-terminal. ; ; (define-language L13 ; (terminals ; (closure+void+primitive (pr)) ; (symbol (x f l)) ; (constant (c))) ; (Expr (e body) ; (labels ([l* le*] ...) body) ; (label l) ; (let ([x* e*] ...) body) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (e e* ...)) ; (LambdaExpr (le) ; (lambda (x* ...) body))) ; (define-language L13 (extends L12) (terminals (- (void+primitive (pr))) (+ (closure+void+primitive (pr)))) (Expr (e body) (- (closures ([x* l* f** ...] ...) lbody)) (+ (labels ([l* le*] ...) body))) (LabelsBody (lbody) (- (labels ([l* le*] ...) body))) (LambdaExpr (le) (- (lambda (x* ...) fbody)) (+ (lambda (x* ...) body))) (FreeBody (fbody) (- (free (f* ...) body)))) ;;; Language 14: removes labels form from the Expr nonterminal and puts a ;;; single labels form at the top. Essentially this raises all of our ;;; closure converted functions to the top. ; ; (define-language L14 ; (entry Program) ; (terminals ; (closure+void+primitive (pr)) ; (symbol (x f l)) ; (constant (c))) ; (Expr (e body) ; (label l) ; (let ([x* e*] ...) body) ; (quote c) ; (primcall pr e* ...) ; x ; (if e0 e1 e2) ; (begin e* ... e) ; (e e* ...)) ; (LambdaExpr (le) ; (lambda (x* ...) body)) ; (Program (p) ; (labels ([l* le*] ...) l))) ; (define-language L14 (extends L13) (entry Program) (Program (p) (+ (labels ([l* le*] ...) l))) (Expr (e body) (- (labels ([l* le*] ...) body)))) ;;; Language 15: moves simple expressions (constants and variable references) ;;; out of the Expr nonterminal, and replaces expressions referred to in ;;; calls and primcalls with simple expressions. This effectively removes ;;; complex operands to calls and primcalls. ; ; (define-language L15 ; (entry Program) ; (terminals ; (closure+void+primitive (pr)) ; (symbol (x f l)) ; (constant (c))) ; (Expr (e body) ; (se se* ...) ; (primcall pr se* ...) ; se ; (label l) ; (let ([x* e*] ...) body) ; (if e0 e1 e2) ; (begin e* ... e)) ; (LambdaExpr (le) ; (lambda (x* ...) body)) ; (Program (p) ; (labels ([l* le*] ...) l)) ; (SimpleExpr (se) ; x ; (quote c))) ; (define-language L15 (extends L14) (Expr (e body) (- x (quote c) (primcall pr e* ...) (e e* ...)) (+ se (primcall pr se* ...) (se se* ...))) (SimpleExpr (se) (+ x (quote c)))) ;;; Language 16: separates the Expr nonterminal into the Value, Effect, and ;;; Predicate nonterminals. This is needed to translate from our expression ;;; language into a language like C that has statements (effects) and ;;; expressions (values) and predicates that need to be simply values. (define-language L16 (terminals (symbol (x l)) (value-primitive (vpr)) (effect-primitive (epr)) (predicate-primitive (ppr)) (constant (c))) (Program (prog) (labels ([l* le*] ...) l)) (LambdaExpr (le) (lambda (x* ...) body)) (SimpleExpr (se) x (label l) (quote c)) (Value (v body) se (if p0 v1 v2) (begin e* ... v) (let ([x* v*] ...) body) (primcall vpr se* ...) (se se* ...)) (Effect (e) (nop) (if p0 e1 e2) (begin e* ... e) (let ([x* v*] ...) e) (primcall epr se* ...) (se se* ...)) (Predicate (p) (true) (false) (if p0 p1 p2) (begin e* ... p) (let ([x* v*] ...) p) (primcall ppr se* ...))) ;;; Language 17: removes the allocation primitives: cons, box, make-vector, ;;; and make-closure and adds a generic alloc form for specifying allocation. It ;;; also adds raw integers for specifying type tags in the alloc form. ; ; (define-language L17 ; (entry Program) ; (terminals ; (integer-64 (i)) ; (effect+internal-primitive (epr)) ; (non-alloc-value-primitive (vpr)) ; (symbol (x l)) ; (predicate-primitive (ppr)) ; (constant (c))) ; (Program (prog) ; (labels ([l* le*] ...) l)) ; (LambdaExpr (le) ; (lambda (x* ...) body)) ; (SimpleExpr (se) ; x ; (label l) ; (quote c)) ; (Value (v body) ; (alloc i se) ; se ; (if p0 v1 v2) ; (begin e* ... v) ; (let ([x* v*] ...) body) ; (primcall vpr se* ...) ; (se se* ...)) ; (Effect (e) ; (nop) ; (if p0 e1 e2) ; (begin e* ... e) ; (let ([x* v*] ...) e) ; (primcall epr se* ...) ; (se se* ...)) ; (Predicate (p) ; (true) ; (false) ; (if p0 p1 p2) ; (begin e* ... p) ; (let ([x* v*] ...) p) ; (primcall ppr se* ...))) ; (define-language L17 (extends L16) (terminals (- (value-primitive (vpr)) (effect-primitive (epr))) (+ (non-alloc-value-primitive (vpr)) (effect+internal-primitive (epr)) (integer-64 (i)))) (Value (v body) (+ (alloc i se)))) ;;; Language L18: removes let forms and replaces them with a top-level locals ;;; form that indicates which variables are bound in the function (so they ;;; can be listed at the top of our C function) and set! that do simple ;;; assignments. ; ; (define-language L18 ; (entry Program) ; (terminals ; (integer-64 (i)) ; (effect+internal-primitive (epr)) ; (non-alloc-value-primitive (vpr)) ; (symbol (x l)) ; (predicate-primitive (ppr)) ; (constant (c))) ; (Program (prog) ; (labels ([l* le*] ...) l)) ; (SimpleExpr (se) ; x ; (label l) ; (quote c)) ; (Value (v body) ; (alloc i se) ; se ; (if p0 v1 v2) ; (begin e* ... v) ; (primcall vpr se* ...) ; (se se* ...)) ; (Effect (e) ; (set! x v) ; (nop) ; (if p0 e1 e2) ; (begin e* ... e) ; (primcall epr se* ...) ; (se se* ...)) ; (Predicate (p) ; (true) ; (false) ; (if p0 p1 p2) ; (begin e* ... p) ; (primcall ppr se* ...)) ; (LocalsBody (lbody) ; (locals (x* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) lbody))) ; (define-language L18 (extends L17) (Value (v body) (- (let ([x* v*] ...) body))) (Effect (e) (- (let ([x* v*] ...) e)) (+ (set! x v))) (Predicate (p) (- (let ([x* v*] ...) p))) (LambdaExpr (le) (- (lambda (x* ...) body)) (+ (lambda (x* ...) lbody))) (LocalsBody (lbody) (+ (locals (x* ...) body)))) ;;; Language 19: simplify the right-hand-side of a set! so that it can ;;; contain, simple expression, allocations, primcalls, and function calls, ;;; but not ifs, or begins. ; ; (define-language L19 ; (terminals ; (integer-64 (i)) ; (effect+internal-primitive (epr)) ; (non-alloc-value-primitive (vpr)) ; (symbol (x l)) ; (predicate-primitive (ppr)) ; (constant (c))) ; (Program (prog) ; (labels ([l* le*] ...) l)) ; (SimpleExpr (se) ; x ; (label l) ; (quote c)) ; (Value (v body) ; rhs ; (if p0 v1 v2) ; (begin e* ... v)) ; (Effect (e) ; (set! x rhs) ; (nop) ; (if p0 e1 e2) ; (begin e* ... e) ; (primcall epr se* ...) ; (se se* ...)) ; (Predicate (p) ; (true) ; (false) ; (if p0 p1 p2) ; (begin e* ... p) ; (primcall ppr se* ...)) ; (LocalsBody (lbody) ; (locals (x* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) lbody)) ; (Rhs (rhs) ; se ; (alloc i se) ; (primcall vpr se* ...) ; (se se* ...))) ; (define-language L19 (extends L18) (Value (v body) (- se (alloc i se) (primcall vpr se* ...) (se se* ...)) (+ rhs)) (Rhs (rhs) (+ se (alloc i se) (primcall vpr se* ...) (se se* ...))) (Effect (e) (- (set! x v)) (+ (set! x rhs)))) ;;; Language L20: remove begin from the predicate production (effectively ;;; forcing the if to only have if, true, false, and predicate primitive ;;; calls). ;;; TODO: removed this language because our push-if pass was buggy, and ;;; fixing it requires us to flatten code into something like ;;; basic blocks, and we can avoid doing this since our target ;;; is C. We could revisit it for other backend targets. ; ; (define-language L20 ; (terminals ; (integer-64 (i)) ; (effect+internal-primitive (epr)) ; (non-alloc-value-primitive (vpr)) ; (symbol (x l)) ; (predicate-primitive (ppr)) ; (constant (c))) ; (Program (prog) ; (labels ([l* le*] ...) l)) ; (SimpleExpr (se) ; x ; (label l) ; (quote c)) ; (Value (v body) ; rhs ; (if p0 v1 v2) ; (begin e* ... v)) ; (Effect (e) ; (set! x rhs) ; (nop) ; (if p0 e1 e2) ; (begin e* ... e) ; (primcall epr se* ...) ; (se se* ...)) ; (Predicate (p) ; (true) ; (false) ; (if p0 p1 p2) ; (primcall ppr se* ...)) ; (LocalsBody (lbody) ; (locals (x* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) lbody)) ; (Rhs (rhs) ; se ; (alloc i se) ; (primcall vpr se* ...) ; (se se* ...))) ; ; (define-language L20 ; (extends L19) ; (Predicate (p) ; (- (begin e* ... p)))) ;;; Language 21: remove quoted constants and replace it with our raw ptr ;;; representation (i.e. 64-bit integers) ; ; (define-language L21 ; (terminals ; (integer-64 (i)) ; (effect+internal-primitive (epr)) ; (non-alloc-value-primitive (vpr)) ; (symbol (x l)) ; (predicate-primitive (ppr))) ; (Program (prog) ; (labels ([l* le*] ...) l)) ; (SimpleExpr (se) ; i ; x ; (label l)) ; (Value (v body) ; rhs ; (if p0 v1 v2) ; (begin e* ... v)) ; (Effect (e) ; (set! x rhs) ; (nop) ; (if p0 e1 e2) ; (begin e* ... e) ; (primcall epr se* ...) ; (se se* ...)) ; (Predicate (p) ; (true) ; (false) ; (if p0 p1 p2) ; (primcall ppr se* ...)) ; (LocalsBody (lbody) ; (locals (x* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) lbody)) ; (Rhs (rhs) ; se ; (alloc i se) ; (primcall vpr se* ...) ; (se se* ...))) ; (define-language L21 (extends L19) (terminals (- (constant (c)))) (SimpleExpr (se) (- (quote c)) (+ i))) ;;; Language 22: remove the primcalls and replace them with mref (memory ;;; references), add, subtract, multiply, divide, shift-right, shift-left, ;;; logand, mset! (memory set), =, <, and <=. ;;; ;;; TODO: we should probably replace this with "machine" instructions ;;; instead, so that we can more easily extend the language and generate C ;;; code from it. ; ; (define-language L22 ; (terminals ; (integer-64 (i)) ; (effect+internal-primitive (epr)) ; (non-alloc-value-primitive (vpr)) ; (symbol (x l)) ; (predicate-primitive (ppr))) ; (Program (prog) ; (labels ([l* le*] ...) l)) ; (SimpleExpr (se) ; (logand se0 se1) ; (shift-left se0 se1) ; (shift-right se0 se1) ; (divide se0 se1) ; (multiply se0 se1) ; (subtract se0 se1) ; (add se0 se1) ; (mref se0 (maybe se1?) i) ; i ; x ; (label l)) ; (Value (v body) ; rhs ; (if p0 v1 v2) ; (begin e* ... v)) ; (Effect (e) ; (mset! se0 (maybe se1?) i se2) ; (set! x rhs) ; (nop) ; (if p0 e1 e2) ; (begin e* ... e) ; (se se* ...)) ; (Predicate (p) ; (<= se0 se1) ; (< se0 se1) ; (= se0 se1) ; (true) ; (false) ; (if p0 p1 p2)) ; (LocalsBody (lbody) ; (locals (x* ...) body)) ; (LambdaExpr (le) ; (lambda (x* ...) lbody)) ; (Rhs (rhs) ; se ; (alloc i se) ; (se se* ...))) ; (define-language L22 (extends L21) (Rhs (rhs) (- (primcall vpr se* ...))) (SimpleExpr (se) (+ (mref se0 (maybe se1?) i) (add se0 se1) (subtract se0 se1) (multiply se0 se1) (divide se0 se1) (shift-right se0 se1) (shift-left se0 se1) (logand se0 se1))) (Effect (e) (- (primcall epr se* ...)) (+ (mset! se0 (maybe se1?) i se2))) (Predicate (p) (- (primcall ppr se* ...)) (+ (= se0 se1) (< se0 se1) (<= se0 se1)))) ;;;;;;;;; ;;; beginning of our pass listings ;;; pass: parse-and-rename : S-expression -> Lsrc (or error) ;;; ;;; parses an S-expression, and, if it conforms to the input language, ;;; renames the local variables to be represented with a unique variable. ;;; This helps us to separate keywords from varialbes and recognize one ;;; variable binding as different from another. This step is also called ;;; alpha-renaming or alpha-conversion. The output will be in the Lsrc ;;; language forms, represented as records. ;;; ;;; Some design decisions here: We could have decided to have this pass ;;; remove one-armed ifs, remove and, or, and not, setup begins in the body ;;; of our letrec, let, and lambda, and potentially quoted constants and ;;; eta-expanded raw primitives, rather than doing each of these as separate ;;; passes. I have not done this here, primarily for educational reasons, ;;; since these simple passes are a gentle introduction to how the passes are ;;; written. ;;; (define-pass parse-and-rename : * (e) -> Lsrc () ;;; Helper functions for this pass. (definitions ;;; process-body - used to process the body of begin, let, letrec, and ;;; lambda expressions. since all four of these have the same pattern in ;;; the body. (define process-body (lambda (who env body* f) (when (null? body*) (error who "invalid empty body")) (let loop ([body (car body*)] [body* (cdr body*)] [rbody* '()]) (if (null? body*) (f (reverse rbody*) (Expr body env)) (loop (car body*) (cdr body*) (cons (Expr body env) rbody*)))))) ;;; vars-unique? - processes the list of bindings to make sure all of the ;;; variable names are different (i.e. we don't want to allow ;;; (lambda (x x) x), since we would not know which x is which). (define vars-unique? (lambda (fmls) (let loop ([fmls fmls]) (or (null? fmls) (and (not (memq (car fmls) (cdr fmls))) (loop (cdr fmls))))))) ;;; unique-vars - builds a list of unique variables based on a set of ;;; formals and extends the environment. it takes a function as an ;;; argument (effectively a continuation), and passes it the updated ;;; environment and a list of unique variables. (define unique-vars (lambda (env fmls f) (unless (vars-unique? fmls) (error 'unique-vars "invalid formals" fmls)) (let loop ([fmls fmls] [env env] [rufmls '()]) (if (null? fmls) (f env (reverse rufmls)) (let* ([fml (car fmls)] [ufml (unique-var fml)]) (loop (cdr fmls) (cons (cons fml ufml) env) (cons ufml rufmls))))))) ;;; process-bindings - processes the bindings of a let or letrec and ;;; produces bindings for unique variables for each of the original ;;; variables. it also processes the right-hand sides of the variable ;;; bindings and selects either the original environment (for let) or the ;;; updated environment (for letrec). (define process-bindings (lambda (rec? env bindings f) (let loop ([bindings bindings] [rfml* '()] [re* '()]) (if (null? bindings) (unique-vars env rfml* (lambda (new-env rufml*) (let ([env (if rec? new-env env)]) (let loop ([rufml* rufml*] [re* re*] [ufml* '()] [e* '()]) (if (null? rufml*) (f new-env ufml* e*) (loop (cdr rufml*) (cdr re*) (cons (car rufml*) ufml*) (cons (Expr (car re*) env) e*))))))) (let ([binding (car bindings)]) (loop (cdr bindings) (cons (car binding) rfml*) (cons (cadr binding) re*))))))) ;;; Expr* - helper to process a list of expressions. (define Expr* (lambda (e* env) (map (lambda (e) (Expr e env)) e*))) ;;; with-output-language rebinds quasiquote so that it will build ;;; language records. (with-output-language (Lsrc Expr) ;;; build-primitive - this is a helper function to build entries in the ;;; initial environment for our user primitives. the initial ;;; enviornment contains a mapping of keywords and primitives to ;;; processing functions that check their arity (in the case of ;;; primitives) or their forms (in the case of keywords). These are ;;; put into an environment, because keywords and primitives can be ;;; rebound. (i.e. (lambda (lambda) (lambda lambda)) is a perfectly ;;; valid function in Scheme that takes a function as an argument and ;;; applies the argument to itself). (define build-primitive (lambda (as) (let ([name (car as)] [argc (cdr as)]) (cons name (if (< argc 0) (error who "primitives with arbitrary counts are not currently supported" name) ;;; we'd love to support arbitrary argument lists, but we'd ;;; need to either: ;;; 1. get rid of raw primitives, or ;;; 2. add function versions of our raw primitives with ;;; arbitrary arguments, or (possibly and) ;;; 3. add general handling for functions with arbitrary ;;; arguments. (i.e. support for (lambda args ) ;;; or (lambda (x y . args) ), which we don't ;;; currently support. #;(let ([argc (bitwise-not argc)]) (lambda (env . e*) (if (>= (length e*) argc) `(,name ,(Expr* e* env) ...) (error name "invalid argument count" (cons name e*))))) (lambda (env . e*) (if (= (length e*) argc) `(,name ,(Expr* e* env) ...) (error name "invalid argument count" (cons name e*))))))))) ;;; initial-env - this is our initial environment, expressed as an ;;; association list of keywords and primitives (represented as ;;; symbols) to procedure handlers (represented as procedures). As the ;;; program is processed through this pass, it will be extended with ;;; local bidings from variables (represented as symbols) to unique ;;; variables (represented as symbols with a format of symbol.number). (define initial-env (cons* (cons 'quote (lambda (env d) (unless (datum? d) (error 'quote "invalid datum" d)) `(quote ,d))) (cons 'if (case-lambda [(env e0 e1) `(if ,(Expr e0 env) ,(Expr e1 env))] [(env e0 e1 e2) `(if ,(Expr e0 env) ,(Expr e1 env) ,(Expr e2 env))] [x (error 'if (if (< (length x) 3) "too few arguments" "too many arguments") x)])) (cons 'or (lambda (env . e*) `(or ,(Expr* e* env) ...))) (cons 'and (lambda (env . e*) `(and ,(Expr* e* env) ...))) (cons 'not (lambda (env e) `(not ,(Expr e env)))) (cons 'begin (lambda (env . e*) (process-body env e* (lambda (e* e) `(begin ,e* ... ,e))))) (cons 'lambda (lambda (env fmls . body*) (unique-vars env fmls (lambda (env fmls) (process-body 'lambda env body* (lambda (body* body) `(lambda (,fmls ...) ,body* ... ,body))))))) (cons 'let (lambda (env bindings . body*) (process-bindings #f env bindings (lambda (env x* e*) (process-body 'let env body* (lambda (body* body) `(let ([,x* ,e*] ...) ,body* ... ,body))))))) (cons 'letrec (lambda (env bindings . body*) (process-bindings #t env bindings (lambda (env x* e*) (process-body 'letrec env body* (lambda (body* body) `(letrec ([,x* ,e*] ...) ,body* ... ,body))))))) (cons 'set! (lambda (env x e) (cond [(assq x env) => (lambda (as) (let ([v (cdr as)]) (if (symbol? v) `(set! ,v ,(Expr e env)) (error 'set! "invalid syntax" (list 'set! x e)))))] [else (error 'set! "set to unbound variable" (list 'set! x e))]))) (map build-primitive user-prims))) ;;; App - helper for handling applications. (define App (lambda (e env) (let ([e (car e)] [e* (cdr e)]) `(,(Expr e env) ,(Expr* e* env) ...)))))) ;;; transformer: Expr: S-expression -> LSrc:Expr (or error) ;;; ;;; parses an S-expression, looking for a pair (which indicates, a ;;; keyword use, a primitive call, or a normal function call), a symbol ;;; (which indicates a variable reference or a primitive reference), or one of ;;; our constants (which indicates a raw constant). (Expr : * (e env) -> Expr () (cond [(pair? e) (cond [(assq (car e) env) => (lambda (as) (let ([v (cdr as)]) (if (procedure? v) (apply v env (cdr e)) (App e env))))] [else (App e env)])] [(symbol? e) (cond [(assq e env) => (lambda (as) (let ([v (cdr as)]) (cond [(symbol? v) v] [(primitive? e) e] [else (error who "invalid syntax" e)])))] [else (error who "unbound variable" e)])] [(constant? e) e] [else (error who "invalid expression" e)])) ;;; kick off processing the S-expression by handing Expr our initial ;;; S-expression and the initial environment. (Expr e initial-env)) ;;; pass: remove-one-armed-if : Lsrc -> L1 ;;; ;;; this pass replaces the (if e0 e1) form with an if that will explicitly ;;; produce a void value when the predicate expression returns false. In ;;; other words: ;;; (if e0 e1) => (if e0 e1 (void)) ;;; ;;; Design descision: kept seperate from parse-and-rename to make it easier ;;; to understand how the nanopass framework can be used. ;;; (define-pass remove-one-armed-if : Lsrc (e) -> L1 () (Expr : Expr (e) -> Expr () [(if ,[e0] ,[e1]) `(if ,e0 ,e1 (void))])) ;;; pass: remove-and-or-not : L1 -> L2 ;;; ;;; this pass looks for references to and, or, and not and replaces it with ;;; the appropriate if expressions. this pass follows the standard ;;; expansions and has one small optimization: ;;; ;;; (if (not e0) e1 e2) => (if e0 e2 e1) [optimization] ;;; (and) => #t [from Scheme standard] ;;; (or) => #f [from Scheme standard] ;;; (and e e* ...) => (if e (and e* ...) #f) [standard expansion] ;;; (or e e* ...) => (let ([t e]) [standard expansion - ;;; (if t t (or e* ...))) avoids computing e twice] ;;; ;;; Design decision: again kept separate from parse-and-rename to simplify ;;; the discussion of this pass (adding it to parse-and-rename doesn't really ;;; make parse-and-rename much more complicated, and if we had a macro ;;; system, which would likely be implemented in parse-and-rename, or before ;;; it, we would probably want and, or, and not defined as macros, rather ;;; than forms in the language, in which case this pass would be ;;; unnecessary). ;;; (define-pass remove-and-or-not : L1 (e) -> L2 () (Expr : Expr (e) -> Expr () [(if (not ,[e0]) ,[e1] ,[e2]) `(if ,e0 ,e2 ,e1)] [(not ,[e0]) `(if ,e0 #f #t)] [(and) #t] [(and ,[e] ,[e*] ...) ;; tiny inline loop (not tail recursive, so called f instead of loop) (let f ([e e] [e* e*]) (if (null? e*) e `(if ,e ,(f (car e*) (cdr e*)) #f)))] [(or) #f] [(or ,[e] ,[e*] ...) ;; tiny inline loop (not tail recursive, so called f instead of loop) (let f ([e e] [e* e*]) (if (null? e*) e (let ([t (make-tmp)]) `(let ([,t ,e]) (if ,t ,t ,(f (car e*) (cdr e*)))))))])) ;;; pass: make-being-explicit : L2 -> L3 ;;; ;;; this pass takes the L2 let, letrec, and lambda expressions (which have ;;; bodies that can contain more than one expression), and converts them into ;;; bodies with a single expression, wrapped in a begin if necessary. To ;;; avoid polluting the output with extra begins that contain only one ;;; expression the build-begin helper checks to see if there is more then one ;;; expression and if there is builds a begin. ;;; ;;; Effectively this does the following: ;;; (let ([x* e*] ...) body0 body* ... body1) => ;;; (let ([x* e*] ...) (begin body0 body* ... body1)) ;;; (letrec ([x* e*] ...) body0 body* ... body1) => ;;; (letrec ([x* e*] ...) (begin body0 body* ... body1)) ;;; (lambda (x* ...) body0 body* ... body1) => ;;; (lambda (x* ...) (begin body0 body* ... body1)) ;;; ;;; Design Decision: This could have been included with rename-and-parse, ;;; without making it significantly more compilicated, but was separated out ;;; to continue with simpler nanopass passes to help make it more obvious ;;; what is going on here. ;;; (define-pass make-begin-explicit : L2 (e) -> L3 () (Expr : Expr (e) -> Expr () ;;; Note: the defitions are within the body of the Expr transformer ;;; instead of being within the body of the pass. This means the ;;; quasiquote is bound to the Expr form, and we don't need to use ;;; with-output-language. (definitions ;;; build-begin - helper function to build a begin only when the body ;;; contains more then one expression. (define build-begin (lambda (body* body) (if (null? body*) body `(begin ,body* ... ,body))))) [(let ([,x* ,[e*]] ...) ,[body*] ... ,[body]) `(let ([,x* ,e*] ...) ,(build-begin body* body))] [(letrec ([,x* ,[e*]] ...) ,[body*] ... ,[body]) `(letrec ([,x* ,e*] ...) ,(build-begin body* body))] [(lambda (,x* ...) ,[body*] ... ,[body]) `(lambda (,x* ...) ,(build-begin body* body))])) ;;; pass : inverse-eta-raw-primitives : L3 -> L4 ;;; ;;; Eta reduction recognizes a function that takes a set of arguments and ;;; passes those arguments directly to another function, and unwraps the ;;; function. For instance, the function: ;;; (lambda (x y) (f x y)) ;;; can be eta reduced to: ;;; f ;;; Eta reduction is not always a semantics preserving transformation because ;;; it can change the termination properties of the program, for instance a ;;; program that terminates, could turn into one that does not because a ;;; function is applied directly, rather than a function that might never be ;;; applied. ;;; ;;; In this pass, we are applying the inverse operation and adding a lambda ;;; wrapper when we see a primitive. We do this so that primitives, which we ;;; are going to open code into a C-code equivalent, can still be treated as ;;; though it was a Scheme procedure. This allows us to map over primitives, ;;; which would otherwise not be possible with our code generation. Our ;;; transformation looks for primitives in call position, marking them as ;;; primitive calls, and primitives not in call position are eta-expanded to move ;;; them into call position. ;;; ;;; (pr e* ...) => (primcall pr e* ...) ;;; pr => (lambda (x* ...) (primcall pr x* ...)) ;;; ;;; Design decision: Another way to handle this would be to create a single ;;; function for each primitive, and lift these definitions to the top-level ;;; of the program, including just those primitives that are used. This ;;; would avoid the potential to re-creating the same procedure over and over ;;; again, as we are now. ;;; (define-pass inverse-eta-raw-primitives : L3 (e) -> L4 () (Expr : Expr (e) -> Expr () [(,pr ,[e*] ...) `(primcall ,pr ,e* ...)] [,pr (cond [(assq pr void+user-prims) => (lambda (as) (do ((i (cdr as) (fx- i 1)) (x* '() (cons (make-tmp) x*))) ((fx=? i 0) `(lambda (,x* ...) (primcall ,pr ,x* ...)))))] [else (error who "unexpected primitive" pr)])])) ;;; pass: quote-constants : L4 -> L5 ;;; ;;; A simple pass to find raw constants and wrap them in a quote. ;;; c => (quote c) ;;; ;;; Design decision: This could simply be included in the next pass. ;;; (define-pass quote-constants : L4 (e) -> L5 () (Expr : Expr (e) -> Expr () [,c `(quote ,c)])) ;;; pass: remove-complex-constants : L5 -> L6 ;;; ;;; Lifts creation of constants composed of vectors or pairs outside the body ;;; of the program and makes their creation explicit. In place of the ;;; constants, a temporary variable reference is created. The total ;;; transform looks something like the following: ;;; ;;; (letrec ([add-pair-parts (lambda (p) (+ (car p) (cdr p)))]) ;;; (+ (add-pair-parts '(4 . 5)) (add-pair-parts '(6 .7)))) => ;;; (let ([t0 (cons 4 5)] [t1 (cons 6 7)]) ;;; (letrec ([add-pair-parts (lambda (p) (+ (car p) (cdr p)))]) ;;; (+ (add-pair-parts t0) (add-pair-parts t1)))) ;;; ;;; Design decision: Another possibility is to simply convert the constants ;;; into their memory-layed out variations, rather than treating it in pieces ;;; like this. We could extend our C run-time support to know about these ;;; pre-layed out items so that we do not need to construct them when the ;;; program starts running. ;;; (define-pass remove-complex-constants : L5 (e) -> L6 () (definitions ;;; t* and e* used to gather up our final constant bindings (via set!) (define t* '()) (define e* '())) (Expr : Expr (e) -> Expr () (definitions ;;; datum->expr - a helper function for recurring through the parts of ;;; a vector or pair datum to construct its parts, until it reaches the ;;; constants in the leaves of the datum. We put this definition ;;; within the Expr transformer so that quasiquote will be bound to the ;;; L6:Expr nonterminal creation code. (define datum->expr (lambda (x) (cond [(pair? x) ;; if we have a pair, cons its recurred parts. `(primcall cons ,(datum->expr (car x)) ,(datum->expr (cdr x)))] [(vector? x) ;; if we have a vector ... (let ([l (vector-length x)] [t (make-tmp)]) ;; 1. create a vector of the proper size `(let ([,t (primcall make-vector (quote ,l))]) (begin ;; 2. set each elemenet in the vector with its recurred ;; parts. ,(let loop ([l l] [e* '()]) (if (fx=? l 0) e* (let ([l (fx- l 1)]) (loop l (cons `(primcall vector-set! ,t (quote ,l) ,(datum->expr (vector-ref x l))) e*))))) ... ;; and return the vector as the final expression ,t)))] ;; if it is a constant, simply quote it. [(constant? x) `(quote ,x)])))) [(quote ,d) ;; look for quoted constants (if (constant? d) ;; if they are already simple `(quote ,d) ;; quote them (let ([t (make-tmp)]) ;; otherwise create a binding for them (set! t* (cons t t*)) (set! e* (cons (datum->expr d) e*)) t))]) ;; in the body, call the Expr transformer, and if t* is null (indicating we ;; did not find any complex constants) don't bother creating the empty let ;; around it. (let ([e (Expr e)]) (if (null? t*) e `(let ([,t* ,e*] ...) ,e)))) ;;; pass: identify-assigned-variables : L6 -> L7 ;;; ;;; This pass identifies which variables are assigned using set!. This is the ;;; first step in a process known as assignment conversion. We separate ;;; assigned varaibles from unassigned variables, and assigned variables are ;;; converted into reference cells that can be manipulated through ;;; primitives. In this compiler, we use the existing box type to create the ;;; cells (using the box primitive), reference the cells (using the unbox ;;; primitive), and mutating the cells (using the set-box! primitive). One ;;; of the reasons we perform assignment conversion is it allows multiple ;;; closures to capture the same mutable variable and all of the closures ;;; will see the same, up-to-date, value for that variable since they all ;;; simply contain a pointer to the reference cell. If we didn't do this ;;; conversion, we would need to figure out a different way to handle set! so ;;; that the updates are propagated to all the closures that close over the ;;; variable. The eventual effect of assignemnt conversion is the following: ;;; (let ([x 5]) ;;; (set! x (+ x 5)) ;;; (+ x x)) => ;;; (let ([t0 5]) ;;; (let ([x (box t0)]) ;;; (primcall set-box! x (+ (unbox x) 5) ;;; (+ (unbox x) (unbox x)))) ;;; (of course in this example, we could have simply shadowed x) ;;; ;;; This pass, however, is simply an analysis pass. It gathers up the set of ;;; assigned variables and deposits them in an AssignedBody just inside their ;;; binding points. The transformation in this pass is: ;;; ;;; (let ([x 5] [y 7] [z 10]) ;;; (set! x (+ x y)) ;;; (+ x z)) => ;;; (let ([x 5] [y 7] [z 10]) ;;; (assigned (x) ;;; (set! x (+ x y)) ;;; (+ x z))) ;;; ;;; The key operations we depend on are: ;;; set-cons - to extend a set with a newly found assigned variable. ;;; intersect - to determine which assigned variables are bound by a lambda, ;;; let, or letrec. ;;; difference - to remove assigned variables from a set once we locate their ;;; binding form. ;;; union - to gather assigned variables from sub-expressions into a ;;; single set. ;;; ;;; Note: we are using a relatively inefficient representation of sets here, ;;; simply representing them as lists and using our set-cons, intersect, ;;; difference, and union procedures to maintain their set-ness. We could ;;; choose a more efficient set representation, perhaps leveraging insertion ;;; sort or something similar, or we could choose to represent our variables ;;; using a mutable record, with a field to indicate if it is assigned. ;;; Either approach will improve the worst case performance of this pass, ;;; though the mutable record version will get us down to a linear cost, ;;; which is the best case for any pass in the current version of the ;;; nanopass framework. ;;; (define-pass identify-assigned-variables : L6 (e) -> L7 () (Expr : Expr (e) -> Expr ('()) ;; identify an assigned variable [(set! ,x ,[e assigned*]) (values `(set! ,x ,e) (set-cons x assigned*))] ;; deposit assigned variables at lambda, let, and letrec binding sites [(lambda (,x* ...) ,[body assigned*]) (values `(lambda (,x* ...) (assigned (,(intersect x* assigned*) ...) ,body)) (difference assigned* x*))] [(let ([,x* ,[e* assigned**]] ...) ,[body assigned*]) (values `(let ([,x* ,e*] ...) (assigned (,(intersect x* assigned*) ...) ,body)) (apply union (difference assigned* x*) assigned**))] [(letrec ([,x* ,[e* assigned**]] ...) ,[body assigned*]) (let ([assigned* (apply union assigned* assigned**)]) (values `(letrec ([,x* ,e*] ...) (assigned (,(intersect x* assigned*) ...) ,body)) (difference assigned* x*)))] ;; traverse forms with nested expressions to gather up the assignments ;; from each sub-expression. this could be simplified if the nanopass ;; framework had a way to automatically combine these. [(primcall ,pr ,[e* assigned**] ...) (values `(primcall ,pr ,e* ...) (apply union assigned**))] [(if ,[e0 assigned0*] ,[e1 assigned1*] ,[e2 assigned2*]) (values `(if ,e0 ,e1 ,e2) (union assigned0* assigned1* assigned2*))] [(begin ,[e* assigned**] ... ,[e assigned*]) (values `(begin ,e* ... ,e) (apply union assigned* assigned**))] [(,[e assigned*] ,[e* assigned**] ...) (values `(,e ,e* ...) (apply union assigned* assigned**))]) ;; in the body, call (let-values ([(e assigned*) (Expr e)]) (unless (null? assigned*) (error who "found one or more unbound variables" assigned*)) e)) ;;; pass: purify-letrec : L7 -> L8 ;;; ;;; this pass looks for places where letrec is used to bind something other ;;; than a lambda expression, or where a letrec bound variable is assigned ;;; and moves these bindings into let bindings. when the pass is done all of ;;; the letrecs in our program will be immutable and bind only lambda ;;; expressions. For instance, the following example: ;;; ;;; (letrec ([f (lambda (g x) (g x))] ;;; [a 5] ;;; [b (+ 5 7)] ;;; [g (lambda (h) (f h 5))] ;;; [c (let ([x 10]) ((letrec ([zero? (lambda (n) (= n 0))] ;;; [f (lambda (n) ;;; (if (zero? n) ;;; 1 ;;; (* n (f (- n 1)))))]) ;;; f) ;;; x))] ;;; [m 10] ;;; [z (lambda (x) x)]) ;;; (set! z (lambda (x) (+ x x))) ;;; (set! m (+ m m)) ;;; (+ (+ (+ (f z a) (f z b)) (f z c)) (g z)))) ;;; => ;;; (let ([z (quote #f)] [m '#f] [c '#f]) ;;; (let ([b (+ '5 '7)] [a '5]) ;;; (letrec ([g (lambda (h) (f h '5))] ;;; [f (lambda (g x) (g x))]) ;;; (begin ;;; (set! z (lambda (x) x)) ;;; (set! m '10) ;;; (set! c ;;; (let ([x '10]) ;;; ((letrec ([f (lambda (n) ;;; (if (zero? n) ;;; '1 ;;; (* n (f (- n '1)))))] ;;; [zero? (lambda (n) (= n '0))]) ;;; f) ;;; x))) ;;; (begin ;;; (set! z (lambda (x) (+ x x))) ;;; (set! m (+ m m)) ;;; (+ (+ (+ (f z a) (f z b)) (f z c)) (g z))))))) ;;; ;;; The algorithm for doing this is fairly simple. We attempt to separate ;;; the bindings into simple bindings, lambda bindings, and complex bindings. ;;; Simple bindings bind a constant, a variable reference not bound in this ;;; letrec, the call to an effect free primitive, a begin that contains only ;;; simple expressions, or an if that contains only simple expressions to an ;;; immutable variable. The simple? predicate is used for determining when an ;;; expression is simple. A lambda expression is simply a lambda, and a ;;; complex expression is any other expression. ;;; ;;; Design decision: There are many other approaches that we could use, ;;; including those described in the "Fixing Letrec: A Faithful Yet Efficient ;;; Implementation of Scheme’s Recursive Binding Construct" by Waddell, et. ;;; al. and "Fixing Letrec (reloaded)" by Ghuloum and Dybvig. Earlier ;;; versions of Chez Scheme used the earlier paper, which documented how to ;;; properly handle R5RS letrecs, and newer versions use the latter paper ;;; which described how to properly handle R6RS letrec and letrec*. ;;; (define-pass purify-letrec : L7 (e) -> L8 () (definitions ;; lambda? - use nanopass case to determine if an L8:Expr is a lambda ;; expression. (define lambda? (lambda (e) (nanopass-case (L8 Expr) e [(lambda (,x* ...) ,abody) #t] [else #f]))) ;; simple? - use nanopass case to deteremin if an L8:Expr is a "simple", ;; effect free expression. (define simple? (lambda (x bound* assigned*) (let f ([x x]) (nanopass-case (L8 Expr) x [(quote ,c) #t] [,x (not (or (memq x bound*) (memq x assigned*)))] [(primcall ,pr ,e* ...) (and (effect-free-prim? pr) (for-all f e*))] [(begin ,e* ... ,e) (and (for-all f e*) e)] [(if ,e0 ,e1 ,e2) (and (f e0) (f e1) (f e2))] [else #f]))))) (Expr : Expr (e) -> Expr () (definitions ;; build a let, when there are bindings, otherwise, just return the ;; body. (define build-let (lambda (x* e* a* body) (if (null? x*) body `(let ([,x* ,e*] ...) (assigned (,a* ...) ,body))))) ;; build a letrec, when there are bindings, otherwise, just return the ;; body (define build-letrec (lambda (x* e* body) (if (null? x*) body `(letrec ([,x* ,e*] ...) ,body)))) ;; build a begin when we have more then one expression, otherwise just ;; return the one expression. (define build-begin (lambda (body* body) (if (null? body*) body `(begin ,body* ... ,body))))) [(letrec ([,x* ,[e*]] ...) (assigned (,a* ...) ,[body])) ;; loop through our bindings, separating them into simple, lambda, and ;; complex. (let loop ([xb* x*] [e* e*] [xs* '()] [es* '()] [xl* '()] [el* '()] [xc* '()] [ec* '()]) (if (null? xb*) ;; when we're done bind the complex bindings to #f, they are now ;; all assigned, then bind the simple bindings, then create a ;; letrec binding for the lambda expressions (eliminate the ;; assigned body, since we know none of them are assigned), and ;; finally use set! to set the values of our complex bindings. (build-let xc* (make-list (length xc*) `(quote #f)) xc* (build-let xs* es* '() (build-letrec xl* el* (build-begin (map (lambda (xc ec) `(set! ,xc ,ec)) xc* ec*) body)))) (let ([x (car xb*)] [e (car e*)]) (cond [(and (not (memq x a*)) (lambda? e)) (loop (cdr xb*) (cdr e*) xs* es* (cons x xl*) (cons e el*) xc* ec*)] [(and (not (memq x a*)) (simple? e x* a*)) (loop (cdr xb*) (cdr e*) (cons x xs*) (cons e es*) xl* el* xc* ec*)] [else (loop (cdr xb*) (cdr e*) xs* es* xl* el* (cons x xc*) (cons e ec*))]))))])) ;;; pass: optimize-direct-call : L8 -> L8 ;;; ;;; one of our simplest optimizations, we convert a directly applied lambdas ;;; into a let. this allows us to avoid the creation of a closure for the ;;; let, and allows us instead to create a local binding within a function. ;;; the transform is simple: ;;; ;;; ((lambda (x* ...) body) e* ...) => (let ([x* e*] ...) body) ;;; where (length x*) == (length e*) ;;; (define-pass optimize-direct-call : L8 (e) -> L8 () (Expr : Expr (e) -> Expr () [((lambda (,x* ...) ,[abody]) ,[e* -> e*] ...) (guard (fx=? (length x*) (length e*))) `(let ([,x* ,e*] ...) ,abody)])) ;;; pass: find-let-bound-lambdas : L8 -> L8 ;;; ;;; this pass looks for places where let is used to bind a lambda expression ;;; to an immutable variable and converts this binding into a letrec binding. ;;; Part of the reason we can do this here, is because we have uniquely named ;;; each of our variables and none of these variables can be referenced in ;;; the right-hand side of the let bindings. If it was still possible for ;;; variables to have same name, this would not be a legal transformation, ;;; since it might cause a lambda that did not capture a variable bound in ;;; this let to bind the variable in the resulting letrec. The ;;; transformation looks like: ;;; ;;; (let ([x 5] [f (lambda (n) (+ n n))] [g (lambda (x) x)] [m 10]) ;;; (assigned (g m) ;;; body)) => ;;; (let ([x 5] [g (lambda (x) x)] [m 10]) ;;; (assigned (g m) ;;; (letrec ([f (lambda (n) (+ n n))]) ;;; body))) ;;; ;;; Design decisions: Handling of let can be incorporated into the handling ;;; of letrec, either through one of the algorithms described in the design ;;; decisions of the purify-letrec pass, or in the existing letrec pass. It ;;; is kept separate here, largely to make the letrec pass more straight ;;; forward to understand. ;;; (define-pass find-let-bound-lambdas : L8 (e) -> L8 () (Expr : Expr (e) -> Expr () (definitions ;; build-let - constructs a let if any variables are bound by the let, ;; or simply returns the body if there are no bindings. (define build-let (lambda (x* e* a* body) (if (null? x*) body `(let ([,x* ,e*] ...) (assigned (,a* ...) ,body))))) ;; build-letrec - constructs a letrec if any variables are bound by the ;; letrec, or simple returns the body if there are no bindings. (define build-letrec (lambda (x* le* body) (if (null? x*) body `(letrec ([,x* ,le*] ...) ,body))))) [(let ([,x* ,[e*]] ...) (assigned (,a* ...) ,[body])) ;; executes a similar algorithm to the purify-letrec pass, though it ;; does not separate simple from complex bindings, since we currently ;; handle both in the let. (let loop ([xb* x*] [e* e*] [xl* '()] [el* '()] [xo* '()] [eo* '()]) (if (null? xb*) (build-let xo* eo* a* (build-letrec xl* el* body)) (let ([x (car xb*)] [e (car e*)]) (nanopass-case (L8 Expr) e [(lambda (,x* ...) ,abody) (guard (not (memq x e*))) (loop (cdr xb*) (cdr e*) (cons x xl*) (cons e el*) xo* eo*)] [else (loop (cdr xb*) (cdr e*) xl* el* (cons x xo*) (cons e eo*))]))))])) ;;; pass: remove-anonymous-lambda : L8 -> L9 ;;; ;;; since we are generating a C function for each Scheme lambda, we need to ;;; have a name for each of these lambdas. In addition we need a name to use ;;; as the code pointer label, so that we can lift the lambdas to the top ;;; level of the program. The transformation is fairly simple. If we find a ;;; lambda in expression position (i.e. not in the right-hand side of a ;;; letrec binding) then we wrap a letrec around it that gives it a new name. ;;; ;;; (letrec ([l* (lambda (x** ...) body*)] ...) body) => (no change) ;;; (letrec ([l* (lambda (x** ...) body*)] ...) body) ;;; ;;; (lambda (x* ...) body) => (letrec ([t0 (lambda (x* ...) body)]) t0) ;;; (define-pass remove-anonymous-lambda : L8 (e) -> L9 () (Expr : Expr (e) -> Expr () [(lambda (,x* ...) ,[abody]) (let ([t (make-tmp)]) `(letrec ([,t (lambda (,x* ...) ,abody)]) ,t))])) ;;; pass: convert-assignments : L9 -> L10 ;;; ;;; this pass completes the assignment conversion process that we started in ;;; identify-assigned-variables. We use the assigned variable list through ;;; our previous passes to make decisions about how bindings were separated. ;;; Now, we are ready to change these explicitly to the box, unbox, and ;;; set-box! primitive calls described in the identified-assigned-variable ;;; pass. We also introduce new temporaries to contain the value that will ;;; be put in the box. this is largely because we don't want our ;;; representation of assigned variables to be observable from inside the ;;; program, and if we were to introduce an operator like call/cc to our ;;; implementation, then the order our variables were setup could potentially ;;; be identified by seeing that the allocation and computation of the values ;;; are intermixed. Instead, we want all the computation to happen, then the ;;; allocation, and then the allocated locations are updated with the values. ;;; ;;; Our transform thus looks like the following: ;;; ;;; (let ([x0 e0] [x1 e1] ... [xa0 ea0] [xa1 xa0] ...) ;;; (assigned (xa0 xa1 ...) ;;; body)) ;;; => ;;; (let ([x0 e0] [x1 e1] ... [t0 ea0] [t1 ea1] ...) ;;; (let ([xa0 (primcall box t0)] [xa1 (primcall box t1)] ...) ;;; body^)) ;;; ;;; (lambda (x0 x1 ... xa0 xa1 ...) (assigned (xa0 xa1 ...) body)) ;;; => ;;; (lambda (x0 x1 ... t0 t1 ...) ;;; (let ([xa0 (primcall box t0)] [xa1 (primcall box t1)] ...) ;;; body^)) ;;; ;;; where ;;; (set! xa0 e) => (primcall set-box! xa0 e^) ;;; and ;;; xa0 => (primcall unbox xa0) ;;; in body^ and e^ ;;; ;;; We could choose another data structure, or even create a new data ;;; structure to perform the conversion with, however, we've choosen the box ;;; because it contains exactly one cell, and takes up just one word in ;;; memory, where as our pair and vector take at least two words in memory. ;;; This decision might be different if we had other constraints on how we ;;; lay out memory. ;;; (define-pass convert-assignments : L9 (e) -> L10 () (definitions ;; lookup - looks for assigned variables in the environment and maps them ;; to their temporaries. (define lookup (lambda (env) (lambda (x) (cond [(assq x env) => cdr] [else x])))) ;; build-env - generates temporaries, extends the environment, and ;; returns the final list of unassigned binding variables, the list of ;; emporaries, and the updated environment, through the call to f (define build-env (lambda (x* a* env f) (let ([t* (map (lambda (x) (make-tmp)) a*)]) (let ([env (append (map cons a* t*) env)]) (f (map (lookup env) x*) t* env))))) (with-output-language (L10 Expr) ;; make-boxes - build the calls to box to create the storage locations ;; for our assigned variables. (define make-boxes (lambda (t*) (map (lambda (t) `(primcall box ,t)) t*))) ;; build-let - builds a let if there are any bindings, or returns the ;; body if there are none. (define build-let (lambda (x* e* body) (if (null? x*) body `(let ([,x* ,e*] ...) ,body)))))) (Expr : Expr (e [env '()]) -> Expr () [(let ([,x* ,[e*]] ...) (assigned (,a* ...) ,body)) (build-env x* a* env (lambda (x* t* env) (let ([box* (make-boxes t*)] [body (Expr body env)]) `(let ([,x* ,e*] ...) (let ([,a* ,box*] ...) ,body)))))] [,x (if (assq x env) `(primcall unbox ,x) x)] [(set! ,x ,[e]) `(primcall box-set! ,x ,e)]) (LambdaExpr : LambdaExpr (le env) -> LambdaExpr () [(lambda (,x* ...) (assigned (,a* ...) ,body)) (build-env x* a* env (lambda (x* t* env) (let ([box* (make-boxes t*)] [body (Expr body env)]) `(lambda (,x* ...) (let ([,a* ,box*] ...) ,body)))))])) ;;; pass: uncover-free : L10 -> L11 ;;; ;;; this pass performs the first step in closure conversion, determining the ;;; set of free-variables for each lambda expression. this list of free ;;; variables is an approximation of the values that need to be available to ;;; a procedure as its captured environment when a procedure is executed. ;;; there are numerous ways to shrink, or even eliminate this list, but in ;;; this compiler we are currently skipping any of these steps, and simply ;;; taking this set of free variables as the set we need to capture. (For ;;; one possible closure optimization technique see "Optimizing Flat ;;; Closures" by Keep et. al. or Chapter 4. of "A Nanopass Compiler for ;;; Commercial Compiler Development" by Keep). This is an analysis pass, ;;; so we are just gathering up the free variables. This will look somewhat ;;; similar to the identify-assigned-variables, except we care about all ;;; variable references, but only the free variables at lambdas. ;;; (define-pass uncover-free : L10 (e) -> L11 () (Expr : Expr (e) -> Expr (free*) ;; quoted constants have no variable references [(quote ,c) (values `(quote ,c) '())] ;; gather up variable references [,x (values x (list x))] ;; if we find a let or a letrec remove the bound variables from the list ;; of references. [(let ([,x* ,[e* free**]] ...) ,[e free*]) (values `(let ([,x* ,e*] ...) ,e) (apply union (difference free* x*) free**))] [(letrec ([,x* ,[le* free**]] ...) ,[body free*]) (values `(letrec ([,x* ,le*] ...) ,body) (difference (apply union free* free**) x*))] ;; in all the other cases, we simply want to gather up the ;; variable references from each sub expression [(if ,[e0 free0*] ,[e1 free1*] ,[e2 free2*]) (values `(if ,e0 ,e1 ,e2) (union free0* free1* free2*))] [(begin ,[e* free**] ... ,[e free*]) (values `(begin ,e* ... ,e) (apply union free* free**))] [(primcall ,pr ,[e* free**]...) (values `(primcall ,pr ,e* ...) (apply union free**))] [(,[e free*] ,[e* free**] ...) (values `(,e ,e* ...) (apply union free* free**))]) (LambdaExpr : LambdaExpr (le) -> LambdaExpr (free*) ;; at the lambda expression, remove our bound variables, everything else ;; is free. we continue to return the free variables until we find their ;; binding forms. [(lambda (,x* ...) ,[body free*]) (let ([free* (difference free* x*)]) (values `(lambda (,x* ...) (free (,free* ...) ,body)) free*))]) ;; in the body, we kick off with the Expr call, and make sure that we have ;; an empty free list when we reach the top, because we expect our programs ;; to be self-contained with no free-references. (let-values ([(e free*) (Expr e)]) (unless (null? free*) (error who "found unbound variables" free*)) e)) (define-pass convert-closures : L11 (e) -> L12 () (Expr : Expr (e) -> Expr () [(letrec ([,x* (lambda (,x** ...) (free (,f** ...) ,[body*]))] ...) ,[body]) (let ([l* (map unique-var x*)] [cp* (map unique-var x*)]) `(closures ([,x* ,l* ,f** ...] ...) (labels ([,l* (lambda (,cp* ,x** ...) (free (,f** ...) ,body*))] ...) ,body)))] [(,x ,[e*] ...) `(,x ,x ,e* ...)] [(,[e] ,[e*] ...) (let ([t (make-tmp)]) `(let ([,t ,e]) (,t ,t ,e* ...)))])) (define-pass optimize-known-call : L12 (e) -> L12 () (LabelsBody : LabelsBody (lbody env) -> LabelsBody ()) (LambdaExpr : LambdaExpr (le env) -> LambdaExpr ()) (Expr : Expr (e [env '()]) -> Expr () [(closures ([,x* ,l* ,f** ...] ...) ,lbody) (let ([env (fold-left (lambda (env x l) (cons (cons x l) env)) env x* l*)]) (let ([lbody (LabelsBody lbody env)]) `(closures ([,x* ,l* ,f** ...] ...) ,lbody)))] [(,x ,[e*] ...) (cond [(assq x env) => (lambda (as) `((label ,(cdr as)) ,e* ...))] [else `(,x ,e* ...)])])) (define-pass expose-closure-prims : L12 (e) -> L13 () (Expr : Expr (e [cp #f] [free* '()]) -> Expr () (definitions (define handle-closure-ref (lambda (x cp free*) (let loop ([free* free*] [i 0]) (cond [(null? free*) x] [(eq? x (car free*)) `(primcall closure-ref ,cp (quote ,i))] [else (loop (cdr free*) (fx+ i 1))])))) (define build-closure-set* (lambda (x* l* f** cp free*) (fold-left (lambda (e* x l f*) (let loop ([f* f*] [i 0] [e* e*]) (if (null? f*) (cons `(primcall closure-code-set! ,x (label ,l)) e*) (loop (cdr f*) (fx+ i 1) (cons `(primcall closure-data-set! ,x (quote ,i) ,(handle-closure-ref (car f*) cp free*)) e*))))) '() x* l* f**)))) [(closures ([,x* ,l* ,f** ...] ...) ,lbody) (let ([size* (map length f**)]) `(let ([,x* (primcall make-closure (quote ,size*))] ...) (begin ,(build-closure-set* x* l* f** cp free*) ... ,(LabelsBody lbody))))] [,x (handle-closure-ref x cp free*)] [((label ,l) ,[e*] ...) `((label ,l) ,e* ...)] [(,[e] ,[e*] ...) `((primcall closure-code ,e) ,e* ...)]) (LabelsBody : LabelsBody (lbody) -> Expr ()) (LambdaExpr : LambdaExpr (le) -> LambdaExpr () [(lambda (,x ,x* ...) (free (,f* ...) ,body)) `(lambda (,x ,x* ...) ,(Expr body x f*))])) (define-pass lift-lambdas : L13 (e) -> L14 () (definitions (define *l* '()) (define *le* '())) (Expr : Expr (e) -> Expr () [(labels ([,l* ,[le*]] ...) ,[body]) (set! *l* (append l* *l*)) (set! *le* (append le* *le*)) body]) (let ([e (Expr e)] [l (make-tmp)]) `(labels ([,l (lambda () ,e)] [,*l* ,*le*] ...) ,l))) (define-pass remove-complex-opera* : L14 (e) -> L15 () (definitions (with-output-language (L15 Expr) (define build-let (lambda (x* e* body) (if (null? x*) body `(let ([,x* ,e*] ...) ,body))))) (define simplify* (lambda (e* f) (let loop ([e* e*] [t* '()] [te* '()] [re* '()]) (if (null? e*) (build-let t* te* (f (reverse re*))) (let ([e (car e*)]) (nanopass-case (L15 Expr) e [,x (loop (cdr e*) t* te* (cons x re*))] [(quote ,c) (loop (cdr e*) t* te* (cons e re*))] [else (let ([t (make-tmp)]) (loop (cdr e*) (cons t t*) (cons e te*) (cons t re*)))]))))))) (Expr : Expr (e) -> Expr () [(primcall ,pr ,[e*] ...) (simplify* e* (lambda (e*) `(primcall ,pr ,e* ...)))] [(,[e] ,[e*] ...) (simplify* (cons e e*) (lambda (e*) `(,(car e*) ,(cdr e*) ...)))])) (define-pass recognize-context : L15 (e) -> L16 () (Value : Expr (e) -> Value () [(primcall ,pr ,[se*] ...) (guard (value-primitive? pr)) `(primcall ,pr ,se* ...)] [(primcall ,pr ,[se*] ...) (guard (predicate-primitive? pr)) `(if (primcall ,pr ,se* ...) (quote #t) (quote #f))] [(primcall ,pr ,[se*] ...) (guard (effect-primitive? pr)) `(begin (primcall ,pr ,se* ...) (primcall void))] [(primcall ,pr ,se* ...) (error who "unexpected primitive found" pr)]) (Effect : Expr (e) -> Effect () [,se `(nop)] [(primcall ,pr ,[se*] ...) (guard (effect-primitive? pr)) `(primcall ,pr ,se* ...)] [(primcall ,pr ,[se*] ...) (guard (or (value-primitive? pr) (predicate-primitive? pr))) `(nop)] [(primcall ,pr ,se* ...) (error who "unexpected primitive found" pr)]) (Predicate : Expr (e) -> Predicate () [(quote ,c) (if c `(true) `(false))] [,x `(if (primcall eq? x (quote #f)) (false) (true))] [(if ,[p0] ,[p1] ,[p2]) (nanopass-case (L16 Predicate) p0 [(true) p1] [(false) p2] [else `(if ,p0 ,p1 ,p2)])] [(,[se] ,[se*] ...) (let ([t (make-tmp)]) `(if (let ([,t (,se ,se* ...)]) (primcall = ,t (quote #f))) (false) (true)))] [(primcall ,pr ,[se*] ...) (guard (predicate-primitive? pr)) `(primcall ,pr ,se* ...)] [(primcall ,pr ,[se*] ...) (guard (effect-primitive? pr)) `(begin (primcall ,pr ,se* ...) (true))] [(primcall ,pr ,[se*] ...) (guard (value-primitive? pr)) (let ([t (make-tmp)]) `(if (let ([,t (primcall ,pr ,se* ...)]) (primcall eq? ,t (quote #f))) (false) (true)))] [(primcall ,pr ,se* ...) (error who "unexpected primitive found" pr)])) (define-pass expose-allocation-primitives : L16 (e) -> L17 () (Value : Value (v) -> Value () [(primcall ,vpr ,[se]) (case vpr [(make-vector) (nanopass-case (L17 SimpleExpr) se [(quote ,c) (target-fixnum? c) (let ([t (make-tmp)]) `(let ([,t (alloc ,vector-tag (quote ,(+ c 1)))]) (begin (primcall $vector-length-set! ,t (quote ,c)) ,t)))] [else (let ([t0 (make-tmp)] [t1 (make-tmp)] [t2 (make-tmp)]) `(let ([,t0 ,se]) (let ([,t1 (primcall + ,t0 (quote 1))]) (let ([,t2 (alloc ,vector-tag ,t1)]) (begin (primcall $vector-length-set! ,t2 ,t0) ,t2)))))])] [(make-closure) (nanopass-case (L17 SimpleExpr) se [(quote ,c) (guard (target-fixnum? c)) `(alloc ,closure-tag (quote ,(+ c 1)))] [else (error who "expected constant argument for make-closure primcall" (unparse-L16 v))])] [(box) (let ([t0 (make-tmp)] [t1 (make-tmp)]) `(let ([,t0 ,se]) (let ([,t1 (alloc ,box-tag (quote 1))]) (begin (primcall box-set! ,t1 ,t0) ,t1))))] [else `(primcall ,vpr ,se)])] [(primcall ,vpr ,[se0] ,[se1]) (case vpr [(cons) (let ([t0 (make-tmp)] [t1 (make-tmp)] [t2 (make-tmp)]) `(let ([,t0 ,se0] [,t1 ,se1]) (let ([,t2 (alloc ,pair-tag (quote 2))]) (begin (primcall $set-car! ,t2 ,t0) (primcall $set-cdr! ,t2 ,t1) ,t2))))] [else `(primcall ,vpr ,se0 ,se1)])])) (define-pass return-of-set! : L17 (e) -> L18 () (definitions (with-output-language (L18 Effect) (define build-set*! (lambda (x* v* body build-begin) (build-begin (map (lambda (x v) `(set! ,x ,v)) x* v*) body))))) (SimpleExpr : SimpleExpr (se) -> SimpleExpr ('())) (Value : Value (v) -> Value ('()) (definitions (define build-begin (lambda (e* v) (if (null? e*) v `(begin ,e* ... ,v))))) [(if ,[p0 var0*] ,[v1 var1*] ,[v2 var2*]) (values `(if ,p0 ,v1 ,v2) (append var0* var1* var2*))] [(begin ,[e* var**] ... ,[v var*]) (values `(begin ,e* ... ,v) (apply append var* var**))] [(primcall ,vpr ,[se* var**] ...) (values `(primcall ,vpr ,se* ...) (apply append var**))] [(,[se var*] ,[se* var**] ...) (values `(,se ,se* ...) (apply append var* var**))] [(let ([,x* ,[v* var**]] ...) ,[body var*]) (values (build-set*! x* v* body build-begin) (apply append x* var* var**))]) (Effect : Effect (e) -> Effect ('()) (definitions (define build-begin (lambda (e* e) (if (null? e*) e `(begin ,e* ... ,e))))) [(if ,[p0 var0*] ,[e1 var1*] ,[e2 var2*]) (values `(if ,p0 ,e1 ,e2) (append var0* var1* var2*))] [(begin ,[e* var**] ... ,[e var*]) (values `(begin ,e* ... ,e) (apply append var* var**))] [(primcall ,epr ,[se* var**] ...) (values `(primcall ,epr ,se* ...) (apply append var**))] [(,[se var*] ,[se* var**] ...) (values `(,se ,se* ...) (apply append var* var**))] [(let ([,x* ,[v* var**]] ...) ,[e var*]) (values (build-set*! x* v* e build-begin) (apply append x* var* var**))]) (Predicate : Predicate (p) -> Predicate ('()) (definitions (define build-begin (lambda (e* p) (if (null? e*) p `(begin ,e* ... ,p))))) [(if ,[p0 var0*] ,[p1 var1*] ,[p2 var2*]) (values `(if ,p0 ,p1 ,p2) (append var0* var1* var2*))] [(begin ,[e* var**] ... ,[p var*]) (values `(begin ,e* ... ,p) (apply append var* var**))] [(primcall ,ppr ,[se* var**] ...) (values `(primcall ,ppr ,se* ...) (apply append var**))] [(let ([,x* ,[v* var**]] ...) ,[p var*]) (values (build-set*! x* v* p build-begin) (apply append x* var* var**))]) (LambdaExpr : LambdaExpr (le) -> LambdaExpr () [(lambda (,x* ...) ,[body var*]) `(lambda (,x* ...) (locals (,var* ...) ,body))])) (define-pass flatten-set! : L18 (e) -> L19 () (SimpleExpr : SimpleExpr (se) -> SimpleExpr ()) (Effect : Effect (e) -> Effect () [(set! ,x ,v) (Value v x)]) (Value : Value (v x) -> Effect () [,se `(set! ,x ,(SimpleExpr se))] [(primcall ,vpr ,[se*] ...) `(set! ,x (primcall ,vpr ,se* ...))] [(alloc ,i ,[se]) `(set! ,x (alloc ,i ,se))] [(,[se] ,[se*] ...) `(set! ,x (,se ,se* ...))])) ;;; pass: push-if : L19 -> L20 ;;; ;;; It turns out I was a little overzealous with this pass and didn't quite ;;; handle all of the cases. In particular, in my hustle, I did not think ;;; about the `(if p0 p1 p2) where the result expressions contain effects... ;;; i.e. (if (begin ,e0* ... ,p0) (begin ,e1* ... ,p1) (begin ,e2* ... ;;; ,p2)) can only be handled if: ;;; 1. we are willing to copy the code for the tail of our ifs (we aren't, ;;; this can lead to exponential code explosion) or ;;; 2. if we are willing to flatten this code and use labels and gotos in ;;; our generated code. ;;; Number 2 is a more reasonable solution, but lucky for us, C will allow us ;;; to generate code like the following: ;;; ;;; (if (begin ,e0* ... ,p0) (begin ,e1* ... ,p1) (begin ,e2* ... ,p2)) => ;;; ;;; (((e0*[0]), (e0*[1]), ..., (e0*[n]), p0) ? ;;; ((e1*[0]), (e1*[1]), ..., (e1*[n]), p1) : ;;; ((e2*[0]), (e2*[1]), ..., (e2*[n]), p2)) ;;; ;;; I've left the pass here as an example that even when we think we've got a ;;; pass written and working, it easy to miss things, which is why we test, ;;; and why we need to think carefully as we work through the compiler. ;;; ; (define-pass push-if : L19 (e) -> L20 () ; (Value : Value (v) -> Value () ; (definitions ; (define build-begin ; (lambda (e* v) ; (if (null? e*) v `(begin ,e* ... ,v))))) ; [(if ,[p0 e*] ,[v1] ,[v2]) (build-begin e* `(if ,p0 ,v1 ,v2))]) ; (Effect : Effect (e) -> Effect () ; (definitions ; (define build-begin ; (lambda (e* e) ; (if (null? e*) e `(begin ,e* ... ,e))))) ; [(if ,[p0 e*] ,[e1] ,[e2]) (build-begin e* `(if ,p0 ,e1 ,e2))]) ; (Predicate : Predicate (p) -> Predicate ('()) ; [(begin ,[e*] ... ,[p more-e*]) (values p (append e* more-e*))] ; [(if ,[p0 e0*] ,[p1 e1*] ,[p2 e2*]) ; (values `(if ,p0 (begin ,e1* ... p1) (begin ,e2* ... ,p2)) e0*)])) ;;; pass: specify-constant-representation : L19 -> L21 ;;; ;;; This pass replaces our quoted constants with the explicit ptr ;;; representation we've decided to use. This effectively replaces each of our ;;; constants with a 64-bit integer. The conversion is pretty simple: ;;; ;;; #f => false-rep ;;; #t => true-rep ;;; '() => null-rep ;;; fixnum => fixnum << fixnum-shift (yielding 64-bit integer) ;;; (define-pass specify-constant-representation : L19 (e) -> L21 () (SimpleExpr : SimpleExpr (se) -> SimpleExpr () [(quote ,c) (cond [(eq? c #f) false-rep] [(eq? c #t) true-rep] [(null? c) null-rep] [(target-fixnum? c) (bitwise-arithmetic-shift-left c fixnum-shift)])])) ;;; pass: expand-primitives : L21 -> L22 ;;; ;;; this pass expands our Scheme primitives into something close to their ;;; C-language equivalents. This changes our math primitives to do the ;;; adjustments required by changing the representation of fixnums (it works ;;; fine for + and -, but * and / require us to do some shifting in order to ;;; have a fixnum as a result). We also translate all of our memory ;;; referencing primitives to mrefs and memory setting primitives into ;;; msets!. When we generate C code for these, we will do the pointer ;;; arithmetic required and then dereference the calculated address. ;;; Remember, that because of our tags, we need to do some pointer arithmetic ;;; for any dereference we wish to perform. This pointer arithmetic, though, ;;; can be handled in a single memory reference argument on an x86_64 (which ;;; is our assumed target platform). ;;; ;;; Design decision: Right now each of our "instructions" is a separate form ;;; in the language, however, if we were to extend our source language and ;;; primitive set much farther, it is likely that we would want to revisit ;;; this to choose a representation where a single form could represent ;;; several of these instructions. This might also be desirable if we change ;;; the representation to LLVM or asm.js. ;;;; (define-pass expand-primitives : L21 (e) -> L22 () #;(Value : Value (v) -> Value () (definitions (define build-begin (lambda (e* v) (nanopass-case (L22 Value) v [(begin ,e1* ... ,v) (build-begin (append e* e1*) v)] [else (let loop ([e* e*] [re* '()]) (if (null? e*) `(begin ,(reverse re*) ... ,v) (let ([e (car e*)]) (nanopass-case (L22 Effect) e [(nop) (loop (cdr e*) re*)] [(begin ,e0* ... ,e0) (loop (append e0* (cons e0 (cdr e*))) re*)] [else (loop (cdr e*) (cons (car e*) re*))]))))])))) [(begin ,[e*] ... ,[v]) (build-begin e* v)]) (Rhs : Rhs (rhs) -> Rhs () [(primcall ,vpr) (case vpr [(void) void-rep] [else (error who "unexpected value primitive" vpr)])] [(primcall ,vpr ,[se]) (case vpr [(car) `(mref ,se #f ,(- pair-tag))] [(cdr) `(mref ,se #f ,(- word-size pair-tag))] [(unbox) `(mref ,se #f ,(- box-tag))] [(closure-code) `(mref ,se #f ,(- closure-tag))] [(vector-length) `(mref ,se #f ,(- vector-tag))] [else (error who "unexpected value primitive" vpr)])] [(primcall ,vpr ,[se0] ,[se1]) (case vpr [(closure-ref) `(mref ,se0 ,se1 ,(- word-size closure-tag))] [(vector-ref) `(mref ,se0 ,se1 ,(- word-size vector-tag))] [(+) `(add ,se0 ,se1)] [(-) `(subtract ,se0 ,se1)] ;; when we multiply or divide, we need to shift either one of the ;; arguments or the result. we could also be a bit more clever here, ;; if one of the arguments is a constant, we can perform the shift ;; ahead of time (assuming the constant still fits within the 64-bit ;; width [(*) `(multiply ,se0 (shift-right ,se1 ,fixnum-shift))] [(/) `(shift-left (divide ,se0 ,se1) ,fixnum-shift)] [else (error who "unexpected value primitive" vpr)])] [(primcall ,vpr ,se* ...) (error who "unexpected value primitive" vpr)]) (Effect : Effect (e) -> Effect () #;(definitions (define build-begin (lambda (e* e) (nanopass-case (L22 Effect) e [(begin ,e1* ... ,e) (build-begin (append e* e1*) e)] [else (let loop ([e* e*] [re* '()]) (if (null? e*) `(begin ,(reverse re*) ... ,e) (let ([e (car e*)]) (nanopass-case (L22 Effect) e [(nop) (loop (cdr e*) re*)] [(begin ,e0* ... ,e0) (loop (append e0* (cons e0 (cdr e*))) re*)] [else (loop (cdr e*) (cons (car e*) re*))]))))])))) #;[(begin ,[e*] ... ,[e]) (build-begin e* e)] [(primcall ,epr ,[se0] ,[se1]) (case epr [(box-set!) `(mset! ,se0 #f ,(- box-tag) ,se1)] [($set-car!) `(mset! ,se0 #f ,(- pair-tag) ,se1)] [($set-cdr!) `(mset! ,se0 #f ,(- word-size pair-tag) ,se1)] [($vector-length-set!) `(mset! ,se0 #f ,(- vector-tag) ,se1)] [(closure-code-set!) `(mset! ,se0 #f ,(- closure-tag) ,se1)] [else (error who "unexpected effect primitive" epr)])] [(primcall ,epr ,[se0] ,[se1] ,[se2]) (case epr [(vector-set!) `(mset! ,se0 ,se1 ,(- word-size vector-tag) ,se2)] [(closure-data-set!) `(mset! ,se0 ,se1 ,(- word-size closure-tag) ,se2)] [else (error who "unexpected effect primitive" epr)])] [(primcall ,epr ,se* ...) (error who "unexpected effect primitive" epr)]) (Predicate : Predicate (p) -> Predicate () #;(definitions (define build-begin (lambda (e* p) (nanopass-case (L22 Predicate) p [(begin ,e1* ... ,p) (build-begin (append e* e1*) p)] [else (let loop ([e* e*] [re* '()]) (if (null? e*) `(begin ,(reverse re*) ... ,p) (let ([e (car e*)]) (nanopass-case (L22 Effect) e [(nop) (loop (cdr e*) re*)] [(begin ,e0* ... ,e0) (loop (append e0* (cons e0 (cdr e*))) re*)] [else (loop (cdr e*) (cons (car e*) re*))]))))])))) #;[(begin ,[e*] ... ,[p]) (build-begin e* p)] [(primcall ,ppr ,[se]) (case ppr [(pair?) `(= (logand ,se ,pair-mask) ,pair-tag)] [(null?) `(= ,se ,null-rep)] [(boolean?) `(= (logand ,se ,boolean-mask) ,boolean-tag)] [(vector?) `(= (logand ,se ,vector-mask) ,vector-tag)] [(box?) `(= (logand ,se ,box-mask) ,box-tag)] [else (error who "unexpected predicate primitive" ppr)])] [(primcall ,ppr ,[se0] ,[se1]) (case ppr [(eq? =) `(= ,se0 ,se1)] [(<) `(< ,se0 ,se1)] [(<=) `(<= ,se0 ,se1)] [(>) `(<= ,se1 ,se0)] [(>=) `(< ,se1 ,se0)] [else (error who "unexpected predicate primitive" ppr)])] [(primcall ,ppr ,se* ...) (error who "unexpected predicate primitive" ppr)])) ;;; pass: generate-C : L22 -> printed-output ;;; ;;; this pass takes a program in the L22 language and produces a printed C ;;; program. using a string or file port, the results of this can be ;;; captured in a string or sent to a file to be compiled. The code that it ;;; produces can be a little difficult to read, particularly with all of the ;;; casts to and from ptr values. ;;; ;;; TODO: this pass is fairly convoluted, and could use some refactoring. We ;;; might also want to try to pretty-print the C code so that it prints ;;; out a bit better. ;;; (define-pass generate-c : L22 (e) -> * () (definitions ;;; symbol->c-id - converts any Scheme symbol into a valid C identifier. (define symbol->c-id (lambda (sym) (let ([ls (string->list (symbol->string sym))]) (if (null? ls) "" (let ([fst (car ls)]) (list->string (cons (if (char-alphabetic? fst) fst #\_) (map (lambda (c) (if (or (char-alphabetic? c) (char-numeric? c)) c #\_)) (cdr ls))))))))) ;;; emit-function-header - generates a function header to be used in the ;;; declaration of a function or the definition of a function. (define emit-function-header (lambda (l x*) (printf "ptr ~a(" l) (unless (null? x*) (let loop ([x (car x*)] [x* (cdr x*)]) (if (null? x*) (printf "ptr ~a" (symbol->c-id x)) (begin (printf "ptr ~a, " (symbol->c-id x)) (loop (car x*) (cdr x*)))))) (printf ")")))) ;; transformer to print our function declarations (emit-function-decl : LambdaExpr (le l) -> * () [(lambda (,x* ...) ,lbody) (emit-function-header l x*) (printf ";~%")]) ;; transformer to print our function definitions (emit-function-def : LambdaExpr (le l) -> * () [(lambda (,x* ...) ,lbody) (emit-function-header l x*) (printf " {~%") (emit-function-body lbody) (printf "}~%~%")]) ;; transformer to emit the body of a function (emit-function-body : LocalsBody (lbody) -> * () [(locals (,x* ...) ,body) (for-each (lambda (x) (printf " ptr ~a;~%" (symbol->c-id x))) x*) (emit-value body x*)]) ;; transformer to emit expressions in value context (emit-value : Value (v locals*) -> * () [(if ,p0 ,v1 ,v2) (printf " if (~a) {~%" (format-predicate p0)) (emit-value v1 locals*) (printf " } else {~%") (emit-value v2 locals*) (printf " }~%")] [(begin ,e* ... ,v) (for-each emit-effect e*) (emit-value v locals*)] [,rhs (let ([rhs (format-rhs rhs)]) (printf " return (ptr)~a;\n" rhs))]) ;; transformer to format Predicate expressions into strings (format-predicate : Predicate (p) -> * (str) [(if ,p0 ,p1 ,p2) (format "((~a) ? (~a) : (~a))" (format-predicate p0) (format-predicate p1) (format-predicate p2))] [(<= ,se0 ,se1) (format "((long)~a <= (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(< ,se0 ,se1) (format "((long)~a < (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(= ,se0 ,se1) (format "((long)~a == (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(true) "1"] [(false) "0"] [(begin ,e* ... ,p) (let loop ([e* e*] [str ""]) (if (null? e*) (string-append str (format-predicate p)) (loop (cdr e*) (string-append str (format-effect (car e*)) ", "))))]) ;; transformer to format effects in predicate context into strings (format-effect : Effect (e) -> * (str) [(if ,p0 ,e1 ,e2) (format "((~a) ? (~a) : (~a))" (format-predicate p0) (format-effect e1) (format-effect e2))] [((label ,l) ,se* ...) (format "~a(~a)" (symbol->c-id l) (let f ([se* se*]) (if (null? se*) "" (let ([se (car se*)] [se* (cdr se*)]) (format "~a~a~a" (format-simple-expr se) (if (null? se*) "" ", ") (f se*))))))] [(,se ,se* ...) (format "((ptr (*)(~a))~a)(~a)" (let f ([i (length se*)]) (cond [(fxzero? i) ""] [(fx=? i 1) "ptr"] [else (format "ptr, ~a" (f (fx- i 1)))])) (format-simple-expr se) (let f ([se* se*]) (if (null? se*) "" (let ([se (car se*)] [se* (cdr se*)]) (format "~a~a~a" (format-simple-expr se) (if (null? se*) "" ", ") (f se*))))))] [(set! ,x ,rhs) (format "~a = (ptr)~a" (symbol->c-id x) (format-rhs rhs))] [(nop) "0"] [(begin ,e* ... ,e) (let f ([e* e*]) (if (null? e*) (format-effect e) (string-append (format-effect (car e*)) ", " (f (cdr e*)))))] [(mset! ,se0 ,se1? ,i ,se2) (if se1? (format "((*((ptr*)((long)~a + (long)~a + ~d))) = (ptr)~a)" (format-simple-expr se0) (format-simple-expr se1?) i (format-simple-expr se2)) (format "((*((ptr*)((long)~a + ~d))) = (ptr)~a)" (format-simple-expr se0) i (format-simple-expr se2)))]) ;; formats simple expressions in to strings (format-simple-expr : SimpleExpr (se) -> * (str) [,x (symbol->c-id x)] [,i (number->string i)] [(label ,l) (format "(*~a)" (symbol->c-id l))] [(logand ,se0 ,se1) (format "((long)~a & (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(shift-right ,se0 ,se1) (format "((long)~a >> (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(shift-left ,se0 ,se1) (format "((long)~a << (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(divide ,se0 ,se1) (format "((long)~a / (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(multiply ,se0 ,se1) (format "((long)~a * (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(subtract ,se0 ,se1) (format "((long)~a - (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(add ,se0 ,se1) (format "((long)~a + (long)~a)" (format-simple-expr se0) (format-simple-expr se1))] [(mref ,se0 ,se1? ,i) (if se1? (format "(*((ptr)((long)~a + (long)~a + ~d)))" (format-simple-expr se0) (format-simple-expr se1?) i) (format "(*((ptr)((long)~a + ~d)))" (format-simple-expr se0) i))]) ;; prints expressions in effect position into C statements (emit-effect : Effect (e) -> * () [(if ,p0 ,e1 ,e2) (printf " if (~a) {~%" (format-predicate p0)) (emit-effect e1) (printf " } else {~%") (emit-effect e2) (printf " }~%")] [((label ,l) ,se* ...) (printf " ~a(" (symbol->c-id l)) (unless (null? se*) (let loop ([se (car se*)] [se* (cdr se*)]) (if (null? se*) (printf "(ptr)~a" (format-simple-expr se)) (begin (printf "(ptr)~a, " (format-simple-expr se)) (loop (car se*) (cdr se*)))))) (printf ");\n")] [(,se ,se* ...) (printf " ((ptr (*)(~a))~a)(" (let loop ([i (length se*)]) (cond [(fxzero? i) ""] [(fx=? i 1) "ptr"] [else (format "ptr, ~a" (loop (fx- i 1)))])) (format-simple-expr se)) (unless (null? se*) (let loop ([se (car se*)] [se* (cdr se*)]) (if (null? se*) (printf "(ptr)~a" (format-simple-expr se)) (begin (printf "(ptr)~a, " (format-simple-expr se)) (loop (car se*) (cdr se*)))))) (printf ");\n")] [(set! ,x ,rhs) (printf " ~a = (ptr)" (symbol->c-id x)) (printf "~a;\n" (format-rhs rhs))] [(nop) (if #f #f)] [(begin ,e* ... ,e) (for-each emit-effect e*) (emit-effect e)] [(mset! ,se0 ,se1? ,i ,se2) (if se1? (printf "(*((ptr*)((long)~a + (long)~a + ~d))) = (ptr)~a;\n" (format-simple-expr se0) (format-simple-expr se1?) i (format-simple-expr se2)) (printf "(*((ptr*)((long)~a + ~d))) = (ptr)~a;\n" (format-simple-expr se0) i (format-simple-expr se2)))]) ;; formats the right-hand side of a set! into a C expression (format-rhs : Rhs (rhs) -> * (str) [((label ,l) ,se* ...) (format " ~a(~a)" (symbol->c-id l) (if (null? se*) "" (let loop ([se (car se*)] [se* (cdr se*)]) (if (null? se*) (format "(ptr)~a" (format-simple-expr se)) (format "(ptr)~a, ~a" (format-simple-expr se) (loop (car se*) (cdr se*)))))))] [(,se ,se* ...) (format " ((ptr (*)(~a))~a)(~a)" (let loop ([i (length se*)]) (cond [(zero? i) ""] [(fx=? i 1) "ptr"] [else (format "ptr, ~a" (loop (fx- i 1)))])) (format-simple-expr se) (let loop ([se (car se*)] [se* (cdr se*)]) (if (null? se*) (format "(ptr)~a" (format-simple-expr se)) (format "(ptr)~a, ~a" (format-simple-expr se) (loop (car se*) (cdr se*))))))] [(alloc ,i ,se) (if (use-boehm?) (format "(ptr)((long)GC_MALLOC(~a) + ~dl)" (format-simple-expr se) i) (format "(ptr)((long)malloc(~a) + ~dl)" (format-simple-expr se) i))] [,se (format-simple-expr se)]) ;; emits a C program for our progam expression (Program : Program (p) -> * () [(labels ([,l* ,le*] ...) ,l) (let ([l (symbol->c-id l)] [l* (map symbol->c-id l*)]) (define-syntax emit-include (syntax-rules () [(_ name) (printf "~s\n" 'name)])) (define-syntax emit-predicate (syntax-rules () [(_ PRED_P mask tag) (emit-c-macro PRED_P (x) "(((long)x & ~d) == ~d)" mask tag)])) (define-syntax emit-c-macro (lambda (x) (syntax-case x() [(_ NAME (x* ...) fmt args ...) #'(printf "#define NAME(xlist) ~a" (format fmt args ...))]))) ;; the following printfs output the tiny C runtime we are using ;; to wrap the result of our compiled Scheme program. (printf "#include \n\ ~a\n\ typedef long* ptr;\n\ " (if (use-boehm?) "#include " "#include ")) (printf "#define FIXNUM_P(x) (((long)x & ~d) == ~d)\n" fixnum-mask fixnum-tag) (printf "#define FIX(x) ((long)x << ~d)\n" fixnum-shift) (printf "#define UNFIX(x) ((long)x >> ~d)\n" fixnum-shift) (printf "#define PAIR_P(x) (((long)x & ~d) == ~d)\n" pair-mask pair-tag) (printf "#define BOX_P(x) (((long)x & ~d) == ~d)\n" box-mask box-tag) (printf "#define UNBOX(x) ((ptr)*((ptr)((long)x - ~d)))\n" box-tag) (printf "#define VECTOR_P(x) (((long)x & ~d) == ~d)\n" vector-mask vector-tag) (printf "#define VECTOR_LENGTH_S(x) ((long)*((ptr)((long)x - ~d)))\n" vector-tag) (printf "#define VECTOR_LENGTH_C(x) UNFIX(((long)*((ptr)((long)x - ~d))))\n" vector-tag) (printf "#define VECTOR_REF(x,i) ((ptr)*((ptr)((long)x + ((i + 1) * ~d) - ~d)))\n" word-size vector-tag) (printf "#define TRUE_P(x) ((long)x == ~d)\n" true-rep) (printf "#define FALSE_P(x) ((long)x == ~d)\n" false-rep) (printf "#define NULL_P(x) ((long)x == ~d)\n" null-rep) (printf "#define VOID_P(x) ((long)x == ~d)\n" void-rep) (printf "#define CAR(x) ((ptr)*((ptr)((long)x - ~d)))\n" pair-tag) (printf "#define CDR(x) ((ptr)*((ptr)((long)x + ~d - ~d)))\n" word-size pair-tag) (printf "#define PROCEDURE_P(x) (((long)x & ~d) == ~d)\n" closure-mask closure-tag) (printf "void print_scheme_value(ptr x) {\n") (printf " long i, veclen;\n") (printf " ptr p;\n") (printf " if (TRUE_P(x)) {\n") (printf " printf(\"#t\");\n") (printf " } else if (FALSE_P(x)) {\n") (printf " printf(\"#f\");\n") (printf " } else if (NULL_P(x)) {\n") (printf " printf(\"()\");\n") (printf " } else if (VOID_P(x)) {\n") (printf " printf(\"(void)\");\n") (printf " } else if (FIXNUM_P(x)) {\n") (printf " printf(\"%ld\", UNFIX(x));\n") (printf " } else if (PAIR_P(x)) {\n") (printf " printf(\"(\");\n") (printf " for (p = x; PAIR_P(p); p = CDR(p)) {\n") (printf " print_scheme_value(CAR(p));\n") (printf " if (PAIR_P(CDR(p))) { printf(\" \"); }\n") (printf " }\n") (printf " if (NULL_P(p)) {\n") (printf " printf(\")\");\n") (printf " } else {\n") (printf " printf(\" . \");\n") (printf " print_scheme_value(p);\n") (printf " printf(\")\");\n") (printf " }\n") (printf " } else if (BOX_P(x)) {\n") (printf " printf(\"#(box \");\n") (printf " print_scheme_value(UNBOX(x));\n") (printf " printf(\")\");\n") (printf " } else if (VECTOR_P(x)) {\n") (printf " veclen = VECTOR_LENGTH_C(x);\n") (printf " printf(\"#(\");\n") (printf " for (i = 0; i < veclen; i += 1) {\n") (printf " print_scheme_value(VECTOR_REF(x,i));\n") (printf " if (i < veclen) { printf(\" \"); } \n") (printf " }\n") (printf " printf(\")\");\n") (printf " } else if (PROCEDURE_P(x)) {\n") (printf " printf(\"#(procedure)\");\n") (printf " }\n") (printf "}\n") (map emit-function-decl le* l*) (map emit-function-def le* l*) (printf "int main(int argc, char * argv[]) {\n") (printf " print_scheme_value(~a());\n" l) (printf " printf(\"\\n\");\n") (printf " return 0;\n") (printf "}\n"))])) ;;; a little helper mostly shamelesly stolen from ;;; the Chez Scheme User's Guide (define-syntax with-implicit (syntax-rules () [(_ (tid id ...) body0 ... body1) (with-syntax ([id (datum->syntax #'tid 'id)] ...) body0 ... body1)])) ;;; a little macro to make building a compiler with tracing that we can turn ;;; off and on easier. no support for looping in this, but the syntax is very ;;; simple: ;;; (define-compiler my-compiler-name ;;; (pass1 unparser) ;;; (pass2 unparser) ;;; ... ;;; pass-to-generate-c) ;;; (define-syntax define-compiler (lambda (x) (syntax-case x () [(_ name (pass unparser) ... gen-c) (with-implicit (name all-passes trace-passes) #`(begin (define all-passes '(pass ... gen-c)) (define trace-passes (let ([passes '()]) (case-lambda [() passes] [(x) (cond [(symbol? x) (unless (memq x all-passes) (error 'trace-passes "invalid pass name" x)) (set! passes (list x))] [(list? x) (unless (for-all (lambda (x) (memq x all-passes)) x) (error 'trace-passes "one or more invalid pass names" x)) (set! passes x)] [(eq? x #t) (set! passes all-passes)] [(eq? x #f) (set! passes '())] [else (error 'trace-passes "invalid pass specifier" x)])]))) (define name (lambda (x) #,(let loop ([pass* #'(pass ...)] [unparser* #'(unparser ...)]) (if (null? pass*) #'(begin (when (file-exists? "t.c") (delete-file "t.c")) (with-output-to-file "t.c" (lambda () (gen-c x))) (when (memq 'gen-c (trace-passes)) (printf "output of pass ~s~%" 'gen-c) (call-with-input-file "t.c" (lambda (ip) (let f () (let ([s (get-string-n ip 512)]) (unless (eof-object? s) (display s) (f))))))) (system (format "gcc -m64 ~a t.c -o t" (if (use-boehm?) "-lgc" ""))) (when (file-exists? "t.out") (delete-file "t.out")) (system "t > t.out") (call-with-input-file "t.out" read)) (with-syntax ([pass (car pass*)] [unparser (car unparser*)]) #`(let ([x (pass x)]) (when (memq 'pass (trace-passes)) (printf "output of pass ~s~%" 'pass) (pretty-print (unparser x))) #,(loop (cdr pass*) (cdr unparser*))))))))))]))) ;;; the definition of our compiler that pulls in all of our passes and runs ;;; them in sequence checking to see if the programmer wants them traced. (define-compiler my-tiny-compile (parse-and-rename unparse-Lsrc) (remove-one-armed-if unparse-L1) (remove-and-or-not unparse-L2) (make-begin-explicit unparse-L3) (inverse-eta-raw-primitives unparse-L4) (quote-constants unparse-L5) (remove-complex-constants unparse-L6) (identify-assigned-variables unparse-L7) (purify-letrec unparse-L8) (optimize-direct-call unparse-L8) (find-let-bound-lambdas unparse-L8) (remove-anonymous-lambda unparse-L9) (convert-assignments unparse-L10) (uncover-free unparse-L11) (convert-closures unparse-L12) (optimize-known-call unparse-L12) (expose-closure-prims unparse-L13) (lift-lambdas unparse-L14) (remove-complex-opera* unparse-L15) (recognize-context unparse-L16) (expose-allocation-primitives unparse-L17) (return-of-set! unparse-L18) (flatten-set! unparse-L19) ; (push-if unparse-L20) (specify-constant-representation unparse-L21) (expand-primitives unparse-L22) generate-c))