From 9320340cb9c03e992106a07d7a0f4b8347f824a7 Mon Sep 17 00:00:00 2001 From: Vincent St-Amour Date: Wed, 7 Jul 2010 18:41:25 -0400 Subject: [PATCH] Improved unboxed complex operations. Intermediate results are kept as unboxed floats as long as we stay within complex arithmetic code. original commit: 7853d333495376f065c63a19866ac2eba6f8926a --- collects/typed-scheme/private/optimize.rkt | 135 +++++++++++++++------ 1 file changed, 99 insertions(+), 36 deletions(-) diff --git a/collects/typed-scheme/private/optimize.rkt b/collects/typed-scheme/private/optimize.rkt index 8dc2fed0..a58227ca 100644 --- a/collects/typed-scheme/private/optimize.rkt +++ b/collects/typed-scheme/private/optimize.rkt @@ -1,7 +1,7 @@ #lang scheme/base (require syntax/parse (for-template scheme/base scheme/flonum scheme/fixnum scheme/unsafe/ops racket/private/for) - "../utils/utils.rkt" "../utils/tc-utils.rkt" unstable/match scheme/match unstable/syntax + "../utils/utils.rkt" "../utils/tc-utils.rkt" unstable/match scheme/match unstable/syntax unstable/values (rep type-rep) syntax/id-table racket/dict (types abbrev type-table utils subtype)) (provide optimize) @@ -60,10 +60,21 @@ #:when (dict-ref tbl #'i #f) #:with unsafe (dict-ref tbl #'i))) +;; to generate temporary symbols in a predictable manner +;; these identifiers are unique within a sequence of unboxed operations +;; necessary to have predictable symbols to add in the hand-optimized versions +;; of the optimizer tests (which check for equality of expanded code) +(define *unboxed-gensym-counter* 0) +(define (unboxed-gensym) + (set! *unboxed-gensym-counter* (add1 *unboxed-gensym-counter*)) + (format-unique-id #'here "unboxed-gensym-~a" *unboxed-gensym-counter*)) + (define-syntax-class inexact-complex-opt-expr (pattern e:unboxed-inexact-complex-opt-expr - #:with opt #'(let-values (((real imag) e.opt)) - (unsafe-make-flrectangular real imag)))) + #:with opt + (begin (set! *unboxed-gensym-counter* 0) + #'(let* (e.bindings ...) + (unsafe-make-flrectangular e.real-part e.imag-part))))) ;; it's faster to take apart a complex number and use unsafe operations on ;; its parts than it is to use generic operations ;; we keep the real and imaginary parts unboxed as long as we stay within @@ -73,52 +84,104 @@ c1:unboxed-inexact-complex-opt-expr c2:unboxed-inexact-complex-opt-expr cs:unboxed-inexact-complex-opt-expr ...) - #:with opt - (begin (log-optimization "binary inexact complex" #'op) - (for/fold ([o #'c1.opt]) - ([e (syntax->list #'(c2.opt cs.opt ...))]) - #`(let-values (((t1-real t1-imag) #,o) - ((t2-real t2-imag) #,e)) - (values - (op.unsafe t1-real t2-real) - (op.unsafe t1-imag t2-imag)))))) + #:with real-part (unboxed-gensym) + #:with imag-part (unboxed-gensym) + #:with (bindings ...) + (begin (log-optimization "unboxed binary inexact complex" #'op) + #`(#,@(append (syntax->list #'(c1.bindings ... c2.bindings ... cs.bindings ... ...)) + (list #`(real-part #,(for/fold ((o #'c1.real-part)) + ((e (syntax->list #'(c2.real-part cs.real-part ...)))) + #`(op.unsafe #,o #,e))) + #`(imag-part #,(for/fold ((o #'c1.imag-part)) + ((e (syntax->list #'(c2.imag-part cs.imag-part ...)))) + #`(op.unsafe #,o #,e)))))))) (pattern (#%plain-app (~and op (~literal *)) c1:unboxed-inexact-complex-opt-expr c2:unboxed-inexact-complex-opt-expr cs:unboxed-inexact-complex-opt-expr ...) - #:with opt - (begin (log-optimization "binary inexact complex" #'op) - (for/fold ([o #'c1.opt]) - ([e (syntax->list #'(c2.opt cs.opt ...))]) - #`(let-values (((a b) #,o) - ((c d) #,e)) - (values - (unsafe-fl- (unsafe-fl* a c) (unsafe-fl* b d)) - (unsafe-fl+ (unsafe-fl* b c) (unsafe-fl* a d))))))) + #:with real-part (unboxed-gensym) + #:with imag-part (unboxed-gensym) + #:with (bindings ...) + (begin (log-optimization "unboxed binary inexact complex" #'op) + #`(c1.bindings ... c2.bindings ... cs.bindings ... ... + ;; we want to bind the intermediate results to reuse them + ;; the final results are bound to real-part and imag-part + #,@(let loop ([o1 #'c1.real-part] + [o2 #'c1.imag-part] + [e1 (syntax->list #'(c2.real-part cs.real-part ...))] + [e2 (syntax->list #'(c2.imag-part cs.imag-part ...))] + [rs (append (map (lambda (x) (unboxed-gensym)) + (syntax->list #'(cs.real-part ...))) + (list #'real-part))] + [is (append (map (lambda (x) (unboxed-gensym)) + (syntax->list #'(cs.imag-part ...))) + (list #'imag-part))] + [res '()]) + (if (null? e1) + (reverse res) + (loop (car rs) (car is) (cdr e1) (cdr e2) (cdr rs) (cdr is) + ;; complex multiplication, imag part, then real part (reverse) + (list* #`(#,(car is) + (unsafe-fl+ (unsafe-fl* #,o2 #,(car e1)) + (unsafe-fl* #,o1 #,(car e2)))) + #`(#,(car rs) + (unsafe-fl- (unsafe-fl* #,o1 #,(car e1)) + (unsafe-fl* #,o2 #,(car e2)))) + res))))))) (pattern (#%plain-app (~and op (~literal /)) c1:unboxed-inexact-complex-opt-expr c2:unboxed-inexact-complex-opt-expr cs:unboxed-inexact-complex-opt-expr ...) - #:with opt - (begin (log-optimization "binary inexact complex" #'op) - (for/fold ([o #'c1.opt]) - ([e (syntax->list #'(c2.opt cs.opt ...))]) - #`(let-values (((a b) #,o) - ((c d) #,e)) - (let ((den (unsafe-fl+ (unsafe-fl* c c) (unsafe-fl* d d)))) - (values - (unsafe-fl/ (unsafe-fl+ (unsafe-fl* a c) (unsafe-fl* b d)) - den) - (unsafe-fl/ (unsafe-fl- (unsafe-fl* b c) (unsafe-fl* a d)) - den))))))) + #:with real-part (unboxed-gensym) + #:with imag-part (unboxed-gensym) + #:with (denominators ...) + (for/list + ([e1 (syntax->list #'(c2.real-part cs.real-part ...))] + [e2 (syntax->list #'(c2.imag-part cs.imag-part ...))]) + #`(#,(unboxed-gensym) (unsafe-fl+ (unsafe-fl* #,e1 #,e1) (unsafe-fl* #,e2 #,e2)))) + #:with (bindings ...) + (begin (log-optimization "unboxed binary inexact complex" #'op) + #`(c1.bindings ... c2.bindings ... cs.bindings ... ... denominators ... + ;; we want to bind the intermediate results to reuse them + ;; the final results are bound to real-part and imag-part + #,@(let loop ([o1 #'c1.real-part] + [o2 #'c1.imag-part] + [e1 (syntax->list #'(c2.real-part cs.real-part ...))] + [e2 (syntax->list #'(c2.imag-part cs.imag-part ...))] + [d (map (lambda (x) (car (syntax-e x))) + (syntax->list #'(denominators ...)))] + [rs (append (map (lambda (x) (unboxed-gensym)) + (syntax->list #'(cs.real-part ...))) + (list #'real-part))] + [is (append (map (lambda (x) (unboxed-gensym)) + (syntax->list #'(cs.imag-part ...))) + (list #'imag-part))] + [res '()]) + (if (null? e1) + (reverse res) + (loop (car rs) (car is) (cdr e1) (cdr e2) (cdr d) (cdr rs) (cdr is) + ;; complex division, imag part, then real part (reverse) + (list* #`(#,(car is) + (unsafe-fl/ (unsafe-fl- (unsafe-fl* #,o2 #,(car e1)) + (unsafe-fl* #,o1 #,(car e2))) + #,(car d))) + #`(#,(car rs) + (unsafe-fl/ (unsafe-fl+ (unsafe-fl* #,o1 #,(car e1)) + (unsafe-fl* #,o2 #,(car e2))) + #,(car d))) + res))))))) (pattern e:opt-expr ;; can't work on inexact reals, which are a subtype of inexact ;; complexes, so this has to be equality #:when (match (type-of #'e) [(tc-result1: (== -InexactComplex type-equal?)) #t] [_ #f]) - #:with opt #'(let ((t e.opt)) - (values (unsafe-flreal-part t) - (unsafe-flimag-part t))))) + #:with e* (unboxed-gensym) + #:with real-part (unboxed-gensym) + #:with imag-part (unboxed-gensym) + #:with (bindings ...) + #'((e* e.opt) + (real-part (unsafe-flreal-part e*)) + (imag-part (unsafe-flimag-part e*))))) (define-syntax-class inexact-complex-unary-op (pattern (~or (~literal real-part) (~literal flreal-part)) #:with unsafe #'unsafe-flreal-part)