Improved unboxed complex operations.

Intermediate results are kept as unboxed floats as long as we stay
within complex arithmetic code.

original commit: 7853d333495376f065c63a19866ac2eba6f8926a
This commit is contained in:
Vincent St-Amour 2010-07-07 18:41:25 -04:00
parent f7ac316db2
commit 9320340cb9

View File

@ -1,7 +1,7 @@
#lang scheme/base
(require syntax/parse (for-template scheme/base scheme/flonum scheme/fixnum scheme/unsafe/ops racket/private/for)
"../utils/utils.rkt" "../utils/tc-utils.rkt" unstable/match scheme/match unstable/syntax
"../utils/utils.rkt" "../utils/tc-utils.rkt" unstable/match scheme/match unstable/syntax unstable/values
(rep type-rep) syntax/id-table racket/dict
(types abbrev type-table utils subtype))
(provide optimize)
@ -60,10 +60,21 @@
#:when (dict-ref tbl #'i #f)
#:with unsafe (dict-ref tbl #'i)))
;; to generate temporary symbols in a predictable manner
;; these identifiers are unique within a sequence of unboxed operations
;; necessary to have predictable symbols to add in the hand-optimized versions
;; of the optimizer tests (which check for equality of expanded code)
(define *unboxed-gensym-counter* 0)
(define (unboxed-gensym)
(set! *unboxed-gensym-counter* (add1 *unboxed-gensym-counter*))
(format-unique-id #'here "unboxed-gensym-~a" *unboxed-gensym-counter*))
(define-syntax-class inexact-complex-opt-expr
(pattern e:unboxed-inexact-complex-opt-expr
#:with opt #'(let-values (((real imag) e.opt))
(unsafe-make-flrectangular real imag))))
#:with opt
(begin (set! *unboxed-gensym-counter* 0)
#'(let* (e.bindings ...)
(unsafe-make-flrectangular e.real-part e.imag-part)))))
;; it's faster to take apart a complex number and use unsafe operations on
;; its parts than it is to use generic operations
;; we keep the real and imaginary parts unboxed as long as we stay within
@ -73,52 +84,104 @@
c1:unboxed-inexact-complex-opt-expr
c2:unboxed-inexact-complex-opt-expr
cs:unboxed-inexact-complex-opt-expr ...)
#:with opt
(begin (log-optimization "binary inexact complex" #'op)
(for/fold ([o #'c1.opt])
([e (syntax->list #'(c2.opt cs.opt ...))])
#`(let-values (((t1-real t1-imag) #,o)
((t2-real t2-imag) #,e))
(values
(op.unsafe t1-real t2-real)
(op.unsafe t1-imag t2-imag))))))
#:with real-part (unboxed-gensym)
#:with imag-part (unboxed-gensym)
#:with (bindings ...)
(begin (log-optimization "unboxed binary inexact complex" #'op)
#`(#,@(append (syntax->list #'(c1.bindings ... c2.bindings ... cs.bindings ... ...))
(list #`(real-part #,(for/fold ((o #'c1.real-part))
((e (syntax->list #'(c2.real-part cs.real-part ...))))
#`(op.unsafe #,o #,e)))
#`(imag-part #,(for/fold ((o #'c1.imag-part))
((e (syntax->list #'(c2.imag-part cs.imag-part ...))))
#`(op.unsafe #,o #,e))))))))
(pattern (#%plain-app (~and op (~literal *))
c1:unboxed-inexact-complex-opt-expr
c2:unboxed-inexact-complex-opt-expr
cs:unboxed-inexact-complex-opt-expr ...)
#:with opt
(begin (log-optimization "binary inexact complex" #'op)
(for/fold ([o #'c1.opt])
([e (syntax->list #'(c2.opt cs.opt ...))])
#`(let-values (((a b) #,o)
((c d) #,e))
(values
(unsafe-fl- (unsafe-fl* a c) (unsafe-fl* b d))
(unsafe-fl+ (unsafe-fl* b c) (unsafe-fl* a d)))))))
#:with real-part (unboxed-gensym)
#:with imag-part (unboxed-gensym)
#:with (bindings ...)
(begin (log-optimization "unboxed binary inexact complex" #'op)
#`(c1.bindings ... c2.bindings ... cs.bindings ... ...
;; we want to bind the intermediate results to reuse them
;; the final results are bound to real-part and imag-part
#,@(let loop ([o1 #'c1.real-part]
[o2 #'c1.imag-part]
[e1 (syntax->list #'(c2.real-part cs.real-part ...))]
[e2 (syntax->list #'(c2.imag-part cs.imag-part ...))]
[rs (append (map (lambda (x) (unboxed-gensym))
(syntax->list #'(cs.real-part ...)))
(list #'real-part))]
[is (append (map (lambda (x) (unboxed-gensym))
(syntax->list #'(cs.imag-part ...)))
(list #'imag-part))]
[res '()])
(if (null? e1)
(reverse res)
(loop (car rs) (car is) (cdr e1) (cdr e2) (cdr rs) (cdr is)
;; complex multiplication, imag part, then real part (reverse)
(list* #`(#,(car is)
(unsafe-fl+ (unsafe-fl* #,o2 #,(car e1))
(unsafe-fl* #,o1 #,(car e2))))
#`(#,(car rs)
(unsafe-fl- (unsafe-fl* #,o1 #,(car e1))
(unsafe-fl* #,o2 #,(car e2))))
res)))))))
(pattern (#%plain-app (~and op (~literal /))
c1:unboxed-inexact-complex-opt-expr
c2:unboxed-inexact-complex-opt-expr
cs:unboxed-inexact-complex-opt-expr ...)
#:with opt
(begin (log-optimization "binary inexact complex" #'op)
(for/fold ([o #'c1.opt])
([e (syntax->list #'(c2.opt cs.opt ...))])
#`(let-values (((a b) #,o)
((c d) #,e))
(let ((den (unsafe-fl+ (unsafe-fl* c c) (unsafe-fl* d d))))
(values
(unsafe-fl/ (unsafe-fl+ (unsafe-fl* a c) (unsafe-fl* b d))
den)
(unsafe-fl/ (unsafe-fl- (unsafe-fl* b c) (unsafe-fl* a d))
den)))))))
#:with real-part (unboxed-gensym)
#:with imag-part (unboxed-gensym)
#:with (denominators ...)
(for/list
([e1 (syntax->list #'(c2.real-part cs.real-part ...))]
[e2 (syntax->list #'(c2.imag-part cs.imag-part ...))])
#`(#,(unboxed-gensym) (unsafe-fl+ (unsafe-fl* #,e1 #,e1) (unsafe-fl* #,e2 #,e2))))
#:with (bindings ...)
(begin (log-optimization "unboxed binary inexact complex" #'op)
#`(c1.bindings ... c2.bindings ... cs.bindings ... ... denominators ...
;; we want to bind the intermediate results to reuse them
;; the final results are bound to real-part and imag-part
#,@(let loop ([o1 #'c1.real-part]
[o2 #'c1.imag-part]
[e1 (syntax->list #'(c2.real-part cs.real-part ...))]
[e2 (syntax->list #'(c2.imag-part cs.imag-part ...))]
[d (map (lambda (x) (car (syntax-e x)))
(syntax->list #'(denominators ...)))]
[rs (append (map (lambda (x) (unboxed-gensym))
(syntax->list #'(cs.real-part ...)))
(list #'real-part))]
[is (append (map (lambda (x) (unboxed-gensym))
(syntax->list #'(cs.imag-part ...)))
(list #'imag-part))]
[res '()])
(if (null? e1)
(reverse res)
(loop (car rs) (car is) (cdr e1) (cdr e2) (cdr d) (cdr rs) (cdr is)
;; complex division, imag part, then real part (reverse)
(list* #`(#,(car is)
(unsafe-fl/ (unsafe-fl- (unsafe-fl* #,o2 #,(car e1))
(unsafe-fl* #,o1 #,(car e2)))
#,(car d)))
#`(#,(car rs)
(unsafe-fl/ (unsafe-fl+ (unsafe-fl* #,o1 #,(car e1))
(unsafe-fl* #,o2 #,(car e2)))
#,(car d)))
res)))))))
(pattern e:opt-expr
;; can't work on inexact reals, which are a subtype of inexact
;; complexes, so this has to be equality
#:when (match (type-of #'e)
[(tc-result1: (== -InexactComplex type-equal?)) #t] [_ #f])
#:with opt #'(let ((t e.opt))
(values (unsafe-flreal-part t)
(unsafe-flimag-part t)))))
#:with e* (unboxed-gensym)
#:with real-part (unboxed-gensym)
#:with imag-part (unboxed-gensym)
#:with (bindings ...)
#'((e* e.opt)
(real-part (unsafe-flreal-part e*))
(imag-part (unsafe-flimag-part e*)))))
(define-syntax-class inexact-complex-unary-op
(pattern (~or (~literal real-part) (~literal flreal-part)) #:with unsafe #'unsafe-flreal-part)