Improved unboxed complex operations.
Intermediate results are kept as unboxed floats as long as we stay within complex arithmetic code. original commit: 7853d333495376f065c63a19866ac2eba6f8926a
This commit is contained in:
parent
f7ac316db2
commit
9320340cb9
|
@ -1,7 +1,7 @@
|
|||
#lang scheme/base
|
||||
|
||||
(require syntax/parse (for-template scheme/base scheme/flonum scheme/fixnum scheme/unsafe/ops racket/private/for)
|
||||
"../utils/utils.rkt" "../utils/tc-utils.rkt" unstable/match scheme/match unstable/syntax
|
||||
"../utils/utils.rkt" "../utils/tc-utils.rkt" unstable/match scheme/match unstable/syntax unstable/values
|
||||
(rep type-rep) syntax/id-table racket/dict
|
||||
(types abbrev type-table utils subtype))
|
||||
(provide optimize)
|
||||
|
@ -60,10 +60,21 @@
|
|||
#:when (dict-ref tbl #'i #f)
|
||||
#:with unsafe (dict-ref tbl #'i)))
|
||||
|
||||
;; to generate temporary symbols in a predictable manner
|
||||
;; these identifiers are unique within a sequence of unboxed operations
|
||||
;; necessary to have predictable symbols to add in the hand-optimized versions
|
||||
;; of the optimizer tests (which check for equality of expanded code)
|
||||
(define *unboxed-gensym-counter* 0)
|
||||
(define (unboxed-gensym)
|
||||
(set! *unboxed-gensym-counter* (add1 *unboxed-gensym-counter*))
|
||||
(format-unique-id #'here "unboxed-gensym-~a" *unboxed-gensym-counter*))
|
||||
|
||||
(define-syntax-class inexact-complex-opt-expr
|
||||
(pattern e:unboxed-inexact-complex-opt-expr
|
||||
#:with opt #'(let-values (((real imag) e.opt))
|
||||
(unsafe-make-flrectangular real imag))))
|
||||
#:with opt
|
||||
(begin (set! *unboxed-gensym-counter* 0)
|
||||
#'(let* (e.bindings ...)
|
||||
(unsafe-make-flrectangular e.real-part e.imag-part)))))
|
||||
;; it's faster to take apart a complex number and use unsafe operations on
|
||||
;; its parts than it is to use generic operations
|
||||
;; we keep the real and imaginary parts unboxed as long as we stay within
|
||||
|
@ -73,52 +84,104 @@
|
|||
c1:unboxed-inexact-complex-opt-expr
|
||||
c2:unboxed-inexact-complex-opt-expr
|
||||
cs:unboxed-inexact-complex-opt-expr ...)
|
||||
#:with opt
|
||||
(begin (log-optimization "binary inexact complex" #'op)
|
||||
(for/fold ([o #'c1.opt])
|
||||
([e (syntax->list #'(c2.opt cs.opt ...))])
|
||||
#`(let-values (((t1-real t1-imag) #,o)
|
||||
((t2-real t2-imag) #,e))
|
||||
(values
|
||||
(op.unsafe t1-real t2-real)
|
||||
(op.unsafe t1-imag t2-imag))))))
|
||||
#:with real-part (unboxed-gensym)
|
||||
#:with imag-part (unboxed-gensym)
|
||||
#:with (bindings ...)
|
||||
(begin (log-optimization "unboxed binary inexact complex" #'op)
|
||||
#`(#,@(append (syntax->list #'(c1.bindings ... c2.bindings ... cs.bindings ... ...))
|
||||
(list #`(real-part #,(for/fold ((o #'c1.real-part))
|
||||
((e (syntax->list #'(c2.real-part cs.real-part ...))))
|
||||
#`(op.unsafe #,o #,e)))
|
||||
#`(imag-part #,(for/fold ((o #'c1.imag-part))
|
||||
((e (syntax->list #'(c2.imag-part cs.imag-part ...))))
|
||||
#`(op.unsafe #,o #,e))))))))
|
||||
(pattern (#%plain-app (~and op (~literal *))
|
||||
c1:unboxed-inexact-complex-opt-expr
|
||||
c2:unboxed-inexact-complex-opt-expr
|
||||
cs:unboxed-inexact-complex-opt-expr ...)
|
||||
#:with opt
|
||||
(begin (log-optimization "binary inexact complex" #'op)
|
||||
(for/fold ([o #'c1.opt])
|
||||
([e (syntax->list #'(c2.opt cs.opt ...))])
|
||||
#`(let-values (((a b) #,o)
|
||||
((c d) #,e))
|
||||
(values
|
||||
(unsafe-fl- (unsafe-fl* a c) (unsafe-fl* b d))
|
||||
(unsafe-fl+ (unsafe-fl* b c) (unsafe-fl* a d)))))))
|
||||
#:with real-part (unboxed-gensym)
|
||||
#:with imag-part (unboxed-gensym)
|
||||
#:with (bindings ...)
|
||||
(begin (log-optimization "unboxed binary inexact complex" #'op)
|
||||
#`(c1.bindings ... c2.bindings ... cs.bindings ... ...
|
||||
;; we want to bind the intermediate results to reuse them
|
||||
;; the final results are bound to real-part and imag-part
|
||||
#,@(let loop ([o1 #'c1.real-part]
|
||||
[o2 #'c1.imag-part]
|
||||
[e1 (syntax->list #'(c2.real-part cs.real-part ...))]
|
||||
[e2 (syntax->list #'(c2.imag-part cs.imag-part ...))]
|
||||
[rs (append (map (lambda (x) (unboxed-gensym))
|
||||
(syntax->list #'(cs.real-part ...)))
|
||||
(list #'real-part))]
|
||||
[is (append (map (lambda (x) (unboxed-gensym))
|
||||
(syntax->list #'(cs.imag-part ...)))
|
||||
(list #'imag-part))]
|
||||
[res '()])
|
||||
(if (null? e1)
|
||||
(reverse res)
|
||||
(loop (car rs) (car is) (cdr e1) (cdr e2) (cdr rs) (cdr is)
|
||||
;; complex multiplication, imag part, then real part (reverse)
|
||||
(list* #`(#,(car is)
|
||||
(unsafe-fl+ (unsafe-fl* #,o2 #,(car e1))
|
||||
(unsafe-fl* #,o1 #,(car e2))))
|
||||
#`(#,(car rs)
|
||||
(unsafe-fl- (unsafe-fl* #,o1 #,(car e1))
|
||||
(unsafe-fl* #,o2 #,(car e2))))
|
||||
res)))))))
|
||||
(pattern (#%plain-app (~and op (~literal /))
|
||||
c1:unboxed-inexact-complex-opt-expr
|
||||
c2:unboxed-inexact-complex-opt-expr
|
||||
cs:unboxed-inexact-complex-opt-expr ...)
|
||||
#:with opt
|
||||
(begin (log-optimization "binary inexact complex" #'op)
|
||||
(for/fold ([o #'c1.opt])
|
||||
([e (syntax->list #'(c2.opt cs.opt ...))])
|
||||
#`(let-values (((a b) #,o)
|
||||
((c d) #,e))
|
||||
(let ((den (unsafe-fl+ (unsafe-fl* c c) (unsafe-fl* d d))))
|
||||
(values
|
||||
(unsafe-fl/ (unsafe-fl+ (unsafe-fl* a c) (unsafe-fl* b d))
|
||||
den)
|
||||
(unsafe-fl/ (unsafe-fl- (unsafe-fl* b c) (unsafe-fl* a d))
|
||||
den)))))))
|
||||
#:with real-part (unboxed-gensym)
|
||||
#:with imag-part (unboxed-gensym)
|
||||
#:with (denominators ...)
|
||||
(for/list
|
||||
([e1 (syntax->list #'(c2.real-part cs.real-part ...))]
|
||||
[e2 (syntax->list #'(c2.imag-part cs.imag-part ...))])
|
||||
#`(#,(unboxed-gensym) (unsafe-fl+ (unsafe-fl* #,e1 #,e1) (unsafe-fl* #,e2 #,e2))))
|
||||
#:with (bindings ...)
|
||||
(begin (log-optimization "unboxed binary inexact complex" #'op)
|
||||
#`(c1.bindings ... c2.bindings ... cs.bindings ... ... denominators ...
|
||||
;; we want to bind the intermediate results to reuse them
|
||||
;; the final results are bound to real-part and imag-part
|
||||
#,@(let loop ([o1 #'c1.real-part]
|
||||
[o2 #'c1.imag-part]
|
||||
[e1 (syntax->list #'(c2.real-part cs.real-part ...))]
|
||||
[e2 (syntax->list #'(c2.imag-part cs.imag-part ...))]
|
||||
[d (map (lambda (x) (car (syntax-e x)))
|
||||
(syntax->list #'(denominators ...)))]
|
||||
[rs (append (map (lambda (x) (unboxed-gensym))
|
||||
(syntax->list #'(cs.real-part ...)))
|
||||
(list #'real-part))]
|
||||
[is (append (map (lambda (x) (unboxed-gensym))
|
||||
(syntax->list #'(cs.imag-part ...)))
|
||||
(list #'imag-part))]
|
||||
[res '()])
|
||||
(if (null? e1)
|
||||
(reverse res)
|
||||
(loop (car rs) (car is) (cdr e1) (cdr e2) (cdr d) (cdr rs) (cdr is)
|
||||
;; complex division, imag part, then real part (reverse)
|
||||
(list* #`(#,(car is)
|
||||
(unsafe-fl/ (unsafe-fl- (unsafe-fl* #,o2 #,(car e1))
|
||||
(unsafe-fl* #,o1 #,(car e2)))
|
||||
#,(car d)))
|
||||
#`(#,(car rs)
|
||||
(unsafe-fl/ (unsafe-fl+ (unsafe-fl* #,o1 #,(car e1))
|
||||
(unsafe-fl* #,o2 #,(car e2)))
|
||||
#,(car d)))
|
||||
res)))))))
|
||||
(pattern e:opt-expr
|
||||
;; can't work on inexact reals, which are a subtype of inexact
|
||||
;; complexes, so this has to be equality
|
||||
#:when (match (type-of #'e)
|
||||
[(tc-result1: (== -InexactComplex type-equal?)) #t] [_ #f])
|
||||
#:with opt #'(let ((t e.opt))
|
||||
(values (unsafe-flreal-part t)
|
||||
(unsafe-flimag-part t)))))
|
||||
#:with e* (unboxed-gensym)
|
||||
#:with real-part (unboxed-gensym)
|
||||
#:with imag-part (unboxed-gensym)
|
||||
#:with (bindings ...)
|
||||
#'((e* e.opt)
|
||||
(real-part (unsafe-flreal-part e*))
|
||||
(imag-part (unsafe-flimag-part e*)))))
|
||||
|
||||
(define-syntax-class inexact-complex-unary-op
|
||||
(pattern (~or (~literal real-part) (~literal flreal-part)) #:with unsafe #'unsafe-flreal-part)
|
||||
|
|
Loading…
Reference in New Issue
Block a user