keep single-argument unsafe fl+
and fl*
as an unboxing hint
original commit: 054d6da58ceffcce2c5caa6eda5561a122658543
This commit is contained in:
parent
d1f20019ae
commit
af04af5aa3
12
mats/cp0.ms
12
mats/cp0.ms
|
@ -1240,8 +1240,8 @@
|
|||
(fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0)
|
||||
(fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0)
|
||||
(fl+ 3.0 x +nan.0 y 5.0))))
|
||||
'(#3%list 0.0 3.0 7.0 x (#3%fl+ 0.0 x) x (#3%fl+ 0.0 x) x (#3%fl+ 3.0 x)
|
||||
(#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) x (#3%fl+ 12.0 x y)
|
||||
'(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 3.0 x)
|
||||
(#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 12.0 x y)
|
||||
+nan.0))
|
||||
(equivalent-expansion?
|
||||
(parameterize ([#%$suppress-primitive-inlining #f]
|
||||
|
@ -1338,8 +1338,8 @@
|
|||
(fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0)
|
||||
(fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0)
|
||||
(fl* 3.0 x 4.0 y +nan.0))))
|
||||
'(#3%list 1.0 3.0 12.0 x x x (#3%fl* 3.0 x)
|
||||
(#3%fl* 12.0 x) (#3%fl* 12.0 x) x (#3%fl* 60.0 x y)
|
||||
'(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x) (#3%fl* x) (#3%fl* 3.0 x)
|
||||
(#3%fl* 12.0 x) (#3%fl* 12.0 x) (#3%fl* x) (#3%fl* 60.0 x y)
|
||||
+nan.0))
|
||||
(equivalent-expansion?
|
||||
(parameterize ([#%$suppress-primitive-inlining #f]
|
||||
|
@ -1836,7 +1836,7 @@
|
|||
(fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0)
|
||||
(fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0)
|
||||
(fl+ +nan.0 x 3.0 y 5.0) (fl+ 3.0 x +nan.0 y 5.0))))
|
||||
'(#3%list 0.0 3.0 7.0 x (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) x (#3%fl+ x 3.0)
|
||||
'(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ x 3.0)
|
||||
(#3%fl+ x 3.0 4.0) (#3%fl+ 3.0 x 4.0) (#3%fl+ 3.0 x -3.0) (#3%fl+ x -0.0) (#3%fl+ 3.0 x 4.0 y 5.0)
|
||||
+nan.0 (#3%fl+ 3.0 x +nan.0 y 5.0)))
|
||||
(equivalent-expansion?
|
||||
|
@ -1942,7 +1942,7 @@
|
|||
(fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0)
|
||||
(fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0)
|
||||
(fl* +nan.0 x 3.0 y 4.0) (fl* 3.0 x 4.0 y +nan.0))))
|
||||
'(#3%list 1.0 3.0 12.0 x (#3%fl* x 1.0) x (#3%fl* x 3.0)
|
||||
'(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x 1.0) (#3%fl* x) (#3%fl* x 3.0)
|
||||
(#3%fl* x 3.0 4.0) (#3%fl* 3.0 x 4.0) (#3%fl* 3.0 x #i1/3) (#3%fl* 3.0 x 4.0 y 5.0)
|
||||
+nan.0 (#3%fl* +3.0 x 4.0 y +nan.0)))
|
||||
(equivalent-expansion?
|
||||
|
|
31
mats/fl.ms
31
mats/fl.ms
|
@ -1055,17 +1055,18 @@
|
|||
(#%$suppress-primitive-inlining)
|
||||
(let ([before (+ (bytes-allocated) (bytes-deallocated))]
|
||||
[N 100000])
|
||||
(box?
|
||||
(let loop ([i N] [bx (box 0.0)])
|
||||
(if (zero? i)
|
||||
bx
|
||||
(loop (sub1 i) (let ([v (unbox bx)])
|
||||
(box (proc v)))))))
|
||||
(let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)]
|
||||
[expected (* N (+ (compute-size 1.0)
|
||||
(compute-size (box #f))))])
|
||||
(printf "~s ~s\n" allocated expected)
|
||||
(<= expected allocated (* 1.2 expected)))))]))
|
||||
(and
|
||||
(box?
|
||||
(let loop ([i N] [bx (box 0.0)])
|
||||
(if (zero? i)
|
||||
bx
|
||||
(loop (sub1 i) (let ([v (unbox bx)])
|
||||
(box (proc v)))))))
|
||||
(let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)]
|
||||
[expected (* N (+ (compute-size 1.0)
|
||||
(compute-size (box #f))))])
|
||||
(printf "~s ~s\n" allocated expected)
|
||||
(<= expected allocated (* 1.2 expected))))))]))
|
||||
#t)
|
||||
|
||||
(check-loop-allocation (lambda (v) (fl+ v v)))
|
||||
|
@ -1128,6 +1129,14 @@
|
|||
(fl+ v 1.0)
|
||||
(fl- v 1.0))))
|
||||
|
||||
(check-loop-allocation (lambda (v)
|
||||
;; The two single-argument `fl+`s here should work as
|
||||
;; a hint for unboxing in the loop
|
||||
(let loop ([n 100] [v (fl+ v)])
|
||||
(if (fx= n 0)
|
||||
(fl+ v)
|
||||
(loop (fx- n 1) (fl+ v 1.0))))))
|
||||
|
||||
(let ([bv (make-bytevector 8 0)])
|
||||
(check-loop-allocation (lambda (v) (fl+ v (bytevector-ieee-double-native-ref bv 0)))))
|
||||
(let ([bv (make-bytevector 8 0)])
|
||||
|
|
28
s/cp0.ss
28
s/cp0.ss
|
@ -2562,7 +2562,7 @@
|
|||
(let ([folded (generic-op a d)])
|
||||
(and (target-fixnum? folded) folded)))))]
|
||||
[else #f]))))
|
||||
(define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level)
|
||||
(define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level direct-result?)
|
||||
(define fold? (make-fold? op generic-op))
|
||||
(let loop ([arg* orig-arg*] [a ident] [val* '()] [used '()] [unused '()])
|
||||
(if (null? arg*)
|
||||
|
@ -2587,7 +2587,7 @@
|
|||
(cond
|
||||
[(null? val*) `(quote ,a)]
|
||||
[(eqv? a ident)
|
||||
(if (and (fx= level 3) (null? (cdr val*)))
|
||||
(if (and (fx= level 3) (null? (cdr val*)) (direct-result? (car val*)))
|
||||
(car val*)
|
||||
(build-primcall (app-preinfo ctxt) level prim val*))]
|
||||
[else
|
||||
|
@ -2649,15 +2649,17 @@
|
|||
[(_ plus prim generic-op ident bottom?)
|
||||
(partial-folder plus prim generic-op ident bottom? #f)]
|
||||
[(_ plus prim generic-op ident bottom? assoc-at-level)
|
||||
(partial-folder plus prim generic-op ident bottom? assoc-at-level (lambda (e) #t))]
|
||||
[(_ plus prim generic-op ident bottom? assoc-at-level direct-result?)
|
||||
(begin
|
||||
(define-inline 2 prim
|
||||
; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4
|
||||
[() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))]
|
||||
[arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)])
|
||||
[arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)])
|
||||
(define-inline 3 prim
|
||||
; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4
|
||||
[() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))]
|
||||
[arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))]
|
||||
[arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)]))]
|
||||
[(_ minus prim generic-op ident)
|
||||
(begin
|
||||
(define-inline 2 prim
|
||||
|
@ -2677,9 +2679,9 @@
|
|||
[(_ plus r6rs:prim prim generic-op ident bottom? assoc-at-level)
|
||||
(begin
|
||||
(define-inline 2 r6rs:prim
|
||||
[(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)])
|
||||
[(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))])
|
||||
(define-inline 3 r6rs:prim
|
||||
[(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))]
|
||||
[(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))]))]
|
||||
[(_ minus r6rs:prim prim generic-op ident)
|
||||
(begin
|
||||
(define-inline 2 r6rs:prim
|
||||
|
@ -2691,18 +2693,28 @@
|
|||
[(arg1 arg2)
|
||||
(partial-fold-minus 3 arg1 (list arg2) ctxt 'prim prim generic-op ident)]))]))
|
||||
|
||||
(define obviously-fl?
|
||||
;; We keep single-argument `fl+` and `fl*` as an unboxing hint to the back end,
|
||||
;; but the hint is not necessary if the argument is the result of a primitive that
|
||||
;; produces fonums
|
||||
(lambda (e)
|
||||
(nanopass-case (Lsrc Expr) e
|
||||
[(quote ,d) (flonum? d)]
|
||||
[(call ,preinfo ,pr ,e* ...) (eq? 'flonum ($sgetprop (primref-name pr) '*result-type* #f))]
|
||||
[else #f])))
|
||||
|
||||
; handling nans here using the support for handling exact zero in
|
||||
; the multiply case. maybe shouldn't bother with nans anyway.
|
||||
(partial-folder plus + + 0 generic-nan?)
|
||||
(partial-folder plus fx+ + 0 (lambda (x) #f) 3)
|
||||
(r6rs-fixnum-partial-folder plus r6rs:fx+ fx+ + 0 (lambda (x) #f) 3)
|
||||
(partial-folder plus fl+ fl+ -0.0 fl-nan?)
|
||||
(partial-folder plus fl+ fl+ -0.0 fl-nan? #f obviously-fl?)
|
||||
(partial-folder plus cfl+ cfl+ -0.0 cfl-nan?)
|
||||
|
||||
(partial-folder plus * * 1 exact-zero?) ; exact zero trumps nan
|
||||
(partial-folder plus fx* * 1 exact-zero? 3)
|
||||
(r6rs-fixnum-partial-folder plus r6rs:fx* fx* * 1 exact-zero? 3)
|
||||
(partial-folder plus fl* fl* 1.0 fl-nan?)
|
||||
(partial-folder plus fl* fl* 1.0 fl-nan? #f obviously-fl?)
|
||||
(partial-folder plus cfl* cfl* 1.0 cfl-nan?)
|
||||
|
||||
; not handling nans here since we don't have support for the exact
|
||||
|
|
Loading…
Reference in New Issue
Block a user