keep single-argument unsafe fl+ and fl* as an unboxing hint

original commit: 054d6da58ceffcce2c5caa6eda5561a122658543
This commit is contained in:
Matthew Flatt 2020-06-14 10:12:16 -06:00
parent d1f20019ae
commit af04af5aa3
3 changed files with 46 additions and 25 deletions

View File

@ -1240,8 +1240,8 @@
(fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0)
(fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0)
(fl+ 3.0 x +nan.0 y 5.0))))
'(#3%list 0.0 3.0 7.0 x (#3%fl+ 0.0 x) x (#3%fl+ 0.0 x) x (#3%fl+ 3.0 x)
(#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) x (#3%fl+ 12.0 x y)
'(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 3.0 x)
(#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 12.0 x y)
+nan.0))
(equivalent-expansion?
(parameterize ([#%$suppress-primitive-inlining #f]
@ -1338,8 +1338,8 @@
(fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0)
(fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0)
(fl* 3.0 x 4.0 y +nan.0))))
'(#3%list 1.0 3.0 12.0 x x x (#3%fl* 3.0 x)
(#3%fl* 12.0 x) (#3%fl* 12.0 x) x (#3%fl* 60.0 x y)
'(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x) (#3%fl* x) (#3%fl* 3.0 x)
(#3%fl* 12.0 x) (#3%fl* 12.0 x) (#3%fl* x) (#3%fl* 60.0 x y)
+nan.0))
(equivalent-expansion?
(parameterize ([#%$suppress-primitive-inlining #f]
@ -1836,7 +1836,7 @@
(fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0)
(fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0)
(fl+ +nan.0 x 3.0 y 5.0) (fl+ 3.0 x +nan.0 y 5.0))))
'(#3%list 0.0 3.0 7.0 x (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) x (#3%fl+ x 3.0)
'(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ x 3.0)
(#3%fl+ x 3.0 4.0) (#3%fl+ 3.0 x 4.0) (#3%fl+ 3.0 x -3.0) (#3%fl+ x -0.0) (#3%fl+ 3.0 x 4.0 y 5.0)
+nan.0 (#3%fl+ 3.0 x +nan.0 y 5.0)))
(equivalent-expansion?
@ -1942,7 +1942,7 @@
(fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0)
(fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0)
(fl* +nan.0 x 3.0 y 4.0) (fl* 3.0 x 4.0 y +nan.0))))
'(#3%list 1.0 3.0 12.0 x (#3%fl* x 1.0) x (#3%fl* x 3.0)
'(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x 1.0) (#3%fl* x) (#3%fl* x 3.0)
(#3%fl* x 3.0 4.0) (#3%fl* 3.0 x 4.0) (#3%fl* 3.0 x #i1/3) (#3%fl* 3.0 x 4.0 y 5.0)
+nan.0 (#3%fl* +3.0 x 4.0 y +nan.0)))
(equivalent-expansion?

View File

@ -1055,17 +1055,18 @@
(#%$suppress-primitive-inlining)
(let ([before (+ (bytes-allocated) (bytes-deallocated))]
[N 100000])
(box?
(let loop ([i N] [bx (box 0.0)])
(if (zero? i)
bx
(loop (sub1 i) (let ([v (unbox bx)])
(box (proc v)))))))
(let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)]
[expected (* N (+ (compute-size 1.0)
(compute-size (box #f))))])
(printf "~s ~s\n" allocated expected)
(<= expected allocated (* 1.2 expected)))))]))
(and
(box?
(let loop ([i N] [bx (box 0.0)])
(if (zero? i)
bx
(loop (sub1 i) (let ([v (unbox bx)])
(box (proc v)))))))
(let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)]
[expected (* N (+ (compute-size 1.0)
(compute-size (box #f))))])
(printf "~s ~s\n" allocated expected)
(<= expected allocated (* 1.2 expected))))))]))
#t)
(check-loop-allocation (lambda (v) (fl+ v v)))
@ -1128,6 +1129,14 @@
(fl+ v 1.0)
(fl- v 1.0))))
(check-loop-allocation (lambda (v)
;; The two single-argument `fl+`s here should work as
;; a hint for unboxing in the loop
(let loop ([n 100] [v (fl+ v)])
(if (fx= n 0)
(fl+ v)
(loop (fx- n 1) (fl+ v 1.0))))))
(let ([bv (make-bytevector 8 0)])
(check-loop-allocation (lambda (v) (fl+ v (bytevector-ieee-double-native-ref bv 0)))))
(let ([bv (make-bytevector 8 0)])

View File

@ -2562,7 +2562,7 @@
(let ([folded (generic-op a d)])
(and (target-fixnum? folded) folded)))))]
[else #f]))))
(define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level)
(define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level direct-result?)
(define fold? (make-fold? op generic-op))
(let loop ([arg* orig-arg*] [a ident] [val* '()] [used '()] [unused '()])
(if (null? arg*)
@ -2587,7 +2587,7 @@
(cond
[(null? val*) `(quote ,a)]
[(eqv? a ident)
(if (and (fx= level 3) (null? (cdr val*)))
(if (and (fx= level 3) (null? (cdr val*)) (direct-result? (car val*)))
(car val*)
(build-primcall (app-preinfo ctxt) level prim val*))]
[else
@ -2649,15 +2649,17 @@
[(_ plus prim generic-op ident bottom?)
(partial-folder plus prim generic-op ident bottom? #f)]
[(_ plus prim generic-op ident bottom? assoc-at-level)
(partial-folder plus prim generic-op ident bottom? assoc-at-level (lambda (e) #t))]
[(_ plus prim generic-op ident bottom? assoc-at-level direct-result?)
(begin
(define-inline 2 prim
; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4
[() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))]
[arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)])
[arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)])
(define-inline 3 prim
; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4
[() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))]
[arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))]
[arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)]))]
[(_ minus prim generic-op ident)
(begin
(define-inline 2 prim
@ -2677,9 +2679,9 @@
[(_ plus r6rs:prim prim generic-op ident bottom? assoc-at-level)
(begin
(define-inline 2 r6rs:prim
[(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)])
[(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))])
(define-inline 3 r6rs:prim
[(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))]
[(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))]))]
[(_ minus r6rs:prim prim generic-op ident)
(begin
(define-inline 2 r6rs:prim
@ -2691,18 +2693,28 @@
[(arg1 arg2)
(partial-fold-minus 3 arg1 (list arg2) ctxt 'prim prim generic-op ident)]))]))
(define obviously-fl?
;; We keep single-argument `fl+` and `fl*` as an unboxing hint to the back end,
;; but the hint is not necessary if the argument is the result of a primitive that
;; produces fonums
(lambda (e)
(nanopass-case (Lsrc Expr) e
[(quote ,d) (flonum? d)]
[(call ,preinfo ,pr ,e* ...) (eq? 'flonum ($sgetprop (primref-name pr) '*result-type* #f))]
[else #f])))
; handling nans here using the support for handling exact zero in
; the multiply case. maybe shouldn't bother with nans anyway.
(partial-folder plus + + 0 generic-nan?)
(partial-folder plus fx+ + 0 (lambda (x) #f) 3)
(r6rs-fixnum-partial-folder plus r6rs:fx+ fx+ + 0 (lambda (x) #f) 3)
(partial-folder plus fl+ fl+ -0.0 fl-nan?)
(partial-folder plus fl+ fl+ -0.0 fl-nan? #f obviously-fl?)
(partial-folder plus cfl+ cfl+ -0.0 cfl-nan?)
(partial-folder plus * * 1 exact-zero?) ; exact zero trumps nan
(partial-folder plus fx* * 1 exact-zero? 3)
(r6rs-fixnum-partial-folder plus r6rs:fx* fx* * 1 exact-zero? 3)
(partial-folder plus fl* fl* 1.0 fl-nan?)
(partial-folder plus fl* fl* 1.0 fl-nan? #f obviously-fl?)
(partial-folder plus cfl* cfl* 1.0 cfl-nan?)
; not handling nans here since we don't have support for the exact