keep single-argument unsafe fl+ and fl* as an unboxing hint

original commit: 054d6da58ceffcce2c5caa6eda5561a122658543
2020-06-14 10:12:16 -06:00 · 2020-06-14 10:12:16 -06:00 · af04af5aa3
commit af04af5aa3
parent d1f20019ae
3 changed files with 46 additions and 25 deletions
--- a/mats/cp0.ms
+++ b/mats/cp0.ms
@ -1240,8 +1240,8 @@
           (fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0)
           (fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0)
           (fl+ 3.0 x +nan.0 y 5.0))))
-    '(#3%list 0.0 3.0 7.0 x (#3%fl+ 0.0 x) x (#3%fl+ 0.0 x) x (#3%fl+ 3.0 x)
-       (#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) x (#3%fl+ 12.0 x y)
+    '(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 3.0 x)
+       (#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 12.0 x y)
       +nan.0))
  (equivalent-expansion?
    (parameterize ([#%$suppress-primitive-inlining #f]
@ -1338,8 +1338,8 @@
           (fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0)
           (fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0)
           (fl* 3.0 x 4.0 y +nan.0))))
-    '(#3%list 1.0 3.0 12.0 x x x (#3%fl* 3.0 x)
-       (#3%fl* 12.0 x) (#3%fl* 12.0 x) x (#3%fl* 60.0 x y)
+    '(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x) (#3%fl* x) (#3%fl* 3.0 x)
+       (#3%fl* 12.0 x) (#3%fl* 12.0 x) (#3%fl* x) (#3%fl* 60.0 x y)
       +nan.0))
  (equivalent-expansion?
    (parameterize ([#%$suppress-primitive-inlining #f]
@ -1836,7 +1836,7 @@
           (fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0)
           (fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0)
           (fl+ +nan.0 x 3.0 y 5.0) (fl+ 3.0 x +nan.0 y 5.0))))
-    '(#3%list 0.0 3.0 7.0 x (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) x (#3%fl+ x 3.0)
+    '(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ x 3.0)
       (#3%fl+ x 3.0 4.0) (#3%fl+ 3.0 x 4.0) (#3%fl+ 3.0 x -3.0) (#3%fl+ x -0.0) (#3%fl+ 3.0 x 4.0 y 5.0)
       +nan.0 (#3%fl+ 3.0 x +nan.0 y 5.0)))
  (equivalent-expansion?
@ -1942,7 +1942,7 @@
           (fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0)
           (fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0)
           (fl* +nan.0 x 3.0 y 4.0) (fl* 3.0 x 4.0 y +nan.0))))
-    '(#3%list 1.0 3.0 12.0 x (#3%fl* x 1.0) x (#3%fl* x 3.0)
+    '(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x 1.0) (#3%fl* x) (#3%fl* x 3.0)
       (#3%fl* x 3.0 4.0) (#3%fl* 3.0 x 4.0) (#3%fl* 3.0 x #i1/3) (#3%fl* 3.0 x 4.0 y 5.0)
       +nan.0 (#3%fl* +3.0 x 4.0 y +nan.0)))
  (equivalent-expansion?
--- a/mats/fl.ms
+++ b/mats/fl.ms
@ -1055,17 +1055,18 @@
             (#%$suppress-primitive-inlining)
             (let ([before (+ (bytes-allocated) (bytes-deallocated))]
                   [N 100000])
-               (box?
-                (let loop ([i N] [bx (box 0.0)])
-                  (if (zero? i)
-                      bx
-                      (loop (sub1 i) (let ([v (unbox bx)])
-                                       (box (proc v)))))))
-               (let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)]
-                     [expected (* N (+ (compute-size 1.0)
-                                       (compute-size (box #f))))])
-                 (printf "~s ~s\n" allocated expected)
-                 (<= expected allocated (* 1.2 expected)))))]))
+               (and
+                (box?
+                 (let loop ([i N] [bx (box 0.0)])
+                   (if (zero? i)
+                       bx
+                       (loop (sub1 i) (let ([v (unbox bx)])
+                                        (box (proc v)))))))
+                (let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)]
+                      [expected (* N (+ (compute-size 1.0)
+                                        (compute-size (box #f))))])
+                  (printf "~s ~s\n" allocated expected)
+                  (<= expected allocated (* 1.2 expected))))))]))
    #t)

  (check-loop-allocation (lambda (v) (fl+ v v)))
@ -1128,6 +1129,14 @@
                                         (fl+ v 1.0)
                                         (fl- v 1.0))))

+  (check-loop-allocation (lambda (v)
+                           ;; The two single-argument `fl+`s here should work as
+                           ;; a hint for unboxing in the loop
+                           (let loop ([n 100] [v (fl+ v)])
+                             (if (fx= n 0)
+                                 (fl+ v)
+                                 (loop (fx- n 1) (fl+ v 1.0))))))
+
  (let ([bv (make-bytevector 8 0)])
    (check-loop-allocation (lambda (v) (fl+ v (bytevector-ieee-double-native-ref bv 0)))))
  (let ([bv (make-bytevector 8 0)])
--- a/s/cp0.ss
+++ b/s/cp0.ss
@ -2562,7 +2562,7 @@
                            (let ([folded (generic-op a d)])
                              (and (target-fixnum? folded) folded)))))]
                [else #f]))))
-        (define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level)
+        (define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level direct-result?)
          (define fold? (make-fold? op generic-op))
          (let loop ([arg* orig-arg*] [a ident] [val* '()] [used '()] [unused '()])
            (if (null? arg*)
@ -2587,7 +2587,7 @@
                      (cond
                        [(null? val*) `(quote ,a)]
                        [(eqv? a ident)
-                         (if (and (fx= level 3) (null? (cdr val*)))
+                         (if (and (fx= level 3) (null? (cdr val*)) (direct-result? (car val*)))
                             (car val*)
                             (build-primcall (app-preinfo ctxt) level prim val*))]
                        [else
@ -2649,15 +2649,17 @@
            [(_ plus prim generic-op ident bottom?)
             (partial-folder plus prim generic-op ident bottom? #f)]
            [(_ plus prim generic-op ident bottom? assoc-at-level)
+             (partial-folder plus prim generic-op ident bottom? assoc-at-level (lambda (e) #t))]
+            [(_ plus prim generic-op ident bottom? assoc-at-level direct-result?)
             (begin
               (define-inline 2 prim
                 ; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4
                 [() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))]
-                 [arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)])
+                 [arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)])
               (define-inline 3 prim
                 ; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4
                 [() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))]
-                 [arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))]
+                 [arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)]))]
            [(_ minus prim generic-op ident)
             (begin
               (define-inline 2 prim
@ -2677,9 +2679,9 @@
            [(_ plus r6rs:prim prim generic-op ident bottom? assoc-at-level)
             (begin
               (define-inline 2 r6rs:prim
-                 [(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)])
+                 [(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))])
               (define-inline 3 r6rs:prim
-                 [(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))]
+                 [(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))]))]
            [(_ minus r6rs:prim prim generic-op ident)
             (begin
               (define-inline 2 r6rs:prim
@ -2691,18 +2693,28 @@
                 [(arg1 arg2)
                  (partial-fold-minus 3 arg1 (list arg2) ctxt 'prim prim generic-op ident)]))]))

+        (define obviously-fl?
+          ;; We keep single-argument `fl+` and `fl*` as an unboxing hint to the back end,
+          ;; but the hint is not necessary if the argument is the result of a primitive that
+          ;; produces fonums
+          (lambda (e)
+            (nanopass-case (Lsrc Expr) e
+              [(quote ,d) (flonum? d)]
+              [(call ,preinfo ,pr ,e* ...) (eq? 'flonum ($sgetprop (primref-name pr) '*result-type* #f))]
+              [else #f])))
+
        ; handling nans here using the support for handling exact zero in
        ; the multiply case.  maybe shouldn't bother with nans anyway.
        (partial-folder plus + + 0 generic-nan?)
        (partial-folder plus fx+ + 0 (lambda (x) #f) 3)
        (r6rs-fixnum-partial-folder plus r6rs:fx+ fx+ + 0 (lambda (x) #f) 3)
-        (partial-folder plus fl+ fl+ -0.0 fl-nan?)
+        (partial-folder plus fl+ fl+ -0.0 fl-nan? #f obviously-fl?)
        (partial-folder plus cfl+ cfl+ -0.0 cfl-nan?)

        (partial-folder plus * * 1 exact-zero?)   ; exact zero trumps nan
        (partial-folder plus fx* * 1 exact-zero? 3)
        (r6rs-fixnum-partial-folder plus r6rs:fx* fx* * 1 exact-zero? 3)
-        (partial-folder plus fl* fl* 1.0 fl-nan?)
+        (partial-folder plus fl* fl* 1.0 fl-nan? #f obviously-fl?)
        (partial-folder plus cfl* cfl* 1.0 cfl-nan?)

        ; not handling nans here since we don't have support for the exact