From af04af5aa32f18dc3269f7b28c8e086bb0c2ba29 Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sun, 14 Jun 2020 10:12:16 -0600 Subject: [PATCH] keep single-argument unsafe `fl+` and `fl*` as an unboxing hint original commit: 054d6da58ceffcce2c5caa6eda5561a122658543 --- mats/cp0.ms | 12 ++++++------ mats/fl.ms | 31 ++++++++++++++++++++----------- s/cp0.ss | 28 ++++++++++++++++++++-------- 3 files changed, 46 insertions(+), 25 deletions(-) diff --git a/mats/cp0.ms b/mats/cp0.ms index f357a1fb21..3da3b9642c 100644 --- a/mats/cp0.ms +++ b/mats/cp0.ms @@ -1240,8 +1240,8 @@ (fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0) (fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0) (fl+ 3.0 x +nan.0 y 5.0)))) - '(#3%list 0.0 3.0 7.0 x (#3%fl+ 0.0 x) x (#3%fl+ 0.0 x) x (#3%fl+ 3.0 x) - (#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) x (#3%fl+ 12.0 x y) + '(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 3.0 x) + (#3%fl+ 7.0 x) (#3%fl+ 7.0 x) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ 12.0 x y) +nan.0)) (equivalent-expansion? (parameterize ([#%$suppress-primitive-inlining #f] @@ -1338,8 +1338,8 @@ (fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0) (fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0) (fl* 3.0 x 4.0 y +nan.0)))) - '(#3%list 1.0 3.0 12.0 x x x (#3%fl* 3.0 x) - (#3%fl* 12.0 x) (#3%fl* 12.0 x) x (#3%fl* 60.0 x y) + '(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x) (#3%fl* x) (#3%fl* 3.0 x) + (#3%fl* 12.0 x) (#3%fl* 12.0 x) (#3%fl* x) (#3%fl* 60.0 x y) +nan.0)) (equivalent-expansion? (parameterize ([#%$suppress-primitive-inlining #f] @@ -1836,7 +1836,7 @@ (fl+) (fl+ 3.0) (fl+ 3.0 4.0) (fl+ x) (fl+ x 0.0) (fl+ x -0.0) (fl+ 0.0 x) (fl+ -0.0 x) (fl+ x 3.0) (fl+ x 3.0 4.0) (fl+ 3.0 x 4.0) (fl+ 3.0 x -3.0) (fl+ (truncate -.5) x (/ -inf.0)) (fl+ 3.0 x 4.0 y 5.0) (fl+ +nan.0 x 3.0 y 5.0) (fl+ 3.0 x +nan.0 y 5.0)))) - '(#3%list 0.0 3.0 7.0 x (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) x (#3%fl+ x 3.0) + '(#3%list 0.0 3.0 7.0 (#3%fl+ x) (#3%fl+ x 0.0) (#3%fl+ x -0.0) (#3%fl+ 0.0 x) (#3%fl+ x) (#3%fl+ x 3.0) (#3%fl+ x 3.0 4.0) (#3%fl+ 3.0 x 4.0) (#3%fl+ 3.0 x -3.0) (#3%fl+ x -0.0) (#3%fl+ 3.0 x 4.0 y 5.0) +nan.0 (#3%fl+ 3.0 x +nan.0 y 5.0))) (equivalent-expansion? @@ -1942,7 +1942,7 @@ (fl*) (fl* 3.0) (fl* 3.0 4.0) (fl* x) (fl* x 1.0) (fl* 1.0 x) (fl* x 3.0) (fl* x 3.0 4.0) (fl* 3.0 x 4.0) (fl* 3.0 x #i1/3) (fl* 3.0 x 4.0 y 5.0) (fl* +nan.0 x 3.0 y 4.0) (fl* 3.0 x 4.0 y +nan.0)))) - '(#3%list 1.0 3.0 12.0 x (#3%fl* x 1.0) x (#3%fl* x 3.0) + '(#3%list 1.0 3.0 12.0 (#3%fl* x) (#3%fl* x 1.0) (#3%fl* x) (#3%fl* x 3.0) (#3%fl* x 3.0 4.0) (#3%fl* 3.0 x 4.0) (#3%fl* 3.0 x #i1/3) (#3%fl* 3.0 x 4.0 y 5.0) +nan.0 (#3%fl* +3.0 x 4.0 y +nan.0))) (equivalent-expansion? diff --git a/mats/fl.ms b/mats/fl.ms index cf6c93f72a..36f1922a1f 100644 --- a/mats/fl.ms +++ b/mats/fl.ms @@ -1055,17 +1055,18 @@ (#%$suppress-primitive-inlining) (let ([before (+ (bytes-allocated) (bytes-deallocated))] [N 100000]) - (box? - (let loop ([i N] [bx (box 0.0)]) - (if (zero? i) - bx - (loop (sub1 i) (let ([v (unbox bx)]) - (box (proc v))))))) - (let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)] - [expected (* N (+ (compute-size 1.0) - (compute-size (box #f))))]) - (printf "~s ~s\n" allocated expected) - (<= expected allocated (* 1.2 expected)))))])) + (and + (box? + (let loop ([i N] [bx (box 0.0)]) + (if (zero? i) + bx + (loop (sub1 i) (let ([v (unbox bx)]) + (box (proc v))))))) + (let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)] + [expected (* N (+ (compute-size 1.0) + (compute-size (box #f))))]) + (printf "~s ~s\n" allocated expected) + (<= expected allocated (* 1.2 expected))))))])) #t) (check-loop-allocation (lambda (v) (fl+ v v))) @@ -1128,6 +1129,14 @@ (fl+ v 1.0) (fl- v 1.0)))) + (check-loop-allocation (lambda (v) + ;; The two single-argument `fl+`s here should work as + ;; a hint for unboxing in the loop + (let loop ([n 100] [v (fl+ v)]) + (if (fx= n 0) + (fl+ v) + (loop (fx- n 1) (fl+ v 1.0)))))) + (let ([bv (make-bytevector 8 0)]) (check-loop-allocation (lambda (v) (fl+ v (bytevector-ieee-double-native-ref bv 0))))) (let ([bv (make-bytevector 8 0)]) diff --git a/s/cp0.ss b/s/cp0.ss index a7142b50c0..91d8dd9133 100644 --- a/s/cp0.ss +++ b/s/cp0.ss @@ -2562,7 +2562,7 @@ (let ([folded (generic-op a d)]) (and (target-fixnum? folded) folded)))))] [else #f])))) - (define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level) + (define (partial-fold-plus level orig-arg* ctxt prim op generic-op ident bottom? assoc-at-level direct-result?) (define fold? (make-fold? op generic-op)) (let loop ([arg* orig-arg*] [a ident] [val* '()] [used '()] [unused '()]) (if (null? arg*) @@ -2587,7 +2587,7 @@ (cond [(null? val*) `(quote ,a)] [(eqv? a ident) - (if (and (fx= level 3) (null? (cdr val*))) + (if (and (fx= level 3) (null? (cdr val*)) (direct-result? (car val*))) (car val*) (build-primcall (app-preinfo ctxt) level prim val*))] [else @@ -2649,15 +2649,17 @@ [(_ plus prim generic-op ident bottom?) (partial-folder plus prim generic-op ident bottom? #f)] [(_ plus prim generic-op ident bottom? assoc-at-level) + (partial-folder plus prim generic-op ident bottom? assoc-at-level (lambda (e) #t))] + [(_ plus prim generic-op ident bottom? assoc-at-level direct-result?) (begin (define-inline 2 prim ; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4 [() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))] - [arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)]) + [arg* (partial-fold-plus 2 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)]) (define-inline 3 prim ; (fl+) might should return -0.0, but we force it to return +0.0 per TSPL4 [() (residualize-seq '() '() ctxt) `(quote ,(if (eqv? ident -0.0) +0.0 ident))] - [arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))] + [arg* (partial-fold-plus 3 arg* ctxt 'prim prim generic-op ident bottom? assoc-at-level direct-result?)]))] [(_ minus prim generic-op ident) (begin (define-inline 2 prim @@ -2677,9 +2679,9 @@ [(_ plus r6rs:prim prim generic-op ident bottom? assoc-at-level) (begin (define-inline 2 r6rs:prim - [(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)]) + [(arg1 arg2) (partial-fold-plus 2 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))]) (define-inline 3 r6rs:prim - [(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level)]))] + [(arg1 arg2) (partial-fold-plus 3 (list arg1 arg2) ctxt 'prim prim generic-op ident bottom? assoc-at-level (lambda (x) #t))]))] [(_ minus r6rs:prim prim generic-op ident) (begin (define-inline 2 r6rs:prim @@ -2691,18 +2693,28 @@ [(arg1 arg2) (partial-fold-minus 3 arg1 (list arg2) ctxt 'prim prim generic-op ident)]))])) + (define obviously-fl? + ;; We keep single-argument `fl+` and `fl*` as an unboxing hint to the back end, + ;; but the hint is not necessary if the argument is the result of a primitive that + ;; produces fonums + (lambda (e) + (nanopass-case (Lsrc Expr) e + [(quote ,d) (flonum? d)] + [(call ,preinfo ,pr ,e* ...) (eq? 'flonum ($sgetprop (primref-name pr) '*result-type* #f))] + [else #f]))) + ; handling nans here using the support for handling exact zero in ; the multiply case. maybe shouldn't bother with nans anyway. (partial-folder plus + + 0 generic-nan?) (partial-folder plus fx+ + 0 (lambda (x) #f) 3) (r6rs-fixnum-partial-folder plus r6rs:fx+ fx+ + 0 (lambda (x) #f) 3) - (partial-folder plus fl+ fl+ -0.0 fl-nan?) + (partial-folder plus fl+ fl+ -0.0 fl-nan? #f obviously-fl?) (partial-folder plus cfl+ cfl+ -0.0 cfl-nan?) (partial-folder plus * * 1 exact-zero?) ; exact zero trumps nan (partial-folder plus fx* * 1 exact-zero? 3) (r6rs-fixnum-partial-folder plus r6rs:fx* fx* * 1 exact-zero? 3) - (partial-folder plus fl* fl* 1.0 fl-nan?) + (partial-folder plus fl* fl* 1.0 fl-nan? #f obviously-fl?) (partial-folder plus cfl* cfl* 1.0 cfl-nan?) ; not handling nans here since we don't have support for the exact