diff --git a/collects/compiler/decompile.ss b/collects/compiler/decompile.ss index 8729a983b1..0ebc8b28a6 100644 --- a/collects/compiler/decompile.ss +++ b/collects/compiler/decompile.ss @@ -321,13 +321,15 @@ list list* vector vector-immutable box))] [(3) (memq (car a) '(eq? = <= < >= > bitwise-bit-set? char=? - + - * / quotient remainder min max bitwise-and bitwise-ior + + - * / quotient remainder min max bitwise-and bitwise-ior bitwise-xor arithmetic-shift vector-ref string-ref bytes-ref set-mcar! set-mcdr! cons mcons list list* vector vector-immutable))] [(4) (memq (car a) '(vector-set! string-set! bytes-set! - list list* vector vector-immutable))] - [else (memq (car a) '(list list* vector vector-immutable))])) + list list* vector vector-immutable + + - * / min max bitwise-and bitwise-ior bitwise-xor))] + [else (memq (car a) '(list list* vector vector-immutable + + - * / min max bitwise-and bitwise-ior bitwise-xor))])) (cons '#%in a) a)) diff --git a/collects/scribblings/guide/performance.scrbl b/collects/scribblings/guide/performance.scrbl index dd78b8d3a4..0a0fc5e165 100644 --- a/collects/scribblings/guide/performance.scrbl +++ b/collects/scribblings/guide/performance.scrbl @@ -253,9 +253,9 @@ machine's instruction to add the numbers (and check for overflow). If the two numbers are not fixnums, then the next check whether whether both are flonums; in that case, the machine's floating-point operations are used directly. For functions that take any number of -arguments, such as @scheme[+], inlining is applied only for the -two-argument case (except for @scheme[-], whose one-argument case is -also inlined). +arguments, such as @scheme[+], inlining works for two or more +arguments (except for @scheme[-], whose one-argument case is also +inlined) when the arguments are either all fixnums or all flonums. Flonums are @defterm{boxed}, which means that memory is allocated to hold every result of a flonum computation. Fortunately, the diff --git a/collects/scribblings/reference/unsafe.scrbl b/collects/scribblings/reference/unsafe.scrbl index f01ba82236..35202bb503 100644 --- a/collects/scribblings/reference/unsafe.scrbl +++ b/collects/scribblings/reference/unsafe.scrbl @@ -1,6 +1,10 @@ #lang scribble/doc @(require "mz.ss" - (for-label scheme/unsafe/ops)) + (for-label scheme/unsafe/ops + (only-in scheme/foreign + f64vector? + f64vector-ref + f64vector-set!))) @title[#:tag "unsafe"]{Unsafe Operations} @@ -165,6 +169,15 @@ Unsafe versions of @scheme[bytes-length], @scheme[bytes-ref], and fixnum).} +@deftogether[( +@defproc[(unsafe-f64vector-ref [vec f64vector?][k fixnum?]) inexact-real?] +@defproc[(unsafe-f64vector-set! [vec f64vector?][k fixnum?][n inexact-real?]) void?] +)]{ + +Unsafe versions of @scheme[f64vector-ref] and +@scheme[f64vector-set!].} + + @deftogether[( @defproc[(unsafe-struct-ref [v any/c][k fixnum?]) any/c] @defproc[(unsafe-struct-set! [v any/c][k fixnum?][val any/c]) void?] diff --git a/collects/tests/mzscheme/benchmarks/shootout/mandelbrot.ss b/collects/tests/mzscheme/benchmarks/shootout/mandelbrot.ss index 3e8306c7b8..37b3526660 100644 --- a/collects/tests/mzscheme/benchmarks/shootout/mandelbrot.ss +++ b/collects/tests/mzscheme/benchmarks/shootout/mandelbrot.ss @@ -36,7 +36,7 @@ ((> (+ zrq ziq) +limit-sqr+) 0) (else (loop (add1 i) (+ (- zrq ziq) cr) - (+ (* 2.0 (* zr zi)) ci))))))))) + (+ (* 2.0 zr zi) ci))))))))) ;; ------------------------------- diff --git a/collects/tests/mzscheme/benchmarks/shootout/nbody.ss b/collects/tests/mzscheme/benchmarks/shootout/nbody.ss index f3c3199ddd..3311cc344e 100644 --- a/collects/tests/mzscheme/benchmarks/shootout/nbody.ss +++ b/collects/tests/mzscheme/benchmarks/shootout/nbody.ss @@ -93,9 +93,10 @@ Correct output N = 1000 is (if (null? o) e (let* ([o1 (car o)] - [e (+ e (* (* 0.5 (body-mass o1)) - (+ (+ (* (body-vx o1) (body-vx o1)) - (* (body-vy o1) (body-vy o1))) + [e (+ e (* 0.5 + (body-mass o1) + (+ (* (body-vx o1) (body-vx o1)) + (* (body-vy o1) (body-vy o1)) (* (body-vz o1) (body-vz o1)))))]) (let loop-i ([i (cdr o)] [e e]) (if (null? i) @@ -104,7 +105,7 @@ Correct output N = 1000 is [dx (- (body-x o1) (body-x i1))] [dy (- (body-y o1) (body-y i1))] [dz (- (body-z o1) (body-z i1))] - [dist (sqrt (+ (+ (* dx dx) (* dy dy)) (* dz dz)))] + [dist (sqrt (+ (* dx dx) (* dy dy) (* dz dz)))] [e (- e (/ (* (body-mass o1) (body-mass i1)) dist))]) (loop-i (cdr i) e)))))))) @@ -126,7 +127,7 @@ Correct output N = 1000 is [dx (- o1x (body-x i1))] [dy (- o1y (body-y i1))] [dz (- o1z (body-z i1))] - [dist2 (+ (+ (* dx dx) (* dy dy)) (* dz dz))] + [dist2 (+ (* dx dx) (* dy dy) (* dz dz))] [mag (/ +dt+ (* dist2 (sqrt dist2)))] [dxmag (* dx mag)] [dymag (* dy mag)] diff --git a/collects/tests/mzscheme/optimize.ss b/collects/tests/mzscheme/optimize.ss index f221dca2d6..19c32bfd45 100644 --- a/collects/tests/mzscheme/optimize.ss +++ b/collects/tests/mzscheme/optimize.ss @@ -89,17 +89,38 @@ (bin0 iv op +nan.0 (exact->inexact arg2)) (unless (eq? op 'eq?) (bin0 iv op +nan.0 +nan.0))))] - [tri0 (lambda (v op get-arg1 arg2 arg3 check-effect) + [tri0 (lambda (v op get-arg1 arg2 arg3 check-effect #:wrap [wrap values]) ;; (printf "Trying ~a ~a ~a\n" op (get-arg1) arg2 arg3); - (let ([name `(,op ,get-arg1 ,arg2, arg3)]) - (test v name ((eval `(lambda (x) (,op x ,arg2 ,arg3))) (get-arg1))) + (let ([name `(,op ,get-arg1 ,arg2, arg3)] + [get-arg2 (lambda () arg2)] + [get-arg3 (lambda () arg3)]) + (test v name ((eval `(lambda (x) ,(wrap `(,op x ,arg2 ,arg3)))) (get-arg1))) (check-effect) - (test v name ((eval `(lambda (x) (,op (,get-arg1) x ,arg3))) arg2)) + (test v name ((eval `(lambda (x) ,(wrap `(,op (,get-arg1) x ,arg3)))) arg2)) (check-effect) - (test v name ((eval `(lambda (x) (,op (,get-arg1) ,arg2 x))) arg3)) + (test v name ((eval `(lambda (x) ,(wrap `(,op x (,get-arg2) ,arg3)))) (get-arg1))) (check-effect) - (test v name ((eval `(lambda (x y z) (,op x y z))) (get-arg1) arg2 arg3)) + (test v name ((eval `(lambda (x) ,(wrap `(,op (,get-arg1) (,get-arg2) x)))) arg3)) + (check-effect) + (test v name ((eval `(lambda () ,(wrap `(,op (,get-arg1) (,get-arg2) (,get-arg3))))))) + (check-effect) + (test v name ((eval `(lambda (x) ,(wrap `(,op (,get-arg1) ,arg2 x)))) arg3)) + (check-effect) + (test v name ((eval `(lambda (x y) ,(wrap `(,op (,get-arg1) x y)))) arg2 arg3)) + (check-effect) + (test v name ((eval `(lambda (x y z) ,(wrap `(,op x y z)))) (get-arg1) arg2 arg3)) (check-effect)))] + [tri (lambda (v op get-arg1 arg2 arg3 check-effect #:wrap [wrap values]) + (define (e->i n) (if (number? n) (exact->inexact n) n)) + (tri0 v op get-arg1 arg2 arg3 check-effect #:wrap wrap) + (tri0 (e->i v) op (lambda () (exact->inexact (get-arg1))) (exact->inexact arg2) (exact->inexact arg3) check-effect + #:wrap wrap) + (tri0 (e->i v) op get-arg1 (exact->inexact arg2) arg3 check-effect + #:wrap wrap))] + [tri-if (lambda (v op get-arg1 arg2 arg3 check-effect) + (tri v op get-arg1 arg2 arg3 check-effect) + (tri (if v 'true 'false) op get-arg1 arg2 arg3 check-effect + #:wrap (lambda (e) `(if ,e 'true 'false))))] [tri-exact (lambda (v op get-arg1 arg2 arg3 check-effect 3rd-all-ok?) (check-error-message op (eval `(lambda (x) (,op x ,arg2 ,arg3)))) (check-error-message op (eval `(lambda (x) (,op (,get-arg1) x ,arg3)))) @@ -188,12 +209,18 @@ (bin #t '< -200 100) (bin #f '< 100 -200) (bin #t '< 1 (expt 2 30)) + (tri-if #t '< (lambda () 1) 2 3 void) + (tri-if #f '< (lambda () 1) 3 3 void) + (tri-if #f '< (lambda () 1) -1 3 void) (bin #t '<= 100 200) (bin #f '<= 200 100) (bin #t '<= 100 100) (bin #t '<= -200 100) (bin #f '<= 100 -200) + (tri-if #t '<= (lambda () 1) 2 3 void) + (tri-if #t '<= (lambda () 1) 3 3 void) + (tri-if #f '<= (lambda () 1) -1 3 void) (bin #f '> 100 200) (bin #t '> 200 100) @@ -201,18 +228,28 @@ (bin #f '> -200 100) (bin #t '> 100 -200) (bin #f '> 1 (expt 2 30)) + (tri-if #t '> (lambda () 3) 2 1 void) + (tri-if #f '> (lambda () 3) 3 1 void) + (tri-if #f '> (lambda () 3) -1 1 void) (bin #f '>= 100 200) (bin #t '>= 200 100) (bin #t '>= 100 100) (bin #f '>= -200 100) (bin #t '>= 100 -200) + (tri-if #t '>= (lambda () 3) 2 1 void) + (tri-if #t '>= (lambda () 3) 3 1 void) + (tri-if #f '>= (lambda () 3) -1 1 void) (bin #f '= 100 200) (bin #f '= 200 100) (bin #t '= 100 100) (bin #f '= -200 100) (bin #f '= +nan.0 +nan.0) + (tri-if #t '= (lambda () 3) 3 3 void) + (tri-if #f '= (lambda () 3) 3 1 void) + (tri-if #f '= (lambda () 3) 1 3 void) + (tri-if #f '= (lambda () 1) 3 3 void) (un 3 'add1 2) (un -3 'add1 -4) @@ -247,6 +284,7 @@ (bin -3 '+ 4 -7) (bin (expt 2 30) '+ (expt 2 29) (expt 2 29)) (bin (- (expt 2 31) 2) '+ (sub1 (expt 2 30)) (sub1 (expt 2 30))) + (tri 6 '+ (lambda () 1) 2 3 void) (bin 3 '- 7 4) (bin 11 '- 7 -4) @@ -254,6 +292,7 @@ (bin (expt 2 30) '- (expt 2 29) (- (expt 2 29))) (bin (- (expt 2 30)) '- (- (expt 2 29)) (expt 2 29)) (bin (- 2 (expt 2 31)) '- (- 1 (expt 2 30)) (sub1 (expt 2 30))) + (tri 6 '- (lambda () 10) 3 1 void) (bin 4 '* 1 4) (bin 0 '* 0 4) @@ -265,6 +304,7 @@ (bin (expt 2 30) '* 2 (expt 2 29)) (bin (expt 2 31) '* 2 (expt 2 30)) (bin (- (expt 2 30)) '* 2 (- (expt 2 29))) + (tri 30 '* (lambda () 2) 3 5 void) (bin 0 '/ 0 4) (bin 1/4 '/ 1 4) @@ -273,6 +313,7 @@ (bin -4 '/ -16 4) (bin -4 '/ 16 -4) (bin 4 '/ -16 -4) + (tri 3 '/ (lambda () 30) 5 2 void) (bin-int 3 'quotient 10 3) (bin-int -3 'quotient 10 -3) @@ -289,10 +330,16 @@ (bin 3 'min 3 300) (bin -300 'min 3 -300) (bin -400 'min -400 -300) + (tri 5 'min (lambda () 10) 5 20 void) + (tri 5 'min (lambda () 5) 10 20 void) + (tri 5 'min (lambda () 20) 10 5 void) (bin 300 'max 3 300) (bin 3 'max 3 -300) (bin -3 'max -3 -300) + (tri 50 'max (lambda () 10) 50 20 void) + (tri 50 'max (lambda () 50) 10 20 void) + (tri 50 'max (lambda () 20) 10 50 void) (bin-exact 11 'bitwise-and 11 43) (bin-exact 0 'bitwise-and 11 32) @@ -301,18 +348,21 @@ (bin-exact 11 'bitwise-and 11 -1) (bin-exact -11 'bitwise-and -11 -1) (bin-exact (expt 2 50) 'bitwise-and (expt 2 50) (expt 2 50)) + (tri-exact #x10101 'bitwise-and (lambda () #x11111) #x10111 #x110101 void #f) (bin-exact 11 'bitwise-ior 8 3) (bin-exact 11 'bitwise-ior 11 3) (bin-exact -1 'bitwise-ior 11 -1) (bin-exact (sub1 (expt 2 51)) 'bitwise-ior (sub1 (expt 2 50)) (expt 2 50)) (bin-exact (add1 (expt 2 50)) 'bitwise-ior 1 (expt 2 50)) + (tri-exact #x10101 'bitwise-ior (lambda () #x1) #x100 #x10000 void #f) (bin-exact 11 'bitwise-xor 8 3) (bin-exact 8 'bitwise-xor 11 3) (bin-exact -2 'bitwise-xor 1 -1) (bin-exact (sub1 (expt 2 51)) 'bitwise-xor (sub1 (expt 2 50)) (expt 2 50)) (bin-exact (add1 (expt 2 50)) 'bitwise-xor 1 (expt 2 50)) + (tri-exact #x10101 'bitwise-xor (lambda () #x1) #x110 #x10010 void #f) (bin-exact 4 'arithmetic-shift 2 1) (bin-exact 1 'arithmetic-shift 2 -1) diff --git a/collects/tests/mzscheme/unsafe.ss b/collects/tests/mzscheme/unsafe.ss index 71f7aac5ae..fdfd47803c 100644 --- a/collects/tests/mzscheme/unsafe.ss +++ b/collects/tests/mzscheme/unsafe.ss @@ -3,7 +3,8 @@ (Section 'unsafe) -(require '#%unsafe) +(require scheme/unsafe/ops + scheme/foreign) (let () (define (test-tri result proc x y z @@ -186,6 +187,13 @@ #:post (lambda (x) (list x (string-ref v 2))) #:literal-ok? #f)) + (test-bin 9.5 'unsafe-f64vector-ref (f64vector 1.0 9.5 18.7) 1) + (let ([v (f64vector 1.0 9.5 18.7)]) + (test-tri (list (void) 27.4) 'unsafe-f64vector-set! v 2 27.4 + #:pre (lambda () (f64vector-set! v 2 0.0)) + #:post (lambda (x) (list x (f64vector-ref v 2))) + #:literal-ok? #f)) + (let () (define-struct posn (x [y #:mutable] z)) (test-bin 'a unsafe-struct-ref (make-posn 'a 'b 'c) 0 #:literal-ok? #f) @@ -195,6 +203,12 @@ #:pre (lambda () (set-posn-y! p 0)) #:post (lambda (x) (posn-y p)) #:literal-ok? #f))) + ;; test unboxing: + (test-tri 5.4 '(lambda (x y z) (unsafe-fl+ x (unsafe-f64vector-ref y z))) 1.2 (f64vector 1.0 4.2 6.7) 1) + (test-tri 3.2 '(lambda (x y z) + (unsafe-f64vector-set! y 1 (unsafe-fl+ x z)) + (unsafe-f64vector-ref y 1)) + 1.2 (f64vector 1.0 4.2 6.7) 2.0) (void)) diff --git a/doc/release-notes/mzscheme/HISTORY.txt b/doc/release-notes/mzscheme/HISTORY.txt index 1c35a4436d..fa874ddc03 100644 --- a/doc/release-notes/mzscheme/HISTORY.txt +++ b/doc/release-notes/mzscheme/HISTORY.txt @@ -1,3 +1,7 @@ +Version 4.2.3.3 +Added unsafe-f64vector-ref and unsafe-f64vector-set! +Changed JIT to inline numeric ops with more than 2 arguments + Version 4.2.3, November 2009 Changed _pointer (in scheme/foreign) to mean a pointer that does not refer to GCable memory; added _gcpointer diff --git a/src/mzscheme/src/cstartup.inc b/src/mzscheme/src/cstartup.inc index 51a29c08c7..2173ba0593 100644 --- a/src/mzscheme/src/cstartup.inc +++ b/src/mzscheme/src/cstartup.inc @@ -1,43 +1,43 @@ { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,50,50,0,0,0,1,0,0,3,0,12,0, -17,0,20,0,27,0,40,0,47,0,51,0,58,0,63,0,68,0,72,0,78, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,51,50,0,0,0,1,0,0,3,0,12,0, +25,0,29,0,34,0,41,0,44,0,49,0,56,0,63,0,67,0,72,0,78, 0,92,0,106,0,109,0,115,0,119,0,121,0,132,0,134,0,148,0,155,0, 177,0,179,0,193,0,4,1,33,1,44,1,55,1,65,1,101,1,134,1,167, 1,226,1,36,2,114,2,180,2,185,2,205,2,96,3,116,3,167,3,233,3, 118,4,4,5,56,5,79,5,158,5,0,0,105,7,0,0,29,11,11,68,104, -101,114,101,45,115,116,120,64,99,111,110,100,62,111,114,66,108,101,116,114,101, -99,72,112,97,114,97,109,101,116,101,114,105,122,101,66,117,110,108,101,115,115, -63,108,101,116,66,100,101,102,105,110,101,64,119,104,101,110,64,108,101,116,42, -63,97,110,100,65,113,117,111,116,101,29,94,2,13,68,35,37,107,101,114,110, +101,114,101,45,115,116,120,72,112,97,114,97,109,101,116,101,114,105,122,101,63, +97,110,100,64,108,101,116,42,66,100,101,102,105,110,101,62,111,114,64,99,111, +110,100,66,108,101,116,114,101,99,66,117,110,108,101,115,115,63,108,101,116,64, +119,104,101,110,65,113,117,111,116,101,29,94,2,13,68,35,37,107,101,114,110, 101,108,11,29,94,2,13,68,35,37,112,97,114,97,109,122,11,62,105,102,65, 98,101,103,105,110,63,115,116,120,61,115,70,108,101,116,45,118,97,108,117,101, 115,61,120,73,108,101,116,114,101,99,45,118,97,108,117,101,115,66,108,97,109, 98,100,97,1,20,112,97,114,97,109,101,116,101,114,105,122,97,116,105,111,110, 45,107,101,121,61,118,73,100,101,102,105,110,101,45,118,97,108,117,101,115,97, -35,11,8,240,168,70,0,0,95,159,2,15,35,35,159,2,14,35,35,159,2, -14,35,35,16,20,2,3,2,1,2,5,2,1,2,6,2,1,2,7,2,1, -2,8,2,1,2,9,2,1,2,10,2,1,2,4,2,1,2,11,2,1,2, -12,2,1,97,36,11,8,240,168,70,0,0,93,159,2,14,35,36,16,2,2, -2,161,2,1,36,2,2,2,1,2,2,96,11,11,8,240,168,70,0,0,16, -0,96,37,11,8,240,168,70,0,0,16,0,13,16,4,35,29,11,11,2,1, +35,11,8,240,35,79,0,0,95,159,2,15,35,35,159,2,14,35,35,159,2, +14,35,35,16,20,2,3,2,1,2,7,2,1,2,4,2,1,2,5,2,1, +2,6,2,1,2,9,2,1,2,8,2,1,2,10,2,1,2,11,2,1,2, +12,2,1,97,36,11,8,240,35,79,0,0,93,159,2,14,35,36,16,2,2, +2,161,2,1,36,2,2,2,1,2,2,96,11,11,8,240,35,79,0,0,16, +0,96,37,11,8,240,35,79,0,0,16,0,13,16,4,35,29,11,11,2,1, 11,18,16,2,99,64,104,101,114,101,8,31,8,30,8,29,8,28,8,27,93, -8,224,175,70,0,0,95,9,8,224,175,70,0,0,2,1,27,248,22,137,4, +8,224,42,79,0,0,95,9,8,224,42,79,0,0,2,1,27,248,22,137,4, 195,249,22,130,4,80,158,38,35,251,22,77,2,16,248,22,92,199,12,249,22, 67,2,17,248,22,94,201,27,248,22,137,4,195,249,22,130,4,80,158,38,35, 251,22,77,2,16,248,22,92,199,249,22,67,2,17,248,22,94,201,12,27,248, 22,69,248,22,137,4,196,28,248,22,75,193,20,15,159,36,35,36,28,248,22, 75,248,22,69,194,248,22,68,193,249,22,130,4,80,158,38,35,251,22,77,2, -16,248,22,68,199,249,22,67,2,12,248,22,69,201,11,18,16,2,101,10,8, +16,248,22,68,199,249,22,67,2,4,248,22,69,201,11,18,16,2,101,10,8, 31,8,30,8,29,8,28,8,27,16,4,11,11,2,18,3,1,8,101,110,118, -49,49,51,56,56,16,4,11,11,2,19,3,1,8,101,110,118,49,49,51,56, -57,93,8,224,176,70,0,0,95,9,8,224,176,70,0,0,2,1,27,248,22, +49,50,57,54,48,16,4,11,11,2,19,3,1,8,101,110,118,49,50,57,54, +49,93,8,224,43,79,0,0,95,9,8,224,43,79,0,0,2,1,27,248,22, 69,248,22,137,4,196,28,248,22,75,193,20,15,159,36,35,36,28,248,22,75, 248,22,69,194,248,22,68,193,249,22,130,4,80,158,38,35,250,22,77,2,20, 248,22,77,249,22,77,248,22,77,2,21,248,22,68,201,251,22,77,2,16,2, -21,2,21,249,22,67,2,4,248,22,69,204,18,16,2,101,11,8,31,8,30, -8,29,8,28,8,27,16,4,11,11,2,18,3,1,8,101,110,118,49,49,51, -57,49,16,4,11,11,2,19,3,1,8,101,110,118,49,49,51,57,50,93,8, -224,177,70,0,0,95,9,8,224,177,70,0,0,2,1,248,22,137,4,193,27, +21,2,21,249,22,67,2,7,248,22,69,204,18,16,2,101,11,8,31,8,30, +8,29,8,28,8,27,16,4,11,11,2,18,3,1,8,101,110,118,49,50,57, +54,51,16,4,11,11,2,19,3,1,8,101,110,118,49,50,57,54,52,93,8, +224,44,79,0,0,95,9,8,224,44,79,0,0,2,1,248,22,137,4,193,27, 248,22,137,4,194,249,22,67,248,22,77,248,22,68,196,248,22,69,195,27,248, 22,69,248,22,137,4,23,197,1,249,22,130,4,80,158,38,35,28,248,22,53, 248,22,131,4,248,22,68,23,198,2,27,249,22,2,32,0,89,162,8,44,36, @@ -51,8 +51,8 @@ 249,22,2,32,0,89,162,8,44,36,46,9,222,33,42,248,22,137,4,248,22, 68,201,248,22,69,198,27,248,22,69,248,22,137,4,196,27,248,22,137,4,248, 22,68,195,249,22,130,4,80,158,39,35,28,248,22,75,195,250,22,78,2,20, -9,248,22,69,199,250,22,77,2,8,248,22,77,248,22,68,199,250,22,78,2, -11,248,22,69,201,248,22,69,202,27,248,22,69,248,22,137,4,23,197,1,27, +9,248,22,69,199,250,22,77,2,11,248,22,77,248,22,68,199,250,22,78,2, +5,248,22,69,201,248,22,69,202,27,248,22,69,248,22,137,4,23,197,1,27, 249,22,1,22,81,249,22,2,22,137,4,248,22,137,4,248,22,68,199,249,22, 130,4,80,158,39,35,251,22,77,1,22,119,105,116,104,45,99,111,110,116,105, 110,117,97,116,105,111,110,45,109,97,114,107,2,24,250,22,78,1,23,101,120, @@ -62,14 +62,14 @@ 22,69,203,27,248,22,69,248,22,137,4,196,28,248,22,75,193,20,15,159,36, 35,36,249,22,130,4,80,158,38,35,27,248,22,137,4,248,22,68,197,28,249, 22,167,8,62,61,62,248,22,131,4,248,22,92,196,250,22,77,2,20,248,22, -77,249,22,77,21,93,2,25,248,22,68,199,250,22,78,2,3,249,22,77,2, +77,249,22,77,21,93,2,25,248,22,68,199,250,22,78,2,8,249,22,77,2, 25,249,22,77,248,22,101,203,2,25,248,22,69,202,251,22,77,2,16,28,249, 22,167,8,248,22,131,4,248,22,68,200,64,101,108,115,101,10,248,22,68,197, -250,22,78,2,20,9,248,22,69,200,249,22,67,2,3,248,22,69,202,100,8, +250,22,78,2,20,9,248,22,69,200,249,22,67,2,8,248,22,69,202,100,8, 31,8,30,8,29,8,28,8,27,16,4,11,11,2,18,3,1,8,101,110,118, -49,49,52,49,52,16,4,11,11,2,19,3,1,8,101,110,118,49,49,52,49, -53,93,8,224,178,70,0,0,18,16,2,158,94,10,64,118,111,105,100,8,47, -95,9,8,224,178,70,0,0,2,1,27,248,22,69,248,22,137,4,196,249,22, +49,50,57,56,54,16,4,11,11,2,19,3,1,8,101,110,118,49,50,57,56, +55,93,8,224,45,79,0,0,18,16,2,158,94,10,64,118,111,105,100,8,47, +95,9,8,224,45,79,0,0,2,1,27,248,22,69,248,22,137,4,196,249,22, 130,4,80,158,38,35,28,248,22,53,248,22,131,4,248,22,68,197,250,22,77, 2,26,248,22,77,248,22,68,199,248,22,92,198,27,248,22,131,4,248,22,68, 197,250,22,77,2,26,248,22,77,248,22,68,197,250,22,78,2,23,248,22,69, @@ -81,25 +81,25 @@ 2,3,2,4,2,5,2,6,2,7,2,8,2,9,2,10,2,11,2,12,35, 45,36,11,11,11,16,0,16,0,16,0,35,35,11,11,11,11,16,0,16,0, 16,0,35,35,16,11,16,5,2,2,20,15,159,35,35,35,35,20,102,159,35, -16,0,16,1,33,32,10,16,5,2,7,89,162,8,44,36,52,9,223,0,33, -33,35,20,102,159,35,16,1,2,2,16,0,11,16,5,2,10,89,162,8,44, +16,0,16,1,33,32,10,16,5,2,10,89,162,8,44,36,52,9,223,0,33, +33,35,20,102,159,35,16,1,2,2,16,0,11,16,5,2,12,89,162,8,44, 36,52,9,223,0,33,34,35,20,102,159,35,16,1,2,2,16,0,11,16,5, -2,12,89,162,8,44,36,52,9,223,0,33,35,35,20,102,159,35,16,1,2, -2,16,1,33,36,11,16,5,2,4,89,162,8,44,36,55,9,223,0,33,37, -35,20,102,159,35,16,1,2,2,16,1,33,38,11,16,5,2,8,89,162,8, +2,4,89,162,8,44,36,52,9,223,0,33,35,35,20,102,159,35,16,1,2, +2,16,1,33,36,11,16,5,2,7,89,162,8,44,36,55,9,223,0,33,37, +35,20,102,159,35,16,1,2,2,16,1,33,38,11,16,5,2,11,89,162,8, 44,36,57,9,223,0,33,41,35,20,102,159,35,16,1,2,2,16,0,11,16, -5,2,5,89,162,8,44,36,52,9,223,0,33,43,35,20,102,159,35,16,1, -2,2,16,0,11,16,5,2,11,89,162,8,44,36,53,9,223,0,33,44,35, -20,102,159,35,16,1,2,2,16,0,11,16,5,2,6,89,162,8,44,36,54, -9,223,0,33,45,35,20,102,159,35,16,1,2,2,16,0,11,16,5,2,3, +5,2,9,89,162,8,44,36,52,9,223,0,33,43,35,20,102,159,35,16,1, +2,2,16,0,11,16,5,2,5,89,162,8,44,36,53,9,223,0,33,44,35, +20,102,159,35,16,1,2,2,16,0,11,16,5,2,3,89,162,8,44,36,54, +9,223,0,33,45,35,20,102,159,35,16,1,2,2,16,0,11,16,5,2,8, 89,162,8,44,36,57,9,223,0,33,46,35,20,102,159,35,16,1,2,2,16, -1,33,48,11,16,5,2,9,89,162,8,44,36,53,9,223,0,33,49,35,20, +1,33,48,11,16,5,2,6,89,162,8,44,36,53,9,223,0,33,49,35,20, 102,159,35,16,1,2,2,16,0,11,16,0,94,2,14,2,15,93,2,14,9, 9,35,0}; EVAL_ONE_SIZED_STR((char *)expr, 2018); } { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,50,59,0,0,0,1,0,0,13,0,18,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,51,59,0,0,0,1,0,0,13,0,18,0, 35,0,50,0,68,0,84,0,94,0,112,0,132,0,148,0,166,0,197,0,226, 0,248,0,6,1,12,1,26,1,31,1,41,1,49,1,77,1,109,1,154,1, 199,1,223,1,6,2,8,2,65,2,155,3,196,3,31,5,135,5,239,5,100, @@ -341,12 +341,12 @@ EVAL_ONE_SIZED_STR((char *)expr, 5006); } { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,50,8,0,0,0,1,0,0,6,0,19,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,51,8,0,0,0,1,0,0,6,0,19,0, 34,0,48,0,62,0,76,0,118,0,0,0,38,1,0,0,65,113,117,111,116, 101,29,94,2,1,67,35,37,117,116,105,108,115,11,29,94,2,1,69,35,37, 110,101,116,119,111,114,107,11,29,94,2,1,68,35,37,112,97,114,97,109,122, 11,29,94,2,1,68,35,37,101,120,112,111,98,115,11,29,94,2,1,68,35, -37,107,101,114,110,101,108,11,97,35,11,8,240,46,71,0,0,98,159,2,2, +37,107,101,114,110,101,108,11,97,35,11,8,240,169,79,0,0,98,159,2,2, 35,35,159,2,3,35,35,159,2,4,35,35,159,2,5,35,35,159,2,6,35, 35,159,2,6,35,35,16,0,159,35,20,102,159,35,16,1,11,16,0,83,158, 41,20,100,144,69,35,37,98,117,105,108,116,105,110,29,11,11,11,11,11,18, @@ -360,7 +360,7 @@ EVAL_ONE_SIZED_STR((char *)expr, 331); } { - static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,50,56,0,0,0,1,0,0,11,0,38,0, + static MZCOMPILED_STRING_FAR unsigned char expr[] = {35,126,7,52,46,50,46,51,46,51,56,0,0,0,1,0,0,11,0,38,0, 44,0,57,0,66,0,73,0,95,0,117,0,143,0,155,0,173,0,193,0,205, 0,221,0,244,0,0,1,31,1,38,1,43,1,48,1,53,1,58,1,67,1, 72,1,76,1,84,1,93,1,101,1,204,1,249,1,13,2,42,2,73,2,129, diff --git a/src/mzscheme/src/jit.c b/src/mzscheme/src/jit.c index 95859ffeb2..577b27be65 100644 --- a/src/mzscheme/src/jit.c +++ b/src/mzscheme/src/jit.c @@ -134,6 +134,7 @@ static void *call_original_binary_rev_arith_code; static void *call_original_unary_arith_for_branch_code; static void *call_original_binary_arith_for_branch_code; static void *call_original_binary_rev_arith_for_branch_code; +static void *call_original_nary_arith_code; static void *bad_car_code, *bad_cdr_code; static void *bad_caar_code, *bad_cdar_code, *bad_cadr_code, *bad_cddr_code; static void *bad_mcar_code, *bad_mcdr_code; @@ -1047,7 +1048,7 @@ static int mz_remap_it(mz_jit_state *jitter, int i) while (p && (j >= 0)) { c = jitter->mappings[p]; if (c & 0x1) { - /* native push */ + /* native push or skip */ c >>= 1; i += c; if (c < 0) @@ -2097,6 +2098,19 @@ static int is_constant_and_avoids_r1(Scheme_Object *obj) return (t >= _scheme_compiled_values_types_); } +static int avoids_r1(Scheme_Object *obj) +{ + Scheme_Type t = SCHEME_TYPE(obj); + + if (SAME_TYPE(t, scheme_toplevel_type)) { + return 1; + } else if (SAME_TYPE(t, scheme_local_type) + || SAME_TYPE(t, scheme_local_unbox_type)) { + return 1; + } else + return is_constant_and_avoids_r1(obj); +} + /*========================================================================*/ /* application codegen */ /*========================================================================*/ @@ -3063,7 +3077,7 @@ static void register_helper_func(mz_jit_state *jitter, void *code) #endif } -int do_generate_shared_call(mz_jit_state *jitter, void *_data) +static int do_generate_shared_call(mz_jit_state *jitter, void *_data) { Generate_Call_Data *data = (Generate_Call_Data *)_data; @@ -3351,7 +3365,7 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ } if (num_rands) { - if (!direct_prim || (num_rands > 1)) { + if (!direct_prim || (num_rands > 1) || (no_call == 2)) { mz_rs_dec(num_rands); need_safety = num_rands; CHECK_RUNSTACK_OVERFLOW(); @@ -3425,7 +3439,7 @@ static int generate_app(Scheme_App_Rec *app, Scheme_Object **alt_rands, int num_ /* Move rator back to register: */ mz_rs_ldxi(JIT_V1, i + offset); } - if ((!direct_prim || (num_rands > 1)) + if ((!direct_prim || (num_rands > 1) || (no_call == 2)) && (!direct_self || !is_tail || no_call || (i + 1 < num_rands))) { mz_rs_stxi(i + offset, JIT_R0); } @@ -3571,6 +3585,7 @@ static int is_unboxable_op(Scheme_Object *obj, int flag) if (IS_NAMED_PRIM(obj, "unsafe-fl/")) return 1; if (IS_NAMED_PRIM(obj, "unsafe-flabs")) return 1; if (IS_NAMED_PRIM(obj, "unsafe-fx->fl")) return 1; + if (IS_NAMED_PRIM(obj, "unsafe-f64vector-ref")) return 1; return 0; } @@ -3618,7 +3633,7 @@ static int can_unbox(Scheme_Object *obj, int fuel, int regs) if (!can_unbox(app->rand1, fuel - 1, regs)) return 0; return can_unbox(app->rand2, fuel - 1, regs - 1); - } + } case scheme_toplevel_type: case scheme_local_type: case scheme_local_unbox_type: @@ -3740,6 +3755,7 @@ static int can_fast_double(int arith, int cmp, int two_args) #define jit_ldi_d_fppush(rd, is) jit_ldi_d(rd, is) #define jit_ldr_d_fppush(rd, rs) jit_ldr_d(rd, rs) #define jit_ldxi_d_fppush(rd, rs, is) jit_ldxi_d(rd, rs, is) +#define jit_ldxr_d_fppush(rd, rs, is) jit_ldxr_d(rd, rs, is) #define jit_addr_d_fppop(rd,s1,s2) jit_addr_d(rd,s1,s2) #define jit_subr_d_fppop(rd,s1,s2) jit_subr_d(rd,s1,s2) #define jit_subrr_d_fppop(rd,s1,s2) jit_subrr_d(rd,s1,s2) @@ -3751,6 +3767,7 @@ static int can_fast_double(int arith, int cmp, int two_args) #define jit_sti_d_fppop(id, rs) jit_sti_d(id, rs) #define jit_str_d_fppop(id, rd, rs) jit_str_d(id, rd, rs) #define jit_stxi_d_fppop(id, rd, rs) jit_stxi_d(id, rd, rs) +#define jit_stxr_d_fppop(id, rd, rs) jit_stxr_d(id, rd, rs) #define jit_bger_d_fppop(d, s1, s2) jit_bger_d(d, s1, s2) #define jit_bantiger_d_fppop(d, s1, s2) jit_bantiger_d(d, s1, s2) #define jit_bler_d_fppop(d, s1, s2) jit_bler_d(d, s1, s2) @@ -3990,7 +4007,7 @@ static int generate_double_arith(mz_jit_state *jitter, int arith, int cmp, int r static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Object *rand, Scheme_Object *rand2, int orig_args, int arith, int cmp, int v, jit_insn **for_branch, int branch_short, - int unsafe_fx, int unsafe_fl) + int unsafe_fx, int unsafe_fl, GC_CAN_IGNORE jit_insn *overflow_refslow) /* needs de-sync */ /* Either arith is non-zero or it's a cmp; the value of each determines the operation: arith = 1 -> + or add1 (if !rand2) @@ -4013,7 +4030,12 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj cmp = +/-1 -> >=/<= cmp = +/-2 -> >/< or positive/negative? cmp = 3 -> bitwise-bit-test? - */ + If rand is NULL, then we're generating part of the fast path for an + nary arithmatic over a binary operator; the first argument is + already in R0 (fixnum or min/max) or a floating-point register + (flonum) and the second arguement is in R1 (fixnum or min/max) or a + floating-point register (flonum). +*/ { GC_CAN_IGNORE jit_insn *ref, *ref2, *ref3, *ref4, *refd = NULL, *refdt = NULL, *refslow; int skipped, simple_rand, simple_rand2, reversed = 0, has_fixnum_fast = 1; @@ -4021,11 +4043,16 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj LOG_IT(("inlined %s\n", ((Scheme_Primitive_Proc *)rator)->name)); if (unsafe_fl - && can_unbox(rand, 5, JIT_FPR_NUM-2) - && (!rand2 || can_unbox(rand2, 5, JIT_FPR_NUM-3))) { + && (!rand + || (can_unbox(rand, 5, JIT_FPR_NUM-2) + && (!rand2 || can_unbox(rand2, 5, JIT_FPR_NUM-3))))) { /* Unsafe, unboxed floating-point ops. */ - jitter->unbox++; - if (!rand2) { + int args_unboxed = ((arith != 9) && (arith != 10)); + if (args_unboxed) + jitter->unbox++; + if (!rand) { + CHECK_LIMIT(); + } else if (!rand2) { mz_runstack_skipped(jitter, 1); generate(rand, jitter, 0, 1, JIT_R0); CHECK_LIMIT(); @@ -4038,12 +4065,15 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj CHECK_LIMIT(); mz_runstack_unskipped(jitter, 2); } - --jitter->unbox; - jitter->unbox_depth -= (rand2 ? 2 : 1); + if (args_unboxed) { + --jitter->unbox; + jitter->unbox_depth -= (rand2 ? 2 : 1); + } if (for_branch) mz_rs_sync(); /* needed if arguments were unboxed */ - generate_double_arith(jitter, arith, cmp, 0, !!rand2, 0, - &refd, &refdt, branch_short, 1, 1, jitter->unbox); + generate_double_arith(jitter, arith, cmp, reversed, !!rand2, 0, + &refd, &refdt, branch_short, 1, + args_unboxed, jitter->unbox); CHECK_LIMIT(); ref3 = NULL; ref = NULL; @@ -4056,261 +4086,274 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj /* While generating a fixnum op, don't unbox! */ jitter->unbox = 0; - if (rand2) { - if (SCHEME_INTP(rand2) - && SCHEME_INT_SMALL_ENOUGH(rand2) - && ((arith != 6) - || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT) - && (SCHEME_INT_VAL(rand2) >= -MAX_TRY_SHIFT))) - && ((cmp != 3) - || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT) - && (SCHEME_INT_VAL(rand2) >= 0)))) { - /* Second is constant, so use constant mode. - For arithmetic shift, only do this if the constant - is in range. */ - v = SCHEME_INT_VAL(rand2); - rand2 = NULL; - } else if (SCHEME_INTP(rand) - && SCHEME_INT_SMALL_ENOUGH(rand) - && (arith != 6) && (arith != -6) - && (cmp != 3)) { - /* First is constant; swap argument order and use constant mode. */ - v = SCHEME_INT_VAL(rand); - cmp = -cmp; - rand = rand2; - rand2 = NULL; - reversed = 1; - } else if ((ok_to_move_local(rand2) - || SCHEME_INTP(rand2)) - && !(ok_to_move_local(rand) - || SCHEME_INTP(rand))) { - /* Second expression is side-effect-free, unlike the first; - swap order and use the fast path for when the first arg is - side-effect free. */ - Scheme_Object *t = rand2; - rand2 = rand; - rand = t; - cmp = -cmp; - reversed = 1; - } - } - - if ((arith == -1) && (orig_args == 1) && !v) { - /* Unary subtract */ + if (!rand) { + /* generating for an nary operation; first arg in R0, + second in R1 */ reversed = 1; - } - - if (rand2) { - simple_rand = (ok_to_move_local(rand) - || SCHEME_INTP(rand)); - if (!simple_rand) - simple_rand2 = SAME_TYPE(SCHEME_TYPE(rand2), scheme_local_type); - else - simple_rand2 = 0; + cmp = -cmp; + refslow = overflow_refslow; + refd = NULL; + refdt = NULL; + ref3 = NULL; + ref = NULL; + ref4 = NULL; } else { - simple_rand = 0; - simple_rand2 = 0; - } + if (rand2) { + if (SCHEME_INTP(rand2) + && SCHEME_INT_SMALL_ENOUGH(rand2) + && ((arith != 6) + || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT) + && (SCHEME_INT_VAL(rand2) >= -MAX_TRY_SHIFT))) + && ((cmp != 3) + || ((SCHEME_INT_VAL(rand2) <= MAX_TRY_SHIFT) + && (SCHEME_INT_VAL(rand2) >= 0)))) { + /* Second is constant, so use constant mode. + For arithmetic shift, only do this if the constant + is in range. */ + v = SCHEME_INT_VAL(rand2); + rand2 = NULL; + } else if (SCHEME_INTP(rand) + && SCHEME_INT_SMALL_ENOUGH(rand) + && (arith != 6) && (arith != -6) + && (cmp != 3)) { + /* First is constant; swap argument order and use constant mode. */ + v = SCHEME_INT_VAL(rand); + cmp = -cmp; + rand = rand2; + rand2 = NULL; + reversed = 1; + } else if ((ok_to_move_local(rand2) + || SCHEME_INTP(rand2)) + && !(ok_to_move_local(rand) + || SCHEME_INTP(rand))) { + /* Second expression is side-effect-free, unlike the first; + swap order and use the fast path for when the first arg is + side-effect free. */ + Scheme_Object *t = rand2; + rand2 = rand; + rand = t; + cmp = -cmp; + reversed = 1; + } + } - if (rand2 && !simple_rand && !simple_rand2) - skipped = orig_args - 1; - else - skipped = orig_args; + if ((arith == -1) && (orig_args == 1) && !v) { + /* Unary subtract */ + reversed = 1; + } + + if (rand2) { + simple_rand = (ok_to_move_local(rand) + || SCHEME_INTP(rand)); + if (!simple_rand) + simple_rand2 = SAME_TYPE(SCHEME_TYPE(rand2), scheme_local_type); + else + simple_rand2 = 0; + } else { + simple_rand = 0; + simple_rand2 = 0; + } - mz_runstack_skipped(jitter, skipped); + if (rand2 && !simple_rand && !simple_rand2) + skipped = orig_args - 1; + else + skipped = orig_args; - if (rand2 && !simple_rand && !simple_rand2) { - mz_runstack_skipped(jitter, 1); - generate_non_tail(rand, jitter, 0, 1); /* sync'd later */ - CHECK_LIMIT(); - mz_runstack_unskipped(jitter, 1); - mz_rs_dec(1); - CHECK_RUNSTACK_OVERFLOW(); - mz_runstack_pushed(jitter, 1); - mz_rs_str(JIT_R0); - } - /* not sync'd... */ + mz_runstack_skipped(jitter, skipped); - if (simple_rand2) { - if (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) - generate(rand, jitter, 0, 0, JIT_R1); /* sync'd below */ - else { - generate_non_tail(rand, jitter, 0, 1); /* sync'd below */ + if (rand2 && !simple_rand && !simple_rand2) { + mz_runstack_skipped(jitter, 1); + generate_non_tail(rand, jitter, 0, 1); /* sync'd later */ CHECK_LIMIT(); - jit_movr_p(JIT_R1, JIT_R0); + mz_runstack_unskipped(jitter, 1); + mz_rs_dec(1); + CHECK_RUNSTACK_OVERFLOW(); + mz_runstack_pushed(jitter, 1); + mz_rs_str(JIT_R0); + } + /* not sync'd... */ + + if (simple_rand2) { + if (SAME_TYPE(SCHEME_TYPE(rand), scheme_local_type)) + generate(rand, jitter, 0, 0, JIT_R1); /* sync'd below */ + else { + generate_non_tail(rand, jitter, 0, 1); /* sync'd below */ + CHECK_LIMIT(); + jit_movr_p(JIT_R1, JIT_R0); + } + CHECK_LIMIT(); + generate(rand2, jitter, 0, 0, JIT_R0); /* sync'd below */ + } else { + generate_non_tail(rand2 ? rand2 : rand, jitter, 0, 1); /* sync'd below */ } CHECK_LIMIT(); - generate(rand2, jitter, 0, 0, JIT_R0); /* sync'd below */ - } else { - generate_non_tail(rand2 ? rand2 : rand, jitter, 0, 1); /* sync'd below */ - } - CHECK_LIMIT(); - /* sync'd in three branches below */ + /* sync'd in three branches below */ - if (arith == -2) { - if (rand2 || (v != 1) || reversed) - has_fixnum_fast = 0; - } + if (arith == -2) { + if (rand2 || (v != 1) || reversed) + has_fixnum_fast = 0; + } - /* rand2 in R0, and rand in R1 unless it's simple */ + /* rand2 in R0, and rand in R1 unless it's simple */ - if (simple_rand || simple_rand2) { - int pos, va; + if (simple_rand || simple_rand2) { + int pos, va; - if (simple_rand && SCHEME_INTP(rand)) { - (void)jit_movi_p(JIT_R1, rand); - va = JIT_R0; - } else { - if (simple_rand) { - pos = mz_remap(SCHEME_LOCAL_POS(rand)); - mz_rs_ldxi(JIT_R1, pos); + if (simple_rand && SCHEME_INTP(rand)) { + (void)jit_movi_p(JIT_R1, rand); + va = JIT_R0; + } else { + if (simple_rand) { + pos = mz_remap(SCHEME_LOCAL_POS(rand)); + mz_rs_ldxi(JIT_R1, pos); + } + if (!unsafe_fx && !unsafe_fl) { + /* check both fixnum bits at once by ANDing into R2: */ + jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); + va = JIT_R2; + } } + if (!unsafe_fx && !unsafe_fl) { + mz_rs_sync(); + + __START_TINY_JUMPS__(1); + ref2 = jit_bmsi_ul(jit_forward(), va, 0x1); + __END_TINY_JUMPS__(1); + } else { + ref2 = NULL; + if (for_branch) mz_rs_sync(); + } + + if (unsafe_fl || (!unsafe_fx && !SCHEME_INTP(rand) && can_fast_double(arith, cmp, 1))) { + /* Maybe they're both doubles... */ + if (unsafe_fl) mz_rs_sync(); + generate_double_arith(jitter, arith, cmp, reversed, 1, 0, &refd, &refdt, branch_short, unsafe_fl, 0, 0); + CHECK_LIMIT(); + } + + if (!unsafe_fx && !unsafe_fl) { + if (!has_fixnum_fast) { + __START_TINY_JUMPS__(1); + mz_patch_branch(ref2); + __END_TINY_JUMPS__(1); + } + + /* Slow path */ + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 0, 0); + + if (has_fixnum_fast) { + __START_TINY_JUMPS__(1); + mz_patch_branch(ref2); + __END_TINY_JUMPS__(1); + } + } else { + refslow = overflow_refslow; + ref = NULL; + ref4 = NULL; + } + CHECK_LIMIT(); + } else if (rand2) { + /* Move rand result back into R1 */ + mz_rs_ldr(JIT_R1); + mz_rs_inc(1); + mz_runstack_popped(jitter, 1); + + if (!unsafe_fx && !unsafe_fl) { + mz_rs_sync(); + /* check both fixnum bits at once by ANDing into R2: */ jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); - va = JIT_R2; - } - } - - if (!unsafe_fx && !unsafe_fl) { - mz_rs_sync(); - - __START_TINY_JUMPS__(1); - ref2 = jit_bmsi_ul(jit_forward(), va, 0x1); - __END_TINY_JUMPS__(1); - } else { - ref2 = NULL; - if (for_branch) mz_rs_sync(); - } - - if (unsafe_fl || (!unsafe_fx && !SCHEME_INTP(rand) && can_fast_double(arith, cmp, 1))) { - /* Maybe they're both doubles... */ - if (unsafe_fl) mz_rs_sync(); - generate_double_arith(jitter, arith, cmp, reversed, 1, 0, &refd, &refdt, branch_short, unsafe_fl, 0, 0); - CHECK_LIMIT(); - } - - if (!unsafe_fx && !unsafe_fl) { - if (!has_fixnum_fast) { __START_TINY_JUMPS__(1); - mz_patch_branch(ref2); + ref2 = jit_bmsi_ul(jit_forward(), JIT_R2, 0x1); __END_TINY_JUMPS__(1); + CHECK_LIMIT(); + } else { + if (for_branch) mz_rs_sync(); + ref2 = NULL; + CHECK_LIMIT(); } - /* Slow path */ - refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 0, 0); - - if (has_fixnum_fast) { - __START_TINY_JUMPS__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS__(1); - } - } else { - refslow = NULL; - ref = NULL; - ref4 = NULL; - } - CHECK_LIMIT(); - } else if (rand2) { - /* Move rand result back into R1 */ - mz_rs_ldr(JIT_R1); - mz_rs_inc(1); - mz_runstack_popped(jitter, 1); - - if (!unsafe_fx && !unsafe_fl) { - mz_rs_sync(); - - /* check both fixnum bits at once by ANDing into R2: */ - jit_andr_ul(JIT_R2, JIT_R0, JIT_R1); - __START_TINY_JUMPS__(1); - ref2 = jit_bmsi_ul(jit_forward(), JIT_R2, 0x1); - __END_TINY_JUMPS__(1); - CHECK_LIMIT(); - } else { - if (for_branch) mz_rs_sync(); - ref2 = NULL; - CHECK_LIMIT(); - } - - if (unsafe_fl || (!unsafe_fx && can_fast_double(arith, cmp, 1))) { - /* Maybe they're both doubles... */ - if (unsafe_fl) mz_rs_sync(); - generate_double_arith(jitter, arith, cmp, reversed, 1, 0, &refd, &refdt, branch_short, unsafe_fl, 0, 0); - CHECK_LIMIT(); - } - - if (!unsafe_fx && !unsafe_fl) { - if (!has_fixnum_fast) { - __START_TINY_JUMPS__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS__(1); + if (unsafe_fl || (!unsafe_fx && can_fast_double(arith, cmp, 1))) { + /* Maybe they're both doubles... */ + if (unsafe_fl) mz_rs_sync(); + generate_double_arith(jitter, arith, cmp, reversed, 1, 0, &refd, &refdt, branch_short, unsafe_fl, 0, 0); + CHECK_LIMIT(); } - /* Slow path */ - refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 0, 0); + if (!unsafe_fx && !unsafe_fl) { + if (!has_fixnum_fast) { + __START_TINY_JUMPS__(1); + mz_patch_branch(ref2); + __END_TINY_JUMPS__(1); + } + + /* Slow path */ + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 0, 0); - if (has_fixnum_fast) { - /* Fixnum branch: */ - __START_TINY_JUMPS__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS__(1); - } - CHECK_LIMIT(); - } else { - refslow = NULL; - ref = NULL; - ref4 = NULL; - } - } else { - /* Only one argument: */ - if (!unsafe_fx && !unsafe_fl) { - mz_rs_sync(); - __START_TINY_JUMPS__(1); - ref2 = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1); - __END_TINY_JUMPS__(1); - } else { - if (for_branch) mz_rs_sync(); - ref2 = NULL; - } - - if (unsafe_fl - || ((orig_args != 2) /* <- heuristic: we could generate code when an exact argument is - given, but the extra FP code is probably not worthwhile. */ - && !unsafe_fx - && can_fast_double(arith, cmp, 0) - /* watch out: divide by 0 is special: */ - && ((arith != -2) || v || reversed))) { - /* Maybe it's a double... */ - generate_double_arith(jitter, arith, cmp, reversed, 0, v, &refd, &refdt, branch_short, unsafe_fl, 0, 0); - CHECK_LIMIT(); - } - - if (!unsafe_fx && !unsafe_fl) { - if (!has_fixnum_fast) { - __START_TINY_JUMPS__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS__(1); - } - - /* Slow path */ - refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 1, v); - - if (has_fixnum_fast) { - __START_TINY_JUMPS__(1); - mz_patch_branch(ref2); - __END_TINY_JUMPS__(1); + if (has_fixnum_fast) { + /* Fixnum branch: */ + __START_TINY_JUMPS__(1); + mz_patch_branch(ref2); + __END_TINY_JUMPS__(1); + } + CHECK_LIMIT(); + } else { + refslow = overflow_refslow; + ref = NULL; + ref4 = NULL; } } else { - refslow = NULL; - ref = NULL; - ref4 = NULL; + /* Only one argument: */ + if (!unsafe_fx && !unsafe_fl) { + mz_rs_sync(); + __START_TINY_JUMPS__(1); + ref2 = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1); + __END_TINY_JUMPS__(1); + } else { + if (for_branch) mz_rs_sync(); + ref2 = NULL; + } + + if (unsafe_fl + || ((orig_args != 2) /* <- heuristic: we could generate code when an exact argument is + given, but the extra FP code is probably not worthwhile. */ + && !unsafe_fx + && can_fast_double(arith, cmp, 0) + /* watch out: divide by 0 is special: */ + && ((arith != -2) || v || reversed))) { + /* Maybe it's a double... */ + generate_double_arith(jitter, arith, cmp, reversed, 0, v, &refd, &refdt, branch_short, unsafe_fl, 0, 0); + CHECK_LIMIT(); + } + + if (!unsafe_fx && !unsafe_fl) { + if (!has_fixnum_fast) { + __START_TINY_JUMPS__(1); + mz_patch_branch(ref2); + __END_TINY_JUMPS__(1); + } + + /* Slow path */ + refslow = generate_arith_slow_path(jitter, rator, &ref, &ref4, for_branch, orig_args, reversed, arith, 1, v); + + if (has_fixnum_fast) { + __START_TINY_JUMPS__(1); + mz_patch_branch(ref2); + __END_TINY_JUMPS__(1); + } + } else { + refslow = overflow_refslow; + ref = NULL; + ref4 = NULL; + } } + + CHECK_LIMIT(); + + mz_runstack_unskipped(jitter, skipped); } - CHECK_LIMIT(); - - mz_runstack_unskipped(jitter, skipped); - __START_SHORT_JUMPS__(branch_short); if (!unsafe_fl) { @@ -4325,7 +4368,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj /* First arg is in JIT_R1, second is in JIT_R0 */ if (arith == 1) { jit_andi_ul(JIT_R2, JIT_R1, (~0x1)); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_addr_l(JIT_R2, JIT_R2, JIT_R0); else (void)jit_boaddr_l(refslow, JIT_R2, JIT_R0); @@ -4333,13 +4376,13 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } else if (arith == -1) { if (reversed) { jit_movr_p(JIT_R2, JIT_R0); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_subr_l(JIT_R2, JIT_R2, JIT_R1); else (void)jit_bosubr_l(refslow, JIT_R2, JIT_R1); } else { jit_movr_p(JIT_R2, JIT_R1); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) (void)jit_subr_l(JIT_R2, JIT_R2, JIT_R0); else (void)jit_bosubr_l(refslow, JIT_R2, JIT_R0); @@ -4348,7 +4391,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } else if (arith == 2) { jit_andi_ul(JIT_R2, JIT_R1, (~0x1)); jit_rshi_l(JIT_V1, JIT_R0, 0x1); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_mulr_l(JIT_V1, JIT_V1, JIT_R2); else (void)jit_bomulr_l(refslow, JIT_V1, JIT_R2); @@ -4363,14 +4406,14 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj jit_rshi_l(JIT_V1, JIT_R0, 0x1); jit_rshi_l(JIT_R2, JIT_R1, 0x1); if (reversed) { - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) (void)jit_beqi_l(refslow, JIT_R2, 0); if (arith == -3) jit_divr_l(JIT_R0, JIT_V1, JIT_R2); else jit_modr_l(JIT_R0, JIT_V1, JIT_R2); } else { - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) (void)jit_beqi_l(refslow, JIT_V1, 0); if (arith == -3) jit_divr_l(JIT_R0, JIT_R2, JIT_V1); @@ -4397,14 +4440,14 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj int v2 = (reversed ? JIT_R1 : JIT_R0); jit_insn *refi, *refc; - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) refi = jit_bgei_l(jit_forward(), v2, (long)scheme_make_integer(0)); else refi = NULL; - if (!unsafe_fx || (arith == -6)) { + if (!unsafe_fx || overflow_refslow || (arith == -6)) { /* Right shift */ - if (!unsafe_fx) { + if (!unsafe_fx || overflow_refslow) { /* check for a small enough shift */ (void)jit_blti_l(refslow, v2, scheme_make_integer(-MAX_TRY_SHIFT)); jit_notr_l(JIT_V1, v2); @@ -4412,7 +4455,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } else { jit_rshi_l(JIT_V1, v2, 0x1); } - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) jit_addi_l(JIT_V1, JIT_V1, 0x1); CHECK_LIMIT(); #ifdef MZ_USE_JIT_I386 @@ -4423,7 +4466,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj jit_rshr_l(JIT_R2, v1, JIT_V1); #endif jit_ori_l(JIT_R0, JIT_R2, 0x1); - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) refc = jit_jmpi(jit_forward()); else refc = NULL; @@ -4432,10 +4475,10 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj refc = NULL; /* Left shift */ - if (!unsafe_fx || (arith == 6)) { + if (!unsafe_fx || overflow_refslow || (arith == 6)) { if (refi) mz_patch_branch(refi); - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) (void)jit_bgti_l(refslow, v2, (long)scheme_make_integer(MAX_TRY_SHIFT)); jit_rshi_l(JIT_V1, v2, 0x1); jit_andi_l(v1, v1, (~0x1)); @@ -4449,8 +4492,8 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj CHECK_LIMIT(); /* If shifting back right produces a different result, that's overflow... */ jit_rshr_l(JIT_V1, JIT_R2, JIT_V1); - /* !! In case we go refslow, it nseed to add back tag to v1 !! */ - if (!unsafe_fx) + /* !! In case we go refslow, it needs to add back tag to v1 !! */ + if (!unsafe_fx || overflow_refslow) (void)jit_bner_p(refslow, JIT_V1, v1); /* No overflow. */ jit_ori_l(JIT_R0, JIT_R2, 0x1); @@ -4479,7 +4522,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj /* Non-constant arg is in JIT_R0 */ if (arith == 1) { jit_movr_p(JIT_R2, JIT_R0); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_addi_l(JIT_R2, JIT_R2, v << 1); else (void)jit_boaddi_l(refslow, JIT_R2, v << 1); @@ -4487,14 +4530,14 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } else if (arith == -1) { if (reversed) { (void)jit_movi_p(JIT_R2, scheme_make_integer(v)); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_subr_l(JIT_R2, JIT_R2, JIT_R0); else (void)jit_bosubr_l(refslow, JIT_R2, JIT_R0); jit_addi_ul(JIT_R0, JIT_R2, 0x1); } else { jit_movr_p(JIT_R2, JIT_R0); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_subi_l(JIT_R2, JIT_R2, v << 1); else (void)jit_bosubi_l(refslow, JIT_R2, v << 1); @@ -4509,7 +4552,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj (void)jit_movi_p(JIT_R1, scheme_make_integer(v)); jit_andi_ul(JIT_R2, JIT_R1, (~0x1)); jit_rshi_l(JIT_V1, JIT_R0, 0x1); - if (unsafe_fx) + if (unsafe_fx && !overflow_refslow) jit_mulr_l(JIT_V1, JIT_V1, JIT_R2); else (void)jit_bomulr_l(refslow, JIT_V1, JIT_R2); @@ -4549,7 +4592,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj } else { jit_andi_l(JIT_R0, JIT_R0, (~0x1)); jit_lshi_l(JIT_R2, JIT_R0, v); - if (!unsafe_fx) { + if (!unsafe_fx && !overflow_refslow) { /* If shifting back right produces a different result, that's overflow... */ jit_rshi_l(JIT_V1, JIT_R2, v); /* !! In case we go refslow, it nseed to add back tag to JIT_R0 !! */ @@ -4584,7 +4627,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj refc = jit_bgei_l(jit_forward(), JIT_R0, (long)scheme_make_integer(0)); __END_INNER_TINY__(branch_short); /* watch out for most negative fixnum! */ - if (!unsafe_fx) + if (!unsafe_fx || overflow_refslow) (void)jit_beqi_p(refslow, JIT_R0, (void *)(((long)1 << ((8 * JIT_WORD_SIZE) - 1)) | 0x1)); (void)jit_movi_p(JIT_R1, scheme_make_integer(0)); jit_subr_l(JIT_R0, JIT_R1, JIT_R0); @@ -4624,7 +4667,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj switch (cmp) { case -3: if (rand2) { - if (!unsafe_fx) { + if (!unsafe_fx || overflow_refslow) { (void)jit_blti_l(refslow, JIT_R1, 0); (void)jit_bgti_l(refslow, JIT_R1, (long)scheme_make_integer(MAX_TRY_SHIFT)); } @@ -4677,7 +4720,7 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj default: case 3: if (rand2) { - if (!unsafe_fx) { + if (!unsafe_fx || overflow_refslow) { (void)jit_blti_l(refslow, JIT_R0, 0); (void)jit_bgti_l(refslow, JIT_R0, (long)scheme_make_integer(MAX_TRY_SHIFT)); } @@ -4728,6 +4771,263 @@ static int generate_arith(mz_jit_state *jitter, Scheme_Object *rator, Scheme_Obj return 1; } +#define MAX_NON_SIMPLE_ARGS 5 + +static int extract_nary_arg(int reg, int n, mz_jit_state *jitter, Scheme_App_Rec *app, Scheme_Object **alt_args) +{ + if (!alt_args) { + jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(n)); + if (jitter->unbox) + generate_unboxing(jitter); + } else if (is_constant_and_avoids_r1(app->args[n+1])) { + generate(app->args[n+1], jitter, 0, 0, reg); + } else { + int i, j = 0; + for (i = 0; i < n; i++) { + if (!is_constant_and_avoids_r1(app->args[i+1])) + j++; + } + jit_ldxi_p(reg, JIT_RUNSTACK, WORDS_TO_BYTES(j)); + if (jitter->unbox) + generate_unboxing(jitter); + } + CHECK_LIMIT(); + return 1; +} + +static void patch_nary_branches(mz_jit_state *jitter, jit_insn **refs, GC_CAN_IGNORE jit_insn *reffalse) +{ + if (refs[0]) { + mz_patch_branch_at(refs[0], reffalse); + } + if (refs[1]) { + mz_patch_branch_at(refs[1], reffalse); + } + if (refs[2]) { + jit_patch_movi(refs[2], reffalse); + } +} + +static int generate_nary_arith(mz_jit_state *jitter, Scheme_App_Rec *app, + int arith, int cmp, jit_insn **for_branch, int branch_short) +{ + int c, i, non_simple_c = 0, stack_c, use_fl = 1, use_fx = 1, trigger_arg = 0; + Scheme_Object *non_simples[1+MAX_NON_SIMPLE_ARGS], **alt_args, *v; + GC_CAN_IGNORE jit_insn *refslow, *reffx, *refdone; + GC_CAN_IGNORE jit_insn *refs[3], *reffalse = NULL, *refdone3 = NULL; +#ifdef INLINE_FP_OPS + int args_unboxed; + GC_CAN_IGNORE jit_insn *reffl, *refdone2; +#endif + + if (arith == -2) { + /* can't inline fixnum '/' */ + use_fx = 0; + } else if ((arith == 3) + || (arith == 4) + || (arith == 5)) { + /* bitwise operators are fixnum, only */ + use_fl = 0; + } + + c = app->num_args; + for (i = 0; i < c; i++) { + v = app->args[i+1]; + if (!is_constant_and_avoids_r1(v)) { + if (non_simple_c < MAX_NON_SIMPLE_ARGS) + non_simples[1+non_simple_c] = v; + non_simple_c++; + } + if (SCHEME_INTP(v)) { + use_fl = 0; + if (trigger_arg == i) + trigger_arg++; + } else if (SCHEME_FLOATP(v)) { + use_fx = 0; + if (trigger_arg == i) + trigger_arg++; + } + } + if ((non_simple_c <= MAX_NON_SIMPLE_ARGS) && (non_simple_c < c)) { + stack_c = non_simple_c; + alt_args = non_simples; + non_simples[0] = app->args[0]; + mz_runstack_skipped(jitter, c - stack_c); + } else { + stack_c = c; + alt_args = NULL; + } + + if (stack_c) + generate_app(app, alt_args, stack_c, jitter, 0, 0, 2); + CHECK_LIMIT(); + mz_rs_sync(); + + __START_SHORT_JUMPS__(c < 100); + + if (trigger_arg > c) { + /* we don't expect this to happen, since constant-folding would + have collapsed it */ + trigger_arg = 0; + } + + extract_nary_arg(JIT_R0, trigger_arg, jitter, app, alt_args); + CHECK_LIMIT(); + /* trigger argument a fixnum? */ + reffx = jit_bmsi_ul(jit_forward(), JIT_R0, 0x1); + +#ifdef INLINE_FP_OPS + if (use_fl) { + /* First argument a flonum? */ + jit_ldxi_s(JIT_R0, JIT_R0, &((Scheme_Object *)0x0)->type); + reffl = jit_beqi_p(jit_forward(), JIT_R0, scheme_double_type); + CHECK_LIMIT(); + } else { + reffl = NULL; + } +#endif + + if (!use_fx) { + mz_patch_branch(reffx); + } + + refslow = _jit.x.pc; + /* slow path */ + if (alt_args) { + /* get all args on runstack */ + int delta = stack_c - c; + for (i = 0; i < c; i++) { + if (delta) { + extract_nary_arg(JIT_R0, i, jitter, app, alt_args); + CHECK_LIMIT(); + jit_stxi_p(WORDS_TO_BYTES(i+delta), JIT_RUNSTACK, JIT_R0); + } else + break; + } + jit_subi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(c - stack_c)); + } + (void)jit_movi_p(JIT_V1, ((Scheme_Primitive_Proc *)app->args[0])->prim_val); + (void)jit_movi_i(JIT_R1, c); + (void)jit_calli(call_original_nary_arith_code); + if (alt_args) { + jit_addi_p(JIT_RUNSTACK, JIT_RUNSTACK, WORDS_TO_BYTES(c - stack_c)); + } + refdone = jit_jmpi(jit_forward()); + if (!arith) { + reffalse = _jit.x.pc; + jit_movi_p(JIT_R0, &scheme_false); + refdone3 = jit_jmpi(jit_forward()); + } else { + reffalse = NULL; + } + +#ifdef INLINE_FP_OPS + if (use_fl) { + /* Flonum branch: */ + mz_patch_branch(reffl); + for (i = 0; i < c; i++) { + if (i != trigger_arg) { + v = app->args[i+1]; + if (!SCHEME_FLOATP(v)) { + extract_nary_arg(JIT_R0, i, jitter, app, alt_args); + (void)jit_bmsi_ul(refslow, JIT_R0, 0x1); + jit_ldxi_s(JIT_R0, JIT_R0, &((Scheme_Object *)0x0)->type); + (void)jit_bnei_p(refslow, JIT_R0, scheme_double_type); + CHECK_LIMIT(); + } + } + } + /* All flonums, so inline fast flonum combination */ + args_unboxed = ((arith != 9) && (arith != 10)); /* no unboxing for min & max */ + if (args_unboxed) + jitter->unbox++; + extract_nary_arg(JIT_R0, 0, jitter, app, alt_args); + CHECK_LIMIT(); + for (i = 1; i < c; i++) { + if (!arith && (i > 1)) + extract_nary_arg(JIT_R0, i - 1, jitter, app, alt_args); + extract_nary_arg((args_unboxed ? JIT_R0 : JIT_R1), i, jitter, app, alt_args); + if ((i == c - 1) && args_unboxed) --jitter->unbox; /* box last result */ + if (!arith) memset(refs, 0, sizeof(refs)); + __END_SHORT_JUMPS__(c < 100); + generate_arith(jitter, NULL, NULL, scheme_void, 2, arith, cmp, 0, + !arith ? refs : NULL, c < 100, 0, 1, NULL); + __START_SHORT_JUMPS__(c < 100); + if (!arith) patch_nary_branches(jitter, refs, reffalse); + CHECK_LIMIT(); + } + if (use_fx) { + refdone2 = jit_jmpi(jit_forward()); + } else { + refdone2 = NULL; + } + } else { + refdone2 = NULL; + } +#endif + + if (use_fx) { + /* Fixnum branch */ + mz_patch_branch(reffx); + for (i = 0; i < c; i++) { + if (i != trigger_arg) { + v = app->args[i+1]; + if (!SCHEME_INTP(v)) { + extract_nary_arg(JIT_R0, i, jitter, app, alt_args); + CHECK_LIMIT(); + (void)jit_bmci_ul(refslow, JIT_R0, 0x1); + CHECK_LIMIT(); + } + } + } + /* All fixnums, so inline fast fixnum combination; + on overflow, bail out to refslow. */ + extract_nary_arg(JIT_R0, 0, jitter, app, alt_args); + for (i = 1; i < c; i++) { + if (!arith && (i > 1)) + extract_nary_arg(JIT_R0, i - 1, jitter, app, alt_args); + extract_nary_arg(JIT_R1, i, jitter, app, alt_args); + CHECK_LIMIT(); + if (!arith) memset(refs, 0, sizeof(refs)); + __END_SHORT_JUMPS__(c < 100); + generate_arith(jitter, NULL, NULL, scheme_void, 2, arith, cmp, 0, + !arith ? refs : NULL, c < 100, 1, 0, refslow); + __START_SHORT_JUMPS__(c < 100); + if (!arith) patch_nary_branches(jitter, refs, reffalse); + CHECK_LIMIT(); + } + } + +#ifdef INLINE_FP_OPS + if (use_fl && use_fx) { + mz_patch_ucbranch(refdone2); + } +#endif + if (!arith) { + jit_movi_p(JIT_R0, scheme_true); + } + mz_patch_ucbranch(refdone); + if (refdone3) + mz_patch_ucbranch(refdone3); + + __END_SHORT_JUMPS__(c < 100); + + if (stack_c) { + mz_rs_inc(stack_c); /* no sync */ + mz_runstack_popped(jitter, stack_c); + } + if (c > stack_c) + mz_runstack_unskipped(jitter, c - stack_c); + + if (!arith && for_branch) { + __START_SHORT_JUMPS__(branch_short); + for_branch[0] = jit_beqi_p(jit_forward(), JIT_R0, &scheme_false); + __END_SHORT_JUMPS__(branch_short); + } + + return 1; +} + static int generate_inlined_constant_test(mz_jit_state *jitter, Scheme_App2_Rec *app, Scheme_Object *cnst, Scheme_Object *cnst2, jit_insn **for_branch, int branch_short, int need_sync) @@ -4978,13 +5278,13 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in generate_inlined_constant_test(jitter, app, scheme_eof, NULL, for_branch, branch_short, need_sync); return 1; } else if (IS_NAMED_PRIM(rator, "zero?")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 0, 0, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 0, 0, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "negative?")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 0, -2, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 0, -2, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "positive?")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 0, 2, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 0, 2, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "exact-nonnegative-integer?") || IS_NAMED_PRIM(rator, "exact-positive-integer?")) { @@ -5303,34 +5603,34 @@ static int generate_inlined_unary(mz_jit_state *jitter, Scheme_App2_Rec *app, in return 1; } else if (IS_NAMED_PRIM(rator, "add1")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 1, 0, 1, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 1, 0, 1, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "sub1")) { - generate_arith(jitter, rator, app->rand, NULL, 1, -1, 0, 1, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, -1, 0, 1, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "-")) { - generate_arith(jitter, rator, app->rand, NULL, 1, -1, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, -1, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "abs")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 11, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 11, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxabs")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 11, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 11, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-flabs")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 11, 0, 0, NULL, 1, 0, 1); + generate_arith(jitter, rator, app->rand, NULL, 1, 11, 0, 0, NULL, 1, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "exact->inexact")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 12, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 12, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx->fl")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 12, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 12, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "bitwise-not")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 7, 0, 9, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 7, 0, 9, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxnot")) { - generate_arith(jitter, rator, app->rand, NULL, 1, 7, 0, 9, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand, NULL, 1, 7, 0, 9, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "vector-immutable") || IS_NAMED_PRIM(rator, "vector")) { @@ -5712,134 +6012,134 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i return 1; } else if (IS_NAMED_PRIM(rator, "=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 0, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 0, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 0, 0, for_branch, branch_short, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 0, 0, for_branch, branch_short, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 0, 0, for_branch, branch_short, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 0, 0, for_branch, branch_short, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "<=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -1, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -1, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx<=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -1, 0, for_branch, branch_short, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -1, 0, for_branch, branch_short, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl<=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -1, 0, for_branch, branch_short, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -1, 0, for_branch, branch_short, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "<")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -2, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -2, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx<")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -2, 0, for_branch, branch_short, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -2, 0, for_branch, branch_short, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl<")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -2, 0, for_branch, branch_short, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, -2, 0, for_branch, branch_short, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, ">=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 1, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 1, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx>=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 1, 0, for_branch, branch_short, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 1, 0, for_branch, branch_short, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl>=")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 1, 0, for_branch, branch_short, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 1, 0, for_branch, branch_short, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, ">")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 2, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 2, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx>")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 2, 0, for_branch, branch_short, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 2, 0, for_branch, branch_short, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl>")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 2, 0, for_branch, branch_short, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 2, 0, for_branch, branch_short, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "bitwise-bit-set?")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 3, 0, for_branch, branch_short, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 0, 3, 0, for_branch, branch_short, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "char=?")) { generate_binary_char(jitter, app, for_branch, branch_short); return 1; } else if (!for_branch) { if (IS_NAMED_PRIM(rator, "+")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 1, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 1, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx+")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 1, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 1, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl+")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 1, 0, 0, NULL, 1, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 1, 0, 0, NULL, 1, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "-")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx-")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl-")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -1, 0, 0, NULL, 1, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "*")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 2, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 2, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fx*")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 2, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 2, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl*")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 2, 0, 0, NULL, 1, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 2, 0, 0, NULL, 1, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "/")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -2, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -2, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fl/")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -2, 0, 0, NULL, 1, 0, 1); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -2, 0, 0, NULL, 1, 0, 1, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "quotient")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -3, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -3, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxquotient")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -3, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -3, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "remainder")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -4, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -4, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxremainder")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -4, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -4, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "min")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 9, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 9, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "max")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 10, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 10, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "bitwise-and")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 3, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 3, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxand")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 3, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 3, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "bitwise-ior")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 4, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 4, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxior")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 4, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 4, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "bitwise-xor")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 5, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 5, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxxor")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 5, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 5, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "arithmetic-shift")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 6, 0, 0, NULL, 1, 0, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 6, 0, 0, NULL, 1, 0, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxlshift")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, 6, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, 6, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "unsafe-fxrshift")) { - generate_arith(jitter, rator, app->rand1, app->rand2, 2, -6, 0, 0, NULL, 1, 1, 0); + generate_arith(jitter, rator, app->rand1, app->rand2, 2, -6, 0, 0, NULL, 1, 1, 0, NULL); return 1; } else if (IS_NAMED_PRIM(rator, "vector-ref") || IS_NAMED_PRIM(rator, "unsafe-vector-ref") @@ -5961,6 +6261,35 @@ static int generate_inlined_binary(mz_jit_state *jitter, Scheme_App3_Rec *app, i mz_runstack_unskipped(jitter, 2); } + return 1; + } else if (IS_NAMED_PRIM(rator, "unsafe-f64vector-ref")) { + int fpr0, unbox = jitter->unbox; + + jitter->unbox = 0; /* no unboxing of vector and index arguments */ + generate_two_args(app->rand1, app->rand2, jitter, 1, 2); + jitter->unbox = unbox; + CHECK_LIMIT(); + + jit_ldxi_p(JIT_R0, JIT_R0, (long)&(((Scheme_Structure *)0x0)->slots[0])); + jit_ldxi_p(JIT_R0, JIT_R0, (long)&SCHEME_CPTR_VAL(0x0)); + jit_rshi_ul(JIT_R1, JIT_R1, 1); + jit_lshi_ul(JIT_R1, JIT_R1, 3); /* 3 = log(sizeof(double)) */ + + if (jitter->unbox) + fpr0 = JIT_FPR(jitter->unbox_depth); + else + fpr0 = JIT_FPR0; + + jit_ldxr_d_fppush(fpr0, JIT_R0, JIT_R1); + CHECK_LIMIT(); + + if (jitter->unbox) + jitter->unbox_depth++; + else { + mz_rs_sync(); + generate_alloc_double(jitter); + } + return 1; } else if (IS_NAMED_PRIM(rator, "set-mcar!") || IS_NAMED_PRIM(rator, "set-mcdr!")) { @@ -6119,7 +6448,22 @@ static int generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int scheme_direct_call_count++; - if (!for_branch) { + if (IS_NAMED_PRIM(rator, "=")) { + generate_nary_arith(jitter, app, 0, 0, for_branch, branch_short); + return 1; + } else if (IS_NAMED_PRIM(rator, "<")) { + generate_nary_arith(jitter, app, 0, -2, for_branch, branch_short); + return 1; + } else if (IS_NAMED_PRIM(rator, ">")) { + generate_nary_arith(jitter, app, 0, 2, for_branch, branch_short); + return 1; + } else if (IS_NAMED_PRIM(rator, "<=")) { + generate_nary_arith(jitter, app, 0, -1, for_branch, branch_short); + return 1; + } else if (IS_NAMED_PRIM(rator, ">=")) { + generate_nary_arith(jitter, app, 0, 1, for_branch, branch_short); + return 1; + } else if (!for_branch) { if (IS_NAMED_PRIM(rator, "vector-set!") || IS_NAMED_PRIM(rator, "unsafe-vector-set!") || IS_NAMED_PRIM(rator, "unsafe-struct-set!") @@ -6277,6 +6621,44 @@ static int generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int mz_runstack_unskipped(jitter, 3 - pushed); + return 1; + } else if (IS_NAMED_PRIM(rator, "unsafe-f64vector-set!")) { + if (avoids_r1(app->args[1]) + && is_constant_and_avoids_r1(app->args[2]) + && can_unbox(app->args[3], 5, JIT_FPR_NUM-1)) { + mz_runstack_skipped(jitter, 3); + jitter->unbox++; + generate(app->args[3], jitter, 0, 0, JIT_R0); /* to FP reg */ + CHECK_LIMIT(); + --jitter->unbox; + jitter->unbox_depth -= 1; + generate(app->args[2], jitter, 0, 0, JIT_R1); + CHECK_LIMIT(); + generate(app->args[1], jitter, 0, 0, JIT_R0); + mz_runstack_unskipped(jitter, 3); + } else { + generate_app(app, NULL, 3, jitter, 0, 0, 2); + CHECK_LIMIT(); + + mz_rs_ldxi(JIT_R0, 2); + jit_ldxi_d_fppush(JIT_FPR0, JIT_R0, &((Scheme_Double *)0x0)->double_val); + mz_rs_ldr(JIT_R0); + mz_rs_ldxi(JIT_R1, 1); + + mz_rs_inc(3); /* no sync */ + mz_runstack_popped(jitter, 3); + } + CHECK_LIMIT(); + + jit_ldxi_p(JIT_R0, JIT_R0, (long)&(((Scheme_Structure *)0x0)->slots[0])); + jit_ldxi_p(JIT_R0, JIT_R0, (long)&SCHEME_CPTR_VAL(0x0)); + jit_rshi_ul(JIT_R1, JIT_R1, 1); + jit_lshi_ul(JIT_R1, JIT_R1, 3); /* 3 = log(sizeof(double)) */ + jit_stxr_d_fppop(JIT_R1, JIT_R0, JIT_FPR0); + CHECK_LIMIT(); + + jit_movi_p(JIT_R0, &scheme_void); + return 1; } else if (IS_NAMED_PRIM(rator, "vector-immutable") || IS_NAMED_PRIM(rator, "vector")) { @@ -6318,6 +6700,24 @@ static int generate_inlined_nary(mz_jit_state *jitter, Scheme_App_Rec *app, int } return 1; + } else if (IS_NAMED_PRIM(rator, "+")) { + return generate_nary_arith(jitter, app, 1, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "-")) { + return generate_nary_arith(jitter, app, -1, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "*")) { + return generate_nary_arith(jitter, app, 2, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "/")) { + return generate_nary_arith(jitter, app, -2, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "bitwise-and")) { + return generate_nary_arith(jitter, app, 3, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "bitwise-ior")) { + return generate_nary_arith(jitter, app, 4, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "bitwise-xor")) { + return generate_nary_arith(jitter, app, 5, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "min")) { + return generate_nary_arith(jitter, app, 9, 0, NULL, 1); + } else if (IS_NAMED_PRIM(rator, "max")) { + return generate_nary_arith(jitter, app, 10, 0, NULL, 1); } else if (IS_NAMED_PRIM(rator, "checked-procedure-check-and-extract")) { generate_app(app, NULL, 5, jitter, 0, 0, 2); /* sync'd below */ CHECK_LIMIT(); @@ -8257,7 +8657,7 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) { /* May use JIT_R0 and create local branch: */ mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK), - jit_pusharg_p(JIT_R1), + jit_pusharg_i(JIT_R1), JIT_R2, noncm_prim_indirect); } CHECK_LIMIT(); @@ -8283,6 +8683,32 @@ static int do_generate_common(mz_jit_state *jitter, void *_data) } } + /* *** call_original_nary_arith_code *** */ + /* rator is in V1, count is in R1, args are on runstack */ + { + void *code; + + code = jit_get_ip().ptr; + call_original_nary_arith_code = code; + + mz_prolog(JIT_R2); + JIT_UPDATE_THREAD_RSPTR(); + mz_prepare_direct_prim(2); + { + /* May use JIT_R0 and create local branch: */ + mz_generate_direct_prim(jit_pusharg_p(JIT_RUNSTACK), + jit_pusharg_i(JIT_R1), + JIT_V1, noncm_prim_indirect); + } + CHECK_LIMIT(); + jit_retval(JIT_R0); + VALIDATE_RESULT(JIT_R0); + mz_epilog(JIT_R2); + CHECK_LIMIT(); + + register_sub_func(jitter, code, scheme_false); + } + /* *** on_demand_jit_[arity_]code *** */ /* Used as the code stub for a closure whose code is not yet compiled. See generate_function_prolog diff --git a/src/mzscheme/src/lightning/i386/fp.h b/src/mzscheme/src/lightning/i386/fp.h index d9f54757ff..a36f5e205d 100644 --- a/src/mzscheme/src/lightning/i386/fp.h +++ b/src/mzscheme/src/lightning/i386/fp.h @@ -201,6 +201,8 @@ union jit_double_imm { ((rd) == 0 ? (FSTPr (0), FPX(), FLDLm(0, (s1), (s2), 1)) \ : (FPX(), FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1))) +#define jit_ldxr_d_fppush(rd, s1, s2) (FPX(), FLDLm(0, (s1), (s2), 1)) + #define jit_extr_i_d(rd, rs) (PUSHLr((rs)), \ ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0)) \ : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))), \ @@ -235,9 +237,10 @@ union jit_double_imm { #define jit_sti_d_fppop(id, rs) (FPX(), FSTPLm((id), 0, 0, 0)) #endif -#define jit_stxi_d_fppop(id, rd, rs) (FPX(), FSTPLm((id), (rd), 0, 0)) +#define jit_stxi_d_fppop(id, rd, rs) (FPX(), FSTPLm((id), (rd), 0, 0)) #define jit_str_d_fppop(rd, rs) (FPX(), FSTPLm(0, (rd), 0, 0)) +#define jit_stxr_d_fppop(d1, d2, rs) (FPX(), FSTPLm(0, (d1), (d2), 1)) /* Assume round to near mode */ #define jit_floorr_d_i(rd, rs) \ diff --git a/src/mzscheme/src/numarith.c b/src/mzscheme/src/numarith.c index a090f37e6c..605b894782 100644 --- a/src/mzscheme/src/numarith.c +++ b/src/mzscheme/src/numarith.c @@ -63,20 +63,24 @@ void scheme_init_numarith(Scheme_Env *env) scheme_add_global_constant("sub1", p, env); p = scheme_make_folding_prim(plus, "+", 0, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("+", p, env); p = scheme_make_folding_prim(minus, "-", 1, -1, 1); SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED - | SCHEME_PRIM_IS_UNARY_INLINED); + | SCHEME_PRIM_IS_UNARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("-", p, env); p = scheme_make_folding_prim(mult, "*", 0, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("*", p, env); p = scheme_make_folding_prim(div_prim, "/", 1, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("/", p, env); p = scheme_make_folding_prim(scheme_abs, "abs", 1, 1, 1); diff --git a/src/mzscheme/src/number.c b/src/mzscheme/src/number.c index dbd681096d..af7445c20d 100644 --- a/src/mzscheme/src/number.c +++ b/src/mzscheme/src/number.c @@ -105,6 +105,8 @@ static Scheme_Object *fx_not (int argc, Scheme_Object *argv[]); static Scheme_Object *fx_lshift (int argc, Scheme_Object *argv[]); static Scheme_Object *fx_rshift (int argc, Scheme_Object *argv[]); static Scheme_Object *fx_to_fl (int argc, Scheme_Object *argv[]); +static Scheme_Object *fl_ref (int argc, Scheme_Object *argv[]); +static Scheme_Object *fl_set (int argc, Scheme_Object *argv[]); static double not_a_number_val; @@ -312,15 +314,18 @@ scheme_init_number (Scheme_Env *env) env); p = scheme_make_folding_prim(scheme_bitwise_and, "bitwise-and", 0, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("bitwise-and", p, env); p = scheme_make_folding_prim(bitwise_or, "bitwise-ior", 0, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("bitwise-ior", p, env); p = scheme_make_folding_prim(bitwise_xor, "bitwise-xor", 0, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("bitwise-xor", p, env); p = scheme_make_folding_prim(bitwise_not, "bitwise-not", 1, 1, 1); @@ -525,6 +530,18 @@ void scheme_init_unsafe_number(Scheme_Env *env) if (scheme_can_inline_fp_op()) SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_UNARY_INLINED; scheme_add_global_constant("unsafe-fx->fl", p, env); + + p = scheme_make_noncm_prim(fl_ref, "unsafe-f64vector-ref", + 2, 2); + if (scheme_can_inline_fp_op()) + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + scheme_add_global_constant("unsafe-f64vector-ref", p, env); + + p = scheme_make_noncm_prim(fl_set, "unsafe-f64vector-set!", + 3, 3); + if (scheme_can_inline_fp_op()) + SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_NARY_INLINED; + scheme_add_global_constant("unsafe-f64vector-set!", p, env); } @@ -2814,3 +2831,20 @@ static Scheme_Object *fx_to_fl (int argc, Scheme_Object *argv[]) v = SCHEME_INT_VAL(argv[0]); return scheme_make_double(v); } + +static Scheme_Object *fl_ref (int argc, Scheme_Object *argv[]) +{ + double v; + Scheme_Object *p; + p = ((Scheme_Structure *)argv[0])->slots[0]; + v = ((double *)SCHEME_CPTR_VAL(p))[SCHEME_INT_VAL(argv[1])]; + return scheme_make_double(v); +} + +static Scheme_Object *fl_set (int argc, Scheme_Object *argv[]) +{ + Scheme_Object *p; + p = ((Scheme_Structure *)argv[0])->slots[0]; + ((double *)SCHEME_CPTR_VAL(p))[SCHEME_INT_VAL(argv[1])] = SCHEME_DBL_VAL(argv[2]); + return scheme_void; +} diff --git a/src/mzscheme/src/numcomp.c b/src/mzscheme/src/numcomp.c index 6e678070d6..1095be750e 100644 --- a/src/mzscheme/src/numcomp.c +++ b/src/mzscheme/src/numcomp.c @@ -57,23 +57,28 @@ void scheme_init_numcomp(Scheme_Env *env) Scheme_Object *p; p = scheme_make_folding_prim(eq, "=", 2, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("=", p, env); p = scheme_make_folding_prim(lt, "<", 2, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_BINARY_INLINED); scheme_add_global_constant("<", p, env); p = scheme_make_folding_prim(gt, ">", 2, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant(">", p, env); p = scheme_make_folding_prim(lt_eq, "<=", 2, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("<=", p, env); p = scheme_make_folding_prim(gt_eq, ">=", 2, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant(">=", p, env); p = scheme_make_folding_prim(zero_p, "zero?", 1, 1, 1); @@ -89,11 +94,13 @@ void scheme_init_numcomp(Scheme_Env *env) scheme_add_global_constant("negative?", p, env); p = scheme_make_folding_prim(sch_max, "max", 1, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("max", p, env); p = scheme_make_folding_prim(sch_min, "min", 1, -1, 1); - SCHEME_PRIM_PROC_FLAGS(p) |= SCHEME_PRIM_IS_BINARY_INLINED; + SCHEME_PRIM_PROC_FLAGS(p) |= (SCHEME_PRIM_IS_BINARY_INLINED + | SCHEME_PRIM_IS_NARY_INLINED); scheme_add_global_constant("min", p, env); } diff --git a/src/mzscheme/src/schminc.h b/src/mzscheme/src/schminc.h index 35c3b7afb3..ab4f352503 100644 --- a/src/mzscheme/src/schminc.h +++ b/src/mzscheme/src/schminc.h @@ -14,7 +14,7 @@ #define USE_COMPILED_STARTUP 1 #define EXPECTED_PRIM_COUNT 959 -#define EXPECTED_UNSAFE_COUNT 47 +#define EXPECTED_UNSAFE_COUNT 49 #ifdef MZSCHEME_SOMETHING_OMITTED # undef USE_COMPILED_STARTUP diff --git a/src/mzscheme/src/schvers.h b/src/mzscheme/src/schvers.h index a7586b1a72..82a03ebd66 100644 --- a/src/mzscheme/src/schvers.h +++ b/src/mzscheme/src/schvers.h @@ -13,12 +13,12 @@ consistently.) */ -#define MZSCHEME_VERSION "4.2.3.2" +#define MZSCHEME_VERSION "4.2.3.3" #define MZSCHEME_VERSION_X 4 #define MZSCHEME_VERSION_Y 2 #define MZSCHEME_VERSION_Z 3 -#define MZSCHEME_VERSION_W 2 +#define MZSCHEME_VERSION_W 3 #define MZSCHEME_VERSION_MAJOR ((MZSCHEME_VERSION_X * 100) + MZSCHEME_VERSION_Y) #define MZSCHEME_VERSION_MINOR ((MZSCHEME_VERSION_Z * 1000) + MZSCHEME_VERSION_W)