diff --git a/makefiles/Mf-boot.in b/makefiles/Mf-boot.in index a079dd5888..6744c7407a 100644 --- a/makefiles/Mf-boot.in +++ b/makefiles/Mf-boot.in @@ -20,7 +20,7 @@ doit: $(bootfiles) %.boot: ( cd .. ; ./workarea $* xc-$* ) ( cd ../xc-$*/s ; make -f Mf-cross base=../../$(workarea) --jobs=2 m=$(m) xm=$* ) - for x in `echo scheme.boot petite.boot scheme.h equates.h` ; do\ + for x in `echo scheme.boot petite.boot scheme.h equates.h gc-oce.inc gc-ocd.inc vfasl.inc` ; do\ if [ ! -h ../xc-$*/boot/$*/$$x ] ; then \ mv -f ../xc-$*/boot/$*/$$x ../boot/$*/$$x ;\ fi ;\ diff --git a/makefiles/Mf-install.in b/makefiles/Mf-install.in index 8f02ec796a..6a11f00fd6 100644 --- a/makefiles/Mf-install.in +++ b/makefiles/Mf-install.in @@ -62,7 +62,7 @@ InstallLZ4Target= # no changes should be needed below this point # ############################################################################### -Version=csv9.5.3.28 +Version=csv9.5.3.29 Include=boot/$m PetiteBoot=boot/$m/petite.boot SchemeBoot=boot/$m/scheme.boot diff --git a/mats/fl.ms b/mats/fl.ms index 4e38a1b14b..8a06277a3e 100644 --- a/mats/fl.ms +++ b/mats/fl.ms @@ -1038,3 +1038,71 @@ '((3.0 . -2.0) (-2.0 . -2.0) (-3.0 . -2.0) (2.0 . -2.0) (0.0 . 4.0) (0.0 . -4.0) (-0.0 . 4.0) (0.0 . -4.0))) ) + +(mat fp-unboxing + (begin + (define-syntax check-loop-allocation + (syntax-rules () + [(_ proc) ; proc should allocate only its result flonum + (or (eq? (current-eval) interpret) + (let ([before (+ (bytes-allocated) (bytes-deallocated))] + [N 100000]) + (box? + (let loop ([i N] [bx (box 0.0)]) + (if (zero? i) + bx + (loop (sub1 i) (let ([v (unbox bx)]) + (box (proc v))))))) + (let ([allocated (- (+ (bytes-allocated) (bytes-deallocated)) before)] + [expected (* N (+ (compute-size 1.0) + (compute-size (box #f))))]) + (printf "~s ~s\n" allocated expected) + (<= expected allocated (* 1.2 expected)))))])) + #t) + + (check-loop-allocation (lambda (v) (fl+ v v))) + (check-loop-allocation (lambda (v) (fl* v v))) + (check-loop-allocation (lambda (v) (fl- v 1.0))) + (check-loop-allocation (lambda (v) (fl/ v 2.0))) + + (check-loop-allocation (lambda (v) (fl+ v 2.0 v))) + (check-loop-allocation (lambda (v) (fl+ v (fl* 2.0 v)))) + + (check-loop-allocation (lambda (v) (fl+ v v v))) + (check-loop-allocation (lambda (v) (fl+ v (fl* v v) (fl/ v 2.0)))) + + (check-loop-allocation (lambda (v) (flabs v))) + (check-loop-allocation (lambda (v) (fl- v))) + + (check-loop-allocation (lambda (v) (flabs (fl+ v v)))) + (check-loop-allocation (lambda (v) (fl- (fl+ v v)))) + + (let ([i 0]) + (check-loop-allocation (lambda (v) (begin + (set! i (add1 i)) + (fl+ v (fixnum->flonum i)))))) + + (check-loop-allocation (lambda (v) (if (fl= (fl+ v (fl* 2.0 v)) 7.0) + (fl+ v 1.0) + (fl- v 1.0)))) + (check-loop-allocation (lambda (v) (if (fl< (fl+ v v) v) + (fl+ v 1.0) + (fl- v 1.0)))) + (check-loop-allocation (lambda (v) (if (fl> (fl+ v v) v) + (fl+ v 1.0) + (fl- v 1.0)))) + (check-loop-allocation (lambda (v) (if (fl<= (fl+ v v) v) + (fl+ v 1.0) + (fl- v 1.0)))) + (check-loop-allocation (lambda (v) (if (fl>= (fl+ v v) v) + (fl+ v 1.0) + (fl- v 1.0)))) + + (let ([bv (make-bytevector 8 0)]) + (check-loop-allocation (lambda (v) (fl+ v (bytevector-ieee-double-native-ref bv 0))))) + (let ([bv (make-bytevector 8 0)]) + (check-loop-allocation (lambda (v) (begin + (bytevector-ieee-double-native-set! bv 0 (fl+ v 0.1)) + (fl* v 0.99))))) + + ) diff --git a/mats/ieee.ms b/mats/ieee.ms index 829ed57117..81cfdbe3db 100644 --- a/mats/ieee.ms +++ b/mats/ieee.ms @@ -159,7 +159,7 @@ (mat fl= (let ((n (read (open-input-string "+nan.0")))) (not (fl= n n))) - (not (fl= (nan))) + (fl= (nan)) (not (fl= (nan) +inf.0)) (not (fl= (nan) -inf.0)) (not (fl= (nan) (nan))) @@ -171,7 +171,7 @@ ) (mat fl< - (not (fl< (nan))) + (fl< (nan)) (not (fl< (nan) (nan))) (not (fl< (nan) 0.0)) (not (fl< 0.0 (nan))) @@ -179,7 +179,7 @@ ) (mat fl> - (not (fl> (nan))) + (fl> (nan)) (not (fl> (nan) (nan))) (not (fl> (nan) 0.0)) (not (fl> 0.0 (nan))) @@ -189,14 +189,14 @@ ) (mat fl<= - (not (fl<= (nan))) + (fl<= (nan)) (not (fl<= (nan) (nan))) (not (fl<= (nan) 0.0)) (not (fl<= 0.0 (nan))) ) (mat fl>= - (not (fl>= (nan))) + (fl>= (nan)) (not (fl>= (nan) (nan))) (not (fl>= (nan) 0.0)) (not (fl>= 0.0 (nan))) diff --git a/mats/misc.ms b/mats/misc.ms index 5f0727b6fc..250c17b17c 100644 --- a/mats/misc.ms +++ b/mats/misc.ms @@ -5293,6 +5293,13 @@ (condition-wait c m) (loop))) (mutex-release m) + ;; Wait for threads to exit + (let () + (define $threads (foreign-procedure "(cs)threads" () scheme-object)) + (let loop () + (unless (= 1 (length ($threads))) + (sleep (make-time 'time-duration 10000 0)) + (loop)))) #t)) ) diff --git a/s/a6fb.def b/s/a6fb.def index 6f62f5e5e9..2ac6740a12 100644 --- a/s/a6fb.def +++ b/s/a6fb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/a6le.def b/s/a6le.def index d1d38dce01..dec9dc8148 100644 --- a/s/a6le.def +++ b/s/a6le.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/a6nb.def b/s/a6nb.def index a0d51758d3..6b16231122 100644 --- a/s/a6nb.def +++ b/s/a6nb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/a6nt.def b/s/a6nt.def index 858ec529a9..4d6044e2ad 100644 --- a/s/a6nt.def +++ b/s/a6nt.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long long int") (define-constant typedef-uptr "unsigned long long int") diff --git a/s/a6ob.def b/s/a6ob.def index a1bac80483..d845fd00fe 100644 --- a/s/a6ob.def +++ b/s/a6ob.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/a6osx.def b/s/a6osx.def index dbf65de27d..b5c4d8ae48 100644 --- a/s/a6osx.def +++ b/s/a6osx.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/a6s2.def b/s/a6s2.def index 85342c1769..fc53782722 100644 --- a/s/a6s2.def +++ b/s/a6s2.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/arm32.ss b/s/arm32.ss index d4600323e1..3a2e371b89 100644 --- a/s/arm32.ss +++ b/s/arm32.ss @@ -60,53 +60,52 @@ (define-registers (reserved - [%tc %r9 #t 9] - [%sfp %r10 #t 10] - [%ap %r5 #t 5] + [%tc %r9 #t 9 uptr] + [%sfp %r10 #t 10 uptr] + [%ap %r5 #t 5 uptr] #;[%esp] #;[%eap] - [%trap %r8 #t 8]) + [%trap %r8 #t 8 uptr]) (allocable - [%ac0 %r4 #t 4] - [%xp %r6 #t 6] - [%ts %ip #f 12] - [%td %r11 #t 11] + [%ac0 %r4 #t 4 uptr] + [%xp %r6 #t 6 uptr] + [%ts %ip #f 12 uptr] + [%td %r11 #t 11 uptr] #;[%ret] - [%cp %r7 #t 7] + [%cp %r7 #t 7 uptr] #;[%ac1] #;[%yp] - [ %r0 %Carg1 %Cretval #f 0] - [ %r1 %Carg2 #f 1] - [ %r2 %Carg3 #f 2] - [ %r3 %Carg4 #f 3] - [ %lr #f 14] ; %lr is trashed by 'c' calls including calls to hand-coded routines like get-room + [ %r0 %Carg1 %Cretval #f 0 uptr] + [ %r1 %Carg2 #f 1 uptr] + [ %r2 %Carg3 #f 2 uptr] + [ %r3 %Carg4 #f 3 uptr] + [ %lr #f 14 uptr] ; %lr is trashed by 'c' calls including calls to hand-coded routines like get-room + [%fp1 %Cfparg5 %d4 %s8 #f 8 fp] + [%fp2 %Cfparg6 %d5 %s10 #f 10 fp] ) (machine-dependent - [%sp #t 13] - [%pc #f 15] - [%Cfparg1 %Cfpretval %d0 %s0 #f 0] ; < 32: low bit goes in D, N, or M bit, high bits go in Vd, Vn, Vm - [%Cfparg1b %s1 #f 1] - [%Cfparg2 %d1 %s2 #f 2] - [%Cfparg2b %s3 #f 3] - [%Cfparg3 %d2 %s4 #f 4] - [%Cfparg3b %s5 #f 5] - [%Cfparg4 %d3 %s6 #f 6] - [%Cfparg4b %s7 #f 7] - [%Cfparg5 %d4 %s8 #f 8] - [%Cfparg5b %s9 #f 9] - [%Cfparg6 %d5 %s10 #f 10] - [%Cfparg6b %s11 #f 11] - [%Cfparg7 %d6 %s12 #f 12] - [%Cfparg7b %s13 #f 13] - [%Cfparg8 %d7 %s14 #f 14] - [%Cfparg8b %s15 #f 15] - [%flreg1 %d8 %s16 #f 16] - [%flreg2 %d9 %s18 #f 18] + [%sp #t 13 uptr] + [%pc #f 15 uptr] + [%Cfparg1 %Cfpretval %d0 %s0 #f 0 fp] ; < 32: low bit goes in D, N, or M bit, high bits go in Vd, Vn, Vm + [%Cfparg1b %s1 #f 1 fp] + [%Cfparg2 %d1 %s2 #f 2 fp] + [%Cfparg2b %s3 #f 3 fp] + [%Cfparg3 %d2 %s4 #f 4 fp] + [%Cfparg3b %s5 #f 5 fp] + [%Cfparg4 %d3 %s6 #f 6 fp] + [%Cfparg4b %s7 #f 7 fp] + [%Cfparg5b %s9 #f 9 fp] + [%Cfparg6b %s11 #f 11 fp] + [%Cfparg7 %fptmp1 %d6 %s12 #f 12 fp] + [%Cfparg7b %fptmp2 %s13 #f 13 fp] + [%Cfparg8 %d7 %s14 #f 14 fp] + [%Cfparg8b %s15 #f 15 fp] + ;; etc., but other FP registers are preserved + #;[ %d16 #t 32 fp] ; >= 32: high bit goes in D, N, or M bit, low bits go in Vd, Vn, Vm + #;[ %d17 #t 33 fp] ; etc. - #;[ %d16 #f 32] ; >= 32: high bit goes in D, N, or M bit, low bits go in Vd, Vn, Vm - #;[ %d17 #f 33] - ; etc. - )) + ) + (reify-support %ts %lr %r3 %r2)) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers @@ -130,6 +129,18 @@ (lambda (x) (or (lmem? x) (literal@? x)))) + (define fpmem? + (lambda (x) + (nanopass-case (L15c Triv) x + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (eq? type 'fp)] + [else #f]))) + + (define-syntax mem-of-type? + (lambda (stx) + (syntax-case stx (mem fpmem) + [(_ mem e) #'(lmem? e)] + [(_ fpmem e) #'(fpmem? e)]))) + (define imm-funky12? (lambda (x) (nanopass-case (L15c Triv) x @@ -206,42 +217,42 @@ (define mref->mref (lambda (a k) (define return - (lambda (x0 x1 imm) + (lambda (x0 x1 imm type) ; arm load & store instructions support index or offset but not both (safe-assert (or (eq? x1 %zero) (eqv? imm 0))) - (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm))))) + (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm ,type))))) (nanopass-case (L15c Triv) a - [(mref ,lvalue0 ,lvalue1 ,imm) + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (lvalue->ur lvalue0 (lambda (x0) (lvalue->ur lvalue1 (lambda (x1) (cond [(and (eq? x1 %zero) (or (unsigned12? imm) (unsigned12? (- imm)))) - (return x0 %zero imm)] + (return x0 %zero imm type)] [(funky12 imm) => ; NB: dubious value? check to see if it's exercised (lambda (imm) (let ([u (make-tmp 'u)]) (seq (build-set! ,u (asm ,null-info ,(asm-add #f) ,x0 (immediate ,imm))) - (return u x1 0))))] + (return u x1 0 type))))] [(funky12 (- imm)) => ; NB: dubious value? check to see if it's exercised (lambda (imm) (let ([u (make-tmp 'u)]) (seq (build-set! ,u (asm ,null-info ,(asm-sub #f) ,x0 (immediate ,imm))) - (return u x1 0))))] + (return u x1 0 type))))] [else (let ([u (make-tmp 'u)]) (seq (build-set! ,u (immediate ,imm)) (if (eq? x1 %zero) - (return x0 u 0) + (return x0 u 0 type) (seq (build-set! ,u (asm ,null-info ,(asm-add #f) ,u ,x1)) - (return x0 u 0)))))])))))]))) + (return x0 u 0 type)))))])))))]))) (define mem->mem (lambda (a k) @@ -250,14 +261,46 @@ (let ([u (make-tmp 'u)]) (seq (build-set! ,u ,(literal@->literal a)) - (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0)))))] + (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0 uptr)))))] [else (mref->mref a k)]))) + (define fpmem->fpmem + (lambda (a k) + (define return + (lambda (x0 x1 imm) + (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm fp))))) + (nanopass-case (L15c Triv) a + [(mref ,lvalue0 ,lvalue1 ,imm ,type) + (lvalue->ur lvalue0 + (lambda (x0) + (lvalue->ur lvalue1 + (lambda (x1) + (cond + [(not (and (<= 0 imm #x3FF) + (fx= 0 (fxand imm #b11)))) + ;; offset not aligned or out of range + (let ([u (make-tmp 'umov)]) + (seq + (build-set! ,u (asm ,null-info ,(asm-add #f) ,x0 (immediate ,imm))) + (if (eq? x1 %zero) + (return u %zero 0) + (seq + (build-set! ,u (asm ,null-info ,(asm-add #f) ,u ,x1)) + (return u %zero 0)))))] + [(not (eq? x1 %zero)) + (let ([u (make-tmp 'umov)]) + (seq + (build-set! ,u (asm ,null-info ,(asm-add #f) ,x0 ,x1)) + (return u %zero imm)))] + [else + (return x0 %zero imm)])))))]))) + (define-syntax coercible? (syntax-rules () [(_ ?a ?aty*) (let ([a ?a] [aty* ?aty*]) - (or (memq 'ur aty*) + (or (and (memq 'ur aty*) (not (or (fpmem? a) (fpur? a)))) + (and (memq 'fpur aty*) (or (fpmem? a) (fpur? a))) (and (memq 'funky12 aty*) (imm-funky12? a)) (and (memq 'negate-funky12 aty*) (imm-negate-funky12? a)) (and (memq 'lognot-funky12 aty*) (imm-lognot-funky12? a)) @@ -298,6 +341,18 @@ (build-set! ,u ,a) (k u)))))] [else (sorry! 'coerce-opnd "unexpected operand ~s" a)])] + [(memq 'fpur aty*) + (cond + [(fpur? a) (k a)] + [(fpmem? a) + (fpmem->fpmem a + (lambda (a) + (let ([u (make-tmp 'u 'fp)]) + (seq + (build-set! ,u ,a) + (k u)))))] + [else + (sorry! 'coerce-opnd "unexpected operand ~s" a)])] [else (sorry! 'coerce-opnd "cannot coerce ~s to ~s" a aty*)]))])) (define set-ur=mref @@ -332,9 +387,15 @@ (define-syntax define-instruction (lambda (x) + (define mem-type? + (lambda (t) + (syntax-case t (mem fpmem) + [mem #t] + [fpmem #t] + [else #f]))) (define make-value-clause (lambda (fmt) - (syntax-case fmt (mem ur) + (syntax-case fmt (mem fpmem ur fpur) [(op (c mem) (a ur)) #`(lambda (c a) (if (lmem? c) @@ -344,6 +405,20 @@ (lambda (c) (rhs c a))))) (next c a)))] + [(op (c fpmem) (a aty ...) ...) + #`(lambda (c a ...) + (if (and (fpmem? c) (coercible? a '(aty ...)) ...) + #,(let f ([a* #'(a ...)] [aty** #'((aty ...) ...)]) + (cond + [(null? a*) + #'(fpmem->fpmem c + (lambda (c) + (rhs c a ...)))] + [else + #`(coerce-opnd #,(car a*) '#,(car aty**) + (lambda (#,(car a*)) + #,(f (cdr a*) (cdr aty**))))])) + (next c a ...)))] [(op (c ur) (a aty ...) ...) #`(lambda (c a ...) (if (and (coercible? a '(aty ...)) ...) @@ -359,6 +434,22 @@ (build-set! ,c ,u)))))) #`(coerce-opnd #,(car a*) '#,(car aty**) (lambda (#,(car a*)) #,(f (cdr a*) (cdr aty**)))))) + (next c a ...)))] + [(op (c fpur) (a aty ...) ...) + #`(lambda (c a ...) + (if (and (coercible? a '(aty ...)) ...) + #,(let f ([a* #'(a ...)] [aty** #'((aty ...) ...)]) + (if (null? a*) + #'(if (fpur? c) + (rhs c a ...) + (let ([u (make-tmp 'u 'fp)]) + (seq + (rhs u a ...) + (fpmem->fpmem c + (lambda (c) + (build-set! ,c ,u)))))) + #`(coerce-opnd #,(car a*) '#,(car aty**) + (lambda (#,(car a*)) #,(f (cdr a*) (cdr aty**)))))) (next c a ...)))]))) (define-who make-pred-clause @@ -679,31 +770,58 @@ (lambda (x1) (with-flonum-data-pointers (x2 ...) e1 e2 ...)))]))) - (define-instruction effect (flt) - [(op (x ur) (y ur)) - (with-flonum-data-pointers (y) - `(asm ,info ,asm-flt ,x ,y))]) + (define (fpmem->mem mem dir) + (with-output-language (L15d Triv) + (nanopass-case (L15d Triv) mem + [(mref ,x1 ,x2 ,imm ,type) + (safe-assert (eq? type 'fp)) + (let ([delta (constant-case native-endianness + [(little) (if (eq? dir 'lo) 0 4)] + [(big) (if (eq? dir 'hi) 0 4)])]) + `(mref ,x1 ,x2 ,(fx+ imm delta) uptr))] + [else (sorry! 'fpmem->mem "unexpected reference ~s" mem)]))) - (define-instruction effect (fl+ fl- fl/ fl*) - [(op (x ur) (y ur) (z ur)) - (with-flonum-data-pointers (x y z) - `(asm ,info ,(asm-flop-2 op) ,x ,y ,z))]) + (define-instruction value (fpt) + [(op (x fpur) (y ur)) + `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y))]) - (define-instruction effect (flsqrt) - [(op (x ur) (y ur)) - (with-flonum-data-pointers (x y) - `(asm ,info ,asm-flsqrt ,x ,y))]) + (define-instruction value (fpmove) + [(op (x fpmem) (y fpur)) `(set! ,(make-live-info) ,x ,y)] + [(op (x fpur) (y fpur)) `(set! ,(make-live-info) ,x ,y)]) + + (define-instruction value (fpcastto/hi) + [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'hi))] + [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'hi) ,y))]) + + (define-instruction value (fpcastto/lo) + [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'lo))] + [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'lo) ,y))]) + + (define-instruction value (fpcastfrom) + [(op (x fpmem) (hi ur) (lo ur)) (seq + `(set! ,(make-live-info) ,(fpmem->mem x 'lo) ,lo) + `(set! ,(make-live-info) ,(fpmem->mem x 'hi) ,hi))] + [(op (x fpur) (hi ur) (lo ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,lo ,hi))]) + + + + (define-instruction value (fp+ fp- fp/ fp*) + [(op (x fpur) (y fpur) (z fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpop-2 op) ,y ,z))]) + + (define-instruction value (fpsqrt) + [(op (x fpur) (y fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsqrt ,y))]) (define-instruction value (trunc) [(op (z ur) (x ur)) (with-flonum-data-pointers (x) `(set! ,(make-live-info) ,z (asm ,info ,asm-trunc ,x)))]) - (define-instruction pred (fl= fl< fl<=) - [(op (x ur) (y ur)) - (with-flonum-data-pointers (x y) - (let ([info (make-info-condition-code op #f #f)]) - (values '() `(asm ,info ,(asm-fl-relop info) ,x ,y))))])) + (define-instruction pred (fp= fp< fp<=) + [(op (x fpur) (y fpur)) + (let ([info (make-info-condition-code op #f #f)]) + (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))])) (define-instruction effect (inc-cc-counter) [(op (x ur) (w ur funky12) (z funky12 ur)) @@ -891,15 +1009,15 @@ asm-move asm-move/extend asm-load asm-store asm-swap asm-library-call asm-library-call! asm-library-jump asm-mul asm-smull asm-cmp/shift asm-add asm-sub asm-rsb asm-logand asm-logor asm-logxor asm-bic asm-pop-multiple asm-shiftop asm-logand asm-lognot - asm-logtest asm-fl-relop asm-relop asm-push-multiple asm-vpush-multiple + asm-logtest asm-fp-relop asm-relop asm-push-multiple asm-vpush-multiple asm-indirect-jump asm-literal-jump asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-rp-compact-header asm-indirect-call asm-condition-code asm-fl-load/store - asm-fl-load/cvt asm-fl-store/cvt asm-flt asm-trunc + asm-fl-load/cvt asm-fl-store/cvt asm-fpt asm-fpmove asm-fpcastto asm-fpcastfrom asm-trunc asm-lock asm-lock+/- asm-cas - asm-flop-2 asm-flsqrt asm-c-simple-call + asm-fpop-2 asm-fpsqrt asm-c-simple-call asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-read-counter @@ -1079,8 +1197,10 @@ (define-op vstr.sgl vldr/vstr-op #b1010 #b00) (define-op vstr.dbl vldr/vstr-op #b1011 #b00) - (define-op vmov.gpr->s32 vmov-op #b0) - (define-op vmov.s32->gpr vmov-op #b1) + (define-op vmov.gpr->s32 vmov.gpr-op #b0) + (define-op vmov.s32->gpr vmov.gpr-op #b1) + (define-op vmov.gprgpr->s64 vmov.gpr64-op #b0) + (define-op vmov.fpr vmov.fpr-op) (define-op vcvt.sgl->dbl vcvt-op #b01 #b110111) (define-op vcvt.dbl->sgl vcvt-op #b11 #b110111) @@ -1347,6 +1467,8 @@ (define vldr/vstr-op (lambda (op opc1 opc2 flreg reg offset code*) + (safe-assert (and (<= 0 offset #x3FF) + (fx= 0 (fxand offset #b11)))) (let-values ([(d vd) (ax-flreg->bits flreg)]) (emit-code (op flreg reg offset code*) [28 (ax-cond 'al)] @@ -1360,9 +1482,9 @@ [8 opc1] [0 (fxsrl offset 2)])))) - (define vmov-op - (lambda (op dir flreg gpreg code*) - (let-values ([(n vn) (ax-flreg->bits flreg)]) + (define vmov.gpr-op + (lambda (op dir flreg flreg-delta gpreg code*) + (let-values ([(n vn) (ax-flreg->bits flreg flreg-delta)]) (emit-code (op flreg gpreg code*) [28 (ax-cond 'al)] [21 #b1110000] @@ -1373,6 +1495,44 @@ [7 n] [0 #b0010000])))) + (define vmov.gpr64-op + (lambda (op dir flreg gpreglo gpreghi code*) + (let-values ([(n vn) (ax-flreg->bits flreg)]) + (emit-code (op flreg gpreglo gpreghi code*) + [28 (ax-cond 'al)] + [23 #b11000] + [22 1] + [21 0] + [20 dir] ; 0 to fp, 1 from fp + [16 (ax-ea-reg-code gpreghi)] + [12 (ax-ea-reg-code gpreglo)] + [10 #b10] + [8 #b11] + [6 #b00] + [5 n] + [4 1] + [0 vn])))) + + (define vmov.fpr-op + (lambda (op destreg srcreg code*) + (let-values ([(d vd) (ax-flreg->bits destreg)] + [(m vm) (ax-flreg->bits srcreg)]) + (emit-code (op destreg srcreg code*) + [28 (ax-cond 'al)] + [23 #b11101] + [22 0] ; D + [20 #b11] + [19 d] + [16 #b000] + [12 vd] + [10 #b10] + [8 #b11] + [7 0] + [6 1] + [5 m] + [4 0] + [00 vm])))) + (define vcvt-op (lambda (op szop opc2 dest src code*) (let-values ([(d vd) (ax-flreg->bits dest)] @@ -1588,11 +1748,13 @@ [else ($oops who "unsupported op ~s" op)]))) (define ax-flreg->bits - (lambda (flreg) - (let ([n (reg-mdinfo flreg)]) + (case-lambda + [(flreg) (ax-flreg->bits flreg 0)] + [(flreg flreg-delta) + (let ([n (fx+ (reg-mdinfo flreg) flreg-delta)]) (if (fx< n 32) (values (fxlogand n 1) (fxsrl n 1)) - (values (fxsrl n 4) (fxlogand n #b1111)))))) + (values (fxsrl n 4) (fxlogand n #b1111))))])) (define-syntax emit-code (lambda (x) @@ -1803,11 +1965,11 @@ (Trivit (base offset) (case op [(load-single->double) - (emit vldr.sgl %flreg2 base (ax-imm-data offset) - (emit vcvt.sgl->dbl flreg %flreg2 code*))] + (emit vldr.sgl %fptmp2 base (ax-imm-data offset) + (emit vcvt.sgl->dbl flreg %fptmp2 code*))] [(load-double->single) - (emit vldr.dbl %flreg2 base (ax-imm-data offset) - (emit vcvt.dbl->sgl flreg %flreg2 code*))] + (emit vldr.dbl %fptmp2 base (ax-imm-data offset) + (emit vcvt.dbl->sgl flreg %fptmp2 code*))] [else (sorry! who "unrecognized op ~s" op)]))))) (define-who asm-fl-store/cvt @@ -1816,8 +1978,8 @@ (Trivit (base offset) (case op [(store-single->double) - (emit vcvt.sgl->dbl %flreg2 flreg - (emit vstr.dbl %flreg2 base (ax-imm-data offset) code*))] + (emit vcvt.sgl->dbl %fptmp2 flreg + (emit vstr.dbl %fptmp2 base (ax-imm-data offset) code*))] [else (sorry! who "unrecognized op ~s" op)]))))) (define-who asm-fl-load/store @@ -1884,40 +2046,66 @@ [else (sorry! who "unexpected mref type ~s" type)]))] [else (sorry! who "expected %zero index or 0 offset, got ~s and ~s" index offset)]))))))) - (define-who asm-flop-2 + (define-who asm-fpop-2 (lambda (op) - (lambda (code* src1 src2 dest) - (Trivit (src1 src2 dest) - (emit vldr.dbl %flreg1 src1 0 - (emit vldr.dbl %flreg2 src2 0 - (let ([code* (emit vstr.dbl %flreg1 dest 0 code*)]) - (case op - [(fl+) (emit vadd %flreg1 %flreg1 %flreg2 code*)] - [(fl-) (emit vsub %flreg1 %flreg1 %flreg2 code*)] - [(fl*) (emit vmul %flreg1 %flreg1 %flreg2 code*)] - [(fl/) (emit vdiv %flreg1 %flreg1 %flreg2 code*)] - [else (sorry! who "unrecognized op ~s" op)])))))))) + (lambda (code* dest src1 src2) + (case op + [(fp+) (emit vadd dest src1 src2 code*)] + [(fp-) (emit vsub dest src1 src2 code*)] + [(fp*) (emit vmul dest src1 src2 code*)] + [(fp/) (emit vdiv dest src1 src2 code*)] + [else (sorry! who "unrecognized op ~s" op)])))) - (define asm-flsqrt - (lambda (code* src dest) - (Trivit (src dest) - (emit vldr.dbl %flreg1 src 0 - (emit vsqrt %flreg1 %flreg1 - (emit vstr.dbl %flreg1 dest 0 code*)))))) + (define asm-fpsqrt + (lambda (code* dest src) + (emit vsqrt dest src code*))) (define asm-trunc (lambda (code* dest flonumreg) (Trivit (dest flonumreg) - (emit vldr.dbl %flreg1 flonumreg 0 - (emit vcvt.dbl->s32 %flreg1 %flreg1 - (emit vmov.s32->gpr %flreg1 dest code*)))))) + (emit vldr.dbl %fptmp1 flonumreg 0 + (emit vcvt.dbl->s32 %fptmp1 %fptmp1 + (emit vmov.s32->gpr %fptmp1 0 dest code*)))))) - (define asm-flt - (lambda (code* src flonumreg) - (Trivit (src flonumreg) - (emit vmov.gpr->s32 %flreg1 src - (emit vcvt.s32->dbl %flreg1 %flreg1 - (emit vstr.dbl %flreg1 flonumreg 0 code*)))))) + (define asm-fpt + (lambda (code* dest src) + (Trivit (src) + (emit vmov.gpr->s32 %fptmp1 0 src + (emit vcvt.s32->dbl %fptmp1 dest code*))))) + + (define-who asm-fpmove + ;; fpmove pseudo instruction is used by set! case in + ;; select-instructions! and generate-code; at most one of src or + ;; dest can be an mref, and then the offset is double-aligned + (lambda (code* dest src) + (let ([dest-it dest] + [src-it src]) + (Trivit (dest-it src-it) + (record-case dest-it + [(disp) (imm reg) + (safe-assert (fx= 0 (fxand imm #b11))) + (emit vstr.dbl src (cons 'reg reg) imm code*)] + [(index) (n ireg breg) (sorry! who "cannot handle indexed fp dest ref")] + [else + (record-case src-it + [(disp) (imm reg) + (safe-assert (fx= 0 (fxand imm #b11))) + (emit vldr.dbl dest (cons 'reg reg) imm code*)] + [(index) (n ireg breg) (sorry! who "cannot handle indexed fp src ref")] + [else (emit vmov.fpr dest src code*)])]))))) + + (define asm-fpcastto + (lambda (part) + (lambda (code* dest src) + (Trivit (dest) + (if (eq? part 'lo) + (emit vmov.gpr->s32 src 0 dest code*) + (emit vmov.gpr->s32 src 1 dest code*)))))) + + (define asm-fpcastfrom + (lambda (code* dest lo-src hi-src) + (Trivit (lo-src hi-src) + (emit vmov.gprgpr->s64 dest lo-src hi-src code*)))) (define-who asm-swap (lambda (type) @@ -1982,16 +2170,12 @@ (emit cmpi tmp2 0 code*)))))))) - (define asm-fl-relop + (define asm-fp-relop (lambda (info) (lambda (l1 l2 offset x y) - (Trivit (x y) - (values - (emit vldr.dbl %flreg1 x 0 - (emit vldr.dbl %flreg2 y 0 - (emit vcmp %flreg1 %flreg2 - (emit fpscr->apsr '())))) - (asm-conditional-jump info l1 l2 offset)))))) + (values + (emit vcmp x y (emit fpscr->apsr '())) + (asm-conditional-jump info l1 l2 offset))))) (define-who asm-relop (lambda (info) @@ -2231,9 +2415,9 @@ [(overflow) (i? bvc bvs)] [(multiply-overflow) (i? beq bne)] ; result of comparing sign bit of low word with all bits in high word: eq if no overflow, ne if oveflow [(carry) (i? bcc bcs)] - [(fl<) (i? (r? ble bcs) (r? bgt bcc))] - [(fl<=) (i? (r? blt bhi) (r? bge bls))] - [(fl=) (i? bne beq)])))))) + [(fp<) (i? (r? ble bcs) (r? bgt bcc))] + [(fp<=) (i? (r? blt bhi) (r? bge bls))] + [(fp=) (i? bne beq)])))))) (define asm-data-label (lambda (code* l offset func code-size) @@ -2387,14 +2571,14 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] + (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %fptmp1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] [load-int-stack (lambda (offset) (lambda (rhs) ; requires rhs @@ -2737,14 +2921,14 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-int-stack (lambda (type offset) (lambda (lvalue) diff --git a/s/arm32le.def b/s/arm32le.def index e9657b4381..cb03c253ab 100644 --- a/s/arm32le.def +++ b/s/arm32le.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/bytevector.ss b/s/bytevector.ss index 067d2bd71b..be491650ed 100644 --- a/s/bytevector.ss +++ b/s/bytevector.ss @@ -732,16 +732,11 @@ (set-who! bytevector-ieee-double-native-ref (lambda (v i) - (if ($bytevector-ref-check? 64 v i) - (#3%bytevector-ieee-double-native-ref v i) - (if (bytevector? v) - (invalid-index who v i) - (not-a-bytevector who v))))) + (#2%bytevector-ieee-double-native-ref v i))) (set-who! bytevector-ieee-single-native-set! (lambda (v i x) (if ($bytevector-set!-check? 32 v i) - ; inline routine checks to make sure x is a real number (#3%bytevector-ieee-single-native-set! v i x) (if (mutable-bytevector? v) (invalid-index who v i) @@ -749,12 +744,7 @@ (set-who! bytevector-ieee-double-native-set! (lambda (v i x) - (if ($bytevector-set!-check? 64 v i) - ; inline routine checks to make sure x is a real number - (#3%bytevector-ieee-double-native-set! v i x) - (if (mutable-bytevector? v) - (invalid-index who v i) - (not-a-mutable-bytevector who v))))) + (#2%bytevector-ieee-double-native-set! v i x))) (set-who! bytevector-copy (lambda (v) diff --git a/s/cmacros.ss b/s/cmacros.ss index 30830e603c..3cc1fac565 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -328,7 +328,7 @@ [(_ foo e1 e2) e1] ... [(_ bar e1 e2) e2]))))]))) -(define-constant scheme-version #x0905031C) +(define-constant scheme-version #x0905031D) (define-syntax define-machine-types (lambda (x) @@ -412,6 +412,11 @@ (define-constant ptr-alignment (/ (constant byte-alignment) (constant ptr-bytes))) +;; Stack alignment may be needed for unboxed floating-point values: +(constant-case ptr-bits + [(32) (define-constant stack-word-alignment 2)] + [(64) (define-constant stack-word-alignment 1)]) + ;; seginfo offsets, must be consistent with `seginfo` in "types.h" (define-constant seginfo-space-disp 0) (define-constant seginfo-generation-disp 1) @@ -1448,7 +1453,8 @@ [void* lz4-out-buffer] [U64 instr-counter] [U64 alloc-counter] - [ptr parameters])) + [ptr parameters] + [double fpregs (constant asm-fpreg-max)])) (define tc-field-list (let f ([ls (oblist)] [params '()]) @@ -1686,6 +1692,7 @@ (unsafe #b00001000000000000000000) (unrestricted #b00010000000000000000000) (safeongoodargs #b00100000000000000000000) + (unboxed-arguments #b10000000000000000000000) ; always accepts unboxed 'flonum arguments, up to inline-args-limit (cptypes2 #b01000000000000000000000) (cptypes3 cptypes2) (cptypes2x cptypes2) @@ -1694,7 +1701,9 @@ (alloc (or proc discard true)) ; would be nice to check that these and only these actually have cp0 partial folders (partial-folder (or cp02 cp03)) -) + ) + +(define-constant inline-args-limit 10) (define-flags cp0-info-mask (pure-known #b0000000001) @@ -1804,7 +1813,10 @@ (syntax-rules () ((_ x) (float-type-case - [(ieee) (fx= ($flonum-exponent x) #x7ff)])))) + [(ieee) (fx= ($flonum-exponent x) #x7ff)])))) + +;; #t => incompatibility with older Chez Scheme: +(define-constant nan-single-comparison-true? #t) (define-syntax on-reset (syntax-rules () @@ -2503,6 +2515,7 @@ (cfl/ #f 2 #f #t) (negate #f 1 #f #t) (flnegate #f 1 #t #t) + (flabs #f 1 #t #t) (call-error #f 0 #f #f) (unsafe-unread-char #f 2 #f #t) (map-car #f 1 #f #t) @@ -2523,6 +2536,7 @@ (fxsll #f 2 #f #t) (fxsrl #f 2 #t #t) (fxsra #f 2 #t #t) + (fixnum->flonum #f 1 #t #t) (append #f 2 #f #t) (values-error #f 0 #f #f) (dooverflow #f 0 #f #f) @@ -2640,6 +2654,8 @@ (bytevector-s8-set! #f 3 #f #t) (bytevector-u8-set! #f 3 #f #t) (bytevector=? #f 2 #f #f) + (bytevector-ieee-double-native-ref #f 2 #t #t) + (bytevector-ieee-double-native-set! #f 2 #t #t) (real->flonum #f 2 #f #t) (unsafe-port-eof? #f 1 #f #t) (unsafe-lookahead-u8 #f 1 #f #t) diff --git a/s/cp0.ss b/s/cp0.ss index fb607515b9..8b8e4e959d 100644 --- a/s/cp0.ss +++ b/s/cp0.ss @@ -550,7 +550,13 @@ ; pure OR body to be pure, since we can't separate non-pure ; RHS and body expressions [(letrec ([,x* ,e*] ...) ,body) - (guard (or (ivory? body) (andmap ivory1? e*))) + (guard (and (or (ivory? body) (andmap ivory1? e*)) + ;; don't break apart (potential) loops + (not (and (fx= (length x*) 1) + (nanopass-case (Lsrc Expr) body + [(call ,preinfo (ref ,maybe-src ,x) ,e* ...) + (eq? x (car x*))] + [else #f]))))) ; assocate each lhs with cooked operand for corresponding rhs. see note above. (for-each (lambda (x e) (prelex-operand-set! x (build-cooked-opnd e)) (operand-name-set! opnd (prelex-name x))) x* e*) (values (make-lifted #f x* e*) body)] diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index bd38f64418..32e10b499d 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -500,11 +500,11 @@ (define-syntax define-reserved-registers (lambda (x) (syntax-case x () - [(_ [regid alias ... callee-save? mdinfo] ...) + [(_ [regid alias ... callee-save? mdinfo type] ...) (syntax-case #'(regid ...) (%tc %sfp) [(%tc %sfp . others) #t] [_ #f]) #'(begin (begin - (define regid (make-reg 'regid 'mdinfo (tc-disp regid) callee-save?)) + (define regid (make-reg 'regid 'mdinfo (tc-disp regid) callee-save? 'type)) (module (alias ...) (define x regid) (define alias x) ...)) ...)]))) @@ -512,17 +512,27 @@ (lambda (x) (assert (fx<= (constant asm-arg-reg-cnt) (constant asm-arg-reg-max))) (syntax-case x () - [(_ regvec arg-registers extra-registers with-initialized-registers [regid reg-alias ... callee-save? mdinfo] ...) - (with-syntax ([((tc-disp ...) (arg-regid ...) (extra-regid ...)) - (syntax-case #'(regid ...) (%ac0 %xp %ts %td) - [(%ac0 %xp %ts %td other ...) + [(_ regvec arg-registers extra-registers extra-fpregisters with-initialized-registers + [regid reg-alias ... callee-save? mdinfo type] ...) + (with-syntax ([((tc-disp ...) (arg-regid ...) (extra-regid ...) (extra-fpregid ...)) + (syntax-case #'([regid type] ...) (%ac0 %xp %ts %td uptr) + [([%ac0 _] [%xp _] [%ts _] [%td _] [other other-type] ...) (let f ([other* #'(other ...)] + [other-type* #'(other-type ...)] [rtc-disp* '()] [arg-offset (constant tc-arg-regs-disp)] - [rextra* '()]) + [fp-offset (constant tc-fpregs-disp)] + [rextra* '()] + [rfpextra* '()]) (if (null? other*) - (if (fx= (length rextra*) (constant asm-arg-reg-max)) - (let ([extra* (reverse rextra*)]) + (cond + [(not (fx= (length rextra*) (constant asm-arg-reg-max))) + (syntax-error x (format "asm-arg-reg-max extra registers are not specified ~s" (syntax->datum rextra*)))] + [(not (fx= (length rfpextra*) (constant asm-fpreg-max))) + (syntax-error x (format "asm-fpreg-max extra registers are not specified ~s" (syntax->datum rfpextra*)))] + [else + (let ([extra* (reverse rextra*)] + [fpextra* (reverse rfpextra*)]) (list (list* (constant tc-ac0-disp) @@ -531,14 +541,17 @@ (constant tc-td-disp) (reverse rtc-disp*)) (list-head extra* (constant asm-arg-reg-cnt)) - (list-tail extra* (constant asm-arg-reg-cnt)))) - (syntax-error x (format "asm-arg-reg-max extra registers are not specified ~s" (syntax->datum rextra*)))) + (list-tail extra* (constant asm-arg-reg-cnt)) + fpextra*))]) (let ([other (car other*)]) (if (memq (syntax->datum other) '(%ac1 %yp %cp %ret)) - (f (cdr other*) (cons #`(tc-disp #,other) rtc-disp*) - arg-offset rextra*) - (f (cdr other*) (cons arg-offset rtc-disp*) - (fx+ arg-offset (constant ptr-bytes)) (cons other rextra*))))))] + (f (cdr other*) (cdr other-type*) (cons #`(tc-disp #,other) rtc-disp*) + arg-offset fp-offset rextra* rfpextra*) + (if (eq? (syntax->datum (car other-type*)) 'fp) + (f (cdr other*) (cdr other-type*) (cons fp-offset rtc-disp*) + arg-offset (fx+ fp-offset 8) rextra* (cons other rfpextra*)) + (f (cdr other*) (cdr other-type*) (cons arg-offset rtc-disp*) + (fx+ arg-offset (constant ptr-bytes)) fp-offset (cons other rextra*) rfpextra*))))))] [_ (syntax-error x "missing or out-of-order required registers")])] [(regid-loc ...) (generate-temporaries #'(regid ...))]) #'(begin @@ -560,36 +573,44 @@ (define-squawking-parameter regvec regvec-loc) (define-squawking-parameter arg-registers arg-registers-loc) (define-squawking-parameter extra-registers extra-registers-loc) + (define-squawking-parameter extra-fpregisters extra-fpregisters-loc) (define-syntax with-initialized-registers (syntax-rules () [(_ b1 b2 (... ...)) - (parameterize ([regid-loc (make-reg 'regid 'mdinfo tc-disp callee-save?)] ...) + (parameterize ([regid-loc (make-reg 'regid 'mdinfo tc-disp callee-save? 'type)] ...) (parameterize ([regvec-loc (vector regid ...)] [arg-registers-loc (list arg-regid ...)] - [extra-registers-loc (list extra-regid ...)]) + [extra-registers-loc (list extra-regid ...)] + [extra-fpregisters-loc (list extra-fpregid ...)]) (let () b1 b2 (... ...))))]))))]))) (define-syntax define-machine-dependent-registers (lambda (x) (syntax-case x () - [(_ [regid alias ... callee-save? mdinfo] ...) + [(_ [regid alias ... callee-save? mdinfo type] ...) #'(begin (begin - (define regid (make-reg 'regid 'mdinfo #f callee-save?)) + (define regid (make-reg 'regid 'mdinfo #f callee-save? 'type)) (module (alias ...) (define x regid) (define alias x) ...)) ...)]))) (define-syntax define-registers (lambda (x) (syntax-case x (reserved allocable machine-dependent) - [(k (reserved [rreg rreg-alias ... rreg-callee-save? rreg-mdinfo] ...) - (allocable [areg areg-alias ... areg-callee-save? areg-mdinfo] ...) - (machine-depdendent [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo] ...)) - (with-implicit (k regvec arg-registers extra-registers real-register? with-initialized-registers) + [(k (reserved [rreg rreg-alias ... rreg-callee-save? rreg-mdinfo rreg-type] ...) + (allocable [areg areg-alias ... areg-callee-save? areg-mdinfo areg-type] ...) + (machine-depdendent [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo mdreg-type] ...) + (reify-support reify-reg ...)) + (with-implicit (k regvec arg-registers extra-registers extra-fpregisters real-register? + cons-reify-registers with-initialized-registers) #`(begin - (define-reserved-registers [rreg rreg-alias ... rreg-callee-save? rreg-mdinfo] ...) - (define-allocable-registers regvec arg-registers extra-registers with-initialized-registers [areg areg-alias ... areg-callee-save? areg-mdinfo] ...) - (define-machine-dependent-registers [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo] ...) + (define-reserved-registers [rreg rreg-alias ... rreg-callee-save? rreg-mdinfo rreg-type] ...) + (define-allocable-registers regvec arg-registers extra-registers extra-fpregisters with-initialized-registers + [areg areg-alias ... areg-callee-save? areg-mdinfo areg-type] ...) + (define-machine-dependent-registers [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo mdreg-type] ...) + (define-syntax cons-reify-registers + (syntax-rules () + [(_ reg*) (cons* reify-reg ... reg*)])) (define-syntax real-register? (with-syntax ([real-reg* #''(rreg ... rreg-alias ... ... areg ... areg-alias ... ... mdreg ... mdreg-alias ... ...)]) (syntax-rules () @@ -598,7 +619,7 @@ (architecture registers) ; pseudo register used for mref's with no actual index - (define %zero (make-reg 'zero #f #f #f)) + (define %zero (make-reg 'zero #f #f #f #f)) ; define %ref-ret to be sfp[0] on machines w/no ret register (define-syntax %ref-ret @@ -625,10 +646,18 @@ (make-libspec-label 'event-detour (lookup-libspec event-detour) (reg-cons* %ret %cp %ac0 arg-registers)))) - (module (frame-vars get-fv) + ;; Both 'fp or both not + (define (compatible-var-types? t1 t2) + (cond + [(eq? t1 'fp) (eq? t2 'fp)] + [else (not (eq? t2 'fp))])) + + (module (frame-vars get-fv get-ptr-fv get-ret-fv compatible-fv?) (define-threaded frame-vars) (define get-fv - (lambda (x) + (case-lambda + [(x) (get-fv x 'uptr)] + [(x type) (let ([n (vector-length frame-vars)]) (when (fx>= x n) (let ([new-vec (make-vector (fxmax (fx+ x 1) (fx* n 2)) #f)]) @@ -639,9 +668,25 @@ (loop n)))) (set! frame-vars new-vec)))) (or (vector-ref frame-vars x) - (let ([fv ($make-fv x)]) + (let ([fv ($make-fv x (constant-case stack-word-alignment + [(2) (if (and (eq? type 'fp) + (fxodd? x)) + 'ptr + type)] + [(1) type]))]) (vector-set! frame-vars x fv) - fv))))) + fv))])) + (define get-ptr-fv + (lambda (x) + (let ([fv (get-fv x)]) + (safe-assert (not (memq (fv-type fv) '(fp reserved)))) + fv))) + (define get-ret-fv + (lambda () + (get-ptr-fv 0))) + (define (compatible-fv? fv type) + (and (not (eq? (fv-type fv) 'reserved)) + (compatible-var-types? (fv-type fv) type)))) (define-syntax reg-cons* (lambda (x) @@ -649,9 +694,12 @@ [(_ ?reg ... ?reg*) (fold-right (lambda (reg reg*) - (if (real-register? (syntax->datum reg)) - #`(cons #,reg #,reg*) - reg*)) + (cond + [(eq? (syntax->datum reg) 'reify-support) + #`(cons-reify-registers #,reg*)] + [(real-register? (syntax->datum reg)) + #`(cons #,reg #,reg*)] + [else reg*])) #'?reg* #'(?reg ...))]))) (define-syntax reg-list @@ -702,10 +750,10 @@ #`(cons* (ref-reg in) ... #,(if (memq 'scheme-args in*) (if (memq 'extra-regs in*) - #'(append arg-registers extra-registers) + #'(append arg-registers extra-registers extra-fpregisters) #'arg-registers) (if (memq 'extra-regs in*) - #'extra-registers + #'(append extra-registers extra-fpregisters) #''())))))]))) (define-syntax get-tcslot (lambda (x) @@ -713,7 +761,7 @@ [(_ k reg) (with-implicit (k in-context %mref) #'(in-context Lvalue - (%mref ,%tc ,(reg-tc-disp reg))))]))) + (%mref ,%tc ,%zero ,(reg-tc-disp reg) ,(reg-type reg))))]))) (define-syntax $save-scheme-state (lambda (x) (syntax-case x () @@ -914,8 +962,8 @@ (declare-intrinsic dofretu32* dofretu32* (%ac0 %ts %td %cp %ac1) (%ac0) (%xp)) (declare-intrinsic get-room get-room () (%xp) (%xp)) (declare-intrinsic scan-remembered-set scan-remembered-set () () ()) - (declare-intrinsic reify-1cc reify-1cc (%xp %ac0 %ts) () (%td)) - (declare-intrinsic maybe-reify-cc maybe-reify-cc (%xp %ac0 %ts) () (%td)) + (declare-intrinsic reify-1cc reify-1cc (%xp %ac0 reify-support) () (%td)) + (declare-intrinsic maybe-reify-cc maybe-reify-cc (%xp %ac0 reify-support) () (%td)) (declare-intrinsic dooverflow dooverflow () () ()) (declare-intrinsic dooverflood dooverflood () (%xp) ()) ; a dorest routine takes all of the register and frame arguments from the rest @@ -1004,6 +1052,10 @@ (sealed #t) (fields)) + (define-record-type info-unboxed-args (nongenerative) + (parent info) + (fields unboxed?*)) + (module () (record-writer (record-type-descriptor info-load) (lambda (x p wr) @@ -1127,12 +1179,15 @@ (define-syntax %mref (lambda (x) (syntax-case x () + [(k e0 e1 imm type) + (with-implicit (k quasiquote) + #'`(mref e0 e1 imm type))] [(k e0 e1 imm) (with-implicit (k quasiquote) - #'`(mref e0 e1 imm))] + #'`(mref e0 e1 imm uptr))] [(k e0 imm) (with-implicit (k quasiquote) - #'`(mref e0 ,%zero imm))]))) + #'`(mref e0 ,%zero imm uptr))]))) (define-syntax %inline (lambda (x) @@ -2690,8 +2745,8 @@ [(immediate ,imm) (values body 0 0)] [(quote ,d) (values body 0 0)] [(goto ,l) (values body 1 1)] - [(mref ,[loop : e1 -> e1-promise e1-size e1-new-size] ,[loop : e2 -> e2-promise e2-size e2-new-size] ,imm) - (values (delay `(mref ,(force e1-promise) ,(force e2-promise) ,imm)) + [(mref ,[loop : e1 -> e1-promise e1-size e1-new-size] ,[loop : e2 -> e2-promise e2-size e2-new-size] ,imm ,type) + (values (delay `(mref ,(force e1-promise) ,(force e2-promise) ,imm ,type)) (fx+ e1-size e2-size 1) (fx+ e1-new-size e2-new-size 1))] [,lvalue (values body 1 1)] @@ -2813,7 +2868,7 @@ body)]))) (Lvalue : Lvalue (ir rename-ht) -> Lvalue () [,x (eq-hashtable-ref rename-ht x x)] - [(mref ,[e1] ,[e2] ,imm) `(mref ,e1 ,e2 ,imm)]) + [(mref ,[e1] ,[e2] ,imm ,type) `(mref ,e1 ,e2 ,imm ,type)]) (Expr : Expr (ir rename-ht) -> Expr () [(loop ,x (,[Lvalue : x* rename-ht -> x*] ...) ,body) ;; NB: with-fresh is so well designed that it can't handle this case @@ -2850,6 +2905,86 @@ ir)))])) (set! $loop-unroll-limit loop-unroll-limit)) + (define (known-flonum-result? e) + (let flonum-result? ([e e] [fuel 10]) + (and + (fx> fuel 0) + (nanopass-case (L7 Expr) e + [,x (and (uvar? x) (eq? (uvar-type x) 'fp))] + [(quote ,d) (flonum? d)] + [(call ,info ,mdcl ,pr ,e* ...) + (eq? 'flonum ($sgetprop (primref-name pr) '*result-type* #f))] + [(seq ,e0 ,e1) (flonum-result? e1 (fx- fuel 1))] + [(let ([,x* ,e*] ...) ,body) (flonum-result? body (fx- fuel 1))] + [else #f])))) + + (define-pass np-unbox-fp-vars! : L7 (ir) -> L7 () + (definitions + (define ensure-not-unboxed! + (lambda (x) + (when (and (uvar? x) (eq? (uvar-type x) 'fp)) + (uvar-type-set! x 'ptr) + (let ([l (uvar-location x)]) + (when l + (uvar-location-set! x #f) + (for-each ensure-not-unboxed! l))))))) + (Expr : Expr (ir) -> Expr () + [(let ([,x* ,e*] ...) ,body) + (for-each (lambda (x e) + (nanopass-case (L7 Expr) e + [,x1 + (guard (and (uvar? x1) (eq? (uvar-type x1) 'fp))) + ;; propagate fp-ness + (uvar-location-set! x (cons x1 (or (uvar-location x) '())))] + [else + (Expr e)]) + (when (known-flonum-result? e) + (uvar-type-set! x 'fp))) + x* e*) + (Expr body) + (for-each (lambda (x) + (when (eq? (uvar-type x) 'fp) + (uvar-location-set! x #f))) + x*) + ir] + [(call ,info ,mdcl ,pr ,e* ...) + (guard (and (all-set? (prim-mask unboxed-arguments) (primref-flags pr)) + (fx<= (length e*) (constant inline-args-limit)))) + (for-each (lambda (e) + (nanopass-case (L7 Expr) e + [,x (void)] ; allow x to keep 'fp type + [else (Expr e)])) + e*) + ir] + [(loop ,x (,x* ...) ,body) + (safe-assert (uvar-loop? x)) + (uvar-location-set! x x*) + (Expr body) + (uvar-location-set! x #f) + ir] + [(call ,info ,mdcl ,x ,e* ...) + (guard (uvar-loop? x)) + (let ([x* (uvar-location x)]) + (for-each (lambda (x e) + (cond + [(eq? (uvar-type x) 'fp) + (nanopass-case (L7 Expr) e + [,x1 + (guard (and (uvar? x1) (eq? (uvar-type x1) 'fp))) + (uvar-location-set! x (cons x1 (or (uvar-location x) '()))) + (uvar-location-set! x1 (cons x (or (uvar-location x1) '())))] + [else + (Expr e) + (unless (known-flonum-result? e) + (ensure-not-unboxed! x))])] + [else (Expr e)])) + x* e*)) + ir]) + (Lvalue : Lvalue (ir) -> Lvalue () + [,x + (ensure-not-unboxed! x) + ir])) + (define target-fixnum? (if (and (= (constant most-negative-fixnum) (most-negative-fixnum)) (= (constant most-positive-fixnum) (most-positive-fixnum))) @@ -2890,6 +3025,12 @@ #'reg (with-implicit (k %mref) #`(%mref ,%tc ,(tc-disp reg))))]))) + ;; After the `np-expand-primitives` pass, some expression produce + ;; double (i.e., floating-point) values instead of pointer values. + ;; Those expression results always flow to an `inline` primitive + ;; that expects double values. The main consequence is that a later + ;; pass must only put such returns in a temporary with type 'fp. + ; TODO: recognize a direct call when it is at the end of a sequence, closures, or let form ; TODO: push call into if? (would need to pull arguments into temporaries to ensure order of evaluation ; TODO: how does this interact with mvcall? @@ -2899,10 +3040,10 @@ (define ht2 (make-hashtable symbol-hash eq?)) (define ht3 (make-hashtable symbol-hash eq?)) (define handle-prim - (lambda (src sexpr level name e*) + (lambda (src sexpr can-unbox-fp? level name e*) (let ([handler (or (and (fx= level 3) (symbol-hashtable-ref ht3 name #f)) (symbol-hashtable-ref ht2 name #f))]) - (and handler (handler src sexpr e*))))) + (and handler (handler src sexpr can-unbox-fp? e*))))) (define-syntax Symref (lambda (x) (syntax-case x () @@ -2919,7 +3060,7 @@ [(immediate ,imm) #t] [(literal ,info) #t] [(label-ref ,l ,offset) #t] - [(mref ,e1 ,e2 ,imm) #t] + [(mref ,e1 ,e2 ,imm ,type) #t] [(quote ,d) #t] [,pr #t] [(call ,info ,mdcl ,pr ,e* ...) @@ -2948,24 +3089,42 @@ `(values ,(make-info-call #f #f #f #f #f) ,e))))] [(e) (ensure-single-valued e (fx= (optimize-level) 3))])) (define-pass np-expand-primitives : L7 (ir) -> L9 () + (definitions + (define Expr1 + (lambda (e) + (let-values ([(e unboxed-fp?) (Expr e #f)]) + e))) + (define Expr* + (lambda (e*) + (map Expr1 e*))) + (define (fp-lvalue? lvalue) + (nanopass-case (L9 Lvalue) lvalue + [,x (and (uvar? x) (eq? (uvar-type x) 'fp))] + [(mref ,e1 ,e2 ,imm ,type) (eq? type 'fp)]))) (Program : Program (ir) -> Program () [(labels ([,l* ,le*] ...) ,l) (fluid-let ([new-l* '()] [new-le* '()]) (let ([le* (map CaseLambdaExpr le*)]) `(labels ([,l* ,le*] ... [,new-l* ,new-le*] ...) ,l)))]) (CaseLambdaExpr : CaseLambdaExpr (ir) -> CaseLambdaExpr ()) - (CaseLambdaClause : CaseLambdaClause (ir) -> CaseLambdaClause ()) - (Expr : Expr (ir) -> Expr () + (CaseLambdaClause : CaseLambdaClause (ir) -> CaseLambdaClause () + [(clause (,x* ...) ,mcp ,interface ,[body #f -> body unboxed-fp?]) + `(clause (,x* ...) ,mcp ,interface ,body)]) + (Expr : Expr (ir [can-unbox-fp? #f]) -> Expr (#f) [(quote ,d) - (cond - [(ptr->imm d) => (lambda (i) `(immediate ,i))] - [else `(literal ,(make-info-literal #f 'object d 0))])] - [,pr (Symref (primref-name pr))] + (values (cond + [(ptr->imm d) => (lambda (i) `(immediate ,i))] + [else `(literal ,(make-info-literal #f 'object d 0))]) + #f)] + [,pr (values (Symref (primref-name pr)) #f)] + [(unboxed-fp ,[e can-unbox-fp? -> e unboxed-fp?]) + (safe-assert can-unbox-fp?) + (values e #t)] [(call ,info0 ,mdcl0 (call ,info1 ,mdcl1 ,pr (quote ,d)) - ,[e*] ...) + ,[e* #f -> e* unboxed-fp?*] ...) (guard (and (eq? (primref-name pr) '$top-level-value) (symbol? d))) - `(call ,info0 ,mdcl0 ,(Symref d) ,e* ...)] + (values `(call ,info0 ,mdcl0 ,(Symref d) ,e* ...) #f)] [(call ,info ,mdcl ,pr ,e* ...) (cond [(and @@ -2973,19 +3132,21 @@ ;; Note: single-valued also implies that the primitive doesn't ;; tail-call an arbitary function (which might inspect attachments): (all-set? (prim-mask single-valued) (primref-flags pr))) - (handle-prim (info-call-src info) (info-call-sexpr info) (primref-level pr) (primref-name pr) e*)) + (handle-prim (info-call-src info) (info-call-sexpr info) can-unbox-fp? (primref-level pr) (primref-name pr) e*)) => (lambda (e) - (let ([e (Expr e)]) - (cond - [(info-call-shift-attachment? info) - (let ([t (make-tmp 't)]) - `(let ([,t ,e]) - (seq - (attachment-set pop #f) - ,t)))] - [else e])))] + (let-values ([(e unboxed-fp?) (Expr e can-unbox-fp?)]) + (values + (cond + [(info-call-shift-attachment? info) + (let ([t (make-tmp 't (if unboxed-fp? 'fp 'ptr))]) + `(let ([,t ,e]) + (seq + (attachment-set pop #f) + ,t)))] + [else e]) + unboxed-fp?)))] [else - (let ([e* (map Expr e*)]) + (let ([e* (Expr* e*)]) ; NB: expand calls through symbol top-level values similarly (let ([info (if (any-set? (prim-mask abort-op) (primref-flags pr)) (make-info-call (info-call-src info) (info-call-sexpr info) @@ -2993,7 +3154,89 @@ (info-call-shift-attachment? info) (info-call-shift-consumer-attachment?* info)) info)]) - `(call ,info ,mdcl ,(Symref (primref-name pr)) ,e* ...)))])])) + (values `(call ,info ,mdcl ,(Symref (primref-name pr)) ,e* ...) + #f)))])] + [(call ,info ,mdcl ,x ,e* ...) + (guard (uvar-loop? x)) + (let ([e* (map (lambda (x1 e) + (let ([unbox? (eq? (uvar-type x1) 'fp)]) + (let-values ([(e unboxed-fp?) (Expr e unbox?)]) + (cond + [(and unbox? (not unboxed-fp?)) + (%mref ,e ,%zero ,(constant flonum-data-disp) fp)] + [else e])))) + (uvar-location x) e*)]) + (values `(call ,info ,mdcl ,x ,e* ...) #f))] + [(call ,info ,mdcl ,e ,e* ...) + (let ([e (and e (Expr1 e))] + [e* (Expr* e*)]) + (values `(call ,info ,mdcl ,e ,e* ...) #f))] + [(inline ,info ,prim ,e* ...) + (cond + [(info-unboxed-args? info) + (let ([e* (map (lambda (e unbox-arg?) + (let-values ([(e unboxed-arg?) (Expr e unbox-arg?)]) + (if (and unbox-arg? (not unboxed-arg?)) + (%mref ,e ,%zero ,(constant flonum-data-disp) fp) + e))) + e* + (info-unboxed-args-unboxed?* info))]) + (values `(inline ,info ,prim ,e* ...) + ;; Especially likely to be replaced by enclosing `unboxed-fp` wrapper: + #f))] + [else + (let ([e* (Expr* e*)]) + (values `(inline ,info ,prim ,e* ...) #f))])] + [(set! ,[lvalue #f -> lvalue unboxed-fp?l] ,e) + (let ([fp? (fp-lvalue? lvalue)]) + (let-values ([(e unboxed?) (Expr e fp?)]) + (let ([e (if (and fp? (not unboxed?)) + (%mref ,e ,%zero ,(constant flonum-data-disp) fp) + e)]) + (values `(set! ,lvalue ,e) #f))))] + [(values ,info ,[e* #f -> e* unboxed-fp?*] ...) (values `(values ,info ,e* ...) #f)] + [(alloc ,info ,e) (values `(alloc ,info ,(Expr1 e)) #f)] + [(if ,[e0 #f -> e0 unboxed-fp?0] ,[e1 can-unbox-fp? -> e1 unboxed-fp?1] ,[e2 can-unbox-fp? -> e2 unboxed-fp?2]) + (let* ([unboxed-fp? (or unboxed-fp?1 unboxed-fp?2)] + [e1 (if (and unboxed-fp? (not unboxed-fp?1)) + (%mref ,e1 ,%zero ,(constant flonum-data-disp) fp) + e1)] + [e2 (if (and unboxed-fp? (not unboxed-fp?2)) + (%mref ,e2 ,%zero ,(constant flonum-data-disp) fp) + e2)]) + (values `(if ,e0 ,e1 ,e2) unboxed-fp?))] + [(seq ,[e0 #f -> e0 unboxed-fp?0] ,[e1 can-unbox-fp? -> e1 unboxed-fp?]) + (values `(seq ,e0 ,e1) unboxed-fp?)] + [(let ([,x* ,e*] ...) ,body) + (let ([e* (map (lambda (x e) + (if (eq? (uvar-type x) 'fp) + (let-values ([(e unboxed?) (Expr e #t)]) + (if (not unboxed?) + (%mref ,e ,%zero ,(constant flonum-data-disp) fp) + e)) + (Expr1 e))) + x* e*)]) + (let-values ([(body unboxed-fp?) (Expr body can-unbox-fp?)]) + (values `(let ([,x* ,e*] ...) ,body) unboxed-fp?)))] + [(loop ,x (,x* ...) ,body) + (uvar-location-set! x x*) + (let-values ([(body unboxed-fp?) (Expr body can-unbox-fp?)]) + (uvar-location-set! x #f) + (values `(loop ,x (,x* ...) ,body) unboxed-fp?))] + [(attachment-set ,aop ,e) (values `(attachment-set ,aop ,(and e (Expr1 e))) #f)] + [(attachment-get ,reified ,e) (values `(attachment-get ,reified ,(and e (Expr1 e))) #f)] + [(attachment-consume ,reified ,e) (values `(attachment-consume ,reified ,(and e (Expr1 e))) #f)] + [(continuation-set ,cop ,e1 ,e2) (values `(continuation-set ,cop ,(Expr1 e1) ,(Expr1 e2)) #f)] + [(label ,l ,[body can-unbox-fp? -> body unboxed-fp?]) (values `(label ,l ,body) unboxed-fp?)] + [(foreign-call ,info ,e ,e* ...) (values `(foreign-call ,info ,(Expr1 e) ,(Expr* e*) ...) #f)] + [(mvcall ,info ,e1 ,e2) (values `(mvcall ,info ,(Expr1 e1) ,(Expr1 e2)) #f)] + [(mvlet ,e ((,x** ...) ,interface* ,body*) ...) + (values `(mvlet ,(Expr1 e) ((,x** ...) ,interface* ,(map Expr1 body*)) ...) #f)]) + (Lvalue : Lvalue (ir [unboxed-fp? #f]) -> Lvalue (#f) + [(mref ,e1 ,e2 ,imm ,type) (values `(mref ,(Expr1 e1) ,(Expr1 e2) ,imm ,type) #f)] + [,x + (safe-assert (or unboxed-fp? (not (and (uvar? x) (eq? (uvar-type x) 'fp))))) + (values x (and (uvar? x) (eq? (uvar-type x) 'fp)))])) (define-who unhandled-arity (lambda (name args) (sorry! who "unhandled argument count ~s for ~s" (length args) 'name))) @@ -3035,10 +3278,10 @@ (unless (= (bitmaskify arity) (bitmaskify (map compute-interface #'(clause ...)))) (syntax-error x (format "arity mismatch for ~s" name)))))) (check-and-record level #'id) - (with-implicit (k src sexpr moi) + (with-implicit (k src sexpr moi can-unbox-fp?) #`(symbol-hashtable-set! #,(if (eqv? level 2) #'ht2 #'ht3) 'id (rec moi - (lambda (src sexpr args) + (lambda (src sexpr can-unbox-fp? args) (apply (case-lambda clause ... [rest #f]) args))))))])))) (define no-need-to-bind? (lambda (multiple-ref? e) @@ -3057,8 +3300,10 @@ (values e values) (let ([t (make-tmp 't type)]) (values t - (lambda (body) - `(let ([,t ,e]) ,body))))))) + (lambda (body) + (nanopass-case (L7 Expr) body + [(unboxed-fp ,body) `(unboxed-fp (let ([,t ,e]) ,body))] + [else `(let ([,t ,e]) ,body)]))))))) (define list-binder (lambda (multiple-ref? type e*) (if (null? e*) @@ -3225,6 +3470,9 @@ (if (null? e*) check (build-and check (f e*)))))))))) + (define build-fl= + (lambda (e1 e2) ; must be bound + `(inline ,(make-info-unboxed-args '(#t #t)) ,%fp= ,e1 ,e2))) (define build-chars? (lambda (e1 e2) (define char-constant? @@ -3337,7 +3585,7 @@ ,(f (cdr args) (fx+ offset (constant ptr-bytes))))))))))))) (define build-$real->flonum (lambda (src sexpr x who) - (if (constant? flonum? x) + (if (known-flonum-result? x) x (bind #t (x) (bind #f (who) @@ -3676,8 +3924,10 @@ (case-lambda [(swapped? type base offset-expr) (let-values ([(index offset) (offset-expr->index+offset offset-expr)]) - (build-object-ref swapped? type base index offset))] + (build-object-ref swapped? type base index offset #f))] [(swapped? type base index offset) + (build-object-ref swapped? type base index offset #f)] + [(swapped? type base index offset can-unbox-fp?) (case type [(scheme-object) `(inline ,(make-info-load ptr-type swapped?) ,%load ,base ,index (immediate ,offset))] [(double-float) @@ -3703,13 +3953,17 @@ (immediate ,offset))) ,t)))]) (bind #f (base index) - (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) - (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double - ,base ,index (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double - ,t ,%zero ,(%constant flonum-data-disp)) - ,t))))] + (cond + [can-unbox-fp? + `(unboxed-fp ,(%mref ,base ,index ,offset fp))] + [else + (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) + (%seq + (inline ,(make-info-loadfl %fptmp1) ,%load-double + ,base ,index (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double + ,t ,%zero ,(%constant flonum-data-disp)) + ,t))])))] [(single-float) (if swapped? (bind #f (base index) @@ -3718,17 +3972,17 @@ (set! ,(%mref ,t ,(constant flonum-data-disp)) (inline ,(make-info-load 'unsigned-32 #t) ,%load ,base ,index (immediate ,offset))) - (inline ,(make-info-loadfl %flreg1) ,%load-single->double + (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,t ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,t ,%zero ,(%constant flonum-data-disp)) ,t))) (bind #f (base index) (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-single->double + (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,base ,index (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,t ,%zero ,(%constant flonum-data-disp)) ,t))))] [(integer-8 integer-16 integer-24 integer-32 integer-40 integer-48 integer-56 integer-64) @@ -3806,18 +4060,22 @@ (case type [(scheme-object) (build-dirty-store base index offset value)] [(double-float) + (bind #f (base index) + (bind #f fp (value) + `(set! ,(%mref ,base ,index ,offset fp) ,value))) + #; (bind #f (base index) (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,value ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,base ,index (immediate ,offset))))] [(single-float) (bind #f (base index) (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single + (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,value ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-single + (inline ,(make-info-loadfl %fptmp1) ,%store-single ,base ,index (immediate ,offset))))] ; 40-bit+ only on 64-bit machines [(integer-8 integer-16 integer-24 integer-32 integer-40 integer-48 integer-56 integer-64 @@ -3984,7 +4242,7 @@ ,(%inline logor ,(%mref ,e-v ,(constant type-disp)) (immediate ,(constant immutable-flag))))))])) - (define inline-args-limit 10) + (define inline-args-limit (constant inline-args-limit)) (define reduce-equality (lambda (src sexpr moi e1 e2 e*) (and (fx<= (length e*) (fx- inline-args-limit 2)) @@ -3993,8 +4251,8 @@ (list-bind #f (e*) (let compare ([src src] [e2 e2] [e* e*]) (if (null? e*) - (moi src sexpr (list e1 e2)) - `(if ,(moi src sexpr (list e1 e2)) + (moi src sexpr #f (list e1 e2)) + `(if ,(moi src sexpr #f (list e1 e2)) ,(compare #f (car e*) (cdr e*)) (quote #f)))))))))) (define reduce-inequality @@ -4006,8 +4264,8 @@ (let compare ([src src] [e* (cons e1 (reverse (cons e2 re*)))]) (let ([more-args (cddr e*)]) (if (null? more-args) - (moi src sexpr e*) - `(if ,(moi src sexpr (list (car e*) (cadr e*))) + (moi src sexpr #f e*) + `(if ,(moi src sexpr #f (list (car e*) (cadr e*))) ,(compare #f (cdr e*)) (quote #f)))))) (bind #t ([e2 e2]) (f (car e*) (cdr e*) (cons e2 re*)))))))) @@ -4019,7 +4277,24 @@ (let reduce ([src src] [e e] [e* e*]) (if (null? e*) e - (reduce #f (moi src sexpr (list e (car e*))) (cdr e*))))))))) + (reduce #f (moi src sexpr #f (list e (car e*))) (cdr e*))))))))) + (define reduce-fp-compare ; suitable for arguments known or assumed to produce flonums + (lambda (reduce) + (lambda (src sexpr moi e1 e2 e*) + (and (fx<= (length e*) (fx- inline-args-limit 2)) + (bind #t fp (e1) + (bind #f fp (e2) + (list-bind #f fp (e*) + (reduce src sexpr moi e1 e2 e*)))))))) + (define reduce-fp ; specialized reducer supports unboxing for nesting + (lambda (src sexpr level name e e*) + (and (fx<= (length e*) (fx- inline-args-limit 1)) + (let ([pr (lookup-primref level name)]) + (let reduce ([e e] [src src] [sexpr sexpr] [e* e*]) + (if (null? e*) + e + (reduce `(call ,(make-info-call src sexpr #f #f #f) #f ,pr ,e ,(car e*)) + #f #f (cdr e*)))))))) (module (relop-length RELOP< RELOP<= RELOP= RELOP>= RELOP>) (define RELOP< -2) (define RELOP<= -1) @@ -6332,7 +6607,7 @@ (if ($nan? d) ;; NaN: invert `fl=` on self (bind #t (e2) - (build-not (%inline fl= ,e2 ,e2))) + (build-not (build-fl= e2 e2))) ;; Non-NaN: compare bits (constant-case ptr-bits [(32) @@ -6926,13 +7201,13 @@ (define-relop-inline >= r6rs:>= RELOP>= >=) (define-relop-inline > r6rs:> RELOP> >)) (define-inline 3 positive? ; 3 so opt-level 2 errors come from positive? - [(e) (handle-prim src sexpr 3 '> (list e `(quote 0)))]) + [(e) (handle-prim src sexpr #f 3 '> (list e `(quote 0)))]) (define-inline 3 nonnegative? ; 3 so opt-level 2 errors come from nonnegative? - [(e) (handle-prim src sexpr 3 '>= (list e `(quote 0)))]) + [(e) (handle-prim src sexpr #f 3 '>= (list e `(quote 0)))]) (define-inline 3 negative? ; 3 so opt-level 2 errors come from negative? - [(e) (handle-prim src sexpr 3 '< (list e `(quote 0)))]) + [(e) (handle-prim src sexpr #f 3 '< (list e `(quote 0)))]) (define-inline 3 nonpositive? ; 3 so opt-level 2 errors come from nonpositive? - [(e) (handle-prim src sexpr 3 '<= (list e `(quote 0)))]) + [(e) (handle-prim src sexpr #f 3 '<= (list e `(quote 0)))]) (define-inline 2 zero? [(e) (or (relop-length RELOP= e) @@ -7078,7 +7353,7 @@ ,(build-libcall #t src sexpr logtest e1 e2)))]) (define-inline 3 $flhash [(e) (bind #t (e) - `(if ,(%inline fl= ,e ,e) + `(if ,(build-fl= e e) ,(%inline logand ,(%inline srl ,(constant-case ptr-bits @@ -7107,7 +7382,7 @@ (%inline sll ,body (immediate ,(fx- 0 cnt))) body))) (immediate ,mask))))) - +< (define-inline 3 fllp [(e) (build-flonum-extractor 19 12 e)]) @@ -7120,7 +7395,7 @@ (define-inline 3 $fleqv? [(e1 e2) (bind #t (e1 e2) - `(if ,(%inline fl= ,e1 ,e1) ; check e1 not +nan.0 + `(if ,(build-fl= e1 e1) ; check e1 not +nan.0 ,(constant-case ptr-bits [(32) (build-and (%inline eq? @@ -7136,110 +7411,78 @@ "$fleqv doesn't handle ptr-bits = ~s" (constant ptr-bits))]) ;; If e1 is +nan.0, see if e2 is +nan.0: - ,(build-not (%inline fl= ,e2 ,e2))))]) + ,(build-not (build-fl= e2 e2))))]) (let () - (define build-flop-1 - ; NB: e must be bound - (lambda (op e) - (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) - `(seq (inline ,null-info ,op ,e ,t) ,t)))) - (define build-flop-2 - ; NB: e1 and e2 must be bound - (lambda (op e1 e2) - (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) - `(seq (inline ,null-info ,op ,e1 ,e2 ,t) ,t)))) + (define build-fp-boxed + (lambda (can-unbox-fp? e) + (if can-unbox-fp? + `(unboxed-fp ,e) + (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) + `(seq + (set! ,(%mref ,t ,%zero ,(constant flonum-data-disp) fp) (unboxed-fp ,e)) + ,t))))) + (define (unboxed-explicit->implicit e) + (nanopass-case (L7 Expr) e + [(unboxed-fp ,e) e] + [else (%mref ,e ,%zero ,(constant flonum-data-disp) fp)])) + (define build-fp-op-1 + (lambda (can-unbox-fp? op e) + (bind #f fp (e) + (build-fp-boxed can-unbox-fp? (if (procedure? op) (op e) `(inline ,(make-info-unboxed-args '(#t)) ,op ,e)))))) + (define build-fp-op-2 + (lambda (can-unbox-fp? op e1 e2) + (bind #f fp (e1 e2) + (build-fp-boxed can-unbox-fp? `(inline ,(make-info-unboxed-args '(#t #t)) ,op ,e1 ,e2))))) + (define build-fl-adjust-sign + (lambda (e can-unbox-fp? combine base) + (build-fp-boxed + can-unbox-fp? + (constant-case ptr-bits + [(64) + (let ([t (make-tmp 'flsgn)]) + `(let ([,t (inline ,(make-info-unboxed-args '(#t)) ,%fpcastto ,e)]) + (inline ,null-info ,%fpcastfrom (inline ,null-info ,combine ,t ,base))))] + [(32) + (let ([thi (make-tmp 'flsgnh)] + [tlo (make-tmp 'flsgnl)]) + (bind #t fp (e) + `(let ([,thi (inline ,(make-info-unboxed-args '(#t)) ,%fpcastto/hi ,e)] + [,tlo (inline ,(make-info-unboxed-args '(#t)) ,%fpcastto/lo ,e)]) + (inline ,null-info ,%fpcastfrom (inline ,null-info ,combine ,thi ,base) ,tlo))))])))) (define build-flabs - (lambda (e) - (bind (constant-case ptr-bits [(32) #t] [(64) #f]) (e) - (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) - (%seq - ,(constant-case ptr-bits - [(64) - `(set! ,(%mref ,t ,(constant flonum-data-disp)) - ,(%inline logand - ,(%mref ,e ,(constant flonum-data-disp)) - ,(%inline srl (immediate -1) (immediate 1))))] - [(32) - (let () - (constant-case native-endianness - [(big) - (begin - (define disp-high (constant flonum-data-disp)) - (define disp-low (fx+ (constant flonum-data-disp) 4)))] - [(little) - (begin - (define disp-low (constant flonum-data-disp)) - (define disp-high (fx+ (constant flonum-data-disp) 4)))]) - (%seq - (set! ,(%mref ,t ,disp-high) - ,(%inline logand - ,(%mref ,e ,disp-high) - ,(%inline srl (immediate -1) (immediate 1)))) - (set! ,(%mref ,t ,disp-low) - ,(%mref ,e ,disp-low))))]) - ,t))))) + (lambda (e can-unbox-fp?) + (build-fl-adjust-sign e can-unbox-fp? %logand (%inline srl (immediate -1) (immediate 1))))) (define build-flneg - (lambda (e) - (bind (constant-case ptr-bits [(32) #t] [(64) #f]) (e) - (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) - (%seq - ,(constant-case ptr-bits - [(64) - `(set! ,(%mref ,t ,(constant flonum-data-disp)) - ,(%inline logxor - ,(%mref ,e ,(constant flonum-data-disp)) - ,(%inline sll (immediate 1) (immediate 63))))] - [(32) - (let () - (constant-case native-endianness - [(big) - (begin - (define disp-high (constant flonum-data-disp)) - (define disp-low (fx+ (constant flonum-data-disp) 4)))] - [(little) - (begin - (define disp-low (constant flonum-data-disp)) - (define disp-high (fx+ (constant flonum-data-disp) 4)))]) - (%seq - (set! ,(%mref ,t ,disp-high) - ,(%inline logxor - ,(%mref ,e ,disp-high) - ,(%inline sll (immediate 1) (immediate 31)))) - (set! ,(%mref ,t ,disp-low) - ,(%mref ,e ,disp-low))))]) - ,t))))) - - ;; TODO: Rather then reducing here, (which will allocate a new flonum for each interim result) - ;; we could allocate a single flonum and reuse it until the final result is calculated. - ;; Better yet, we could do this across nested fl operations, so that only one flonum is - ;; allocated across nested fl+, fl*, fl-, fl/ etc. operation + (lambda (e can-unbox-fp?) + (build-fl-adjust-sign e can-unbox-fp? %logxor (%inline sll (immediate -1) (immediate ,(fx- (constant ptr-bits) 1)))))) + (define-inline 3 fl+ [() `(quote 0.0)] [(e) (ensure-single-valued e)] - [(e1 e2) (bind #f (e1 e2) (build-flop-2 %fl+ e1 e2))] - [(e1 . e*) (reduce src sexpr moi e1 e*)]) - + [(e1 e2) (build-fp-op-2 can-unbox-fp? %fp+ e1 e2)] + [(e1 . e*) (reduce-fp src sexpr 3 'fl+ e1 e*)]) + (define-inline 3 fl* [() `(quote 1.0)] [(e) (ensure-single-valued e)] - [(e1 e2) (bind #f (e1 e2) (build-flop-2 %fl* e1 e2))] - [(e1 . e*) (reduce src sexpr moi e1 e*)]) + [(e1 e2) (build-fp-op-2 can-unbox-fp? %fp* e1 e2)] + [(e1 . e*) (reduce-fp src sexpr 3 'fl* e1 e*)]) (define-inline 3 fl- - [(e) (build-flneg e)] - [(e1 e2) (bind #f (e1 e2) (build-flop-2 %fl- e1 e2))] - [(e1 . e*) (reduce src sexpr moi e1 e*)]) + [(e) (build-flneg e can-unbox-fp?)] + [(e1 e2) (build-fp-op-2 can-unbox-fp? %fp- e1 e2)] + [(e1 . e*) (reduce-fp src sexpr 3 'fl- e1 e*)]) (define-inline 3 fl/ - [(e) (bind #f (e) (build-flop-2 %fl/ `(quote 1.0) e))] - [(e1 e2) (bind #f (e1 e2) (build-flop-2 %fl/ e1 e2))] - [(e1 . e*) (reduce src sexpr moi e1 e*)]) + [(e) (build-fp-op-2 can-unbox-fp? %fp/ `(quote 1.0) e)] + [(e1 e2) (build-fp-op-2 can-unbox-fp? %fp/ e1 e2)] + [(e1 . e*) (reduce-fp src sexpr 3 'fl/ e1 e*)]) (define-inline 3 flsqrt [(e) (constant-case architecture - [(x86 x86_64 arm32) (bind #f (e) (build-flop-1 %flsqrt e))] + [(x86 x86_64 arm32) (build-fp-op-1 can-unbox-fp? %fpsqrt e)] [(ppc32) #f])]) (define-inline 3 flround @@ -7247,7 +7490,7 @@ [(e) (build-libcall #f src sexpr flround e)]) (define-inline 3 flabs - [(e) (build-flabs e)]) + [(e) (build-flabs e can-unbox-fp?)]) (let () (define build-fl-make-rectangular @@ -7258,13 +7501,13 @@ (set! ,(%mref ,t ,(constant inexactnum-type-disp)) ,(%constant type-inexactnum)) ,(%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,e1 ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,t ,%zero ,(%constant inexactnum-real-disp)) - (inline ,(make-info-loadfl %flreg1) ,%load-double + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,e2 ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,t ,%zero ,(%constant inexactnum-imag-disp)) ,t)))))) @@ -7274,10 +7517,10 @@ (define-inline 3 cfl- [(e) (bind #t (e) `(if ,(%type-check mask-flonum type-flonum ,e) - ,(build-flneg e) + ,(build-flneg e #f) ,(build-fl-make-rectangular - (build-flneg (build-$inexactnum-real-part e)) - (build-flneg (build-$inexactnum-imag-part e)))))] + (build-flneg (build-$inexactnum-real-part e) #f) + (build-flneg (build-$inexactnum-imag-part e) #f))))] [(e1 e2) (build-libcall #f src sexpr cfl- e1 e2)] ; TODO: add 3 argument version of cfl- library function #;[(e1 e2 e3) (build-libcall #f src sexpr cfl- e1 e2 e3)] @@ -7312,7 +7555,7 @@ ,e ,(build-fl-make-rectangular (build-$inexactnum-real-part e) - (build-flneg (build-$inexactnum-imag-part e)))))])) + (build-flneg (build-$inexactnum-imag-part e) #f))))])) (define-inline 3 $make-exactnum [(e1 e2) (bind #f (e1 e2) @@ -7325,9 +7568,15 @@ ,t)))]) (let () - (define (build-fl< e1 e2) (%inline fl< ,e1 ,e2)) - (define (build-fl= e1 e2) (%inline fl= ,e1 ,e2)) - (define (build-fl<= e1 e2) (%inline fl<= ,e1 ,e2)) + (define (build-fl< e1 e2) `(inline ,(make-info-unboxed-args '(#t #t)) ,%fp< ,e1 ,e2)) + (define build-fl= + (case-lambda + [(e) (if (constant nan-single-comparison-true?) + (%seq ,e (quote #t)) + (bind #t fp (e) (build-fl= e e)))] + [(e1 e2) (bind #f fp (e1 e2) + `(inline ,(make-info-unboxed-args '(#t #t)) ,%fp= ,e1 ,e2))])) + (define (build-fl<= e1 e2) `(inline ,(make-info-unboxed-args '(#t #t)) ,%fp<= ,e1 ,e2)) (let () (define-syntax define-fl-cmp-inline @@ -7336,11 +7585,11 @@ [(_ op r6rs:op builder inequality? swapped?) (with-syntax ([(args ...) (if (datum swapped?) #'(e2 e1) #'(e1 e2))] [reducer (if (datum inequality?) - #'reduce-inequality - #'reduce-equality)]) + #'(reduce-fp-compare reduce-inequality) + #'(reduce-fp-compare reduce-equality))]) #'(begin (define-inline 3 op - [(e) (bind #t (e) (build-fl= e e))] + [(e) (build-fl= e)] [(e1 e2) (builder args ...)] [(e1 e2 . e*) (reducer src sexpr moi e1 e2 e*)]) (define-inline 3 r6rs:op @@ -7356,25 +7605,76 @@ (define-syntax build-bind-and-check (syntax-rules () [(_ src sexpr op e1 e2 body) - (bind #t (e1 e2) - `(if ,(build-and - (%type-check mask-flonum type-flonum ,e1) - (%type-check mask-flonum type-flonum ,e2)) - ,body - ,(build-libcall #t src sexpr op e1 e2)))])) + (if (known-flonum-result? e1) + (if (known-flonum-result? e2) + body + (bind #t (e2) + `(if ,(%type-check mask-flonum type-flonum ,e2) + ,body + ,(build-libcall #t src sexpr op e2 e2)))) + (if (known-flonum-result? e2) + (bind #t (e1) + `(if ,(%type-check mask-flonum type-flonum ,e1) + ,body + ,(build-libcall #t src sexpr op e1 e1))) + (bind #t (e1 e2) + `(if ,(build-and + (%type-check mask-flonum type-flonum ,e1) + (%type-check mask-flonum type-flonum ,e2)) + ,body + ,(build-libcall #t src sexpr op e1 e2)))))])) + (define build-check-fp-arguments + (lambda (e* build-libcall k) + (let loop ([e* e*] [check-e* '()] [all-e* '()]) + (cond + [(null? e*) + (let loop ([check-e* (reverse check-e*)]) + (cond + [(null? check-e*) (apply k (reverse all-e*))] + [(null? (cdr check-e*)) + (let ([e1 (car check-e*)]) + `(if ,(%type-check mask-flonum type-flonum ,e1) + ,(loop '()) + ,(build-libcall e1 e1)))] + [else + (let ([e1 (car check-e*)] + [e2 (cadr check-e*)]) + `(if ,(build-and + (%type-check mask-flonum type-flonum ,e1) + (%type-check mask-flonum type-flonum ,e2)) + ,(loop (cddr check-e*)) + ,(build-libcall e1 e2)))]))] + [else + (let ([e1 (car e*)]) + (if (known-flonum-result? e1) + (loop (cdr e*) check-e* (cons e1 all-e*)) + (bind #t (e1) + (loop (cdr e*) (cons e1 check-e*) (cons e1 all-e*)))))])))) (define-syntax define-fl-cmp-inline (lambda (x) (syntax-case x () [(_ op r6rs:op builder inequality? swapped?) - (with-syntax ([(args ...) (if (datum swapped?) #'(e2 e1) #'(e1 e2))]) + (with-syntax ([(args ...) (if (datum swapped?) #'(e2 e1) #'(e1 e2))] + [reducer (if (datum inequality?) + #'(reduce-fp-compare reduce-inequality) + #'(reduce-fp-compare reduce-equality))]) #'(begin (define-inline 2 op - [(e) #f] + [(e1) (if (known-flonum-result? e1) + (build-fl= e1) + (bind #t (e1) + `(if ,(%type-check mask-flonum type-flonum ,e1) + ,(build-fl= e1) + ,(build-libcall #t src sexpr op e1 e1))))] [(e1 e2) (build-bind-and-check src sexpr op e1 e2 (builder args ...))] - [(e1 e2 . e*) #f]) + [(e1 e2 . e*) (build-check-fp-arguments (cons* e1 e2 e*) + (lambda (e1 e2) (build-libcall #t src sexpr op e1 e2)) + (lambda (e1 e2 . e*) (reducer src sexpr moi e1 e2 e*)))]) (define-inline 2 r6rs:op [(e1 e2) (build-bind-and-check src sexpr r6rs:op e1 e2 (builder args ...))] - [(e1 e2 . e*) #f])))]))) + [(e1 e2 . e*) (build-check-fp-arguments (cons* e1 e2 e*) + (lambda (e1 e2) (build-libcall #t src sexpr r6rs:op e1 e2)) + (lambda (e1 e2 . e*) (reducer src sexpr moi e1 e2 e*)))])))]))) (define-fl-cmp-inline fl= fl=? build-fl= #f #f) (define-fl-cmp-inline fl< flimplicit (build-flneg e #t))) can-unbox-fp? + (lambda (e) + (build-libcall #t src sexpr flnegate e)))] + [(e1 e2) (build-checked-fp-op e1 e2 %fp- can-unbox-fp? (lambda (e1 e2) (build-libcall #t src sexpr fl- e1 e2)))] - ; TODO: add 3 argument fl- library function - #;[(e1 e2 e3) (build-checked flop e1 e2 e3 %fl- - (lambda (e1 e2 e3) - (build-libcall #t src sexpr fl- e1 e2 e3)))] - [(e1 . e*) #f]) + [(e1 . e*) (reduce-fp src sexpr 2 'fl- e1 e*)]) (define-inline 2 fl/ - [(e) (build-checked-flop `(quote 1.0) e %fl/ + [(e) (build-checked-fp-op `(quote 1.0) e %fp/ can-unbox-fp? (lambda (e1 e2) (build-libcall #t src sexpr fl/ e1 e2)))] - [(e1 e2) (build-checked-flop e1 e2 %fl/ + [(e1 e2) (build-checked-fp-op e1 e2 %fp/ can-unbox-fp? (lambda (e1 e2) (build-libcall #t src sexpr fl/ e1 e2)))] - ; TODO: add 3 argument fl/ library function - #;[(e1 e2 e3) (build-checked flop e1 e2 e3 %fl/ - (lambda (e1 e2 e3) - (build-libcall #t src sexpr fl/ e1 e2 e3)))] - [(e1 . e*) #f]))) + [(e1 . e*) (reduce-fp src sexpr 2 'fl/ e1 e*)]) + + (define-inline 2 flabs + [(e) (build-checked-fp-op e (lambda (e) (unboxed-explicit->implicit (build-flabs e #t))) can-unbox-fp? + (lambda (e) + (build-libcall #t src sexpr flabs e)))]))) ; NB: assuming that we have a trunc instruction for now, will need to change to support Sparc (define-inline 3 flonum->fixnum @@ -7496,20 +7810,30 @@ (let () (define build-fixnum->flonum ; NB: x must already be bound in order to ensure it is done before the flonum is allocated - (lambda (e-x) - (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) - (%seq - ,(%inline flt ,(build-unfix e-x) ,t) - ,t)))) + (lambda (e-x can-unbox-fp? k) + (let ([e (%inline fpt ,(build-unfix e-x))]) + (if can-unbox-fp? + `(unboxed-fp ,(k e)) + (k (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) + (%seq + (set! ,(%mref ,t ,%zero ,(constant flonum-data-disp) fp) (unboxed-fp ,e)) + ,t))))))) (define-inline 3 fixnum->flonum - [(e-x) (bind #f (e-x) (build-fixnum->flonum e-x))]) + [(e-x) (bind #f (e-x) (build-fixnum->flonum e-x can-unbox-fp? values))]) + (define-inline 2 fixnum->flonum + [(e-x) (bind #t (e-x) + (build-fixnum->flonum e-x can-unbox-fp? + (lambda (e) + `(if ,(%type-check mask-fixnum type-fixnum ,e-x) + ,e + ,(build-libcall #t src sexpr fixnum->flonum e-x)))))]) (define-inline 2 real->flonum [(e-x) - (if (constant? flonum? e-x) + (if (known-flonum-result? e-x) e-x (bind #t (e-x) `(if ,(%type-check mask-fixnum type-fixnum ,e-x) - ,(build-fixnum->flonum e-x) + ,(build-fixnum->flonum e-x #f values) (if ,(%type-check mask-flonum type-flonum ,e-x) ,e-x ,(build-libcall #t src sexpr real->flonum e-x `(quote real->flonum))))))])) @@ -8041,7 +8365,7 @@ (label ,L2 (seq ,(%inline pause) - (if ,(%inline eq? (mref ,e-base ,e-index ,imm-offset) (immediate 0)) + (if ,(%inline eq? (mref ,e-base ,e-index ,imm-offset uptr) (immediate 0)) (goto ,L1) (goto ,L2)))))))))))])) (let () @@ -8948,7 +9272,7 @@ (define-inline 2 name [(e-bv e-offset) (bind #t (e-bv e-offset) - `(if ,(handle-prim #f #f 3 '$bytevector-ref-check? (list `(quote 8) e-bv e-offset)) + `(if ,(handle-prim #f #f #f 3 '$bytevector-ref-check? (list `(quote 8) e-bv e-offset)) ,(let-values ([(e-index imm-offset) (bv-index-offset e-offset)]) (build-object-ref #f 'type e-bv e-index imm-offset)) ,(build-libcall #t src sexpr name e-bv e-offset)))])])) @@ -8964,7 +9288,7 @@ #'(define-inline 3 name [(e-bv e-offset) (let-values ([(e-index imm-offset) (bv-index-offset e-offset)]) - (build-object-ref #f 'type e-bv e-index imm-offset))])]))) + (build-object-ref #f 'type e-bv e-index imm-offset can-unbox-fp?))])]))) (define-bv-native-ref-inline bytevector-s8-ref integer-8) (define-bv-native-ref-inline bytevector-u8-ref unsigned-8) @@ -8979,7 +9303,16 @@ (define-bv-native-ref-inline bytevector-u64-native-ref unsigned-64) (define-bv-native-ref-inline bytevector-ieee-single-native-ref single-float) - (define-bv-native-ref-inline bytevector-ieee-double-native-ref double-float)) + (define-bv-native-ref-inline bytevector-ieee-double-native-ref double-float) + + ;; Inline to enable unboxing: + (define-inline 2 bytevector-ieee-double-native-ref + [(e-bv e-offset) + (bind #t (e-bv e-offset) + (let ([info (make-info-call #f #f #f #f #f)]) + `(if (call ,info ,#f ,(lookup-primref 3 '$bytevector-ref-check?) (quote 64) ,e-bv ,e-offset) + (call ,info ,#f ,(lookup-primref 3 'bytevector-ieee-double-native-ref) ,e-bv ,e-offset) + ,(build-libcall #t src sexpr bytevector-ieee-double-native-ref e-bv e-offset))))])) (let () (define-syntax define-bv-native-int-set!-inline @@ -9020,7 +9353,17 @@ (build-$real->flonum src sexpr e-val `(quote name)))))])]))) (define-bv-native-ieee-set!-inline bytevector-ieee-single-native-set! single-float) - (define-bv-native-ieee-set!-inline bytevector-ieee-double-native-set! double-float)) + (define-bv-native-ieee-set!-inline bytevector-ieee-double-native-set! double-float) + + ;; Inline to enable unboxing: + (define-inline 2 bytevector-ieee-double-native-set! + [(e-bv e-offset e-val) + (bind #t (e-bv e-offset) + (let ([info (make-info-call #f #f #f #f #f)]) + `(if (call ,info ,#f ,(lookup-primref 3 '$bytevector-set!-check?) (quote 64) ,e-bv ,e-offset) + ;; checks to make sure e-val produces a real number: + (call ,info ,#f ,(lookup-primref 3 'bytevector-ieee-double-native-set!) ,e-bv ,e-offset ,e-val) + ,(build-libcall #t src sexpr bytevector-ieee-double-native-set! e-bv e-offset))))])) (let () (define-syntax define-bv-int-ref-inline @@ -9073,7 +9416,7 @@ (bv-offset-okay? e-offset mask)) (constant? (lambda (x) (eq? x (constant native-endianness))) e-eness) (let-values ([(e-index imm-offset) (bv-index-offset e-offset)]) - (build-object-ref #f 'type e-bv e-index imm-offset)))])]))) + (build-object-ref #f 'type e-bv e-index imm-offset can-unbox-fp?)))])]))) (define-bv-ieee-ref-inline bytevector-ieee-single-ref single-float 3) (define-bv-ieee-ref-inline bytevector-ieee-double-ref double-float 7)) @@ -9998,12 +10341,12 @@ [(e-bop e-x) (bind #t (e-x) (build-libcall #f src sexpr name e-bop e-x `(immediate 0) - (handle-prim #f #f 3 'x-length (list e-x))))] + (handle-prim #f #f #f 3 'x-length (list e-x))))] [(e-bop e-x e-start) (bind #t (e-x e-start) (build-libcall #f src sexpr name e-bop e-x e-start (%inline - - ,(handle-prim #f #f 3 'x-length (list e-x)) + ,(handle-prim #f #f #f 3 'x-length (list e-x)) ,e-start)))] [(e-bop e-x e-start e-count) (build-libcall #f src sexpr name e-bop e-x e-start e-count)])])) @@ -10077,7 +10420,7 @@ (and (or (goto-oc? goto) (not (local-label-overflow-check (goto-label goto)))) (or (goto-tc? goto) (not (local-label-trap-check (goto-label goto)))))))) (Lvalue : Lvalue (ir oc? tc?) -> Lvalue () - [(mref ,[e0] ,[e1] ,imm) `(mref ,e0 ,e1 ,imm)]) + [(mref ,[e0] ,[e1] ,imm ,type) `(mref ,e0 ,e1 ,imm ,type)]) (Expr : Expr (ir oc? tc?) -> Expr () [(overflow-check ,[e #t tc? -> e]) (if oc? e `(overflow-check ,e))] [(trap-check ,ioc ,[e oc? #t -> e]) (if tc? e `(trap-check ,(if oc? #f ioc) ,e))] @@ -10117,8 +10460,8 @@ `(trap-check ,overflow? ,e) e))))) (Lvalue : Lvalue (ir) -> Lvalue ('no 'no) - [(mref ,[e0 #f -> e0 oc0 tc0] ,[e1 #f -> e1 oc1 tc1] ,imm) - (values `(mref ,e0 ,e1 ,imm) (combine-seq oc0 oc1) (combine-seq tc0 tc1))]) + [(mref ,[e0 #f -> e0 oc0 tc0] ,[e1 #f -> e1 oc1 tc1] ,imm ,type) + (values `(mref ,e0 ,e1 ,imm ,type) (combine-seq oc0 oc1) (combine-seq tc0 tc1))]) (Expr : Expr (ir tail?) -> Expr ('no 'no) [(goto ,l) (if (local-label? l) @@ -10260,7 +10603,7 @@ [,x (guard (uvar? x)) (cond [(uvar-in-prefix? x) - (let ([t (make-tmp 't)]) + (let ([t (make-tmp 't (uvar-type x))]) (uvar-location-set! x t) (uvar-in-prefix! x #f) (set! x* (cons x x*)) @@ -10376,16 +10719,16 @@ (definitions (define local*) (define make-tmp - (lambda (x) + (lambda (x type) (import (only np-languages make-tmp)) - (let ([x (make-tmp x)]) + (let ([x (make-tmp x type)]) (set! local* (cons x local*)) x))) (define Ref (lambda (ir setup*) (if (var? ir) (values ir setup*) - (let ([tmp (make-tmp 't)]) + (let ([tmp (make-tmp 't 'uptr)]) (values tmp (cons (Rhs ir tmp) setup*)))))) (define Lvalue? (lambda (x) @@ -10393,32 +10736,38 @@ [,lvalue #t] [else #f]))) (define Triv* - (lambda (e* k) - (let f ([e* e*] [lvalue-setup* '()] [rt* '()] [setup* '()]) + (case-lambda + [(e* k) (Triv* e* #f k)] + [(e* fp?* k) + (let f ([e* e*] [fp?* fp?*] [lvalue-setup* '()] [rt* '()] [setup* '()]) (if (null? e*) (build-seq* setup* (build-seq* lvalue-setup* (k (reverse rt*)))) - (let-values ([(t t-setup*) (Triv (car e*) (null? lvalue-setup*))]) + (let-values ([(t t-setup*) (Triv (car e*) (null? lvalue-setup*) (and fp?* (car fp?*)))]) (if (and (null? lvalue-setup*) (not (null? t-setup*)) (Lvalue? t) ; uvar's are singly assigned (or (not (uvar? t)) (uvar-assigned? t))) - (f (cdr e*) t-setup* (cons t rt*) setup*) - (f (cdr e*) lvalue-setup* (cons t rt*) (append t-setup* setup*)))))))) + (f (cdr e*) (and fp?* (cdr fp?*)) t-setup* (cons t rt*) setup*) + (f (cdr e*) (and fp?* (cdr fp?*)) lvalue-setup* (cons t rt*) (append t-setup* setup*))))))])) (define Triv? (lambda (maybe-e k) (if maybe-e - (let-values ([(t setup*) (Triv maybe-e #t)]) + (let-values ([(t setup*) (Triv maybe-e #f #f)]) (build-seq* setup* (k t))) (k #f)))) + (define (fp-lvalue? lvalue) + (nanopass-case (L10 Lvalue) lvalue + [,x (and (uvar? x) (eq? (uvar-type x) 'fp))] + [(mref ,x1 ,x2 ,imm ,type) (eq? type 'fp)])) (define build-seq* (lambda (x* y) (fold-right build-seq y x*))) (with-output-language (L10 Expr) (define build-seq (lambda (x y) `(seq ,x ,y))) (define Rhs (lambda (ir lvalue) - (Expr ir + (Expr ir (fp-lvalue? lvalue) (lambda (e) (nanopass-case (L10 Expr) e [,rhs `(set! ,lvalue ,rhs)] @@ -10439,18 +10788,18 @@ (CaseLambdaClause : CaseLambdaClause (ir) -> CaseLambdaClause () [(clause (,x* ...) ,mcp ,interface ,body) (fluid-let ([local* '()]) - (let ([body (Expr body values)]) + (let ([body (Expr body #f values)]) (safe-assert (nodups x* local*)) `(clause (,x* ...) (,local* ...) ,mcp ,interface ,body)))]) - (Triv : Expr (ir lvalue-okay?) -> Triv (setup*) + (Triv : Expr (ir lvalue-okay? fp?) -> Triv (setup*) [,x (guard (or lvalue-okay? (and (uvar? x) (not (uvar-assigned? x))) (eq? x %zero))) (values x '())] - [(mref ,e1 ,e2 ,imm) + [(mref ,e1 ,e2 ,imm ,type) (guard lvalue-okay?) (let*-values ([(x1 setup*) (Ref e1 '())] [(x2 setup*) (Ref e2 setup*)]) - (values (%mref ,x1 ,x2 ,imm) setup*))] + (values (%mref ,x1 ,x2 ,imm ,type) setup*))] [(literal ,info) (values `(literal ,info) '())] [(immediate ,imm) (values `(immediate ,imm) '())] [(label-ref ,l ,offset) (values `(label-ref ,l ,offset) '())] @@ -10461,19 +10810,20 @@ (fold-right (lambda (ir lvalue setup*) (cons (Rhs ir lvalue) setup*)) setup* e* x*))] - [(seq ,[Expr : e0 values -> e0] ,[t setup*]) + [(seq ,[Expr : e0 fp? values -> e0] ,[t setup*]) (values t (cons e0 setup*))] [(pariah) (values (%constant svoid) (list (with-output-language (L10 Expr) `(pariah))))] [else - (let ([tmp (make-tmp 't)]) + (let ([tmp (make-tmp 't (if fp? 'fp 'ptr))]) (values tmp (list (Rhs ir tmp))))]) - (Expr : Expr (ir k) -> Expr () + (Expr : Expr (ir fp? k) -> Expr () [(inline ,info ,prim ,e1* ...) - (Triv* e1* + (Triv* e1* (and (info-unboxed-args? info) + (info-unboxed-args-unboxed?* info)) (lambda (t1*) (k `(inline ,info ,prim ,t1* ...))))] [(alloc ,info ,e) - (let-values ([(t setup*) (Triv e #t)]) + (let-values ([(t setup*) (Triv e #t #f)]) (build-seq* setup* (k `(alloc ,info ,t))))] [(call ,info ,mdcl ,e0? ,e1* ...) (if e0? @@ -10504,10 +10854,10 @@ (Triv* e* (lambda (t*) (k `(values ,info ,t* ...))))] - [(if ,[Expr : e0 values -> e0] ,[e1] ,[e2]) `(if ,e0 ,e1 ,e2)] - [(seq ,[Expr : e0 values -> e0] ,[e1]) `(seq ,e0 ,e1)] + [(if ,[Expr : e0 #f values -> e0] ,[e1] ,[e2]) `(if ,e0 ,e1 ,e2)] + [(seq ,[Expr : e0 #f values -> e0] ,[e1]) `(seq ,e0 ,e1)] [(set! ,lvalue ,e) - (let-values ([(lvalue setup*) (Triv lvalue #t)]) + (let-values ([(lvalue setup*) (Triv lvalue #t #f)]) ; must put lvalue setup* first to avoid potentially interleaved argument ; evaluation in, e.g.: ; @@ -10555,12 +10905,12 @@ (set! local* (append x* local*)) (safe-assert (nodups local*)) (fold-left (lambda (t x e) (build-seq (Rhs e x) t)) body x* e*)] - [(mvlet ,[Expr : e values -> e] ((,x** ...) ,interface* ,[body*]) ...) + [(mvlet ,[Expr : e #f values -> e] ((,x** ...) ,interface* ,[body*]) ...) (set! local* (append (apply append x**) local*)) (safe-assert (nodups local*)) `(mvlet ,e ((,x** ...) ,interface* ,body*) ...)] - [(mvcall ,info ,[Expr : e1 values -> e1] ,e2) - (let-values ([(t2 setup*) (Triv e2 #t)]) + [(mvcall ,info ,[Expr : e1 #f values -> e1] ,e2) + (let-values ([(t2 setup*) (Triv e2 #t #f)]) (build-seq* setup* (k `(mvcall ,info ,e1 ,t2))))] [(goto ,l) `(goto ,l)] [(label ,l ,[body]) `(label ,l ,body)] @@ -10569,7 +10919,7 @@ [(pariah) `(pariah)] [(profile ,src) `(profile ,src)] [else - (let-values ([(t setup*) (Triv ir #t)]) + (let-values ([(t setup*) (Triv ir #t fp?)]) (build-seq* setup* (k t)))])) (define-pass np-push-mrvs : L10 (ir) -> L10.5 () @@ -10744,6 +11094,10 @@ (let ([x (make-tmp x)]) (set! local* (cons x local*)) x))) + (define (fp-lvalue? lvalue) + (nanopass-case (L10.5 Lvalue) lvalue + [,x (and (uvar? x) (eq? (uvar-type x) 'fp))] + [(mref ,x1 ,x2 ,imm ,type) (eq? type 'fp)])) (define rhs-inline (lambda (lvalue info prim t*) (with-output-language (L11 Effect) @@ -10780,7 +11134,7 @@ (set! ,t ?rhs) ,(predicafy-triv ,t)))]))) [,x (predicafy-triv ,x)] - [(mref ,x1 ,x2 ,imm) (predicafy-triv ,(%mref ,x1 ,x2 ,imm))] + [(mref ,x1 ,x2 ,imm ,type) (predicafy-triv ,(%mref ,x1 ,x2 ,imm ,type))] [(literal ,info) (if (info-literal-indirect? info) (predicafy-triv (literal ,info)) @@ -10813,8 +11167,10 @@ ($oops who "unrecognized prim ~s" prim)] [(set! ,[lvalue] (inline ,info ,prim ,[t*] ...)) `(seq ,(rhs-inline lvalue info prim t*) (true))] - [(set! ,[lvalue] (mvcall ,info ,mdcl ,[t0?] ,[t1] ... (,[t*] ...))) - (guard (info-call-error? info) (fx< (debug-level) 2)) + [(set! ,[lvalue -> lvalue] (mvcall ,info ,mdcl ,[t0?] ,[t1] ... (,[t*] ...))) + (guard (info-call-error? info) (or (fx< (debug-level) 2) + ;; must really escape if fp context + (fp-lvalue? lvalue))) (%seq (tail (mvcall ,info ,mdcl ,t0? ,t1 ... (,t* ...))) (true))] @@ -10835,7 +11191,7 @@ (true))]) (Effect : Expr (ir) -> Effect () [,x `(nop)] - [(mref ,x1 ,x2 ,imm) `(nop)] + [(mref ,x1 ,x2 ,imm ,type) `(nop)] [(literal ,info) `(nop)] [(immediate ,imm) `(nop)] [(label-ref ,l ,offset) `(nop)] @@ -10850,8 +11206,10 @@ [else `(inline ,info ,prim ,t* ...)])] [(set! ,[lvalue] (inline ,info ,prim ,[t*] ...)) (rhs-inline lvalue info prim t*)] - [(set! ,[lvalue] (mvcall ,info ,mdcl ,[t0?] ,[t1] ... (,[t*] ...))) - (guard (info-call-error? info) (fx< (debug-level) 2)) + [(set! ,[lvalue -> lvalue] (mvcall ,info ,mdcl ,[t0?] ,[t1] ... (,[t*] ...))) + (guard (info-call-error? info) (or (fx< (debug-level) 2) + ;; must really escape if fp context + (fp-lvalue? lvalue))) `(tail (mvcall ,info ,mdcl ,t0? ,t1 ... (,t* ...)))] [(set! ,[lvalue] (attachment-get ,reified? ,[t?])) `(set! ,lvalue (attachment-get ,reified? ,t?))] @@ -10893,8 +11251,10 @@ ($oops who "unrecognized prim ~s" prim)] [(set! ,[lvalue] (inline ,info ,prim ,[t*] ...)) `(seq ,(rhs-inline lvalue info prim t*) ,(%constant svoid))] - [(set! ,[lvalue] (mvcall ,info ,mdcl ,[t0?] ,[t1] ... (,[t*] ...))) - (guard (info-call-error? info) (fx< (debug-level) 2)) + [(set! ,[lvalue -> lvalue] (mvcall ,info ,mdcl ,[t0?] ,[t1] ... (,[t*] ...))) + (guard (info-call-error? info) (or (fx< (debug-level) 2) + ;; must really escape if fp context + (fp-lvalue? lvalue))) `(mvcall ,info ,mdcl ,t0? ,t1 ... (,t* ...))] [(set! ,[lvalue] ,[rhs]) `(seq (set! ,lvalue ,rhs) ,(%constant svoid))] [(mvset ,info (,mdcl ,[t0?] ,[t1] ...) (,[t*] ...) ((,x** ...) ,interface* ,l*) ...) @@ -11154,15 +11514,15 @@ ;; `label-ref` offset is adjusted later if return point turns out to be compact (%seq (set! ,%ref-ret (label-ref ,rpl ,(constant size-rp-header))) ,tl) (meta-cond - [(real-register? '%ret) (%seq (set! ,%ret ,(get-fv 0)) ,tl)] + [(real-register? '%ret) (%seq (set! ,%ret ,(get-ret-fv)) ,tl)] [else tl])))) (define finish-call (lambda (argcnt? cp? t) - (safe-assert (not (eq? t (get-fv 0)))) + (safe-assert (not (eq? t (get-ret-fv)))) (let ([live-reg* (reg-cons* %ret (if cp? (reg-cons* %cp reg*) reg*))] [live-fv* (meta-cond [(real-register? '%ret) fv*] - [else (cons (get-fv 0) fv*)])]) + [else (cons (get-ret-fv) fv*)])]) ((lambda (e) (cond [shift-attachment? @@ -11171,7 +11531,7 @@ (cons (and consumer? %ac0) (nanopass-case (L13 Triv) t [,x (cons x live-reg*)] - [(mref ,x1 ,x2 ,imm) (cons x1 (cons x2 live-reg*))] + [(mref ,x1 ,x2 ,imm ,type) (cons x1 (cons x2 live-reg*))] [else live-reg*])) (%seq (set! ,%td (inline ,(intrinsic-info-asmlib reify-1cc #f) ,%asmlibcall)) @@ -11442,7 +11802,7 @@ (if (null? frame-t*) (begin (set! max-fv (fxmax max-fv i)) '()) (let ([i (fx+ i 1)]) - (cons (get-fv i) (f (cdr frame-t*) i)))))]) + (cons (get-ptr-fv i) (f (cdr frame-t*) i)))))]) (set-locs fv* frame-t* (set-locs reg* reg-t* (build-call t0 #f reg* fv* info mdcl))))) @@ -11475,7 +11835,7 @@ (if (null? frame-t*) (begin (set! max-fv (fxmax max-fv i)) '()) (let ([i (fx+ i 1)]) - (cons (get-fv i) (f (cdr frame-t*) i)))))]) + (cons (get-ptr-fv i) (f (cdr frame-t*) i)))))]) (set-locs fv* frame-t* (set-locs reg* reg-t* `(seq @@ -11484,12 +11844,12 @@ [(real-register? '%ret) (%seq ; must leave RA in %ret for values-error - (set! ,%ret ,(get-fv 0)) + (set! ,%ret ,(get-ret-fv)) ,(%mv-jump ,%ret (,%ac0 ,%ret ,reg* ... ,fv* ...)))] [else (%seq - (set! ,%xp ,(get-fv 0)) - ,(%mv-jump ,%xp (,%ac0 ,reg* ... ,(get-fv 0) ,fv* ...)))]))))))))))) + (set! ,%xp ,(get-ret-fv)) + ,(%mv-jump ,%xp (,%ac0 ,reg* ... ,(get-ret-fv) ,fv* ...)))]))))))))))) (define-syntax do-return (lambda (x) (syntax-case x () @@ -11497,7 +11857,7 @@ (with-implicit (k quasiquote) #'`(seq (set! ,%ac0 retval) - (jump ,(get-fv 0) (,%ac0))))]))) + (jump ,(get-ret-fv) (,%ac0))))]))) (define Ref (lambda (x) (when (uvar? x) (uvar-referenced! x #t)) @@ -11768,7 +12128,7 @@ (if (null? frame-x*) (begin (set! max-fv (fxmax max-fv i)) '()) (let ([i (fx+ i 1)]) - (cons (get-fv i) (f (cdr frame-x*) i)))))]) + (cons (get-ptr-fv i) (f (cdr frame-x*) i)))))]) ; add 2 for the old RA and cchain (set! max-fv (fx+ max-fv 2)) (let-values ([(c-init c-args c-result c-return) (asm-foreign-callable info)]) @@ -12130,7 +12490,7 @@ (set! ,%ret ,(%mref ,xp/cp ,(constant continuation-return-address-disp))) ,(%mv-jump ,%ret (,%ac0 ,%ret ,arg-registers ...)))] [else - (let ([fv0 (get-fv 0)]) + (let ([fv0 (get-ret-fv)]) (%seq (set! ,%xp ,(%mref ,xp/cp ,(constant continuation-return-address-disp))) (set! ,fv0 ,%xp) @@ -12290,8 +12650,8 @@ (let ([other-reg* (fold-left (lambda (live* kill) (remq kill live*)) (vector->list regvec) ;; Registers used by `reify-cc-help` output, - ;; plus `%ts` so that we have one to allocate - (reg-list %xp %td %ac0 %ts))] + ;; including some as needed per machine + (reg-list %xp %td %ac0 reify-support))] [1cc? (eq? sym 'reify-1cc)]) `(lambda ,(make-named-info-lambda (if 1cc? "reify-1cc" "maybe-reify-cc") '(0)) 0 () ,(asm-enter @@ -12447,10 +12807,10 @@ [else `(hand-coded ,sym)])]) (Lvalue : Lvalue (ir) -> Lvalue () [,x (Ref x)] - [(mref ,x1 ,x2 ,imm) (%mref ,(Ref x1) ,(Ref x2) ,imm)]) + [(mref ,x1 ,x2 ,imm ,type) (%mref ,(Ref x1) ,(Ref x2) ,imm ,type)]) (Triv : Triv (ir) -> Triv () [,x (Ref x)] ; TODO: cannot call ref in cata, as we don't allow top-level cata - [(mref ,x1 ,x2 ,imm) (%mref ,(Ref x1) ,(Ref x2) ,imm)]) + [(mref ,x1 ,x2 ,imm ,type) (%mref ,(Ref x1) ,(Ref x2) ,imm ,type)]) (Rhs : Rhs (ir) -> Rhs () [(mvcall ,info ,mdcl ,t0? ,t1* ... (,t* ...)) ($oops who "Effect is responsible for handling mvcalls")] @@ -12635,7 +12995,8 @@ (for-each (lambda (x) (uvar-referenced! x #f)) x*) (let do-frame ([x* (set-formal-registers! x*)] [fv-idx 1]) (unless (null? x*) - (let ([x (car x*)] [fv (get-fv fv-idx)]) + (let ([x (car x*)] [fv (get-ptr-fv fv-idx)]) + (safe-assert (compatible-fv? fv (uvar-type x))) (uvar-location-set! x fv) (do-frame (cdr x*) (fx+ fv-idx 1))))) (let () @@ -12664,7 +13025,7 @@ ; TODO: don't want to save ret for leaf routines ; TODO: don't necessarily want to position ret save here ,(meta-cond - [(real-register? '%ret) `(set! ,(get-fv 0) ,%ret)] + [(real-register? '%ret) `(set! ,(get-ret-fv) ,%ret)] [else `(nop)]) (overflood-check) ,(bind-formals mcp x* tlbody))))] @@ -12674,7 +13035,7 @@ `(seq ; CAUTION: fv0 must hold return address when we call into C ,(build-foreign-call info t0 t1* %ac0 #f) - (jump ,(get-fv 0) (,%ac0)))] + (jump ,(get-ret-fv) (,%ac0)))] [,rhs (do-return ,(Rhs ir))] [(values ,info ,[t]) (do-return ,t)] [(values ,info ,t* ...) (build-mv-return t*)])) @@ -13211,7 +13572,7 @@ [else (%seq (set! ,%xp ,%ref-ret) - ,(%mv-jump ,%xp (,%ac0 ,arg-registers ... ,(get-fv 0))))]))))] + ,(%mv-jump ,%xp (,%ac0 ,arg-registers ... ,(get-ret-fv))))]))))] [($apply-procedure) (let ([Lloop (make-local-label 'loop)] [Ldone (make-local-label 'done)]) @@ -13646,8 +14007,10 @@ (in %ac0 %cp scheme-args) (out %ac1 %xp %yp %ts %td extra-regs)) (new-frame ,(make-info-newframe #f #f '() '() '()) ,'() ... ,Lret) - ; NB: hack!!! - (set! ,%sfp ,(%inline - ,%sfp (immediate ,(constant ptr-bytes)))) + ; NB: hack!!! Asssuming a frame-size calculation: + (set! ,%sfp ,(%inline - ,%sfp (immediate ,(constant-case stack-word-alignment + [(2) (fx* 2 (constant ptr-bytes))] + [(1) (constant ptr-bytes)])))) (set! ,%ref-ret (label-ref ,Lret ,(constant size-rp-header))) (tail ,(do-call)) ; argcnt already in ac0 #;(asm align) @@ -13779,7 +14142,7 @@ (set! ,refeap ,(%inline - ,refeap ,(%constant ptr-bytes))) ; write through to tc so dirty-list bounds are always known in case of an ; invalid memory reference or illegal instruction - (set! (mref ,%tc ,%zero ,(tc-disp %eap)) ,refeap) + (set! (mref ,%tc ,%zero ,(tc-disp %eap) uptr) ,refeap) (set! ,(%mref ,refeap 0) ,t)) (%seq (set! ,%td ,refeap) @@ -13947,7 +14310,7 @@ (values block (cons block block*)))))) (Lvalue : Lvalue (ir target) -> * (ir) [,x x] - [(mref ,x1 ,x2 ,imm) (with-output-language (L15a Lvalue) `(mref ,x1 ,x2 ,imm))]) + [(mref ,x1 ,x2 ,imm ,type) (with-output-language (L15a Lvalue) `(mref ,x1 ,x2 ,imm ,type))]) (Triv : Triv (ir target) -> * (ir) [(literal ,info) (with-output-language (L15a Triv) `(literal ,info))] [(immediate ,imm) (with-output-language (L15a Triv) `(immediate ,imm))] @@ -14958,7 +15321,7 @@ (lambda (lvalue) (nanopass-case (L15a Lvalue) lvalue [,x (process-var x)] - [(mref ,x1 ,x2 ,imm) (process-var x1) (process-var x2)]))) + [(mref ,x1 ,x2 ,imm ,type) (process-var x1) (process-var x2)]))) (define Triv (lambda (t) (nanopass-case (L15a Triv) t @@ -15245,6 +15608,11 @@ (define touch-label! (lambda (l) (unless (libspec-label? l) (local-label-iteration-set! l 1)))) + (define (fp-lvalue? lvalue) + (nanopass-case (L16 Lvalue) lvalue + [,x (or (and (uvar? x) (eq? (uvar-type x) 'fp)) + (and (reg? x) (eq? (reg-type x) 'fp)))] + [(mref ,x1 ,x2 ,imm ,type) (eq? type 'fp)])) (define LambdaBody (lambda (entry-block* block* func) #;(when (#%$assembly-output) @@ -15387,7 +15755,11 @@ (let ([chunk (asm-return-address x l offset1 offset)]) (values '() (cons chunk chunk*) (fx+ (chunk-size chunk) offset)))))] [(set! ,lvalue (asm ,info ,proc ,t* ...)) (values (apply proc code* lvalue t*) chunk* offset)] - [(set! ,lvalue ,rhs) (values (asm-move code* lvalue rhs) chunk* offset)] + [(set! ,lvalue ,rhs) (values (if (fp-lvalue? lvalue) + (asm-fpmove code* lvalue rhs) + (asm-move code* lvalue rhs)) + chunk* + offset)] [(asm ,info ,proc ,t* ...) (values (apply proc code* t*) chunk* offset)]) (Pred : Pred (ir l1 l2 offset) -> * (code* chunk) [(asm ,info ,proc ,t* ...) (apply proc l1 l2 offset t*)]) @@ -15396,7 +15768,7 @@ (define-pass Triv->rand : (L16 Triv) (ir) -> * (operand) (Triv : Triv (ir) -> * (operand) [,x (cons 'reg x)] - [(mref ,x1 ,x2 ,imm) + [(mref ,x1 ,x2 ,imm ,type) (if (eq? x2 %zero) `(disp ,imm ,x1) `(index ,imm ,x2 ,x1))] @@ -15553,6 +15925,18 @@ (tree-extract (cset-tree cset) (cset-size cset) v))) ) + ;; Alignment to support unboxed doubles + (define stack-align + (lambda (n) + (constant-case stack-word-alignment + [(2) (if (fxodd? n) (fx+ n 1) n)] + [(1) n]))) + (define stack-aligned-first-argument? + (lambda (n) + (constant-case stack-word-alignment + [(2) (fxodd? n)] + [(1) #t]))) + (define do-live-analysis! (lambda (live-size entry-block*) (define add-var (make-add-var live-size)) @@ -15569,7 +15953,7 @@ (define Triv (lambda (out t) (nanopass-case (L15a Triv) t - [(mref ,x1 ,x2 ,imm) (add-var (add-var out x2) x1)] + [(mref ,x1 ,x2 ,imm ,type) (add-var (add-var out x2) x1)] [,x (add-var out x)] [else out]))) (define Rhs @@ -15628,7 +16012,7 @@ (begin (live-info-live-set! live-info out) (Rhs out rhs)))] - [(set! ,live-info (mref ,x1 ,x2 ,imm) ,rhs) + [(set! ,live-info (mref ,x1 ,x2 ,imm ,type) ,rhs) (live-info-live-set! live-info out) (Rhs (add-var (add-var out x1) x2) rhs)] [(inline ,live-info ,info ,effect-prim ,t* ...) @@ -15713,7 +16097,7 @@ (if (or (null? nfv*) (fx> i max-fv)) next (loop (cdr nfv*) (fx+ i 1) - (let ([new-next (remove-var next (get-fv i))]) + (let ([new-next (remove-var next (get-ptr-fv i))]) (if (eq? new-next next) next (add-var next (car nfv*)))))))))] @@ -15739,7 +16123,7 @@ (reg-cons* %ret %ac0 arg-registers) (info-newframe-cnfv* newframe-info) (info-newframe-nfv** newframe-info))) - (get-fv 0))]) + (get-ret-fv))]) (newframe-block-live-call-set! block call) call)))]) (let ([out (union-live @@ -16025,7 +16409,7 @@ (if (conflict-fv? x0 fv) (loop move* work*) (begin - (safe-assert (not (eq? fv (get-fv 0)))) + (safe-assert (not (eq? fv (get-ret-fv)))) (begin (clear-seen!) (succ fv)))))) (if (fv? var) (try-fv var) @@ -16039,20 +16423,33 @@ (lambda (spill max-fv first-open) (define return (lambda (home max-fv first-open) + (safe-assert (compatible-fv? home (uvar-type spill))) (uvar-location-set! spill home) (update-conflict! home spill) - (values max-fv first-open))) + (let ([max-fv + (constant-case ptr-bits + [(32) + (cond + [(eq? (uvar-type spill) 'fp) + ;; Make sure next slot is unused + (let ([fv (get-fv (fx+ 1 (fv-offset home)) 'reserved)]) + (safe-assert (eq? (fv-type fv) 'reserved))) + (fxmax max-fv (fx+ 1 (fv-offset home)))] + [else max-fv])] + [(64) max-fv])]) + (values max-fv first-open)))) (find-move-related-home spill (lambda (home) (return home max-fv first-open)) (lambda () (let f ([first-open first-open]) - (let* ([fv (get-fv first-open)] [cset (var-spillable-conflict* fv)]) + (let* ([fv (get-fv first-open (uvar-type spill))] [cset (var-spillable-conflict* fv)]) (if (and cset (cset-full? cset)) (f (fx+ first-open 1)) (let ([spill-offset (var-index spill)]) (let f ([fv-offset first-open] [fv fv] [cset cset]) - (if (and cset (conflict-bit-set? cset spill-offset)) - (let* ([fv-offset (fx+ fv-offset 1)] [fv (get-fv fv-offset)] [cset (var-spillable-conflict* fv)]) + (if (or (and cset (conflict-bit-set? cset spill-offset)) + (not (compatible-fv? fv (uvar-type spill)))) + (let* ([fv-offset (fx+ fv-offset 1)] [fv (get-fv fv-offset (uvar-type spill))] [cset (var-spillable-conflict* fv)]) (f fv-offset fv cset)) (return fv (fxmax fv-offset max-fv) first-open))))))))))) (define find-homes! @@ -16081,8 +16478,10 @@ (let loop ([nfv* nfv*] [offset base]) (or (null? nfv*) (and (or (not (car nfv*)) - (let ([cset (var-spillable-conflict* (get-fv offset))]) - (not (and cset (conflict-bit-set? cset (var-index (car nfv*))))))) + (let ([fv (get-fv offset)]) + (and (compatible-fv? fv 'ptr) + (let ([cset (var-spillable-conflict* fv)]) + (not (and cset (conflict-bit-set? cset (var-index (car nfv*))))))))) (loop (cdr nfv*) (fx+ offset 1))))))) (define assign-new-frame! (lambda (cnfv* nfv** call-live*) @@ -16090,14 +16489,17 @@ (lambda (nfv* offset) (if (null? nfv*) (set! max-fv (fxmax offset max-fv)) - (let ([nfv (car nfv*)] [home (get-fv offset)]) + (let* ([nfv (car nfv*)] [home (get-fv offset (uvar-type nfv))]) + (safe-assert (compatible-fv? home (uvar-type nfv))) (uvar-location-set! nfv home) (update-conflict! home nfv) (set-offsets! (cdr nfv*) (fx+ offset 1)))))) (let ([arg-offset (fx+ (length cnfv*) 1)]) ; +1 for return address slot (let loop ([base (fx+ (find-max-fv call-live*) 1)]) (let ([arg-base (fx+ base arg-offset)]) - (if (and (cool? base cnfv*) (andmap (lambda (nfv*) (cool? arg-base nfv*)) nfv**)) + (if (and (stack-aligned-first-argument? arg-base) + (cool? base cnfv*) + (andmap (lambda (nfv*) (cool? arg-base nfv*)) nfv**)) (begin (set! max-fs@call (fxmax max-fs@call base)) ; max frame size @ call in ptrs (set-offsets! cnfv* base) @@ -16161,8 +16563,9 @@ [(and (uvar? x) (uvar-iii x)) => (lambda (index) (safe-assert - (let ([name.offset (vector-ref (ctci-live ctci) index)]) - (logbit? (fx- (cdr name.offset) 1) lpm))) + (or (eq? (uvar-type x) 'fp) + (let ([name.offset (vector-ref (ctci-live ctci) index)]) + (logbit? (fx- (cdr name.offset) 1) lpm)))) (cons index i*))] [else i*])) '() call-live*))]) @@ -16216,7 +16619,7 @@ (safe-assert (not (fx= frame-words 0))) (let ([shift-offset (fx* frame-words (constant ptr-bytes))]) (safe-assert (fx> shift-offset 0)) - (cons `(set! ,live-info (mref ,reg ,%zero ,imm) (mref ,reg ,%zero ,shift-offset)) new-effect*))))] + (cons `(set! ,live-info (mref ,reg ,%zero ,imm ptr) (mref ,reg ,%zero ,shift-offset ptr)) new-effect*))))] [(check-live ,live-info ,reg* ...) (let ([live (fold-left (lambda (live reg) (let ([t (remove-var live reg)]) @@ -16314,11 +16717,11 @@ (lambda (x cur-off) (if (fv? x) (with-output-language (L15c Lvalue) - `(mref ,%sfp ,%zero ,(fx- (fx* (fv-offset x) (constant ptr-bytes)) cur-off))) + `(mref ,%sfp ,%zero ,(fx- (fx* (fv-offset x) (constant ptr-bytes)) cur-off) ,(fv-type x))) x)))) (Lvalue : Lvalue (ir cur-off) -> Lvalue () - [(mref ,x0 ,x1 ,imm) - `(mref ,(fv->mref (var->loc x0) cur-off) ,(fv->mref (var->loc x1) cur-off) ,imm)] + [(mref ,x0 ,x1 ,imm ,type) + `(mref ,(fv->mref (var->loc x0) cur-off) ,(fv->mref (var->loc x1) cur-off) ,imm ,type)] [,x (fv->mref (var->loc x) cur-off)]) ; NB: defining Triv & Rhs with cur-off argument so we actually get to our version of Lvalue (Triv : Triv (ir cur-off) -> Triv ()) @@ -16357,13 +16760,16 @@ block*) `(dummy))) + ;; updates live-variable info as instructions are expanded (module (select-instructions!) (define make-tmp - (lambda (x) + (case-lambda + [(x) (make-tmp x 'uptr)] + [(x type) (import (only np-languages make-unspillable)) - (let ([tmp (make-unspillable x)]) + (let ([tmp (make-unspillable x type)]) (set! unspillable* (cons tmp unspillable*)) - tmp))) + tmp)])) (define make-restricted-unspillable (lambda (x reg*) (import (only np-languages make-restricted-unspillable)) @@ -16426,24 +16832,25 @@ (define mref? (lambda (x) (nanopass-case (L15c Triv) x - [(mref ,lvalue1 ,lvalue2 ,imm) #t] + [(mref ,lvalue1 ,lvalue2 ,imm ,type) #t] [else #f]))) (define same? (lambda (a b) (or (eq? a b) (nanopass-case (L15c Triv) a - [(mref ,lvalue11 ,lvalue12 ,imm1) + [(mref ,lvalue11 ,lvalue12 ,imm1 ,type1) (nanopass-case (L15c Triv) b - [(mref ,lvalue21 ,lvalue22 ,imm2) + [(mref ,lvalue21 ,lvalue22 ,imm2 ,type2) (and (or (and (eq? lvalue11 lvalue21) (eq? lvalue12 lvalue22)) (and (eq? lvalue11 lvalue22) (eq? lvalue12 lvalue21))) - (eqv? imm1 imm2))] + (eqv? imm1 imm2) + (eq? type1 type2))] [else #f])] [else #f])))) (define-pass imm->imm : (L15c Triv) (ir) -> (L15d Triv) () (Lvalue : Lvalue (ir) -> Lvalue () - [(mref ,lvalue1 ,lvalue2 ,imm) (sorry! who "unexpected mref ~s" ir)]) + [(mref ,lvalue1 ,lvalue2 ,imm ,type) (sorry! who "unexpected mref ~s" ir)]) (Triv : Triv (ir) -> Triv ())) (define-pass literal@->literal : (L15c Triv) (ir) -> (L15d Triv) () @@ -16461,7 +16868,7 @@ (define Triv (lambda (out t) (nanopass-case (L15d Triv) t - [(mref ,x1 ,x2 ,imm) (add-var (add-var out x2) x1)] + [(mref ,x1 ,x2 ,imm ,type) (add-var (add-var out x2) x1)] [,x (add-var out x)] [else out]))) (define Rhs @@ -16518,10 +16925,10 @@ (if force-overflow? (fxmax (fx- (fx* max-fs@call (constant ptr-bytes)) 0) - (fx- (fx* (fx+ max-fv 1) (constant ptr-bytes)) (fx- (constant stack-slop) (fx* (constant stack-frame-limit) 2)))) + (fx- (fx* (fx+ (stack-align max-fv) 1) (constant ptr-bytes)) (fx- (constant stack-slop) (fx* (constant stack-frame-limit) 2)))) (fxmax (fx- (fx* max-fs@call (constant ptr-bytes)) (constant stack-frame-limit)) - (fx- (fx* (fx+ max-fv 1) (constant ptr-bytes)) (fx- (constant stack-slop) (constant stack-frame-limit))))))) + (fx- (fx* (fx+ (stack-align max-fv) 1) (constant ptr-bytes)) (fx- (constant stack-slop) (constant stack-frame-limit))))))) (define overage (compute-overage max-fs@call)) (define handle-overflow-check (lambda (reg info new-effect* live) @@ -16531,7 +16938,7 @@ (meta-cond [(real-register? '%esp) %esp] [else (with-output-language (L15c Triv) - `(mref ,%tc ,%zero ,(tc-disp %esp)))])) + `(mref ,%tc ,%zero ,(tc-disp %esp) uptr))])) live)]) (append xnew-effect* (cons (with-output-language (L15d Effect) @@ -16573,11 +16980,17 @@ (begin (assert (not (checks-cc? block))) (f e*)))) - e*)))) + e*))) + (define (fp-lvalue? lvalue) + (nanopass-case (L15c Lvalue) lvalue + [,x (or (and (uvar? x) (eq? (uvar-type x) 'fp)) + (and (reg? x) (eq? (reg-type x) 'fp)))] + [(mref ,lvalue1 ,lvalue2 ,imm ,type) (eq? type 'fp)]))) (Rhs : Rhs (ir lvalue new-effect* live) -> * (new-effect*) [(inline ,info ,value-prim ,t* ...) (handle-value-inline lvalue value-prim info new-effect* t* live)] - [else (handle-value-inline lvalue %move null-info new-effect* (list ir) live)]) + [else (let ([op (if (fp-lvalue? lvalue) %fpmove %move)]) + (handle-value-inline lvalue op null-info new-effect* (list ir) live))]) (Tail : Tail (ir) -> Tail () [(jump ,live-info ,t) (handle-jump t (live-info-live live-info))] [(goto ,l) (values '() `(goto ,l))] @@ -16700,7 +17113,7 @@ (define Triv (lambda (unspillable* t) (nanopass-case (L15d Triv) t - [(mref ,x1 ,x2 ,imm) (add-unspillable (add-unspillable unspillable* x2) x1)] + [(mref ,x1 ,x2 ,imm ,type) (add-unspillable (add-unspillable unspillable* x2) x1)] [,x (add-unspillable unspillable* x)] [else unspillable*]))) (define Rhs @@ -16721,6 +17134,8 @@ (define Effect* (lambda (e* unspillable*) (if (null? e*) + ;; If this assertion fails, then an unspillable was referenced + ;; without a preceding assignment: (safe-assert (null? unspillable*)) (Effect* (cdr e*) (nanopass-case (L15d Effect) (car e*) @@ -16775,7 +17190,12 @@ (define-who assign-registers! (lambda (lambda-info varvec unvarvec) - (define k (vector-length regvec)) + (define total-k (vector-length regvec)) + (define ptr-k (let loop ([ptr-k total-k]) + (if (eq? (reg-type (vector-ref regvec (fx- ptr-k 1))) 'fp) + (loop (fx- ptr-k 1)) + ptr-k))) + (define fp-k (fx- total-k ptr-k)) (define uvar-weight (lambda (x) (fx- (uvar-ref-weight x) (uvar-save-weight x)))) @@ -16809,8 +17229,9 @@ (lambda (x) (define conflict? (lambda (reg x) - (let ([cset (if (uvar-unspillable? x) (var-unspillable-conflict* reg) (var-spillable-conflict* reg))]) - (conflict-bit-set? cset (var-index x))))) + (or (not (compatible-var-types? (reg-type reg) (uvar-type x))) + (let ([cset (if (uvar-unspillable? x) (var-unspillable-conflict* reg) (var-spillable-conflict* reg))]) + (conflict-bit-set? cset (var-index x)))))) (define find-move-related-home (lambda (x0 succ fail) (let f ([x x0] [work* '()] [clear-seen! void]) @@ -16846,17 +17267,19 @@ (find-move-related-home x set-home! (lambda () - (let f ([offset (fx- k 1)]) + (let f ([offset (fx- total-k 1)]) (cond [(fx< offset 0) (uvar-spilled! x #t) (when (uvar-unspillable? x) - (sorry! who "spilled unspillable ~s" x))] + (sorry! who "spilled unspillable ~s in ~s" x lambda-info))] [(conflict? (vector-ref regvec offset) x) (f (fx- offset 1))] [else (set-home! (vector-ref regvec offset))])))))) (define pick-victims (lambda (x*) - (define low-degree? (lambda (x) (fx< (uvar-degree x) k))) + (define low-degree? (lambda (x) (fx< (uvar-degree x) (if (eq? (uvar-type x) 'fp) + fp-k + ptr-k)))) (define pick-potential-spill ; x* is already sorted by weight, so this effectively picks uvar with ; the highest degree among those with the lowest weight @@ -16981,7 +17404,12 @@ [else (f i (cdr spillable*))]) (let ([v (f (fx+ i 1) (cdr spillable*))]) (uvar-iii-set! spillable i) - (vector-set! v i (cons (unannotate source) (fv-offset (uvar-location spillable)))) + (vector-set! v i (cons (let ([name (unannotate source)]) + ;; A boxed symbol means an "unboxed" variable + (if (eq? (uvar-type spillable) 'fp) + (box name) + name)) + (fv-offset (uvar-location spillable)))) v)))] [else (f i (cdr spillable*))]))))))]))) @@ -16993,7 +17421,7 @@ (or (uvar-location x) (sorry! who "no location assigned to uvar ~s" x)) x)))) (Lvalue : Lvalue (ir) -> Lvalue () - [(mref ,x0 ,x1 ,imm) `(mref ,(var->loc x0) ,(var->loc x1) ,imm)] + [(mref ,x0 ,x1 ,imm ,type) `(mref ,(var->loc x0) ,(var->loc x1) ,imm ,type)] [,x (var->loc x)]) (Pred : Pred (ir) -> Pred ()) (Tail : Tail (ir) -> Tail ()) @@ -17191,7 +17619,8 @@ (let* ([kunspillable (length unspillable*)] [unvarvec (make-vector kunspillable)]) ; set up var indices & unvarvec mapping from indices to unspillables (fold-left (lambda (i x) (var-index-set! x i) (vector-set! unvarvec i x) (fx+ i 1)) 0 unspillable*) - ; rerun intra-block live analysis and record (reg v spillable v unspillable) x unspillable conflicts + ; select-instrcutions! kept intra-block live analysis up-to-date, so now + ; record (reg v spillable v unspillable) x unspillable conflicts (RApass unparse-L15d do-unspillable-conflict! kfv kspillable varvec live-size kunspillable unvarvec block*) #;(show-conflicts (info-lambda-name info) varvec unvarvec) (RApass unparse-L15d assign-registers! info varvec unvarvec) @@ -17398,6 +17827,7 @@ ir ((pass np-profile-unroll-loops unparse-L7) ir))) (pass np-simplify-if unparse-L7) + (pass np-unbox-fp-vars! unparse-L7) (pass np-expand-primitives unparse-L9) (pass np-place-overflow-and-trap unparse-L9.5) (pass np-rebind-on-ruined-path unparse-L9.5) diff --git a/s/i3fb.def b/s/i3fb.def index 5f1593d4b6..a7ea2d5629 100644 --- a/s/i3fb.def +++ b/s/i3fb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3le.def b/s/i3le.def index faf7eac137..98ffafb924 100644 --- a/s/i3le.def +++ b/s/i3le.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3nb.def b/s/i3nb.def index 7878f9239d..e464c9f919 100644 --- a/s/i3nb.def +++ b/s/i3nb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3nt.def b/s/i3nt.def index 9bb96f4d53..4efbd53764 100644 --- a/s/i3nt.def +++ b/s/i3nt.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3ob.def b/s/i3ob.def index ed01492a8a..7a8879ed36 100644 --- a/s/i3ob.def +++ b/s/i3ob.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3osx.def b/s/i3osx.def index f44d51c4b7..02ecd7bf03 100644 --- a/s/i3osx.def +++ b/s/i3osx.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3qnx.def b/s/i3qnx.def index 5da2c7fb15..a0b291de2e 100644 --- a/s/i3qnx.def +++ b/s/i3qnx.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/i3s2.def b/s/i3s2.def index 544e308685..a8f39705df 100644 --- a/s/i3s2.def +++ b/s/i3s2.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/inspect.ss b/s/inspect.ss index ebf0ad714f..1bd56e3fd8 100644 --- a/s/inspect.ss +++ b/s/inspect.ss @@ -2281,8 +2281,12 @@ (values (make-vector count) count cp)) (let ([obj (vector-ref vals i)] [var* (vector-ref vars i)]) (cond - [(eq? obj cookie) - (unless (null? var*) ($oops who "expected value for ~s but it was not in lpm" (car var*))) + [(and (eq? obj cookie) + (or (null? var*) + ;; unboxed variable? + (not (and (pair? var*) (box? (car var*)) (null? (cdr var*)))))) + (unless (null? var*) + ($oops who "expected value for ~s but it was not in lpm" (car var*))) (f (fx1+ i) count cp cpvar*)] [(null? var*) (let-values ([(v frame-count cp) (f (fx1+ i) (fx1+ count) cp cpvar*)]) @@ -2310,7 +2314,12 @@ (vector->list var)))] [else (let-values ([(v frame-count cp) (g (cdr var*) (fx1+ count) cp cpvar*)]) - (vector-set! v count (make-variable-object obj var)) + (vector-set! v count (cond + [(box? var) + ;; unboxed variable + (make-variable-object ' (unbox var))] + [else + (make-variable-object obj var)])) (values v frame-count cp))])))))])))) (lambda (v frame-count cp) (real-make-continuation-object x (rp-info-src rpi) (rp-info-sexpr rpi) cp v frame-count pos))))))] diff --git a/s/library.ss b/s/library.ss index 33f967e43e..d0f8087934 100644 --- a/s/library.ss +++ b/s/library.ss @@ -303,6 +303,11 @@ (define index-oops (lambda (who x i) ($oops who "~s is not a valid index for ~s" i x))) + (define bytevector-index-oops + ;; for consistency with error before library entry was introduced: + (lambda (who x i) + ($oops who "invalid index ~s for bytevector ~s" i x))) + (define stencil-vector-oops (lambda (who x) ($oops who "~s is not a vector" x))) @@ -400,6 +405,16 @@ (define-library-entry (stencil-vector-mask v) (stencil-vector-oops 'stencil-vector-mask v)) + (define-library-entry (bytevector-ieee-double-native-ref v i) + (if (bytevector? v) + (bytevector-index-oops 'bytevector-ieee-double-native-ref v i) + (bytevector-oops 'bytevector-ieee-double-native-ref v))) + + (define-library-entry (bytevector-ieee-double-native-set! v i) + (if (mutable-bytevector? v) + (bytevector-index-oops 'bytevector-ieee-double-native-set! v i) + (mutable-bytevector-oops 'bytevector-ieee-double-native-set! v))) + (define-library-entry (char=? x y) (char-oops 'char=? (if (char? x) y x))) (define-library-entry (char? x y) (char-oops 'char>? (if (char? x) y x))) @@ -523,6 +538,7 @@ (define-library-entry (fxxor x y) (fxnonfixnum2 'fxxor x y)) (define-library-entry (fxand x y) (fxnonfixnum2 'fxand x y)) (define-library-entry (fxnot x) (fxnonfixnum1 'fxnot x)) +(define-library-entry (fixnum->flonum x) (fxnonfixnum1 'fixnum->flonum x)) (define-library-entry (fxpopcount x) ($oops 'fxpopcount32 "~s is not a non-negative fixnum" x)) (define-library-entry (fxpopcount32 x) ($oops 'fxpopcount32 "~s is not a 32-bit fixnum" x)) (define-library-entry (fxpopcount16 x) ($oops 'fxpopcount16 "~s is not a 16-bit fixnum" x)) @@ -658,6 +674,7 @@ (define-library-entry (fl* x y) (flonum-oops 'fl* (if (flonum? x) y x))) (define-library-entry (fl/ x y) (flonum-oops 'fl/ (if (flonum? x) y x))) (define-library-entry (flnegate x) (flonum-oops 'fl- x)) + (define-library-entry (flabs x) (flonum-oops 'flabs x)) ) (define-library-entry (flround x) diff --git a/s/mathprims.ss b/s/mathprims.ss index 41d0e80805..741eea4edc 100644 --- a/s/mathprims.ss +++ b/s/mathprims.ss @@ -272,8 +272,7 @@ (set! flabs (lambda (x) - (unless (flonum? x) (flargerr 'flabs x)) - (#3%flabs x))) + (#2%flabs x))) (set! flround (lambda (x) @@ -682,8 +681,7 @@ (set! fixnum->flonum (lambda (x) - (unless (fixnum? x) (fxargerr 'fixnum->flonum x)) - (#3%fixnum->flonum x))) + (#2%fixnum->flonum x))) (set-who! fxlength (lambda (x) diff --git a/s/np-languages.ss b/s/np-languages.ss index 250f8c26c0..1e46e69e48 100644 --- a/s/np-languages.ss +++ b/s/np-languages.ss @@ -17,7 +17,7 @@ (module np-languages () (export sorry! var? var-index var-index-set! prelex->uvar make-tmp make-assigned-tmp make-unspillable make-cpvar make-restricted-unspillable - uvar? uvar-name uvar-type uvar-source + uvar? uvar-name uvar-type uvar-type-set! uvar-source uvar-referenced? uvar-referenced! uvar-assigned? uvar-assigned! uvar-was-closure-ref? uvar-was-closure-ref! uvar-unspillable? uvar-spilled? uvar-spilled! uvar-local-save? uvar-local-save! @@ -29,13 +29,13 @@ uvar-ref-weight uvar-ref-weight-set! uvar-save-weight uvar-save-weight-set! uvar-live-count uvar-live-count-set! uvar - fv-offset + fv-offset fv-type var-spillable-conflict* var-spillable-conflict*-set! var-unspillable-conflict* var-unspillable-conflict*-set! uvar-degree uvar-degree-set! uvar-info-lambda uvar-info-lambda-set! uvar-iii uvar-iii-set! - ur? + ur? fpur? block make-block block? block-label block-effect* block-src* block-pseudo-src block-in-link* block-flags block-label-set! block-effect*-set! block-src*-set! block-pseudo-src-set! block-in-link*-set! block-flags-set! block-live-in block-live-in-set! block-fp-offset block-fp-offset-set! @@ -57,7 +57,7 @@ live-info make-live-info live-info-live live-info-live-set! live-info-useless live-info-useless-set! primitive-pure? primitive-type primitive-handler primitive-handler-set! %primitive value-primitive? pred-primitive? effect-primitive? - fv? $make-fv make-reg reg? reg-name reg-tc-disp reg-callee-save? reg-mdinfo + fv? $make-fv make-reg reg? reg-name reg-tc-disp reg-callee-save? reg-mdinfo reg-type reg-precolored reg-precolored-set! label? label-name libspec-label? make-libspec-label libspec-label-libspec libspec-label-live-reg* @@ -92,13 +92,13 @@ (define-record-type (fv $make-fv fv?) (parent var) - (fields offset) + (fields offset type) (nongenerative) (sealed #t) (protocol (lambda (pargs->new) - (lambda (offset) - ((pargs->new) offset))))) + (lambda (offset type) + ((pargs->new) offset type))))) (module () (record-writer (record-type-descriptor fv) @@ -107,13 +107,13 @@ (define-record-type reg (parent var) - (fields name mdinfo tc-disp callee-save? (mutable precolored)) + (fields name mdinfo tc-disp callee-save? type (mutable precolored)) (nongenerative) (sealed #t) (protocol (lambda (pargs->new) - (lambda (name mdinfo tc-disp callee-save?) - ((pargs->new) name mdinfo tc-disp callee-save? #f))))) + (lambda (name mdinfo tc-disp callee-save? type) + ((pargs->new) name mdinfo tc-disp callee-save? type #f))))) (module () (record-writer (record-type-descriptor reg) @@ -169,7 +169,7 @@ (fields name source - type + (mutable type) conflict* (mutable flags) (mutable info-lambda) @@ -206,8 +206,8 @@ [(name) (make-assigned-tmp name 'ptr)] [(name type) ($make-uvar name #f type '() (uvar-flags-mask referenced assigned))])) (define make-unspillable - (lambda (name) - ($make-uvar name #f 'ptr '() (uvar-flags-mask referenced unspillable)))) + (lambda (name type) + ($make-uvar name #f type '() (uvar-flags-mask referenced unspillable)))) (define make-cpvar (lambda () (include "types.ss") @@ -220,7 +220,9 @@ (module () (record-writer (record-type-descriptor uvar) (lambda (x p wr) - (write (lookup-unique-uvar x) p)))) + (write (lookup-unique-uvar x) p) + (when (eq? (uvar-type x) 'fp) + (write 'fp p))))) (define lookup-unique-uvar (let ([ht (make-eq-hashtable)]) @@ -439,6 +441,12 @@ (- (clause (x* ...) interface body)) (+ (clause (x* ...) mcp interface body)))) + (define (mref-type? t) + ;; Currently, only 'fp vesus non-'fp matters + (or (eq? t 'ptr) + (eq? t 'uptr) + (eq? t 'fp))) + ; move labels to top level and expands closures forms to more primitive operations (define-language L7 (extends L6) (terminals @@ -446,7 +454,8 @@ (fixnum (interface))) (+ (var (x)) (primitive (prim)) ; moved up one language to support closure instrumentation - (fixnum (interface offset)))) + (fixnum (interface offset)) + (mref-type (type)))) (entry Program) (Program (prog) (+ (labels ([l* le*] ...) l) => (labels ([l* le*] ...) (l)))) @@ -454,7 +463,7 @@ (+ (fcallable info l) => (fcallable info l))) (Lvalue (lvalue) (+ x - (mref e1 e2 imm))) + (mref e1 e2 imm type))) (Expr (e body) (- x (fcallable info) @@ -471,7 +480,9 @@ (set! lvalue e) ; these two forms are added here so expand-inline handlers can expand into them (values info e* ...) - (goto l)))) + (goto l) + ; for floating-point unboxing during expand-line: + (unboxed-fp e)))) (define-record-type primitive (fields name type pure? (mutable handler)) @@ -525,14 +536,8 @@ (declare-primitive c-simple-call effect #f) (declare-primitive c-simple-return effect #f) (declare-primitive deactivate-thread effect #f) ; threaded version only - (declare-primitive fl* effect #f) - (declare-primitive fl+ effect #f) - (declare-primitive fl- effect #f) - (declare-primitive fl/ effect #f) (declare-primitive fldl effect #f) ; x86 (declare-primitive flds effect #f) ; x86 - (declare-primitive flsqrt effect #f) ; not implemented for some ppc32 (so we don't use it) - (declare-primitive flt effect #f) (declare-primitive inc-cc-counter effect #f) (declare-primitive inc-profile-counter effect #f) (declare-primitive invoke-prelude effect #f) @@ -567,9 +572,9 @@ (declare-primitive >= pred #t) (declare-primitive condition-code pred #t) (declare-primitive eq? pred #t) - (declare-primitive fl< pred #t) - (declare-primitive fl<= pred #t) - (declare-primitive fl= pred #t) + (declare-primitive fp< pred #t) + (declare-primitive fp<= pred #t) + (declare-primitive fp= pred #t) (declare-primitive lock! pred #f) (declare-primitive logtest pred #t) (declare-primitive log!test pred #t) @@ -615,6 +620,19 @@ (declare-primitive zext16 value #t) (declare-primitive zext32 value #t) ; 64-bit only + (declare-primitive fpmove value #t) + (declare-primitive fp+ value #t) + (declare-primitive fp- value #t) + (declare-primitive fp* value #t) + (declare-primitive fp/ value #t) + (declare-primitive fpt value #t) + (declare-primitive fpsqrt value #t) ; not implemented for some ppc32 (so we don't use it) + + (declare-primitive fpcastto value #t) ; 64-bit only + (declare-primitive fpcastto/hi value #t) ; 32-bit only + (declare-primitive fpcastto/lo value #t) ; 32-bit only + (declare-primitive fpcastfrom value #t) ; 64-bit: 1 argument; 32-bit: 2 arguments + (define immediate? (let ([low (- (bitwise-arithmetic-shift-left 1 (fx- (constant ptr-bits) 1)))] [high (- (bitwise-arithmetic-shift-left 1 (constant ptr-bits)) 1)]) @@ -652,7 +670,8 @@ (+ (hand-coded sym))) (Expr (e body) (- (quote d) - pr))) + pr + (unboxed-fp e)))) ; determine where we should be placing interrupt and overflow (define-language L9.5 (extends L9) @@ -683,8 +702,8 @@ (- (clause (x* ...) mcp interface body)) (+ (clause (x* ...) (local* ...) mcp interface body))) (Lvalue (lvalue) - (- (mref e1 e2 imm)) - (+ (mref x1 x2 imm))) + (- (mref e1 e2 imm type)) + (+ (mref x1 x2 imm type))) (Triv (t) (+ lvalue (literal info) => info @@ -854,7 +873,8 @@ (label (l rpl)) (source-object (src)) (symbol (sym)) - (boolean (as-fallthrough))) + (boolean (as-fallthrough)) + (mref-type (type))) (Program (prog) (labels ([l* le*] ...) l) => (letrec ([l* le*] ...) (l))) (CaseLambdaExpr (le) @@ -862,7 +882,7 @@ (hand-coded sym)) (Lvalue (lvalue) x - (mref x1 x2 imm)) + (mref x1 x2 imm type)) (Triv (t) lvalue (literal info) => info @@ -985,7 +1005,8 @@ (return-label (mrvl)) (boolean (error-on-values as-fallthrough)) (fixnum (max-fv offset)) - (block (block entry-block))) + (block (block entry-block)) + (mref-type (type))) (Program (pgm) (labels ([l* le*] ...) l) => (letrec ([l* le*] ...) (l))) (CaseLambdaExpr (le) @@ -993,7 +1014,7 @@ (Dummy (dumdum) (dummy)) (Lvalue (lvalue) x - (mref x1 x2 imm)) + (mref x1 x2 imm type)) (Triv (t) lvalue (literal info) => info @@ -1049,14 +1070,21 @@ (lambda (x) (or (reg? x) (uvar? x)))) + (define fpur? + (lambda (x) + (or (and (reg? x) + (eq? (reg-type x) 'fp)) + (and (uvar? x) + (eq? (uvar-type x) 'fp))))) + (define-language L15c (extends L15b) (terminals (- (var (x var))) (+ (ur (x)))) ; NB: base and index are really either regs or (mref %sfp %zero imm) (Lvalue (lvalue) - (- (mref x1 x2 imm)) - (+ (mref lvalue1 lvalue2 imm))) + (- (mref x1 x2 imm type)) + (+ (mref lvalue1 lvalue2 imm type))) (Effect (e) (- (fp-offset live-info imm)))) @@ -1068,8 +1096,8 @@ (+ (procedure (proc)) => $procedure-name)) (entry Program) (Lvalue (lvalue) - (- (mref lvalue1 lvalue2 imm)) - (+ (mref x1 x2 imm))) + (- (mref lvalue1 lvalue2 imm type)) + (+ (mref x1 x2 imm type))) (Rhs (rhs) (- (inline info value-prim t* ...)) (+ (asm info proc t* ...) => (asm proc t* ...))) diff --git a/s/primdata.ss b/s/primdata.ss index 9ad7f102e9..a0f61bebb4 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -91,11 +91,11 @@ (define-symbol-flags* ([libraries (rnrs) (rnrs arithmetic flonums)] [flags primitive proc]) (flonum? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable discard]) (real->flonum [sig [(real) -> (flonum)]] [flags arith-op mifoldable discard safeongoodargs]) - (fl=? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; restricted to 2+ arguments - (fl (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; restricted to 2+ arguments - (fl<=? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; restricted to 2+ arguments - (fl>? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; restricted to 2+ arguments - (fl>=? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; restricted to 2+ arguments + (fl=? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; restricted to 2+ arguments + (fl (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; restricted to 2+ arguments + (fl<=? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; restricted to 2+ arguments + (fl>? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; restricted to 2+ arguments + (fl>=? [sig [(flonum flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; restricted to 2+ arguments (flinteger? [sig [(flonum) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) (flzero? [sig [(flonum) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) (flpositive? [sig [(flonum) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) @@ -107,11 +107,11 @@ (flnan? [sig [(flonum) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) (flmax [sig [(flonum flonum ...) -> (flonum)]] [flags arith-op mifoldable discard safeongoodargs]) (flmin [sig [(flonum flonum ...) -> (flonum)]] [flags arith-op mifoldable discard safeongoodargs]) - (fl* [sig [(flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs]) - (fl+ [sig [(flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs]) - (fl- [sig [(flonum flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs]) - (fl/ [sig [(flonum flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs]) - (flabs [sig [(flonum) -> (flonum)]] [flags arith-op mifoldable discard safeongoodargs]) + (fl* [sig [(flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs unboxed-arguments]) + (fl+ [sig [(flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs unboxed-arguments]) + (fl- [sig [(flonum flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs unboxed-arguments]) + (fl/ [sig [(flonum flonum ...) -> (flonum)]] [flags arith-op partial-folder safeongoodargs unboxed-arguments]) + (flabs [sig [(flonum) -> (flonum)]] [flags arith-op mifoldable discard safeongoodargs unboxed-arguments]) (fldiv-and-mod [sig [(flonum flonum) -> (flonum flonum)]] [flags discard]) (fldiv [sig [(flonum flonum) -> (flonum)]] [flags arith-op mifoldable discard]) (flmod [sig [(flonum flonum) -> (flonum)]] [flags arith-op mifoldable discard]) @@ -1327,10 +1327,10 @@ (flonum->fixnum [sig [(flonum) -> (fixnum)]] [flags arith-op cp02]) (flnonpositive? [sig [(flonum) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) (flnonnegative? [sig [(flonum) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) - (fl= [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; not restricted to 2+ arguments - (fl< [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; not restricted to 2+ arguments - (fl<= [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; not restricted to 2+ arguments - (fl> [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; not restricted to 2+ arguments + (fl= [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; not restricted to 2+ arguments + (fl< [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; not restricted to 2+ arguments + (fl<= [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; not restricted to 2+ arguments + (fl> [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs unboxed-arguments]) ; not restricted to 2+ arguments (fl>= [sig [(flonum flonum ...) -> (boolean)]] [flags pure mifoldable discard safeongoodargs]) ; not restricted to 2+ arguments (flush-output-port [sig [() (output-port) -> (void)]] [flags true]) ; not restricted to 1 argument (foreign-entry? [sig [(string) -> (boolean)]] [flags discard]) diff --git a/s/ta6fb.def b/s/ta6fb.def index f92d222abe..72f79b4d44 100644 --- a/s/ta6fb.def +++ b/s/ta6fb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/ta6le.def b/s/ta6le.def index af06b6a07f..ecdc95ffad 100644 --- a/s/ta6le.def +++ b/s/ta6le.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/ta6nb.def b/s/ta6nb.def index 9917918934..3cf1d39ad8 100644 --- a/s/ta6nb.def +++ b/s/ta6nb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/ta6nt.def b/s/ta6nt.def index 567f30463b..8e4674bbb4 100644 --- a/s/ta6nt.def +++ b/s/ta6nt.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long long int") (define-constant typedef-uptr "unsigned long long int") diff --git a/s/ta6ob.def b/s/ta6ob.def index 3fe1a6c169..3d1e019f4d 100644 --- a/s/ta6ob.def +++ b/s/ta6ob.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/ta6osx.def b/s/ta6osx.def index b0fba2c935..438b13b681 100644 --- a/s/ta6osx.def +++ b/s/ta6osx.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/ta6s2.def b/s/ta6s2.def index 326db66e1e..4c91ab8d9c 100644 --- a/s/ta6s2.def +++ b/s/ta6s2.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 5) (define-constant asm-arg-reg-cnt 3) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long int") (define-constant typedef-uptr "unsigned long int") diff --git a/s/ti3fb.def b/s/ti3fb.def index 18cc4bc36e..9752f79f2f 100644 --- a/s/ti3fb.def +++ b/s/ti3fb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/ti3le.def b/s/ti3le.def index e25b8422a8..a5db0b2b1f 100644 --- a/s/ti3le.def +++ b/s/ti3le.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/ti3nb.def b/s/ti3nb.def index 127f7c0a05..4cd1fc60f4 100644 --- a/s/ti3nb.def +++ b/s/ti3nb.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/ti3nt.def b/s/ti3nt.def index 4cbe1486bf..391625d27a 100644 --- a/s/ti3nt.def +++ b/s/ti3nt.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/ti3ob.def b/s/ti3ob.def index 4b285d71b7..ba8fa99c1f 100644 --- a/s/ti3ob.def +++ b/s/ti3ob.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/ti3osx.def b/s/ti3osx.def index 7427f15e08..47ffb4ed7f 100644 --- a/s/ti3osx.def +++ b/s/ti3osx.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/ti3s2.def b/s/ti3s2.def index be35e15492..4b02a94bf6 100644 --- a/s/ti3s2.def +++ b/s/ti3s2.def @@ -29,6 +29,7 @@ (define-constant max-integer-alignment 4) (define-constant asm-arg-reg-max 1) (define-constant asm-arg-reg-cnt 1) +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "int") (define-constant typedef-uptr "unsigned int") diff --git a/s/x86.ss b/s/x86.ss index a59ca5bd5e..edeb6b8aca 100644 --- a/s/x86.ss +++ b/s/x86.ss @@ -16,27 +16,30 @@ ;;; SECTION 1: registers (define-registers (reserved - [%tc %edi #t 7] - [%sfp %ebp #t 5] + [%tc %edi #t 7 uptr] + [%sfp %ebp #t 5 uptr] #;[%ap] #;[%esp] #;[%eap] #;[%trap]) (allocable ; keep in sync with all-but-byte-registers below - [%ac0 %edx #f 2] - [%xp %ecx #f 1] - [%ts %eax #f 0] - [%td %ebx #t 3] + [%ac0 %edx #f 2 uptr] + [%xp %ecx #f 1 uptr] + [%ts %eax #f 0 uptr] + [%td %ebx #t 3 uptr] #;[%ret] #;[%cp] #;[%ac1] #;[%yp] - [%esi #t 6]) + [%esi #t 6 uptr] + [%fp1 %Cfparg3 #f 2 fp] + [%fp2 %Cfparg4 #f 3 fp]) (machine-dependent - [%flreg1 #f 0] - [%flreg2 #f 1] - [%sp #t 4] - #;[%esi #f 6])) + [%fptmp1 #f 0 fp] + [%fptmp2 #f 1 fp] + [%sp #t 4 uptr] + #;[%esi #f 6]) + (reify-support %ts)) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers @@ -66,6 +69,18 @@ (lambda (x) (or (lmem? x) (literal@? x)))) + (define fpmem? + (lambda (x) + (nanopass-case (L15c Triv) x + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (eq? type 'fp)] + [else #f]))) + + (define-syntax mem-of-type? + (lambda (stx) + (syntax-case stx (mem fpmem) + [(_ mem e) #'(lmem? e)] + [(_ fpmem e) #'(fpmem? e)]))) + (define real-imm32? (lambda (x) (nanopass-case (L15c Triv) x @@ -100,12 +115,12 @@ (lambda (a k) (nanopass-case (L15c Triv) a ; NOTE: x86_64 and risc arch's will need to deal with limitations on the offset - [(mref ,lvalue0 ,lvalue1 ,imm) + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (lvalue->ur lvalue0 (lambda (x0) (lvalue->ur lvalue1 (lambda (x1) - (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm)))))))]))) + (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm ,type)))))))]))) (define mem->mem (lambda (a k) @@ -117,13 +132,15 @@ (syntax-rules () [(_ ?a ?aty*) (let ([a ?a] [aty* ?aty*]) - (or (memq 'ur aty*) + (or (and (memq 'ur aty*) (not (or (fpmem? a) (fpur? a)))) + (and (memq 'fpur aty*) (or (fpmem? a) (fpur? a))) (or (and (memq 'imm32 aty*) (imm32? a)) (and (memq 'imm aty*) (imm? a)) (and (memq 'zero aty*) (imm0? a)) (and (memq 'real-imm32 aty*) (real-imm32? a)) (and (memq 'negatable-real-imm32 aty*) (negatable-real-imm32? a)) - (and (memq 'mem aty*) (mem? a)))))])) + (and (memq 'mem aty*) (mem? a)) + (and (memq 'fpmem aty*) (fpmem? a)))))])) (define-syntax coerce-opnd ; passes k something compatible with aty* (syntax-rules () @@ -131,6 +148,7 @@ (let ([a ?a] [aty* ?aty*] [k ?k]) (cond [(and (memq 'mem aty*) (mem? a)) (mem->mem a k)] + [(and (memq 'fpmem aty*) (fpmem? a)) (mem->mem a k)] [(and (memq 'imm32 aty*) (imm32? a)) (k (imm->imm a))] [(and (memq 'imm aty*) (imm? a)) (k (imm->imm a))] [(and (memq 'zero aty*) (imm0? a)) (k (imm->imm a))] @@ -152,6 +170,18 @@ (build-set! ,u ,a) (k u)))))] [else (sorry! 'coerce-opnd "unexpected operand ~s" a)])] + [(memq 'fpur aty*) + (cond + [(fpur? a) (k a)] + [(fpmem? a) + (mem->mem a + (lambda (a) + (let ([u (make-tmp 'u 'fp)]) + (seq + (build-set! ,u ,a) + (k u)))))] + [else + (sorry! 'coerce-opnd "unexpected operand ~s" a)])] [else (sorry! 'coerce-opnd "cannot coerce ~s to ~s" a aty*)]))])) (define set-ur=mref @@ -212,22 +242,29 @@ [(mref? c) (nanopass-case (L15c Triv) c ; NOTE: x86_64 and risc arch's will need to deal with limitations on the offset - [(mref ,lvalue0 ,lvalue1 ,imm) + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (lvalue->ur lvalue0 (lambda (x0) (lvalue->ur lvalue1 (lambda (x1) (let ([u (make-tmp 'u)]) (seq - (build-set! ,u (mref ,x0 ,x1 ,imm)) + (build-set! ,u (mref ,x0 ,x1 ,imm ,type)) (#,k u b) - (build-set! (mref ,x0 ,x1 ,imm) ,u)))))))])] + (build-set! (mref ,x0 ,x1 ,imm ,type) ,u)))))))])] [else (sorry! '#,(datum->syntax #'* who) "unexpected operand ~s" c)]))) (next c a b))))) + (define mem-type? + (lambda (t) + (syntax-case t (mem fpmem) + [mem #t] + [fpmem #t] + [else #f]))) + (define make-value-clause (lambda (fmt) - (syntax-case fmt (mem ur xp) + (syntax-case fmt (mem ur fpur xp) [(op (c mem) (a ?c) (b bty* ...)) (bound-identifier=? #'?c #'c) (acsame-mem #'c #'a #'b #'(bty* ...) #'(lambda (c b) (rhs c c b)))] @@ -240,9 +277,10 @@ [(op (c ur) (a aty* ...) (b ?c)) (bound-identifier=? #'?c #'c) (acsame-ur #'c #'b #'a #'(aty* ...) #'(lambda (c a) (rhs c a c)))] - [(op (c mem) (a aty ...) (b bty ...)) + [(op (c xmem) (a aty ...) (b bty ...)) + (mem-type? #'xmem) #`(lambda (c a b) - (if (and (lmem? c) (coercible? a '(aty ...)) (coercible? b '(bty ...))) + (if (and (mem-of-type? xmem c) (coercible? a '(aty ...)) (coercible? b '(bty ...))) (coerce-opnd b '(bty ...) (lambda (b) (coerce-opnd a '(aty ...) @@ -265,6 +303,22 @@ (lambda (c) (build-set! ,c ,u)))))))))) (next c a b)))] + [(op (c fpur) (a aty ...) (b bty ...)) + #`(lambda (c a b) + (if (and (coercible? a '(aty ...)) (coercible? b '(bty ...))) + (coerce-opnd b '(bty ...) + (lambda (b) + (coerce-opnd a '(aty ...) + (lambda (a) + (if (fpur? c) + (rhs c a b) + (let ([u (make-tmp 'u 'fp)]) + (seq + (rhs u a b) + (mref->mref c + (lambda (c) + (build-set! ,c ,u)))))))))) + (next c a b)))] ; four-operand case below can require four unspillables [(op (c ur) (a ur) (b ur) (d dty ...)) (not (memq 'mem (datum (dty ...)))) @@ -307,9 +361,10 @@ (rhs u u) (build-set! ,c ,u)))))) (next c a)))] - [(op (c mem) (a aty ...)) + [(op (c xmem) (a aty ...)) + (mem-type? #'xmem) #`(lambda (c a) - (if (and (lmem? c) (coercible? a '(aty ...))) + (if (and (mem-of-type? xmem c) (coercible? a '(aty ...))) (coerce-opnd a '(aty ...) (lambda (a) (mem->mem c @@ -330,6 +385,20 @@ (rhs u a) (build-set! ,c ,u)))))))) (next c a)))] + [(op (c fpur) (a aty ...)) + #`(lambda (c a) + (if (coercible? a '(aty ...)) + (coerce-opnd a '(aty ...) + (lambda (a) + (if (fpur? c) + (rhs c a) + (mem->mem c + (lambda (c) + (let ([u (make-tmp 'u 'fp)]) + (seq + (rhs u a) + (build-set! ,c ,u)))))))) + (next c a)))] [(op (c ur)) #`(lambda (c) (if (ur? c) @@ -754,14 +823,31 @@ [(op (x ur) (y ur) (z imm32)) `(asm ,info ,(asm-fl-load op (info-loadfl-flreg info)) ,x ,y ,z)]) - (define-instruction effect (flt) - [(op (x mem ur) (y ur)) `(asm ,info ,asm-flt ,x ,y)]) + (define-instruction value (fpt) + [(op (x fpur) (y ur)) `(asm ,info ,asm-fpt ,x ,y)]) - (define-instruction effect (fl+ fl- fl/ fl*) - [(op (x ur) (y ur) (z ur)) `(asm ,info ,(asm-flop-2 op) ,x ,y ,z)]) + (define-instruction value (fpmove) + [(op (x fpmem) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove ,y))] + [(op (x fpur) (y fpmem fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove ,y))]) - (define-instruction effect (flsqrt) - [(op (x ur) (y ur)) `(asm ,info ,asm-flsqrt ,x ,y)]) + (define-instruction value (fpcastto/hi) ; little endian: high bytes are at +4 + [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-movefrom 4) ,y))] + [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 32) ,y))]) + + (define-instruction value (fpcastto/lo) ; little endian: low byte are immediate bytes + [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x (asm ,info ,asm-move ,y))] + [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 0) ,y))]) + + (define-instruction value (fpcastfrom) + [(op (x fpmem) (hi ur) (lo ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmovefrom ,lo ,hi))] + [(op (x fpur) (hi ur) (lo ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,lo ,hi))]) + + (define-instruction value (fp+ fp- fp* fp/) + [(op (x fpur) (y fpmem fpur) (z fpmem fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpop-2 op) ,y ,z))]) + + (define-instruction value (fpsqrt) + [(op (x fpur) (y fpmem fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsqrt ,y))]) (define-instruction effect inc-cc-counter [(op (x ur) (y imm32 ur) (z imm32 ur)) `(asm ,info ,asm-inc-cc-counter ,x ,y ,z)]) @@ -807,10 +893,13 @@ (define-instruction value pop [(op (z ur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-pop))]) - (define-instruction pred (fl= fl< fl<=) - [(op (x ur) (y ur)) + (define-instruction pred (fp= fp< fp<=) + [(op (x fpmem) (y fpur)) (let ([info (make-info-condition-code op #t #f)]) ; NB: reversed? flag is assumed to be #t - (values '() `(asm ,info ,(asm-fl-relop info) ,x ,y)))]) + (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))] + [(op (x fpur) (y fpur)) + (let ([info (make-info-condition-code op #t #f)]) ; NB: reversed? flag is assumed to be #t + (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))]) (define-instruction pred (eq? u< < > <= >=) ; the idea (following from the intel x86/x86_64 documentation) @@ -871,7 +960,7 @@ `(set! ,(make-live-info) ,uts (immediate 1)) `(set! ,(make-live-info) ,uts (asm ,info ,asm-exchange ,uts - (mref ,x ,y ,imm)))))]) + (mref ,x ,y ,imm uptr)))))]) `(asm ,info-cc-eq ,asm-eq ,uts (immediate 0))))])) (define-instruction effect (locked-incr!) @@ -926,23 +1015,23 @@ (constant-case machine-type-name [(i3osx ti3osx) (seq - `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4)) + `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr)) `(set! ,(make-live-info) ,%sp (asm ,info ,asm-sub ,%sp (immediate 12))))] - [else `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4))])]) + [else `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr))])]) ) ;;; SECTION 3: assembler (module asm-module (; required exports - asm-move asm-move/extend asm-load asm-store asm-swap asm-library-call asm-library-jump + asm-move asm-move/extend asm-movefrom asm-load asm-store asm-swap asm-library-call asm-library-jump asm-mul asm-muli asm-addop asm-add asm-sub asm-negate asm-sub-negate asm-pop asm-shiftop asm-sll asm-logand asm-lognot - asm-logtest asm-fl-relop asm-relop asm-push asm-indirect-jump asm-literal-jump + asm-logtest asm-fp-relop asm-relop asm-push asm-indirect-jump asm-literal-jump asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-rp-compact-header asm-lea1 asm-lea2 asm-indirect-call asm-fstpl asm-fstps asm-fldl asm-flds asm-condition-code - asm-fl-cvt asm-fl-store asm-fl-load asm-flt asm-trunc asm-div + asm-fl-cvt asm-fl-store asm-fl-load asm-fpt asm-trunc asm-div asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-locked-cmpxchg - asm-flop-2 asm-flsqrt asm-c-simple-call + asm-fpop-2 asm-fpmove asm-fpmovefrom asm-fpcastfrom asm-fpcastto asm-fpsqrt asm-c-simple-call asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-inc-profile-counter @@ -961,6 +1050,9 @@ [(x) (record-case x [(reg) r #t] [else #f])] [(x reg) (record-case x [(reg) r (eq? r reg)] [else #f])])) + (define ax-fp-register? + (lambda (x) (record-case x [(reg) r (eq? 'fp (reg-type r))] [else #f]))) + (define ax-ea-reg-code (lambda (ea) (record-case ea @@ -1131,6 +1223,9 @@ (define-op sse.subsd sse-op1 #xF2 #x5C) (define-op sse.ucomisd sse-op1 #x66 #x2E) (define-op sse.xorpd sse-op1 #x66 #x57) + (define-op sse.psllq sse-shift 6) + (define-op sse.psrlq sse-shift 2) + (define-op sse.orpd sse-op1 #x66 #x56) (define sse-op1 (lambda (op prefix-code op-code source dest-reg code*) @@ -1145,7 +1240,7 @@ (define sse-op2 (lambda (op prefix-code dstreg-op-code srcreg-op-code source dest code*) (cond - [(ax-register? source) + [(ax-fp-register? source) (emit-code (op source dest code*) (build byte prefix-code) (build byte #x0F) @@ -1153,7 +1248,7 @@ (ax-ea-modrm-reg dest source) (ax-ea-sib dest) (ax-ea-addr-disp dest))] - [(ax-register? dest) + [(ax-fp-register? dest) (emit-code (op source dest code*) (build byte prefix-code) (build byte #x0F) @@ -1162,7 +1257,16 @@ (ax-ea-sib source) (ax-ea-addr-disp source))] [else - ($oops 'assembler-internal "sse-op2 source=~s dest=~s" source dest)]))) + ($oops 'assembler-internal "sse-op2 source=~s dest=~s" source dest)]))) + + (define sse-shift + (lambda (op op-code dest-reg amt code*) + (emit-code (op dest-reg amt code*) + (build byte #x66) + (build byte #x0F) + (build byte #x73) + (ax-ea-modrm-ttt dest-reg op-code) + (build byte amt)))) (define float-op2 (lambda (op op-code1 op-code2 source-ea code*) @@ -1660,6 +1764,13 @@ [(word) 2] [else 4]))) + (define shift-address + (lambda (src offset) + (record-case src + [(disp) (imm x1) `(disp ,(fx+ imm offset) ,x1)] + [(index) (imm x2 x1) `(index ,(fx+ imm offset) ,x2 ,x1)] + [else ($oops 'shift-address "unexpected shift-address argument ~s" src)]))) + (define asm-move (lambda (code* dest src) (Trivit (dest src) @@ -1682,6 +1793,12 @@ [(zext16) (emit movzw src dest code*)] [else (sorry! who "unexpected op ~s" op)]))))) + (define asm-movefrom + (lambda (offset) + (lambda (code* dest src) + (Trivit (dest src) + (emit mov (shift-address src offset) dest code*))))) + (define asm-fstpl (lambda (code* dest) (Trivit (dest) @@ -1726,34 +1843,69 @@ [(load-single) (emit sse.movss src (cons 'reg flreg) code*)] [(load-double) (emit sse.movsd src (cons 'reg flreg) code*)]))))) - (define asm-flt - (lambda (code* src flonumreg) - (Trivit (src) - (let ([dest `(disp ,(constant flonum-data-disp) ,flonumreg)] - [flreg (cons 'reg %flreg1)]) - (emit sse.cvtsi2sd src flreg - (emit sse.movsd flreg dest code*)))))) + (define asm-fpt + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.cvtsi2sd src dest code*)))) - (define asm-flop-2 + (define asm-fpop-2 (lambda (op) - (lambda (code* src1 src2 dest) - (let ([src1 `(disp ,(constant flonum-data-disp) ,src1)] - [src2 `(disp ,(constant flonum-data-disp) ,src2)] - [dest `(disp ,(constant flonum-data-disp) ,dest)]) - (let ([code* (emit sse.movsd (cons 'reg %flreg1) dest code*)]) - (let ([code* (case op - [(fl+) (emit sse.addsd src2 (cons 'reg %flreg1) code*)] - [(fl-) (emit sse.subsd src2 (cons 'reg %flreg1) code*)] - [(fl*) (emit sse.mulsd src2 (cons 'reg %flreg1) code*)] - [(fl/) (emit sse.divsd src2 (cons 'reg %flreg1) code*)])]) - (emit sse.movsd src1 (cons 'reg %flreg1) code*))))))) + (lambda (code* dest-reg src1 src2) + (define (emit-it src dest code*) + (case op + [(fp+) (emit sse.addsd src dest code*)] + [(fp-) (emit sse.subsd src dest code*)] + [(fp*) (emit sse.mulsd src dest code*)] + [(fp/) (emit sse.divsd src dest code*)])) + (cond + [(eq? dest-reg src1) + (Trivit (dest-reg src2) + (emit-it src2 dest-reg code*))] + [(and (eq? dest-reg src2) + (memq op '(fp+ fp*))) + (Trivit (dest-reg src1) + (emit-it src1 dest-reg code*))] + [else + (Trivit (dest-reg src1 src2) + (emit sse.movsd src2 (cons 'reg %fptmp1) + (emit sse.movsd src1 dest-reg + (emit-it (cons 'reg %fptmp1) dest-reg code*))))])))) - (define asm-flsqrt - (lambda (code* src dest) - (let ([src `(disp ,(constant flonum-data-disp) ,src)] - [dest `(disp ,(constant flonum-data-disp) ,dest)]) - (emit sse.sqrtsd src (cons 'reg %flreg1) - (emit sse.movsd (cons 'reg %flreg1) dest code*))))) + (define asm-fpsqrt + (lambda (code* dest-reg src) + (Trivit (dest-reg src) + (emit sse.sqrtsd src dest-reg code*)))) + + (define asm-fpmove + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.movsd src dest code*)))) + + (define asm-fpmovefrom + (lambda (code* dest src1 src2) + (Trivit (dest src1 src2) + (emit mov src1 dest + (emit mov src2 (shift-address dest 4) code*))))) + + (define asm-fpcastfrom + (lambda (code* dest-reg src1 src2) + (Trivit (dest-reg src1 src2) + (emit sse.movd src1 dest-reg + (emit sse.movd src2 (cons 'reg %fptmp1) + (emit sse.psllq (cons 'reg %fptmp1) 32 + (emit sse.orpd (cons 'reg %fptmp1) dest-reg code*))))))) + + (define asm-fpcastto + (lambda (shift) + (lambda (code* dest src) + (Trivit (dest src) + (cond + [(eqv? shift 0) + (emit sse.movd src dest code*)] + [else + (emit sse.movsd src (cons 'reg %fptmp1) + (emit sse.psrlq (cons 'reg %fptmp1) shift + (emit sse.movd (cons 'reg %fptmp1) dest code*)))]))))) (define asm-trunc (lambda (code* dest flonumreg) @@ -2073,14 +2225,12 @@ (let-values ([(l1 l2) (if i? (values l2 l1) (values l1 l2))]) (asm-conditional-jump info l2 l1 offset))))))) - (define asm-fl-relop + (define asm-fp-relop (lambda (info) (lambda (l1 l2 offset x y) (values - (let ([x `(disp ,(constant flonum-data-disp) ,x)] - [y `(disp ,(constant flonum-data-disp) ,y)]) - (emit sse.movsd y (cons 'reg %flreg1) - (emit sse.ucomisd x (cons 'reg %flreg1) '()))) + (Trivit (x y) + (emit sse.ucomisd x y '())) (asm-conditional-jump info l1 l2 offset))))) (define asm-relop @@ -2277,11 +2427,11 @@ [(carry) (i? bcc bcs)] ; unordered: zf,pf,cf <- 111; gt: 000; lt: 001; eq: 100 ; reversed & inverted: !(fl< y x) = !(fl> x y) iff zf = 1 & cf = 1 - [(fl<) bls] + [(fp<) bls] ; reversed & inverted: !(fl<= y x) = !(fl>= x y) iff cf = 1 - [(fl<=) bcs] + [(fp<=) bcs] ; inverted: !(fl= x y) iff zf = 0 or cf (or pf) = 1 - [(fl=) (or bne bcs)])))))) + [(fp=) (or bne bcs)])))))) (define asm-data-label (lambda (code* l offset func code-size) @@ -2403,14 +2553,14 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] + (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %fptmp1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] [load-stack (lambda (offset) (lambda (rhs) ; requires rhs @@ -2703,14 +2853,14 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-stack (lambda (type offset) (lambda (lvalue) ; requires lvalue diff --git a/s/x86_64.ss b/s/x86_64.ss index 9cb5b3e869..91f6d5f9fb 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -17,66 +17,68 @@ (if-feature windows (define-registers (reserved - [%tc %r14 #t 14] - [%sfp %r13 #t 13] - [%ap %rdi #t 7] + [%tc %r14 #t 14 uptr] + [%sfp %r13 #t 13 uptr] + [%ap %rdi #t 7 uptr] #;[%esp] #;[%eap] #;[%trap]) (allocable - [%ac0 %rbp #t 5] - [%xp %r12 #t 12] - [%ts %rax %Cretval #f 0] - [%td %rbx #t 3] - [%ac1 %r10 %deact #f 10] - [%yp %r11 #f 11] - [%cp %r15 #t 15] - [#;%ret %rsi #t 6] - [ %rdx %Carg2 #f 2] - [ %r8 %Carg3 #f 8] - [ %r9 %Carg4 #f 9] - [ %rcx %Carg1 #f 1]) ; last to avoid use as a Scheme argument + [%ac0 %rbp #t 5 uptr] + [%xp %r12 #t 12 uptr] + [%ts %rax %Cretval #f 0 uptr] + [%td %rbx #t 3 uptr] + [%ac1 %r10 %deact #f 10 uptr] + [%yp %r11 #f 11 uptr] + [%cp %r15 #t 15 uptr] + [#;%ret %rsi #t 6 uptr] + [ %rdx %Carg2 #f 2 uptr] + [ %r8 %Carg3 #f 8 uptr] + [ %r9 %Carg4 #f 9 uptr] + [ %rcx %Carg1 #f 1 uptr] ; last to avoid use as a Scheme argument + [%fp1 %Cfparg3 #f 2 fp] + [%fp2 %Cfparg4 #f 3 fp]) (machine-dependent - [%Cfparg1 %Cfpretval #f 0] - [%Cfparg2 #f 1] - [%Cfparg3 #f 2] - [%Cfparg4 #f 3] - [%flreg1 #f 4] ; xmm 0-5 are caller-save - [%flreg2 #f 5] ; xmm 6-15 are callee-save - [%sp #t 4])) + [%Cfparg1 %Cfpretval #f 0 fp] + [%Cfparg2 #f 1 fp] + [%fptmp1 #f 4 fp] ; xmm 0-5 are caller-save + [%fptmp2 #f 5 fp] ; xmm 6-15 are callee-save + [%sp #t 4 fp]) + (reify-support %ts)) (define-registers (reserved - [%tc %r14 #t 14] - [%sfp %r13 #t 13] - [%ap %r9 %Carg6 #f 9] + [%tc %r14 #t 14 uptr] + [%sfp %r13 #t 13 uptr] + [%ap %r9 %Carg6 #f 9 uptr] #;[%esp] #;[%eap] #;[%trap]) (allocable - [%ac0 %rbp #t 5] - [%xp %r12 #t 12] - [%ts %rax %Cretval #f 0] - [%td %rbx #t 3] - [%ac1 %r10 %deact #f 10] - [%yp %r11 #f 11] - [%cp %r15 #t 15] - [#;%ret %r8 %Carg5 #f 8] - [ %rdi %Carg1 #f 7] - [ %rsi %Carg2 #f 6] - [ %rdx %Carg3 #f 2] - [ %rcx %Carg4 #f 1]) + [%ac0 %rbp #t 5 uptr] + [%xp %r12 #t 12 uptr] + [%ts %rax %Cretval #f 0 uptr] + [%td %rbx #t 3 uptr] + [%ac1 %r10 %deact #f 10 uptr] + [%yp %r11 #f 11 uptr] + [%cp %r15 #t 15 uptr] + [#;%ret %r8 %Carg5 #f 8 uptr] + [ %rdi %Carg1 #f 7 uptr] + [ %rsi %Carg2 #f 6 uptr] + [ %rdx %Carg3 #f 2 uptr] + [ %rcx %Carg4 #f 1 uptr] + [%fp1 %Cfparg3 #f 2 fp] + [%fp2 %Cfparg4 #f 3 fp]) (machine-dependent - [%Cfparg1 %Cfpretval #f 0] - [%Cfparg2 #f 1] - [%Cfparg3 #f 2] - [%Cfparg4 #f 3] - [%Cfparg5 #f 4] - [%Cfparg6 #f 5] - [%Cfparg7 #f 6] - [%Cfparg8 #f 7] - [%flreg1 #f 8] - [%flreg2 #f 9] - [%sp #t 4]))) + [%Cfparg1 %Cfpretval #f 0 fp] + [%Cfparg2 #f 1 fp] + [%Cfparg5 #f 4 fp] + [%Cfparg6 #f 5 fp] + [%Cfparg7 #f 6 fp] + [%Cfparg8 #f 7 fp] + [%fptmp1 #f 8 fp] + [%fptmp2 #f 9 fp] + [%sp #t 4 uptr]) + (reify-support %ts))) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers @@ -100,6 +102,18 @@ (lambda (x) (or (lmem? x) (literal@? x)))) + (define fpmem? + (lambda (x) + (nanopass-case (L15c Triv) x + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (eq? type 'fp)] + [else #f]))) + + (define-syntax mem-of-type? + (lambda (stx) + (syntax-case stx (mem fpmem) + [(_ mem e) #'(lmem? e)] + [(_ fpmem e) #'(fpmem? e)]))) + (define real-imm32? (lambda (x) (nanopass-case (L15c Triv) x @@ -127,24 +141,24 @@ (define mref->mref (lambda (a k) (define return - (lambda (x0 x1 imm) - (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm))))) + (lambda (x0 x1 imm type) + (k (with-output-language (L15d Triv) `(mref ,x0 ,x1 ,imm ,type))))) (nanopass-case (L15c Triv) a - [(mref ,lvalue0 ,lvalue1 ,imm) + [(mref ,lvalue0 ,lvalue1 ,imm ,type) (lvalue->ur lvalue0 (lambda (x0) (lvalue->ur lvalue1 (lambda (x1) (if (signed-32? imm) - (return x0 x1 imm) + (return x0 x1 imm type) (let ([u (make-tmp 'u)]) (seq (build-set! ,u (immediate ,imm)) (if (eq? x1 %zero) - (return x0 u 0) + (return x0 u 0 type) (seq (build-set! ,u (asm ,null-info ,asm-add ,u ,x1)) - (return x0 u 0))))))))))]))) + (return x0 u 0 type))))))))))]))) (define mem->mem (lambda (a k) @@ -153,20 +167,27 @@ (let ([u (make-tmp 'u)]) (seq (build-set! ,u ,(literal@->literal a)) - (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0)))))] + (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0 ptr)))))] [else (mref->mref a k)]))) + (define literal->literal + (lambda (a) + (nanopass-case (L15c Triv) a + [(literal ,info) (with-output-language (L15d Triv) `(literal ,info))]))) + (define-syntax coercible? (syntax-rules () [(_ ?a ?aty*) (let ([a ?a] [aty* ?aty*]) - (or (memq 'ur aty*) + (or (and (memq 'ur aty*) (not (or (fpmem? a) (fpur? a)))) + (and (memq 'fpur aty*) (or (fpmem? a) (fpur? a))) (or (and (memq 'imm32 aty*) (imm32? a)) (and (memq 'imm aty*) (imm? a)) (and (memq 'zero aty*) (imm0? a)) (and (memq 'real-imm32 aty*) (real-imm32? a)) (and (memq 'negatable-real-imm32 aty*) (negatable-real-imm32? a)) - (and (memq 'mem aty*) (mem? a)))))])) + (and (memq 'mem aty*) (mem? a)) + (and (memq 'fpmem aty*) (fpmem? a)))))])) (define-syntax coerce-opnd ; passes k something compatible with aty* (syntax-rules () @@ -174,6 +195,7 @@ (let ([a ?a] [aty* ?aty*] [k ?k]) (cond [(and (memq 'mem aty*) (mem? a)) (mem->mem a k)] + [(and (memq 'fpmem aty*) (fpmem? a)) (mem->mem a k)] [(and (memq 'imm32 aty*) (imm32? a)) (k (imm->imm a))] [(and (memq 'imm aty*) (imm? a)) (k (imm->imm a))] [(and (memq 'zero aty*) (imm0? a)) (k (imm->imm a))] @@ -195,6 +217,18 @@ (build-set! ,u ,a) (k u)))))] [else (sorry! 'coerce-opnd "unexpected operand ~s" a)])] + [(memq 'fpur aty*) + (cond + [(fpur? a) (k a)] + [(fpmem? a) + (mem->mem a + (lambda (a) + (let ([u (make-tmp 'u 'fp)]) + (seq + (build-set! ,u ,a) + (k u)))))] + [else + (sorry! 'coerce-opnd "unexpected operand ~s" a)])] [else (sorry! 'coerce-opnd "cannot coerce ~s to ~s" a aty*)]))])) (define set-ur=mref @@ -254,7 +288,7 @@ [(ur? c) (#,k c b)] [(mref? c) (nanopass-case (L15c Triv) c - [(mref ,lvalue0 ,lvalue1 ,imm) + [(mref ,lvalue0 ,lvalue1 ,imm ,type) ; TODO: does this use too many registers? (no longer special casing fv x0, x1 case) (lvalue->ur lvalue0 (lambda (x0) @@ -263,23 +297,30 @@ (let ([u1 (make-tmp 'u)]) (if (signed-32? imm) (seq - (build-set! ,u1 (mref ,x0 ,x1 ,imm)) + (build-set! ,u1 (mref ,x0 ,x1 ,imm ,type)) (#,k u1 b) - (build-set! (mref ,x0 ,x1 ,imm) ,u1)) + (build-set! (mref ,x0 ,x1 ,imm ,type) ,u1)) (let ([u2 (make-tmp 'u)]) (seq (build-set! ,u2 ,imm) (build-set! ,x1 (asm ,null-info ,asm-add ,x1 ,u2)) - (build-set! ,u1 (mref ,x0 ,x1 0)) + (build-set! ,u1 (mref ,x0 ,x1 0 ,type)) (#,k u1 b) - (build-set! (mref ,x0 ,x1 0) ,u1)))))))))])] + (build-set! (mref ,x0 ,x1 0 ,type) ,u1)))))))))])] ; can't be literal@ since literals can't be lvalues [else (sorry! '#,(datum->syntax #'* who) "unexpected operand ~s" c)]))) (next c a b))))) + (define mem-type? + (lambda (t) + (syntax-case t (mem fpmem) + [mem #t] + [fpmem #t] + [else #f]))) + (define make-value-clause (lambda (fmt) - (syntax-case fmt (mem ur xp) + (syntax-case fmt (mem fpmem ur fpur xp) [(op (c mem) (a ?c) (b bty* ...)) (bound-identifier=? #'?c #'c) (acsame-mem #'c #'a #'b #'(bty* ...) #'(lambda (c b) (rhs c c b)))] @@ -292,9 +333,10 @@ [(op (c ur) (a aty* ...) (b ?c)) (bound-identifier=? #'?c #'c) (acsame-ur #'c #'b #'a #'(aty* ...) #'(lambda (c a) (rhs c a c)))] - [(op (c mem) (a aty ...) (b bty ...)) + [(op (c xmem) (a aty ...) (b bty ...)) + (mem-type? #'xmem) #`(lambda (c a b) - (if (and (lmem? c) (coercible? a '(aty ...)) (coercible? b '(bty ...))) + (if (and (mem-of-type? xmem c) (coercible? a '(aty ...)) (coercible? b '(bty ...))) (coerce-opnd b '(bty ...) (lambda (b) (coerce-opnd a '(aty ...) @@ -317,6 +359,22 @@ (lambda (c) (build-set! ,c ,u)))))))))) (next c a b)))] + [(op (c fpur) (a aty ...) (b bty ...)) + #`(lambda (c a b) + (if (and (coercible? a '(aty ...)) (coercible? b '(bty ...))) + (coerce-opnd b '(bty ...) + (lambda (b) + (coerce-opnd a '(aty ...) + (lambda (a) + (if (fpur? c) + (rhs c a b) + (let ([u (make-tmp 'u 'fp)]) + (seq + (rhs u a b) + (mref->mref c + (lambda (c) + (build-set! ,c ,u)))))))))) + (next c a b)))] ; four-operand case below can require four unspillables [(op (c ur) (a ur) (b ur) (d dty ...)) (not (memq 'mem (datum (dty ...)))) @@ -359,9 +417,10 @@ (rhs u u) (build-set! ,c ,u)))))) (next c a)))] - [(op (c mem) (a aty ...)) + [(op (c xmem) (a aty ...)) + (mem-type? #'xmem) #`(lambda (c a) - (if (and (lmem? c) (coercible? a '(aty ...))) + (if (and (mem-of-type? xmem c) (coercible? a '(aty ...))) (coerce-opnd a '(aty ...) (lambda (a) (mem->mem c @@ -382,6 +441,20 @@ (rhs u a) (build-set! ,c ,u)))))))) (next c a)))] + [(op (c fpur) (a aty ...)) + #`(lambda (c a) + (if (coercible? a '(aty ...)) + (coerce-opnd a '(aty ...) + (lambda (a) + (if (fpur? c) + (rhs c a) + (mem->mem c + (lambda (c) + (let ([u (make-tmp 'u 'fp)]) + (seq + (rhs u a) + (build-set! ,c ,u)))))))) + (next c a)))] [(op (c ur)) #`(lambda (c) (if (ur? c) @@ -392,9 +465,10 @@ (seq (rhs u) (build-set! ,c ,u)))))))] - [(op (c mem)) + [(op (c xmem)) + (mem-type? #'xmem) #`(lambda (c) - (if (lmem? c) + (if (mem-of-type? xmem c) (mem->mem c (lambda (c) (rhs c))) @@ -813,14 +887,29 @@ `(set! ,(make-live-info) ,z (asm ,info ,(asm-get-double (info-loadfl-flreg info))))]) - (define-instruction effect (flt) - [(op (x mem ur) (y ur)) `(asm ,info ,asm-flt ,x ,y)]) + (define-instruction value (fpt) + [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y))]) - (define-instruction effect (fl+ fl- fl/ fl*) - [(op (x ur) (y ur) (z ur)) `(asm ,info ,(asm-flop-2 op) ,x ,y ,z)]) + (define-instruction value (fpmove) + [(op (x fpmem) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove ,y))] + [(op (x fpur) (y fpmem fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove ,y))]) - (define-instruction effect (flsqrt) - [(op (x ur) (y ur)) `(asm ,info ,asm-flsqrt ,x ,y)]) + (define-instruction value (fpcastto) + [(op (x mem) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove ,y))] + [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x (asm ,info ,asm-move ,y))] + [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcast ,y))]) + + (define-instruction value (fpcastfrom) + [(op (x fpmem) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-move ,y))] + [(op (x fpur) (y mem)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove ,y))] + [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcast ,y))]) + + (define-instruction value (fp+ fp- fp* fp/) + [(op (x fpur) (y fpmem fpur) (z fpmem fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpop-2 op) ,y ,z))]) + + (define-instruction value (fpsqrt) + [(op (x fpur) (y fpmem fpur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsqrt ,y))]) (define-instruction effect inc-cc-counter [(op (x ur) (y imm32 ur) (z imm32 ur)) `(asm ,info ,asm-inc-cc-counter ,x ,y ,z)]) @@ -878,10 +967,13 @@ (define-instruction value pop [(op (z ur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-pop))]) - (define-instruction pred (fl= fl< fl<=) - [(op (x ur) (y ur)) + (define-instruction pred (fp= fp< fp<=) + [(op (x fpmem) (y fpur)) (let ([info (make-info-condition-code op #t #f)]) ; NB: reversed? flag is assumed to be #t - (values '() `(asm ,info ,(asm-fl-relop info) ,x ,y)))]) + (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))] + [(op (x fpur) (y fpur)) + (let ([info (make-info-condition-code op #t #f)]) ; NB: reversed? flag is assumed to be #t + (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))]) (define-instruction pred (eq? u< < > <= >=) ; the idea (following from the intel x86/x86_64 documentation) @@ -942,7 +1034,7 @@ `(set! ,(make-live-info) ,uts (immediate 1)) `(set! ,(make-live-info) ,uts (asm ,info ,asm-exchange ,uts - (mref ,x ,y ,imm)))))]) + (mref ,x ,y ,imm uptr)))))]) `(asm ,info-cc-eq ,asm-eq ,uts (immediate 0))))])) (define-instruction effect (locked-incr!) @@ -1018,13 +1110,14 @@ asm-move asm-move/extend asm-load asm-store asm-swap asm-library-call asm-library-jump asm-mul asm-muli asm-addop asm-add asm-sub asm-negate asm-sub-negate asm-pop asm-shiftop asm-sll asm-logand asm-lognot - asm-logtest asm-fl-relop asm-relop asm-push asm-indirect-jump asm-literal-jump + asm-logtest asm-fp-relop asm-relop asm-push asm-indirect-jump asm-literal-jump asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-rp-compact-header asm-lea1 asm-lea2 asm-indirect-call asm-condition-code - asm-fl-cvt asm-fl-store asm-fl-load asm-flt asm-trunc asm-div asm-popcount + asm-fl-cvt asm-fl-store asm-fl-load asm-fpt asm-trunc asm-div asm-popcount asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-locked-cmpxchg - asm-flop-2 asm-flsqrt asm-c-simple-call + asm-fpsqrt asm-fpop-2 asm-fpmove asm-fpcast + asm-c-simple-call asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size asm-enter asm-foreign-call asm-foreign-callable asm-inc-profile-counter @@ -1040,6 +1133,9 @@ [(x) (record-case x [(reg) r #t] [else #f])] [(x reg) (record-case x [(reg) r (eq? r reg)] [else #f])])) + (define ax-fp-register? + (lambda (x) (record-case x [(reg) r (eq? 'fp (reg-type r))] [else #f]))) + (define ax-ea-reg-code (lambda (ea) (record-case ea @@ -1223,7 +1319,7 @@ (define sse-op2 (lambda (op prefix-code dstreg-op-code srcreg-op-code w source dest code*) (cond - [(ax-register? source) + [(ax-fp-register? source) (emit-code (op source dest code*) (build byte prefix-code) (ax-ea-rex w dest source #f) @@ -1232,7 +1328,7 @@ (ax-ea-modrm-reg dest source) (ax-ea-sib dest) (ax-ea-addr-disp dest))] - [(ax-register? dest) + [(ax-fp-register? dest) (emit-code (op source dest code*) (build byte prefix-code) (ax-ea-rex w source dest #f) @@ -1943,34 +2039,48 @@ (lambda (code* dst) (emit sse.movd (cons 'reg flreg) (cons 'reg dst) code*)))) - (define asm-flt - (lambda (code* src flonumreg) - (Trivit (src) - (let ([dest `(disp ,(constant flonum-data-disp) ,flonumreg)] - [flreg (cons 'reg %flreg1)]) - (emit sse.cvtsi2sd src flreg - (emit sse.movsd flreg dest code*)))))) + (define asm-fpt + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.cvtsi2sd src dest code*)))) - (define asm-flop-2 + (define asm-fpop-2 (lambda (op) - (lambda (code* src1 src2 dest) - (let ([src1 `(disp ,(constant flonum-data-disp) ,src1)] - [src2 `(disp ,(constant flonum-data-disp) ,src2)] - [dest `(disp ,(constant flonum-data-disp) ,dest)]) - (let ([code* (emit sse.movsd (cons 'reg %flreg1) dest code*)]) - (let ([code* (case op - [(fl+) (emit sse.addsd src2 (cons 'reg %flreg1) code*)] - [(fl-) (emit sse.subsd src2 (cons 'reg %flreg1) code*)] - [(fl*) (emit sse.mulsd src2 (cons 'reg %flreg1) code*)] - [(fl/) (emit sse.divsd src2 (cons 'reg %flreg1) code*)])]) - (emit sse.movsd src1 (cons 'reg %flreg1) code*))))))) + (lambda (code* dest-reg src1 src2) + (define (emit-it src dest code*) + (case op + [(fp+) (emit sse.addsd src dest code*)] + [(fp-) (emit sse.subsd src dest code*)] + [(fp*) (emit sse.mulsd src dest code*)] + [(fp/) (emit sse.divsd src dest code*)])) + (cond + [(eq? dest-reg src1) + (Trivit (dest-reg src2) + (emit-it src2 dest-reg code*))] + [(and (eq? dest-reg src2) + (memq op '(fp+ fp*))) + (Trivit (dest-reg src1) + (emit-it src1 dest-reg code*))] + [else + (Trivit (dest-reg src1 src2) + (emit sse.movsd src2 (cons 'reg %fptmp1) + (emit sse.movsd src1 dest-reg + (emit-it (cons 'reg %fptmp1) dest-reg code*))))])))) - (define asm-flsqrt - (lambda (code* src dest) - (let ([src `(disp ,(constant flonum-data-disp) ,src)] - [dest `(disp ,(constant flonum-data-disp) ,dest)]) - (emit sse.sqrtsd src (cons 'reg %flreg1) - (emit sse.movsd (cons 'reg %flreg1) dest code*))))) + (define asm-fpsqrt + (lambda (code* dest-reg src) + (Trivit (dest-reg src) + (emit sse.sqrtsd src dest-reg code*)))) + + (define asm-fpmove + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.movsd src dest code*)))) + + (define asm-fpcast + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.movd src dest code*)))) (define asm-trunc (lambda (code* dest flonumreg) @@ -2305,14 +2415,12 @@ (let-values ([(l1 l2) (if i? (values l2 l1) (values l1 l2))]) (asm-conditional-jump info l2 l1 offset))))))) - (define asm-fl-relop + (define asm-fp-relop (lambda (info) (lambda (l1 l2 offset x y) (values - (let ([x `(disp ,(constant flonum-data-disp) ,x)] - [y `(disp ,(constant flonum-data-disp) ,y)]) - (emit sse.movsd y (cons 'reg %flreg1) - (emit sse.ucomisd x (cons 'reg %flreg1) '()))) + (Trivit (x y) + (emit sse.ucomisd x y '())) (asm-conditional-jump info l1 l2 offset))))) (define asm-relop @@ -2527,11 +2635,11 @@ [(carry) (i? bcc bcs)] ; unordered: zf,pf,cf <- 111; gt: 000; lt: 001; eq: 100 ; reversed & inverted: !(fl< y x) = !(fl> x y) iff zf = 1 & cf = 1 - [(fl<) bls] + [(fp<) bls] ; reversed & inverted: !(fl<= y x) = !(fl>= x y) iff cf = 1 - [(fl<=) bcs] + [(fp<=) bcs] ; inverted: !(fl= x y) iff zf = 0 or cf (or pf) = 1 - [(fl=) (or bne bcs)])))))) + [(fp=) (or bne bcs)])))))) (define asm-data-label (lambda (code* l offset func code-size) @@ -2754,14 +2862,14 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %flreg1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] + (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) + (inline ,(make-info-loadfl %fptmp1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] [load-int-stack (lambda (offset) (lambda (rhs) ; requires rhs @@ -3215,14 +3323,14 @@ (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + (inline ,(make-info-loadfl %fptmp1) ,%load-double ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl %flreg1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %flreg1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) + (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) (define load-int-stack (lambda (type offset) (lambda (lvalue)