From f1a839069d59fc80bce1277a7cb12e644cff0fbc Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Thu, 12 Sep 2019 20:48:38 -0600 Subject: [PATCH] fix `__collect_safe` and arguments/results in x86_64 floating-point registers original commit: d8cb953259faeb8cbab8f66c365ac87eb37ad0f6 --- mats/foreign.ms | 45 +++++++++++++++++++++++++ s/x86_64.ss | 87 +++++++++++++++++++++++++++---------------------- 2 files changed, 93 insertions(+), 39 deletions(-) diff --git a/mats/foreign.ms b/mats/foreign.ms index 7dc06569ab..a4bd7ac9df 100644 --- a/mats/foreign.ms +++ b/mats/foreign.ms @@ -2969,6 +2969,51 @@ (check-union [x i64 43] [y int 0]) (check-union [x float 58.0] [y int 0]) (check-union [x double 68.0] [y int 0]) + + ;; Check that `__collect_safe` saves a argument and result floating-point registers + ;; while activating and deacttiving a thread + (let () + (define-ftype T (struct [d double] [i integer-8] [n int])) + (define sum_pre_double_double_double_double_double_double_double_double + (foreign-procedure __collect_safe + "f4_sum_pre_double_double_double_double_double_double_double_double_struct_double_i8_int" + (double double double double double double double double (& T)) + double)) + (let* ([p (foreign-alloc (ftype-sizeof T))] + [a (make-ftype-pointer T p)]) + (ftype-set! T (d) a 1.25) + (ftype-set! T (i) a 10) + (ftype-set! T (n) a 100) + (let loop ([i 1000000]) + (cond + [(zero? i) (foreign-free p) #t] + [else + (let ([v (sum_pre_double_double_double_double_double_double_double_double 8.25 9.25 10.25 11.25 12.25 13.25 14.25 15.25 a)]) + (and (= 205.25 v) + (loop (sub1 i))))])))) + (let () + (define-ftype T (struct [d double] [i integer-8] [n int])) + (define-ftype callback (function __collect_safe ((& T)) double)) + (define cb_send (foreign-procedure __collect_safe + "f4_cb_send_struct_double_i8_int" + ((* callback)) double)) + (let ([cb (make-ftype-pointer + callback + (lambda (r) + (+ (ftype-ref T (d) r) + (ftype-ref T (i) r) + (ftype-ref T (n) r))))]) + (let loop ([i 1000000]) + (cond + [(zero? i) + (unlock-object + (foreign-callable-code-object + (ftype-pointer-address cb))) + #t] + [else + (let ([v (cb_send cb)]) + (and (= v 112.25) + (loop (sub1 i))))])))) ) (mat collect-safe diff --git a/s/x86_64.ss b/s/x86_64.ss index 7f86c2df8c..726a48155b 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -2718,37 +2718,37 @@ (set! ,reg ,(%inline sll ,reg (immediate 16))) ,(loop tmp 2 x-offset) (set! ,reg ,(%inline + ,reg ,tmp))))]))]))))] - [add-int-regs - (lambda (ints iint vint regs) + [add-regs + (lambda (ints ir vr regs) (cond [(fx= 0 ints) regs] [else - (add-int-regs (fx- ints 1) (fx+ iint 1) vint - (cons (vector-ref vint iint) regs))]))] + (add-regs (fx- ints 1) (fx+ ir 1) vr + (cons (vector-ref vr ir) regs))]))] [do-args (lambda (types vint vfp) (if-feature windows - (let loop ([types types] [locs '()] [regs '()] [i 0] [isp 0]) + (let loop ([types types] [locs '()] [regs '()] [fp-regs '()] [i 0] [isp 0]) (if (null? types) - (values isp 0 locs regs) + (values isp 0 locs regs fp-regs) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< i 4) (let ([reg (vector-ref vint i)]) (loop (cdr types) (cons (load-double-reg2 (vector-ref vfp i) reg) locs) - (cons reg regs) (fx+ i 1) isp)) + (cons reg regs) (cons (vector-ref vfp i) fp-regs) (fx+ i 1) isp)) (loop (cdr types) (cons (load-double-stack isp) locs) - regs i (fx+ isp 8)))] + regs fp-regs i (fx+ isp 8)))] [(fp-single-float) (if (< i 4) (loop (cdr types) (cons (load-single-reg (vector-ref vfp i)) locs) - regs (fx+ i 1) isp) + regs (cons (vector-ref vfp i) fp-regs) (fx+ i 1) isp) (loop (cdr types) (cons (load-single-stack isp) locs) - regs i (fx+ isp 8)))] + regs fp-regs i (fx+ isp 8)))] [(fp-ftd& ,ftd) (cond [(memv ($ftd-size ftd) '(1 2 4 8)) @@ -2762,17 +2762,17 @@ ;; float or double (loop (cdr types) (cons (load-content-regs '(sse) ($ftd-size ftd) i i vint vfp) locs) - (add-int-regs 1 i vint regs) (fx+ i 1) isp)] + (add-regs 1 i vint regs) (add-regs 1 i vfp fp-regs) (fx+ i 1) isp)] [else ;; integer (loop (cdr types) (cons (load-content-regs '(integer) ($ftd-size ftd) i i vint vfp) locs) - (add-int-regs 1 i vint regs) (fx+ i 1) isp)])] + (add-regs 1 i vint regs) fp-regs(fx+ i 1) isp)])] [else ;; pass as value on the stack (loop (cdr types) (cons (load-content-stack isp ($ftd-size ftd)) locs) - regs i (fx+ isp (align ($ftd-size ftd) 8)))])] + regs fp-regs i (fx+ isp (align ($ftd-size ftd) 8)))])] [else ;; pass by reference in register or by reference on the stack (cond @@ -2781,42 +2781,42 @@ (let ([reg (vector-ref vint i)]) (loop (cdr types) (cons (load-int-reg (car types) reg) locs) - (cons reg regs) (fx+ i 1) isp))] + (cons reg regs) fp-regs (fx+ i 1) isp))] [else ;; pass by reference on the stack (loop (cdr types) (cons (load-int-stack isp) locs) - regs i (fx+ isp 8))])])] + regs fp-rregs i (fx+ isp 8))])])] [else (if (< i 4) (let ([reg (vector-ref vint i)]) (loop (cdr types) (cons (load-int-reg (car types) reg) locs) - (cons reg regs) + (cons reg regs) fp-regs (fx+ i 1) isp)) (loop (cdr types) (cons (load-int-stack isp) locs) - regs i (fx+ isp 8)))]))) - (let loop ([types types] [locs '()] [regs '()] [iint 0] [ifp 0] [isp 0]) + regs fp-regs i (fx+ isp 8)))]))) + (let loop ([types types] [locs '()] [regs '()] [fp-regs '()] [iint 0] [ifp 0] [isp 0]) (if (null? types) - (values isp ifp locs regs) + (values isp ifp locs regs fp-regs) (nanopass-case (Ltype Type) (car types) [(fp-double-float) (if (< ifp 8) (loop (cdr types) (cons (load-double-reg (vector-ref vfp ifp)) locs) - regs iint (fx+ ifp 1) isp) + regs (cons (vector-ref vfp ifp) fp-regs) iint (fx+ ifp 1) isp) (loop (cdr types) (cons (load-double-stack isp) locs) - regs iint ifp (fx+ isp 8)))] + regs fp-regs iint ifp (fx+ isp 8)))] [(fp-single-float) (if (< ifp 8) (loop (cdr types) (cons (load-single-reg (vector-ref vfp ifp)) locs) - regs iint (fx+ ifp 1) isp) + regs (cons (vector-ref vfp ifp) fp-regs) iint (fx+ ifp 1) isp) (loop (cdr types) (cons (load-single-stack isp) locs) - regs iint ifp (fx+ isp 8)))] + regs fp-regs iint ifp (fx+ isp 8)))] [(fp-ftd& ,ftd) (let* ([classes (classify-eightbytes ftd)] [ints (count 'integer classes)] @@ -2826,23 +2826,23 @@ ;; pass on the stack (loop (cdr types) (cons (load-content-stack isp ($ftd-size ftd)) locs) - regs iint ifp (fx+ isp (align ($ftd-size ftd) 8)))] + regs fp-regs iint ifp (fx+ isp (align ($ftd-size ftd) 8)))] [else ;; pass in registers (loop (cdr types) (cons (load-content-regs classes ($ftd-size ftd) iint ifp vint vfp) locs) - (add-int-regs ints iint vint regs) + (add-regs ints iint vint regs) (add-regs fps ifp vfp fp-regs) (fx+ iint ints) (fx+ ifp fps) isp)]))] [else (if (< iint 6) (let ([reg (vector-ref vint iint)]) (loop (cdr types) (cons (load-int-reg (car types) reg) locs) - (cons reg regs) + (cons reg regs) fp-regs (fx+ iint 1) ifp isp)) (loop (cdr types) (cons (load-int-stack isp) locs) - regs iint ifp (fx+ isp 8)))])))))]) + regs fp-regs iint ifp (fx+ isp 8)))])))))]) (define (add-deactivate adjust-active? t0 live* result-live* e) (cond [adjust-active? @@ -2961,14 +2961,14 @@ [fill-result-here? (result-fits-in-registers? result-classes)] [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]) (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) (make-vint) (make-vfp)) - (lambda (frame-size nfp locs live*) + (lambda (frame-size nfp locs live* fp-live*) (with-values (add-save-fill-target fill-result-here? frame-size locs) (lambda (frame-size locs) (returnem frame-size locs (lambda (t0) (let* ([t (if adjust-active? %deact t0)] ; need a register if `adjust-active?` [c-call - (add-deactivate adjust-active? t0 live* + (add-deactivate adjust-active? t0 (append fp-live* live*) (get-result-regs fill-result-here? result-type result-classes) (if-feature windows (%seq @@ -3298,12 +3298,14 @@ [int* (list %rax %rdx)] [fp* (list %Cfpretval %Cfparg2)] [accum '()] - [live* '()]) + [live* '()] + [fp-live* '()]) (cond [(null? result-classes) (values (lambda () (if (pair? (cdr accum)) `(seq ,(car accum) ,(cadr accum)) (car accum))) - live*)] + live* + fp-live*)] [(eq? (car result-classes) 'integer) (loop (cdr result-classes) (fx+ offset 8) @@ -3311,7 +3313,8 @@ fp* (cons `(set! ,(car int*) ,(%mref ,%sp ,offset)) accum) - (cons (car int*) live*))] + (cons (car int*) live*) + fp-live*)] [(eq? (car result-classes) 'sse) (loop (cdr result-classes) (fx+ offset 8) @@ -3319,29 +3322,35 @@ (cdr fp*) (cons `(inline ,(make-info-loadfl (car fp*)) ,%load-double ,%sp ,%zero (immediate ,offset)) accum) - live*)]))] + live* + (cons (car fp*) fp-live*))]))] [else (values (lambda () ;; Return pointer that was filled; destination was the first argument `(set! ,%Cretval ,(%mref ,%sp ,(if-feature windows (if adjust-active? 96 80) 48)))) - (list %Cretval))])] + (list %Cretval) + '())])] [(fp-double-float) (values (lambda (x) `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))) - '())] + '() + (list %Cfpretval))] [(fp-single-float) (values (lambda (x) `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))) - '())] + '() + (list %Cfpretval))] [(fp-void) (values (lambda () `(nop)) + '() '())] [else (values(lambda (x) `(set! ,%Cretval ,x)) - (list %Cretval))])) + (list %Cretval) + '())])) (define (unactivate result-regs) (let ([e `(seq (set! ,%Carg1 ,(%mref ,%sp ,(+ (push-registers-size result-regs) (if-feature windows 72 176)))) @@ -3361,7 +3370,7 @@ [synthesize-first? (and result-classes (result-fits-in-registers? result-classes))] [locs (do-stack (if synthesize-first? (cdr arg-type*) arg-type*) adjust-active?)]) - (let-values ([(get-result result-regs) (do-result result-type result-classes adjust-active?)]) + (let-values ([(get-result result-regs result-fp-regs) (do-result result-type result-classes adjust-active?)]) (values (lambda () (%seq @@ -3418,7 +3427,7 @@ ((lambda (e) (if adjust-active? (%seq - ,(unactivate result-regs) + ,(unactivate (append result-fp-regs result-regs)) ,e) e)) (%seq