diff --git a/racket/src/ChezScheme/s/ppc32.ss b/racket/src/ChezScheme/s/ppc32.ss index b941a99883..5483cb04c6 100644 --- a/racket/src/ChezScheme/s/ppc32.ss +++ b/racket/src/ChezScheme/s/ppc32.ss @@ -2327,37 +2327,62 @@ ,(load/store-integer 'load tmp (fxmin size 4) 'unsigned rhs delta) ,(load/store-integer 'store tmp (fxmin size 4) 'unsigned %sp (fx+ offset delta)) ,(loop (fx+ delta 4) (fx- size 4))))))))) - (define load-double-int-regs - (lambda (hireg loreg isp indirect?) + (define load-double-reg+int-regs + (lambda (fpreg hireg loreg isp indirect?) (if indirect? (lambda (x) ; requires var (%seq + (set! ,fpreg ,(%mref ,x ,%zero 0 fp)) (set! ,loreg ,(%mref ,x ,4)) (set! ,hireg ,(%mref ,x ,0)))) (lambda (x) ; unboxed (%seq + (set! ,fpreg ,x) (set! ,(%mref ,%sp ,%zero ,isp fp) ,x) (set! ,loreg ,(%mref ,%sp ,(fx+ isp 4))) (set! ,hireg ,(%mref ,%sp ,isp))))))) - (define load-double-int-reg+stack - (lambda (hireg isp indirect?) + (define load-single-reg+int-regs + (lambda (fpreg hireg loreg isp indirect?) (if indirect? (lambda (x) ; requires var (%seq + (set! ,fpreg ,(%inline load-single->double ,(%mref ,x ,%zero 0 fp))) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,fpreg) + (set! ,loreg ,(%mref ,%sp ,(fx+ isp 4))) + (set! ,hireg ,(%mref ,%sp ,isp)))) + (load-double-reg+int-regs fpreg hireg loreg isp indirect?)))) + (define load-double-reg+stack + (lambda (fpreg isp indirect?) + (if indirect? + (lambda (x) ; requires var + (%seq + (set! ,fpreg ,(%mref ,x ,%zero 0 fp)) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,fpreg))) + (lambda (x) ; unboxed + (%seq + (set! ,fpreg ,x) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,fpreg)))))) + (define load-double-reg+int-reg+stack + (lambda (fpreg hireg isp indirect?) + (if indirect? + (lambda (x) ; requires var + (%seq + (set! ,fpreg ,(%mref ,x ,%zero 0 fp)) (set! ,(%mref ,%sp ,(fx+ isp 4)) ,(%mref ,x 4)) (set! ,hireg ,(%mref ,x 0)))) (lambda (x) ; unboxed (%seq + (set! ,fpreg ,x) (set! ,(%mref ,%sp ,%zero ,isp fp) ,x) (set! ,hireg ,(%mref ,%sp ,isp))))))) (constant-case machine-type-name [(ppc32osx tppc32osx) ;; Mac OS X variant of `do-args` ;; ----------------------------- - ;; On varargs: the PPC ABI says that unknown arguments should be in both FP - ;; and int registers, but in practice it seems to mean int arguments; the - ;; ABI actually talks about "known" and "unknown", so only "..." arguments - ;; are in int registers, but we just assume that all but the first is "...". + ;; On varargs: we can pass arguments in a way that works in both + ;; varargs mode and non-varargs mode, so we do that unless a specific + ;; 'atomic mode is used (for primitve flonum operations) to insists on + ;; a more efficient path (define register+stack-arguments-starting-offset ;; after linkage area: 24) @@ -2365,6 +2390,14 @@ ;; after inkage area plus parameter area reserved for registers: (+ register+stack-arguments-starting-offset 32)) (define (maybe-cdr p) (if (pair? p) (cdr p) p)) + (define (rest-in-fp-regs? types flt* int*) + (cond + [(null? types) #t] + [(or (null? flt*) (null? int*) (null? (cdr int*))) #f] + [else (nanopass-case (Ltype Type) (car types) + [(fp-double-float) (rest-in-fp-regs? (cdr types) (cdr flt*) (cddr int*))] + [(fp-single-float) (rest-in-fp-regs? (cdr types) (cdr flt*) (cddr int*))] + [else #f])])) (define do-args (lambda (types varargs?) ;; NB: start stack pointer at `stack-arguments-starting-offset` to put arguments above the linkage area @@ -2379,32 +2412,36 @@ (nanopass-case (Ltype Type) (car types) [(fp-double-float) (cond - [(or (null? flt*) - (and varargs? (null? int*))) + [(null? flt*) ;; on stack (loop (cdr types) (cons (load-double-stack isp (and indirect? 0)) locs) live* int* '() (fx+ isp 8) fp-live-count #f)] - [(or (not varargs?) - ;; hack: varargs requires at least one argument - (fx= isp register+stack-arguments-starting-offset)) + [(not varargs?) ;; in FP register (loop (cdr types) (cons (load-double-reg (car flt*) (and indirect? 0)) locs) live* (maybe-cdr (maybe-cdr int*)) (cdr flt*) (fx+ isp 8) (fx+ fp-live-count 1) #f)] [else ; => varargs - ;; in integer register... maybe only halfway - (if (null? (cdr int*)) - (loop (cdr types) - (cons (load-double-int-reg+stack (car int*) isp indirect?) locs) - (cons (car int*) live*) '() flt* (fx+ isp 8) fp-live-count - #f) - (loop (cdr types) - (cons (load-double-int-regs (car int*) (cadr int*) isp indirect?) locs) - (cons* (car int*) (cadr int*) live*) (cdr (cdr int*)) flt* (fx+ isp 8) fp-live-count - #f))])] + ;; in FP registers but also in integer register or on stack... maybe only halfway + (cond + [(null? int*) + (loop (cdr types) + (cons (load-double-reg+stack (car flt*) isp indirect?) locs) + live* '() (cdr flt*) (fx+ isp 8) (fx+ fp-live-count 1) + #f)] + [(null? (cdr int*)) + (loop (cdr types) + (cons (load-double-reg+int-reg+stack (car flt*) (car int*) isp indirect?) locs) + (cons (car int*) live*) '() (cdr flt*) (fx+ isp 8) (fx+ fp-live-count 1) + #f)] + [else + (loop (cdr types) + (cons (load-double-reg+int-regs (car flt*) (car int*) (cadr int*) isp indirect?) locs) + (cons* (car int*) (cadr int*) live*) (cdr (cdr int*)) (cdr flt*) (fx+ isp 8) (fx+ fp-live-count 1) + #f)])])] [(fp-single-float) (cond [(null? flt*) @@ -2413,11 +2450,23 @@ (cons (load-single-stack isp (and indirect? 0)) locs) live* int* '() (fx+ isp 4) fp-live-count #f)] - [(not varargs?) + [(or (not varargs?) + (null? int*) + (null? (cdr int*)) + (not (rest-in-fp-regs? (cdr types) (cdr flt*) (cddr int*)))) ;; in FP register (loop (cdr types) (cons (load-single-reg (car flt*) (and indirect? 0)) locs) live* (maybe-cdr int*) (cdr flt*) (fx+ isp 4) (fx+ fp-live-count 1) + #f)] + [else ; => varargs + ;; Although the float type is not normally allowed with `__varargs`, + ;; we might be pessimistically setting up for varargs, treating the + ;; float as a double for varargs; this trick is only going to work as + ;; long as it doesn't matter how many integer registers we use + (loop (cdr types) + (cons (load-single-reg+int-regs (car flt*) (car int*) (cadr int*) isp indirect?) locs) + (cons* (car int*) (cadr int*) live*) (cdr (cdr int*)) (cdr flt*) (fx+ isp 8) (fx+ fp-live-count 1) #f)])] [(fp-ftd& ,ftd) (let ([members ($ftd->members ftd)]) @@ -2684,7 +2733,7 @@ ,(save-and-restore result-live* result-fp-live-count (fp-result-regs) `(set! ,%Cretval ,(%inline activate-thread)))))) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore - (let* ([varargs? (memq 'varargs (info-foreign-conv* info))] + (let* ([varargs? (not (memq 'atomic (info-foreign-conv* info)))] ; pessimistic for Mac OS [arg-type* (info-foreign-arg-type* info)] [result-type (info-foreign-result-type info)] [fill-result-here? (indirect-result-that-fits-in-registers? result-type)] @@ -3008,8 +3057,11 @@ ;; we push all of the int reg args with one push instruction and all of the ;; float reg args with another (v)push instruction. It's possible for an argument ;; to be split across a register and the stack --- but in that case, there's - ;; room just before on the stack to copy in the register. See foreign-call - ;; for information on varrags. + ;; room just before on the stack to copy in the register. For varrags, the + ;; `__varargs` isn't technically enough information, because the ABI is specified + ;; in terms of specific unknown arguments (i.e., the part in the "..."); we + ;; just assume that all argument except the first are in "...", and that assumption + ;; only matters for `double` arguments. (lambda (types gp-reg-count fp-reg-count init-int-reg-offset float-reg-offset stack-arg-offset synthesize-first-argument? varargs? return-space-offset) (let loop ([types (if synthesize-first-argument? (cdr types) types)] @@ -3455,7 +3507,7 @@ (push-registers result-regs result-num-fp-regs (fp-result-regs) stash-offset e)))) (lambda (info) - (define callee-save-regs (list %r16 %r17 %r18 %r19 %r20 %r21 %r22 %r23 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31)) + (define callee-save-regs (list %r14 %r15 %r16 %r17 %r18 %r19 %r20 %r21 %r22 %r23 %r24 %r25 %r26 %r27 %r28 %r29 %r30 %r31)) (define callee-save-fp-regs (list %fpreg1 %fpreg2)) (define isaved (length callee-save-regs)) (define fpsaved (length callee-save-fp-regs))