16-byte alignment on non-Windows x86

Maintain 16-byte stack alignment (if incoming frames are 16-byte
aligned), because newer versions of gcc count on 16-bit alignment for
some generated SSE instructions.

[This patch is extracted from cisco/ChezScheme#518.
 Merging the rest of that change will take longer.]

original commit: 88d57da163ca287ab37534fd858361a035ce7e4a
This commit is contained in:
dyb 2020-06-24 17:11:37 -06:00 committed by Matthew Flatt
parent 9bdc112b4d
commit a958dec07f

View File

@ -1031,11 +1031,11 @@
(define-instruction effect invoke-prelude
[(op)
(constant-case machine-type-name
[(i3osx ti3osx)
[(i3nt ti3nt) `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4))]
[else
(seq
`(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr))
`(set! ,(make-live-info) ,%sp (asm ,info ,asm-sub ,%sp (immediate 12))))]
[else `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr))])])
`(set! ,(make-live-info) ,%sp (asm ,info ,asm-sub ,%sp (immediate 12))))])])
)
;;; SECTION 3: assembler
@ -2080,8 +2080,8 @@
(lambda ()
(constant-case machine-type-name
; remove padding added by asm-enter
[(i3osx ti3osx) (emit addi '(imm 12) (cons 'reg %sp) (emit ret '()))]
[else (emit ret '())])))
[(i3nt ti3nt) (emit ret '())]
[else (emit addi '(imm 12) (cons 'reg %sp) (emit ret '()))])))
(define asm-c-return
(lambda (info)
@ -2272,17 +2272,17 @@
(define asm-save-flrv
(lambda (code*)
; we normally need 8 to store the floating point return variable, but
; on the x86 mac we need 16 in order to get the required 16-byte alignment
(emit subi `(imm ,(constant-case machine-type-name [(i3osx ti3osx) 16] [else 8]))
; on some OS's we need 16 in order to get the required 16-byte alignment
(emit subi `(imm ,(constant-case machine-type-name [(i3nt ti3nt) 8] [else 16]))
(cons 'reg %sp)
(emit fstpl `(disp 0 ,%sp) code*))))
(define asm-restore-flrv
(lambda (code*)
; we normally need 8 to store the floating point return variable, but
; on the x86 mac we need 16 in order to get the required 16-byte alignment
; on some OS's we need 16 in order to get the required 16-byte alignment
(emit fldl `(disp 0 ,%sp)
(emit addi `(imm ,(constant-case machine-type-name [(i3osx ti3osx) 16] [else 8]))
(emit addi `(imm ,(constant-case machine-type-name [(i3nt ti3nt) 8] [else 16]))
(cons 'reg %sp) code*))))
(define asm-library-jump
@ -2489,7 +2489,8 @@
code*)))
(constant-case machine-type-name
[(i3osx ti3osx)
[(i3nt ti3nt) (define asm-enter values)]
[else
(define-syntax asm-enter
(lambda (x)
(syntax-case x ()
@ -2498,8 +2499,7 @@
#'(%seq
; adjust to 16-byte boundary, accounting for 4-byte return address pushed by call
(set! ,%sp ,(%inline - ,%sp (immediate 12)))
,e))])))]
[else (define asm-enter values)])
,e))])))])
(define callee-expects-result-pointer?
(lambda (result-type)
@ -2542,10 +2542,10 @@
;; will be pushed later, before a function call
(let ([offset (fx+ (fx* 4 (length regs)) (fx* 8 fp-reg-count))])
(constant-case machine-type-name
[(i3osx ti3osx)
[(i3nt ti3nt) offset]
[else
(fx- (fxlogand (fx+ offset (fx* 4 arg-count) 15) -16)
(fx* 4 arg-count))]
[else offset])))
(fx* 4 arg-count))])))
(define (push-registers regs fp-reg-count arg-count)
(let ([offset (push-registers-size regs fp-reg-count arg-count)])
(move-registers regs fp-reg-count #f offset
@ -2715,8 +2715,8 @@
(let ([frame-size (constant-case machine-type-name
; maintain 16-byte alignment not including the return address pushed
; by the call instruction, which counts as part of callee's frame
[(i3osx ti3osx) (fxlogand (fx+ orig-frame-size 15) -16)]
[else orig-frame-size])])
[(i3nt ti3nt) orig-frame-size]
[else (fxlogand (fx+ orig-frame-size 15) -16)])])
(values (lambda ()
(if (fx= frame-size 0)
`(nop)
@ -2837,7 +2837,7 @@
| |
| incoming stack args |
sp+X+Y+Z: | |
+---------------------------+ <- i3osx: 16-byte boundary
+---------------------------+ <- i3nt/ti3nt: 4-byte boundary. other: 16-byte boundary
| incoming return address | one word
+---------------------------+
| |
@ -2846,9 +2846,9 @@
+---------------------------+
sp+X: | unactivate mode | 0 words or 1 word
+---------------------------+
| indirect result space | i3osx: 3 words
| (for & results via regs) | other: 2 words
sp+0: +---------------------------+<- i3osx: 16-byte boundary
| indirect result space | i3nt/ti3nt: 2 words
| (for & results via regs) | other: 3 words
sp+0: +---------------------------+<- i3nt/ti3nt: 4-byte boundary. other: 16-byte boundary
|#
@ -3012,13 +3012,13 @@
[arg-type* (info-foreign-arg-type* info)]
[result-type (info-foreign-result-type info)]
[indirect-result-space (constant-case machine-type-name
[(i3osx ti3osx)
;; maintain 16-bit alignment for i3osx, taking into account
[(i3nt ti3nt) (if adjust-active? 12 8)]
[else
;; maintain 16-bit alignment, taking into account
;; 16 bytes pushed above + 4 for RA pushed by asmCcall;
;; 8 of these bytes are used for &-return space, if needed;
;; the extra 4 bytes may be used for the unactivate mode
12]
[else (if adjust-active? 12 8)])]
12])]
[init-stack-offset (fx+ 20 indirect-result-space)]
[indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)])
(let-values ([(get-result result-regs result-num-fp-regs)