16-byte alignment on non-Windows x86
Maintain 16-byte stack alignment (if incoming frames are 16-byte aligned), because newer versions of gcc count on 16-bit alignment for some generated SSE instructions. [This patch is extracted from cisco/ChezScheme#518. Merging the rest of that change will take longer.] original commit: 88d57da163ca287ab37534fd858361a035ce7e4a
This commit is contained in:
parent
9bdc112b4d
commit
a958dec07f
50
s/x86.ss
50
s/x86.ss
|
@ -1031,11 +1031,11 @@
|
||||||
(define-instruction effect invoke-prelude
|
(define-instruction effect invoke-prelude
|
||||||
[(op)
|
[(op)
|
||||||
(constant-case machine-type-name
|
(constant-case machine-type-name
|
||||||
[(i3osx ti3osx)
|
[(i3nt ti3nt) `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4))]
|
||||||
|
[else
|
||||||
(seq
|
(seq
|
||||||
`(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr))
|
`(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr))
|
||||||
`(set! ,(make-live-info) ,%sp (asm ,info ,asm-sub ,%sp (immediate 12))))]
|
`(set! ,(make-live-info) ,%sp (asm ,info ,asm-sub ,%sp (immediate 12))))])])
|
||||||
[else `(set! ,(make-live-info) ,%tc (mref ,%sp ,%zero 4 uptr))])])
|
|
||||||
)
|
)
|
||||||
|
|
||||||
;;; SECTION 3: assembler
|
;;; SECTION 3: assembler
|
||||||
|
@ -2080,8 +2080,8 @@
|
||||||
(lambda ()
|
(lambda ()
|
||||||
(constant-case machine-type-name
|
(constant-case machine-type-name
|
||||||
; remove padding added by asm-enter
|
; remove padding added by asm-enter
|
||||||
[(i3osx ti3osx) (emit addi '(imm 12) (cons 'reg %sp) (emit ret '()))]
|
[(i3nt ti3nt) (emit ret '())]
|
||||||
[else (emit ret '())])))
|
[else (emit addi '(imm 12) (cons 'reg %sp) (emit ret '()))])))
|
||||||
|
|
||||||
(define asm-c-return
|
(define asm-c-return
|
||||||
(lambda (info)
|
(lambda (info)
|
||||||
|
@ -2272,17 +2272,17 @@
|
||||||
(define asm-save-flrv
|
(define asm-save-flrv
|
||||||
(lambda (code*)
|
(lambda (code*)
|
||||||
; we normally need 8 to store the floating point return variable, but
|
; we normally need 8 to store the floating point return variable, but
|
||||||
; on the x86 mac we need 16 in order to get the required 16-byte alignment
|
; on some OS's we need 16 in order to get the required 16-byte alignment
|
||||||
(emit subi `(imm ,(constant-case machine-type-name [(i3osx ti3osx) 16] [else 8]))
|
(emit subi `(imm ,(constant-case machine-type-name [(i3nt ti3nt) 8] [else 16]))
|
||||||
(cons 'reg %sp)
|
(cons 'reg %sp)
|
||||||
(emit fstpl `(disp 0 ,%sp) code*))))
|
(emit fstpl `(disp 0 ,%sp) code*))))
|
||||||
|
|
||||||
(define asm-restore-flrv
|
(define asm-restore-flrv
|
||||||
(lambda (code*)
|
(lambda (code*)
|
||||||
; we normally need 8 to store the floating point return variable, but
|
; we normally need 8 to store the floating point return variable, but
|
||||||
; on the x86 mac we need 16 in order to get the required 16-byte alignment
|
; on some OS's we need 16 in order to get the required 16-byte alignment
|
||||||
(emit fldl `(disp 0 ,%sp)
|
(emit fldl `(disp 0 ,%sp)
|
||||||
(emit addi `(imm ,(constant-case machine-type-name [(i3osx ti3osx) 16] [else 8]))
|
(emit addi `(imm ,(constant-case machine-type-name [(i3nt ti3nt) 8] [else 16]))
|
||||||
(cons 'reg %sp) code*))))
|
(cons 'reg %sp) code*))))
|
||||||
|
|
||||||
(define asm-library-jump
|
(define asm-library-jump
|
||||||
|
@ -2489,7 +2489,8 @@
|
||||||
code*)))
|
code*)))
|
||||||
|
|
||||||
(constant-case machine-type-name
|
(constant-case machine-type-name
|
||||||
[(i3osx ti3osx)
|
[(i3nt ti3nt) (define asm-enter values)]
|
||||||
|
[else
|
||||||
(define-syntax asm-enter
|
(define-syntax asm-enter
|
||||||
(lambda (x)
|
(lambda (x)
|
||||||
(syntax-case x ()
|
(syntax-case x ()
|
||||||
|
@ -2498,8 +2499,7 @@
|
||||||
#'(%seq
|
#'(%seq
|
||||||
; adjust to 16-byte boundary, accounting for 4-byte return address pushed by call
|
; adjust to 16-byte boundary, accounting for 4-byte return address pushed by call
|
||||||
(set! ,%sp ,(%inline - ,%sp (immediate 12)))
|
(set! ,%sp ,(%inline - ,%sp (immediate 12)))
|
||||||
,e))])))]
|
,e))])))])
|
||||||
[else (define asm-enter values)])
|
|
||||||
|
|
||||||
(define callee-expects-result-pointer?
|
(define callee-expects-result-pointer?
|
||||||
(lambda (result-type)
|
(lambda (result-type)
|
||||||
|
@ -2542,10 +2542,10 @@
|
||||||
;; will be pushed later, before a function call
|
;; will be pushed later, before a function call
|
||||||
(let ([offset (fx+ (fx* 4 (length regs)) (fx* 8 fp-reg-count))])
|
(let ([offset (fx+ (fx* 4 (length regs)) (fx* 8 fp-reg-count))])
|
||||||
(constant-case machine-type-name
|
(constant-case machine-type-name
|
||||||
[(i3osx ti3osx)
|
[(i3nt ti3nt) offset]
|
||||||
|
[else
|
||||||
(fx- (fxlogand (fx+ offset (fx* 4 arg-count) 15) -16)
|
(fx- (fxlogand (fx+ offset (fx* 4 arg-count) 15) -16)
|
||||||
(fx* 4 arg-count))]
|
(fx* 4 arg-count))])))
|
||||||
[else offset])))
|
|
||||||
(define (push-registers regs fp-reg-count arg-count)
|
(define (push-registers regs fp-reg-count arg-count)
|
||||||
(let ([offset (push-registers-size regs fp-reg-count arg-count)])
|
(let ([offset (push-registers-size regs fp-reg-count arg-count)])
|
||||||
(move-registers regs fp-reg-count #f offset
|
(move-registers regs fp-reg-count #f offset
|
||||||
|
@ -2715,8 +2715,8 @@
|
||||||
(let ([frame-size (constant-case machine-type-name
|
(let ([frame-size (constant-case machine-type-name
|
||||||
; maintain 16-byte alignment not including the return address pushed
|
; maintain 16-byte alignment not including the return address pushed
|
||||||
; by the call instruction, which counts as part of callee's frame
|
; by the call instruction, which counts as part of callee's frame
|
||||||
[(i3osx ti3osx) (fxlogand (fx+ orig-frame-size 15) -16)]
|
[(i3nt ti3nt) orig-frame-size]
|
||||||
[else orig-frame-size])])
|
[else (fxlogand (fx+ orig-frame-size 15) -16)])])
|
||||||
(values (lambda ()
|
(values (lambda ()
|
||||||
(if (fx= frame-size 0)
|
(if (fx= frame-size 0)
|
||||||
`(nop)
|
`(nop)
|
||||||
|
@ -2837,7 +2837,7 @@
|
||||||
| |
|
| |
|
||||||
| incoming stack args |
|
| incoming stack args |
|
||||||
sp+X+Y+Z: | |
|
sp+X+Y+Z: | |
|
||||||
+---------------------------+ <- i3osx: 16-byte boundary
|
+---------------------------+ <- i3nt/ti3nt: 4-byte boundary. other: 16-byte boundary
|
||||||
| incoming return address | one word
|
| incoming return address | one word
|
||||||
+---------------------------+
|
+---------------------------+
|
||||||
| |
|
| |
|
||||||
|
@ -2846,9 +2846,9 @@
|
||||||
+---------------------------+
|
+---------------------------+
|
||||||
sp+X: | unactivate mode | 0 words or 1 word
|
sp+X: | unactivate mode | 0 words or 1 word
|
||||||
+---------------------------+
|
+---------------------------+
|
||||||
| indirect result space | i3osx: 3 words
|
| indirect result space | i3nt/ti3nt: 2 words
|
||||||
| (for & results via regs) | other: 2 words
|
| (for & results via regs) | other: 3 words
|
||||||
sp+0: +---------------------------+<- i3osx: 16-byte boundary
|
sp+0: +---------------------------+<- i3nt/ti3nt: 4-byte boundary. other: 16-byte boundary
|
||||||
|#
|
|#
|
||||||
|
|
||||||
|
|
||||||
|
@ -3012,13 +3012,13 @@
|
||||||
[arg-type* (info-foreign-arg-type* info)]
|
[arg-type* (info-foreign-arg-type* info)]
|
||||||
[result-type (info-foreign-result-type info)]
|
[result-type (info-foreign-result-type info)]
|
||||||
[indirect-result-space (constant-case machine-type-name
|
[indirect-result-space (constant-case machine-type-name
|
||||||
[(i3osx ti3osx)
|
[(i3nt ti3nt) (if adjust-active? 12 8)]
|
||||||
;; maintain 16-bit alignment for i3osx, taking into account
|
[else
|
||||||
|
;; maintain 16-bit alignment, taking into account
|
||||||
;; 16 bytes pushed above + 4 for RA pushed by asmCcall;
|
;; 16 bytes pushed above + 4 for RA pushed by asmCcall;
|
||||||
;; 8 of these bytes are used for &-return space, if needed;
|
;; 8 of these bytes are used for &-return space, if needed;
|
||||||
;; the extra 4 bytes may be used for the unactivate mode
|
;; the extra 4 bytes may be used for the unactivate mode
|
||||||
12]
|
12])]
|
||||||
[else (if adjust-active? 12 8)])]
|
|
||||||
[init-stack-offset (fx+ 20 indirect-result-space)]
|
[init-stack-offset (fx+ 20 indirect-result-space)]
|
||||||
[indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)])
|
[indirect-result-to-registers? (fill-result-pointer-from-registers? result-type)])
|
||||||
(let-values ([(get-result result-regs result-num-fp-regs)
|
(let-values ([(get-result result-regs result-num-fp-regs)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user