From 45381612b2fbcd45c44c1ddf3a2ef3b2ab7ba87f Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Sat, 11 Jan 2020 15:48:06 -0700 Subject: [PATCH] fix popcount support to work on Windows Avoid RDI, since it's preserved in the Windows ABI. original commit: 68b2f597ec67ed8752998807bd0c9fc66667c752 --- c/fasl.c | 2 +- makefiles/Mf-install.in | 2 +- s/cmacros.ss | 2 +- s/cpnanopass.ss | 6 ++--- s/x86_64.ss | 52 ++++++++++++++++++++--------------------- 5 files changed, 32 insertions(+), 32 deletions(-) diff --git a/c/fasl.c b/c/fasl.c index 18e0f04933..ed625f5fe8 100644 --- a/c/fasl.c +++ b/c/fasl.c @@ -1561,7 +1561,7 @@ static void x86_64_set_popcount(void *address, uptr item) { *((octet *)address + 4) = 0x48; /* REX */ *((octet *)address + 5) = 0x0F; /* POPCNT */ *((octet *)address + 6) = 0xB8; - *((octet *)address + 7) = 0xC7; /* RDI -> RAX */ + *((octet *)address + 7) = 0xC1; /* RCX -> RAX */ /* 4-byte nop: */ *((octet *)address + 8) = 0x0F; *((octet *)address + 9) = 0x1F; diff --git a/makefiles/Mf-install.in b/makefiles/Mf-install.in index 90a424ab62..905f2f5898 100644 --- a/makefiles/Mf-install.in +++ b/makefiles/Mf-install.in @@ -62,7 +62,7 @@ InstallLZ4Target= # no changes should be needed below this point # ############################################################################### -Version=csv9.5.3.11 +Version=csv9.5.3.12 Include=boot/$m PetiteBoot=boot/$m/petite.boot SchemeBoot=boot/$m/scheme.boot diff --git a/s/cmacros.ss b/s/cmacros.ss index f9b1b16448..c794884b49 100644 --- a/s/cmacros.ss +++ b/s/cmacros.ss @@ -328,7 +328,7 @@ [(_ foo e1 e2) e1] ... [(_ bar e1 e2) e2]))))]))) -(define-constant scheme-version #x0905030B) +(define-constant scheme-version #x0905030C) (define-syntax define-machine-types (lambda (x) diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index 3912b559d2..912823ce10 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -13396,7 +13396,7 @@ [(x86_64) `(seq (set! ,%rax (inline ,(make-info-inline) ,%popcount ,%rdi)) - (asm-c-return ,null-info ,%rdi ,%rax))] + (asm-c-return ,null-info ,%rax))] [else ;; Generate anything, since this should not get called `(seq @@ -13407,9 +13407,9 @@ ,(constant-case architecture [(x86_64) (%seq - (set! ,%rdi ,%rbx) ; %rbx must be preserved + (set! ,%r8 ,%rbx) ; %rbx must be preserved (set! ,%rax (inline ,(make-info-kill* (reg-list %rbx %rcx %rdx)) ,%cpuid)) - (set! ,%rbx ,%rdi) + (set! ,%rbx ,%r8) (asm-c-return ,null-info ,%rax ,%rbx))] [else ;; Generate anything, since this should not get called diff --git a/s/x86_64.ss b/s/x86_64.ss index 1a12b14a58..6db9b7326c 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -706,11 +706,11 @@ `(set! ,(make-live-info) ,z (asm ,info ,(asm-popcount (info-inline? info)) ,x)) ;; Link-editable variant, for corresponding `popcount-op`: - (let ([urdi (make-precolored-unspillable 'urdi %rdi)] + (let ([urcx (make-precolored-unspillable 'urcx %rcx)] [urax (make-precolored-unspillable 'urax %rax)]) (seq - `(set! ,(make-live-info) ,urdi ,x) - `(set! ,(make-live-info) ,urax (asm ,info ,(asm-popcount (info-inline? info)) ,urdi)) + `(set! ,(make-live-info) ,urcx ,x) + `(set! ,(make-live-info) ,urax (asm ,info ,(asm-popcount (info-inline? info)) ,urcx)) `(set! ,(make-live-info) ,z ,urax)))]) (define-instruction value move @@ -1627,13 +1627,13 @@ ;; Link-editable variant: (define popcount-op (let ([target `(x86_64-popcount ,(constant code-data-disp) (library ,(lookup-libspec popcount-slow)))]) - (lambda (op size dest-rax src-rdi inline? code*) + (lambda (op size dest-rax src-rcx inline? code*) (safe-assert (and (ax-register? dest-rax) (ax-register? src-dir))) (record-case dest-rax [(reg) dest-rax - (record-case src-rdi - [(reg) src-rdi - (safe-assert (and (eq? dest-rax %rax) (eq? src-rdi %rdi))) + (record-case src-rcx + [(reg) src-rcx + (safe-assert (and (eq? dest-rax %rax) (eq? src-rcx %rcx))) (cond [(not inline?) ;; Set up a call to `popcount-slow`, which the linker @@ -1644,30 +1644,30 @@ ;; This is the sequence generated by LLVM's __builtin_popcountl() ;; __builtin_popcountl() intrinsic, but with pushes and pops ;; to save used registers other than the result register %rax. - (emit-literal-code (op dest-rax src-rdi code*) - 57 ; push %rdi - 51 ; push %rcx - 48 89 f8 ; movq %rdi, %rax + (emit-literal-code (op dest-rax src-rcx code*) + 51 ; pushq %rcx + 57 ; pushq %rdi + 48 89 c8 ; movq %rcx, %rax 48 d1 e8 ; shrq %rax - 48 b9 55 55 55 55 55 55 55 55 ; movabsq $6148914691236517205, %rcx - 48 21 c1 ; andq %rax, %rcx - 48 29 cf ; subq %rcx, %rdi - 48 b8 33 33 33 33 33 33 33 33 ; movabsq $3689348814741910323, %rax - 48 89 f9 ; movq %rdi, %rcx - 48 21 c1 ; andq %rax, %rcx - 48 c1 ef 02 ; shrq $2, %rdi + 48 bf 55 55 55 55 55 55 55 55 ; movabsq $6148914691236517205, %rdi 48 21 c7 ; andq %rax, %rdi - 48 01 cf ; addq %rcx, %rdi - 48 89 f8 ; movq %rdi, %rax - 48 c1 e8 04 ; shrq $4, %rax - 48 8d 04 38 ; leaq (%rax,%rdi), %rax - 48 b9 0f 0f 0f 0f 0f 0f 0f 0f ; movabsq $1085102592571150095, %rcx + 48 29 f9 ; subq %rdi, %rcx + 48 b8 33 33 33 33 33 33 33 33 ; movabsq $3689348814741910323, %rax + 48 89 cf ; movq %rcx, %rdi + 48 21 c7 ; andq %rax, %rdi + 48 c1 e9 02 ; shrq $2, %rcx 48 21 c1 ; andq %rax, %rcx + 48 01 f9 ; addq %rdi, %rcx + 48 89 c8 ; movq %rcx, %rax + 48 c1 e8 04 ; shrq $4, %rax + 48 8d 04 08 ; leaq (%rax,%rcx), %rax + 48 bf 0f 0f 0f 0f 0f 0f 0f 0f ; movabsq $1085102592571150095, %rdi + 48 21 c7 ; andq %rax, %rdi 48 b8 01 01 01 01 01 01 01 01 ; movabsq $72340172838076673, %rax - 48 0f af c1 ; imulq %rcx, %rax + 48 0f af c7 ; imulq %rdi, %rax 48 c1 e8 38 ; shrq $56, %rax - 59 ; pop %rcx - 5f)])])])))) ; pop %rdi + 5f ; popq %rdi + 59)])])])))) ; popq %rcx (define-syntax emit-code (lambda (x)