Update gmp invert_limb from arm (#3111)
Extract invert_limb code for ARM from GMP 6.2. In order to check for thumb mode availability use defined macro `__thumb__`, which in turns requires us to process the source file with `gcc` instead of `as` in order to access the preprocessor - tested with `clang` as well. Fixes #3050
This commit is contained in:
parent
876aff85c2
commit
392dc33ceb
|
@ -302,7 +302,7 @@ vector.@LTO@: $(srcdir)/vector.c
|
|||
gmp_alpha_gcc.@LTO@: $(srcdir)/gmp/gmp_alpha_gcc.s
|
||||
$(AS) -o gmp_alpha_gcc.@LTO@ $(srcdir)/gmp/gmp_alpha_gcc.s
|
||||
gmp_arm_gcc.@LTO@: $(srcdir)/gmp/gmp_arm_gcc.s
|
||||
$(AS) -o gmp_arm_gcc.@LTO@ $(srcdir)/gmp/gmp_arm_gcc.s
|
||||
$(CC) -o gmp_arm_gcc.@LTO@ -c $(srcdir)/gmp/gmp_arm_gcc.S
|
||||
|
||||
SCONFIG = $(srcdir)/../sconfig.h $(srcdir)/../uconfig.h ../mzconfig.h
|
||||
|
||||
|
|
71
racket/src/racket/src/gmp/gmp_arm_gcc.S
Normal file
71
racket/src/racket/src/gmp/gmp_arm_gcc.S
Normal file
|
@ -0,0 +1,71 @@
|
|||
#if defined(__thumb__)
|
||||
#define RETURN(x) bx x
|
||||
#else
|
||||
#define RETURN(x) mov pc, x
|
||||
#endif
|
||||
|
||||
.align 3
|
||||
.globl scheme_gmpn_invert_limb
|
||||
.type scheme_gmpn_invert_limb,#function
|
||||
scheme_gmpn_invert_limb:
|
||||
ldr r2, .Lptr0
|
||||
.Lbas0: add r2, r2, r15
|
||||
mov r3, r0, lsr #23
|
||||
mov r3, r3, asl #1
|
||||
ldrh r3, [r3, r2]
|
||||
mov r1, r3, asl #17
|
||||
mul r12, r3, r3
|
||||
umull r3, r2, r12, r0
|
||||
sub r1, r1, r2, asl #1
|
||||
umull r3, r2, r1, r1
|
||||
umull r12, r3, r0, r3
|
||||
umull r2, r12, r0, r2
|
||||
adds r2, r2, r3
|
||||
adc r12, r12, #0
|
||||
rsb r1, r12, r1
|
||||
mvn r2, r2, lsr #30
|
||||
add r2, r2, r1, asl #2
|
||||
umull r12, r3, r0, r2
|
||||
adds r1, r12, r0
|
||||
adc r3, r3, r0
|
||||
rsb r0, r3, r2
|
||||
RETURN(r14)
|
||||
|
||||
.Lptr0: .word approx_tab-512-.Lbas0-8
|
||||
.size scheme_gmpn_invert_limb,.-scheme_gmpn_invert_limb
|
||||
|
||||
.section .rodata
|
||||
.align 1
|
||||
approx_tab:
|
||||
.short 0xffc0,0xfec0,0xfdc0,0xfcc0,0xfbc0,0xfac0,0xfa00,0xf900
|
||||
.short 0xf800,0xf700,0xf640,0xf540,0xf440,0xf380,0xf280,0xf180
|
||||
.short 0xf0c0,0xefc0,0xef00,0xee00,0xed40,0xec40,0xeb80,0xeac0
|
||||
.short 0xe9c0,0xe900,0xe840,0xe740,0xe680,0xe5c0,0xe500,0xe400
|
||||
.short 0xe340,0xe280,0xe1c0,0xe100,0xe040,0xdf80,0xdec0,0xde00
|
||||
.short 0xdd40,0xdc80,0xdbc0,0xdb00,0xda40,0xd980,0xd8c0,0xd800
|
||||
.short 0xd740,0xd680,0xd600,0xd540,0xd480,0xd3c0,0xd340,0xd280
|
||||
.short 0xd1c0,0xd140,0xd080,0xcfc0,0xcf40,0xce80,0xcdc0,0xcd40
|
||||
.short 0xcc80,0xcc00,0xcb40,0xcac0,0xca00,0xc980,0xc8c0,0xc840
|
||||
.short 0xc780,0xc700,0xc640,0xc5c0,0xc540,0xc480,0xc400,0xc380
|
||||
.short 0xc2c0,0xc240,0xc1c0,0xc100,0xc080,0xc000,0xbf80,0xbec0
|
||||
.short 0xbe40,0xbdc0,0xbd40,0xbc80,0xbc00,0xbb80,0xbb00,0xba80
|
||||
.short 0xba00,0xb980,0xb900,0xb840,0xb7c0,0xb740,0xb6c0,0xb640
|
||||
.short 0xb5c0,0xb540,0xb4c0,0xb440,0xb3c0,0xb340,0xb2c0,0xb240
|
||||
.short 0xb1c0,0xb140,0xb0c0,0xb080,0xb000,0xaf80,0xaf00,0xae80
|
||||
.short 0xae00,0xad80,0xad40,0xacc0,0xac40,0xabc0,0xab40,0xaac0
|
||||
.short 0xaa80,0xaa00,0xa980,0xa900,0xa8c0,0xa840,0xa7c0,0xa740
|
||||
.short 0xa700,0xa680,0xa600,0xa5c0,0xa540,0xa4c0,0xa480,0xa400
|
||||
.short 0xa380,0xa340,0xa2c0,0xa240,0xa200,0xa180,0xa140,0xa0c0
|
||||
.short 0xa080,0xa000,0x9f80,0x9f40,0x9ec0,0x9e80,0x9e00,0x9dc0
|
||||
.short 0x9d40,0x9d00,0x9c80,0x9c40,0x9bc0,0x9b80,0x9b00,0x9ac0
|
||||
.short 0x9a40,0x9a00,0x9980,0x9940,0x98c0,0x9880,0x9840,0x97c0
|
||||
.short 0x9780,0x9700,0x96c0,0x9680,0x9600,0x95c0,0x9580,0x9500
|
||||
.short 0x94c0,0x9440,0x9400,0x93c0,0x9340,0x9300,0x92c0,0x9240
|
||||
.short 0x9200,0x91c0,0x9180,0x9100,0x90c0,0x9080,0x9000,0x8fc0
|
||||
.short 0x8f80,0x8f40,0x8ec0,0x8e80,0x8e40,0x8e00,0x8d80,0x8d40
|
||||
.short 0x8d00,0x8cc0,0x8c80,0x8c00,0x8bc0,0x8b80,0x8b40,0x8b00
|
||||
.short 0x8a80,0x8a40,0x8a00,0x89c0,0x8980,0x8940,0x88c0,0x8880
|
||||
.short 0x8840,0x8800,0x87c0,0x8780,0x8740,0x8700,0x8680,0x8640
|
||||
.short 0x8600,0x85c0,0x8580,0x8540,0x8500,0x84c0,0x8480,0x8440
|
||||
.short 0x8400,0x8380,0x8340,0x8300,0x82c0,0x8280,0x8240,0x8200
|
||||
.short 0x81c0,0x8180,0x8140,0x8100,0x80c0,0x8080,0x8040,0x8000
|
|
@ -1,71 +0,0 @@
|
|||
.globl scheme_gmpn_invert_limb
|
||||
scheme_gmpn_invert_limb:
|
||||
stmfd sp!, {r4, lr}
|
||||
mov r3, r0, lsr #23
|
||||
sub r3, r3, #256
|
||||
add r2, pc, #invtab-.-8
|
||||
mov r3, r3, lsl #1
|
||||
ldrh r1, [r2, r3]
|
||||
mov r2, r1, lsl #6
|
||||
mul ip, r2, r2
|
||||
umull lr, r4, ip, r0
|
||||
mov r2, r4, lsl #1
|
||||
rsb r2, r2, r1, lsl #23
|
||||
umull ip, r3, r2, r2
|
||||
umull lr, r4, r3, r0
|
||||
umull r3, r1, ip, r0
|
||||
adds lr, lr, r1
|
||||
addcs r4, r4, #1
|
||||
mov r3, lr, lsr #30
|
||||
orr r4, r3, r4, lsl #2
|
||||
mov lr, lr, lsl #2
|
||||
cmn lr, #1
|
||||
rsc r2, r4, r2, lsl #2
|
||||
umull ip, r1, r0, r2
|
||||
add r1, r1, r0
|
||||
cmn r1, #1
|
||||
beq $l1
|
||||
adds ip, ip, r0
|
||||
adc r1, r1, #0
|
||||
add r2, r2, #1
|
||||
$l1:
|
||||
adds r3, ip, r0
|
||||
adcs r1, r1, #0
|
||||
moveq r0, r2
|
||||
addne r0, r2, #1
|
||||
ldmfd sp!, {r4, pc}
|
||||
|
||||
invtab:
|
||||
.short 1023,1020,1016,1012,1008,1004,1000,996
|
||||
.short 992,989,985,981,978,974,970,967
|
||||
.short 963,960,956,953,949,946,942,939
|
||||
.short 936,932,929,926,923,919,916,913
|
||||
.short 910,907,903,900,897,894,891,888
|
||||
.short 885,882,879,876,873,870,868,865
|
||||
.short 862,859,856,853,851,848,845,842
|
||||
.short 840,837,834,832,829,826,824,821
|
||||
.short 819,816,814,811,809,806,804,801
|
||||
.short 799,796,794,791,789,787,784,782
|
||||
.short 780,777,775,773,771,768,766,764
|
||||
.short 762,759,757,755,753,751,748,746
|
||||
.short 744,742,740,738,736,734,732,730
|
||||
.short 728,726,724,722,720,718,716,714
|
||||
.short 712,710,708,706,704,702,700,699
|
||||
.short 697,695,693,691,689,688,686,684
|
||||
.short 682,680,679,677,675,673,672,670
|
||||
.short 668,667,665,663,661,660,658,657
|
||||
.short 655,653,652,650,648,647,645,644
|
||||
.short 642,640,639,637,636,634,633,631
|
||||
.short 630,628,627,625,624,622,621,619
|
||||
.short 618,616,615,613,612,611,609,608
|
||||
.short 606,605,604,602,601,599,598,597
|
||||
.short 595,594,593,591,590,589,587,586
|
||||
.short 585,583,582,581,579,578,577,576
|
||||
.short 574,573,572,571,569,568,567,566
|
||||
.short 564,563,562,561,560,558,557,556
|
||||
.short 555,554,553,551,550,549,548,547
|
||||
.short 546,544,543,542,541,540,539,538
|
||||
.short 537,536,534,533,532,531,530,529
|
||||
.short 528,527,526,525,524,523,522,521
|
||||
.short 520,519,518,517,516,515,514,513
|
||||
|
Loading…
Reference in New Issue
Block a user