fix bignum quotient on 32-bit ARM
The assembly implementation of `gmpn_invert_limb` is needed to correctly implement bignum division within the embedded slice of GMP. Relevant to #2773
This commit is contained in:
parent
a1631424bf
commit
17bc626293
|
@ -4834,6 +4834,10 @@ case "$host_os" in
|
|||
i386|i486|i586|i686)
|
||||
enable_parallel_by_default=yes
|
||||
;;
|
||||
arm*)
|
||||
EXTRA_GMP_OBJ="gmp_arm_gcc.o"
|
||||
EXTRA_GMP_DEP='$(EXTRA_GMP_DEP_FILE)'
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
|
|
@ -604,6 +604,10 @@ case "$host_os" in
|
|||
i386|i486|i586|i686)
|
||||
enable_parallel_by_default=yes
|
||||
;;
|
||||
arm*)
|
||||
EXTRA_GMP_OBJ="gmp_arm_gcc.o"
|
||||
EXTRA_GMP_DEP='$(EXTRA_GMP_DEP_FILE)'
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
|
|
@ -209,6 +209,7 @@ fun.@LTO@: $(srcdir)/fun.c
|
|||
future.@LTO@: $(srcdir)/future.c
|
||||
$(CC) $(ALL_CFLAGS) -c $(srcdir)/future.c -o future.@LTO@
|
||||
gmp.@LTO@: $(srcdir)/gmp/gmp.c $(srcdir)/gmp/gmplonglong.h \
|
||||
$(srcdir)/gmp/gmp.h $(srcdir)/gmp/gmp-impl.h $(srcdir)/gmp/gmp-mparam.h \
|
||||
$(srcdir)/../include/schthread.h $(srcdir)/../sconfig.h
|
||||
$(CC) $(ALL_CFLAGS) -c $(srcdir)/gmp/gmp.c -o gmp.@LTO@
|
||||
hash.@LTO@: $(srcdir)/hash.c $(srcdir)/hamt_subset.inc
|
||||
|
@ -300,6 +301,8 @@ vector.@LTO@: $(srcdir)/vector.c
|
|||
|
||||
gmp_alpha_gcc.@LTO@: $(srcdir)/gmp/gmp_alpha_gcc.s
|
||||
$(AS) -o gmp_alpha_gcc.@LTO@ $(srcdir)/gmp/gmp_alpha_gcc.s
|
||||
gmp_arm_gcc.@LTO@: $(srcdir)/gmp/gmp_arm_gcc.s
|
||||
$(AS) -o gmp_arm_gcc.@LTO@ $(srcdir)/gmp/gmp_arm_gcc.s
|
||||
|
||||
SCONFIG = $(srcdir)/../sconfig.h $(srcdir)/../uconfig.h ../mzconfig.h
|
||||
|
||||
|
|
|
@ -720,6 +720,10 @@ extern mp_size_t __gmp_default_fp_limb_precision;
|
|||
#define TARGET_REGISTER_STARVED 0
|
||||
#endif
|
||||
|
||||
#if defined (__arm__)
|
||||
# define HAVE_NATIVE_mpn_invert_limb 1
|
||||
#endif
|
||||
|
||||
/* Use a library function for invert_limb, if available. */
|
||||
#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
|
||||
#define mpn_invert_limb __MPN(invert_limb)
|
||||
|
|
71
racket/src/racket/src/gmp/gmp_arm_gcc.s
Normal file
71
racket/src/racket/src/gmp/gmp_arm_gcc.s
Normal file
|
@ -0,0 +1,71 @@
|
|||
.globl scheme_gmpn_invert_limb
|
||||
scheme_gmpn_invert_limb:
|
||||
stmfd sp!, {r4, lr}
|
||||
mov r3, r0, lsr #23
|
||||
sub r3, r3, #256
|
||||
add r2, pc, #invtab-.-8
|
||||
mov r3, r3, lsl #1
|
||||
ldrh r1, [r2, r3]
|
||||
mov r2, r1, lsl #6
|
||||
mul ip, r2, r2
|
||||
umull lr, r4, ip, r0
|
||||
mov r2, r4, lsl #1
|
||||
rsb r2, r2, r1, lsl #23
|
||||
umull ip, r3, r2, r2
|
||||
umull lr, r4, r3, r0
|
||||
umull r3, r1, ip, r0
|
||||
adds lr, lr, r1
|
||||
addcs r4, r4, #1
|
||||
mov r3, lr, lsr #30
|
||||
orr r4, r3, r4, lsl #2
|
||||
mov lr, lr, lsl #2
|
||||
cmn lr, #1
|
||||
rsc r2, r4, r2, lsl #2
|
||||
umull ip, r1, r0, r2
|
||||
add r1, r1, r0
|
||||
cmn r1, #1
|
||||
beq $l1
|
||||
adds ip, ip, r0
|
||||
adc r1, r1, #0
|
||||
add r2, r2, #1
|
||||
$l1:
|
||||
adds r3, ip, r0
|
||||
adcs r1, r1, #0
|
||||
moveq r0, r2
|
||||
addne r0, r2, #1
|
||||
ldmfd sp!, {r4, pc}
|
||||
|
||||
invtab:
|
||||
.short 1023,1020,1016,1012,1008,1004,1000,996
|
||||
.short 992,989,985,981,978,974,970,967
|
||||
.short 963,960,956,953,949,946,942,939
|
||||
.short 936,932,929,926,923,919,916,913
|
||||
.short 910,907,903,900,897,894,891,888
|
||||
.short 885,882,879,876,873,870,868,865
|
||||
.short 862,859,856,853,851,848,845,842
|
||||
.short 840,837,834,832,829,826,824,821
|
||||
.short 819,816,814,811,809,806,804,801
|
||||
.short 799,796,794,791,789,787,784,782
|
||||
.short 780,777,775,773,771,768,766,764
|
||||
.short 762,759,757,755,753,751,748,746
|
||||
.short 744,742,740,738,736,734,732,730
|
||||
.short 728,726,724,722,720,718,716,714
|
||||
.short 712,710,708,706,704,702,700,699
|
||||
.short 697,695,693,691,689,688,686,684
|
||||
.short 682,680,679,677,675,673,672,670
|
||||
.short 668,667,665,663,661,660,658,657
|
||||
.short 655,653,652,650,648,647,645,644
|
||||
.short 642,640,639,637,636,634,633,631
|
||||
.short 630,628,627,625,624,622,621,619
|
||||
.short 618,616,615,613,612,611,609,608
|
||||
.short 606,605,604,602,601,599,598,597
|
||||
.short 595,594,593,591,590,589,587,586
|
||||
.short 585,583,582,581,579,578,577,576
|
||||
.short 574,573,572,571,569,568,567,566
|
||||
.short 564,563,562,561,560,558,557,556
|
||||
.short 555,554,553,551,550,549,548,547
|
||||
.short 546,544,543,542,541,540,539,538
|
||||
.short 537,536,534,533,532,531,530,529
|
||||
.short 528,527,526,525,524,523,522,521
|
||||
.short 520,519,518,517,516,515,514,513
|
||||
|
|
@ -221,17 +221,63 @@ extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *))
|
|||
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
|
||||
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
|
||||
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
||||
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
|
||||
#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
|
||||
do { \
|
||||
if (__builtin_constant_p (al)) \
|
||||
{ \
|
||||
if (__builtin_constant_p (ah)) \
|
||||
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
|
||||
else \
|
||||
__asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
|
||||
} \
|
||||
else if (__builtin_constant_p (ah)) \
|
||||
{ \
|
||||
if (__builtin_constant_p (bl)) \
|
||||
__asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
|
||||
else \
|
||||
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
|
||||
} \
|
||||
else if (__builtin_constant_p (bl)) \
|
||||
{ \
|
||||
if (__builtin_constant_p (bh)) \
|
||||
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
|
||||
else \
|
||||
__asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
|
||||
} \
|
||||
else /* only bh might be a constant */ \
|
||||
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
|
||||
: "=r" (sh), "=&r" (sl) \
|
||||
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
|
||||
} while (0)
|
||||
#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
|
||||
#define umul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
|
||||
#define UMUL_TIME 5
|
||||
#define smul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
|
||||
#define UMUL_TIME 5
|
||||
#ifndef LONGLONG_STANDALONE
|
||||
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||
do { UWtype __di; \
|
||||
__di = __MPN(invert_limb) (d); \
|
||||
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
|
||||
} while (0)
|
||||
#define UDIV_PREINV_ALWAYS 1
|
||||
#define UDIV_NEEDS_NORMALIZATION 1
|
||||
#define UDIV_TIME 70
|
||||
#endif /* LONGLONG_STANDALONE */
|
||||
#else
|
||||
#define umul_ppmm(xh, xl, a, b) \
|
||||
__asm__ ("%@ Inlined umul_ppmm\n" \
|
||||
|
@ -251,8 +297,16 @@ extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *))
|
|||
: "r" (a), "r" (b) \
|
||||
: "r0", "r1", "r2")
|
||||
#define UMUL_TIME 20
|
||||
#ifndef LONGLONG_STANDALONE
|
||||
#define udiv_qrnnd(q, r, n1, n0, d) \
|
||||
do { UWtype __r; \
|
||||
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
|
||||
(r) = __r; \
|
||||
} while (0)
|
||||
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
|
||||
#define UDIV_TIME 200
|
||||
#endif /* LONGLONG_STANDALONE */
|
||||
#endif
|
||||
#define UDIV_TIME 100
|
||||
#endif /* __arm__ */
|
||||
|
||||
#if defined (__clipper__) && W_TYPE_SIZE == 32
|
||||
|
|
Loading…
Reference in New Issue
Block a user