fix bignum quotient on 32-bit ARM

The assembly implementation of `gmpn_invert_limb` is needed to
correctly implement bignum division within the embedded slice of GMP.

Relevant to #2773
This commit is contained in:
Matthew Flatt 2019-07-29 18:21:26 -06:00
parent a1631424bf
commit 17bc626293
6 changed files with 147 additions and 7 deletions

View File

@ -4834,6 +4834,10 @@ case "$host_os" in
i386|i486|i586|i686)
enable_parallel_by_default=yes
;;
arm*)
EXTRA_GMP_OBJ="gmp_arm_gcc.o"
EXTRA_GMP_DEP='$(EXTRA_GMP_DEP_FILE)'
;;
*)
;;
esac

View File

@ -604,6 +604,10 @@ case "$host_os" in
i386|i486|i586|i686)
enable_parallel_by_default=yes
;;
arm*)
EXTRA_GMP_OBJ="gmp_arm_gcc.o"
EXTRA_GMP_DEP='$(EXTRA_GMP_DEP_FILE)'
;;
*)
;;
esac

View File

@ -209,6 +209,7 @@ fun.@LTO@: $(srcdir)/fun.c
future.@LTO@: $(srcdir)/future.c
$(CC) $(ALL_CFLAGS) -c $(srcdir)/future.c -o future.@LTO@
gmp.@LTO@: $(srcdir)/gmp/gmp.c $(srcdir)/gmp/gmplonglong.h \
$(srcdir)/gmp/gmp.h $(srcdir)/gmp/gmp-impl.h $(srcdir)/gmp/gmp-mparam.h \
$(srcdir)/../include/schthread.h $(srcdir)/../sconfig.h
$(CC) $(ALL_CFLAGS) -c $(srcdir)/gmp/gmp.c -o gmp.@LTO@
hash.@LTO@: $(srcdir)/hash.c $(srcdir)/hamt_subset.inc
@ -300,6 +301,8 @@ vector.@LTO@: $(srcdir)/vector.c
gmp_alpha_gcc.@LTO@: $(srcdir)/gmp/gmp_alpha_gcc.s
$(AS) -o gmp_alpha_gcc.@LTO@ $(srcdir)/gmp/gmp_alpha_gcc.s
gmp_arm_gcc.@LTO@: $(srcdir)/gmp/gmp_arm_gcc.s
$(AS) -o gmp_arm_gcc.@LTO@ $(srcdir)/gmp/gmp_arm_gcc.s
SCONFIG = $(srcdir)/../sconfig.h $(srcdir)/../uconfig.h ../mzconfig.h

View File

@ -720,6 +720,10 @@ extern mp_size_t __gmp_default_fp_limb_precision;
#define TARGET_REGISTER_STARVED 0
#endif
#if defined (__arm__)
# define HAVE_NATIVE_mpn_invert_limb 1
#endif
/* Use a library function for invert_limb, if available. */
#if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb
#define mpn_invert_limb __MPN(invert_limb)

View File

@ -0,0 +1,71 @@
.globl scheme_gmpn_invert_limb
scheme_gmpn_invert_limb:
stmfd sp!, {r4, lr}
mov r3, r0, lsr #23
sub r3, r3, #256
add r2, pc, #invtab-.-8
mov r3, r3, lsl #1
ldrh r1, [r2, r3]
mov r2, r1, lsl #6
mul ip, r2, r2
umull lr, r4, ip, r0
mov r2, r4, lsl #1
rsb r2, r2, r1, lsl #23
umull ip, r3, r2, r2
umull lr, r4, r3, r0
umull r3, r1, ip, r0
adds lr, lr, r1
addcs r4, r4, #1
mov r3, lr, lsr #30
orr r4, r3, r4, lsl #2
mov lr, lr, lsl #2
cmn lr, #1
rsc r2, r4, r2, lsl #2
umull ip, r1, r0, r2
add r1, r1, r0
cmn r1, #1
beq $l1
adds ip, ip, r0
adc r1, r1, #0
add r2, r2, #1
$l1:
adds r3, ip, r0
adcs r1, r1, #0
moveq r0, r2
addne r0, r2, #1
ldmfd sp!, {r4, pc}
invtab:
.short 1023,1020,1016,1012,1008,1004,1000,996
.short 992,989,985,981,978,974,970,967
.short 963,960,956,953,949,946,942,939
.short 936,932,929,926,923,919,916,913
.short 910,907,903,900,897,894,891,888
.short 885,882,879,876,873,870,868,865
.short 862,859,856,853,851,848,845,842
.short 840,837,834,832,829,826,824,821
.short 819,816,814,811,809,806,804,801
.short 799,796,794,791,789,787,784,782
.short 780,777,775,773,771,768,766,764
.short 762,759,757,755,753,751,748,746
.short 744,742,740,738,736,734,732,730
.short 728,726,724,722,720,718,716,714
.short 712,710,708,706,704,702,700,699
.short 697,695,693,691,689,688,686,684
.short 682,680,679,677,675,673,672,670
.short 668,667,665,663,661,660,658,657
.short 655,653,652,650,648,647,645,644
.short 642,640,639,637,636,634,633,631
.short 630,628,627,625,624,622,621,619
.short 618,616,615,613,612,611,609,608
.short 606,605,604,602,601,599,598,597
.short 595,594,593,591,590,589,587,586
.short 585,583,582,581,579,578,577,576
.short 574,573,572,571,569,568,567,566
.short 564,563,562,561,560,558,557,556
.short 555,554,553,551,550,549,548,547
.short 546,544,543,542,541,540,539,538
.short 537,536,534,533,532,531,530,529
.short 528,527,526,525,524,523,522,521
.short 520,519,518,517,516,515,514,513

View File

@ -221,17 +221,63 @@ extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *))
#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
__asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl))
: "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl))
#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
do { \
if (__builtin_constant_p (al)) \
{ \
if (__builtin_constant_p (ah)) \
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
: "=r" (sh), "=&r" (sl) \
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
else \
__asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
} \
else if (__builtin_constant_p (ah)) \
{ \
if (__builtin_constant_p (bl)) \
__asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
: "=r" (sh), "=&r" (sl) \
: "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
else \
__asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \
: "=r" (sh), "=&r" (sl) \
: "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \
} \
else if (__builtin_constant_p (bl)) \
{ \
if (__builtin_constant_p (bh)) \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
else \
__asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \
: "=r" (sh), "=&r" (sl) \
: "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \
} \
else /* only bh might be a constant */ \
__asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \
: "=r" (sh), "=&r" (sl) \
: "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\
} while (0)
#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */
#define umul_ppmm(xh, xl, a, b) \
__asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
#define UMUL_TIME 5
#define smul_ppmm(xh, xl, a, b) \
__asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
#define UMUL_TIME 5
#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { UWtype __di; \
__di = __MPN(invert_limb) (d); \
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
} while (0)
#define UDIV_PREINV_ALWAYS 1
#define UDIV_NEEDS_NORMALIZATION 1
#define UDIV_TIME 70
#endif /* LONGLONG_STANDALONE */
#else
#define umul_ppmm(xh, xl, a, b) \
__asm__ ("%@ Inlined umul_ppmm\n" \
@ -251,8 +297,16 @@ extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *))
: "r" (a), "r" (b) \
: "r0", "r1", "r2")
#define UMUL_TIME 20
#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
do { UWtype __r; \
(q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype));
#define UDIV_TIME 200
#endif /* LONGLONG_STANDALONE */
#endif
#define UDIV_TIME 100
#endif /* __arm__ */
#if defined (__clipper__) && W_TYPE_SIZE == 32