From 17bc6262938f3b29e7ec66d0e77eeb08807854cd Mon Sep 17 00:00:00 2001 From: Matthew Flatt Date: Mon, 29 Jul 2019 18:21:26 -0600 Subject: [PATCH] fix bignum quotient on 32-bit ARM The assembly implementation of `gmpn_invert_limb` is needed to correctly implement bignum division within the embedded slice of GMP. Relevant to #2773 --- racket/src/cfg-racket | 4 ++ racket/src/racket/configure.ac | 4 ++ racket/src/racket/src/Makefile.in | 3 ++ racket/src/racket/src/gmp/gmp-impl.h | 4 ++ racket/src/racket/src/gmp/gmp_arm_gcc.s | 71 +++++++++++++++++++++++++ racket/src/racket/src/gmp/gmplonglong.h | 68 ++++++++++++++++++++--- 6 files changed, 147 insertions(+), 7 deletions(-) create mode 100644 racket/src/racket/src/gmp/gmp_arm_gcc.s diff --git a/racket/src/cfg-racket b/racket/src/cfg-racket index 61487cc6e6..a797ae6b0a 100755 --- a/racket/src/cfg-racket +++ b/racket/src/cfg-racket @@ -4834,6 +4834,10 @@ case "$host_os" in i386|i486|i586|i686) enable_parallel_by_default=yes ;; + arm*) + EXTRA_GMP_OBJ="gmp_arm_gcc.o" + EXTRA_GMP_DEP='$(EXTRA_GMP_DEP_FILE)' + ;; *) ;; esac diff --git a/racket/src/racket/configure.ac b/racket/src/racket/configure.ac index ee663abb52..a7cfba5f5d 100644 --- a/racket/src/racket/configure.ac +++ b/racket/src/racket/configure.ac @@ -604,6 +604,10 @@ case "$host_os" in i386|i486|i586|i686) enable_parallel_by_default=yes ;; + arm*) + EXTRA_GMP_OBJ="gmp_arm_gcc.o" + EXTRA_GMP_DEP='$(EXTRA_GMP_DEP_FILE)' + ;; *) ;; esac diff --git a/racket/src/racket/src/Makefile.in b/racket/src/racket/src/Makefile.in index 873922c89d..38b9d2c544 100644 --- a/racket/src/racket/src/Makefile.in +++ b/racket/src/racket/src/Makefile.in @@ -209,6 +209,7 @@ fun.@LTO@: $(srcdir)/fun.c future.@LTO@: $(srcdir)/future.c $(CC) $(ALL_CFLAGS) -c $(srcdir)/future.c -o future.@LTO@ gmp.@LTO@: $(srcdir)/gmp/gmp.c $(srcdir)/gmp/gmplonglong.h \ + $(srcdir)/gmp/gmp.h $(srcdir)/gmp/gmp-impl.h $(srcdir)/gmp/gmp-mparam.h \ $(srcdir)/../include/schthread.h $(srcdir)/../sconfig.h $(CC) $(ALL_CFLAGS) -c $(srcdir)/gmp/gmp.c -o gmp.@LTO@ hash.@LTO@: $(srcdir)/hash.c $(srcdir)/hamt_subset.inc @@ -300,6 +301,8 @@ vector.@LTO@: $(srcdir)/vector.c gmp_alpha_gcc.@LTO@: $(srcdir)/gmp/gmp_alpha_gcc.s $(AS) -o gmp_alpha_gcc.@LTO@ $(srcdir)/gmp/gmp_alpha_gcc.s +gmp_arm_gcc.@LTO@: $(srcdir)/gmp/gmp_arm_gcc.s + $(AS) -o gmp_arm_gcc.@LTO@ $(srcdir)/gmp/gmp_arm_gcc.s SCONFIG = $(srcdir)/../sconfig.h $(srcdir)/../uconfig.h ../mzconfig.h diff --git a/racket/src/racket/src/gmp/gmp-impl.h b/racket/src/racket/src/gmp/gmp-impl.h index 1560a95e8e..79f69af649 100644 --- a/racket/src/racket/src/gmp/gmp-impl.h +++ b/racket/src/racket/src/gmp/gmp-impl.h @@ -720,6 +720,10 @@ extern mp_size_t __gmp_default_fp_limb_precision; #define TARGET_REGISTER_STARVED 0 #endif +#if defined (__arm__) +# define HAVE_NATIVE_mpn_invert_limb 1 +#endif + /* Use a library function for invert_limb, if available. */ #if ! defined (invert_limb) && HAVE_NATIVE_mpn_invert_limb #define mpn_invert_limb __MPN(invert_limb) diff --git a/racket/src/racket/src/gmp/gmp_arm_gcc.s b/racket/src/racket/src/gmp/gmp_arm_gcc.s new file mode 100644 index 0000000000..5d1ada7449 --- /dev/null +++ b/racket/src/racket/src/gmp/gmp_arm_gcc.s @@ -0,0 +1,71 @@ + .globl scheme_gmpn_invert_limb +scheme_gmpn_invert_limb: + stmfd sp!, {r4, lr} + mov r3, r0, lsr #23 + sub r3, r3, #256 + add r2, pc, #invtab-.-8 + mov r3, r3, lsl #1 + ldrh r1, [r2, r3] + mov r2, r1, lsl #6 + mul ip, r2, r2 + umull lr, r4, ip, r0 + mov r2, r4, lsl #1 + rsb r2, r2, r1, lsl #23 + umull ip, r3, r2, r2 + umull lr, r4, r3, r0 + umull r3, r1, ip, r0 + adds lr, lr, r1 + addcs r4, r4, #1 + mov r3, lr, lsr #30 + orr r4, r3, r4, lsl #2 + mov lr, lr, lsl #2 + cmn lr, #1 + rsc r2, r4, r2, lsl #2 + umull ip, r1, r0, r2 + add r1, r1, r0 + cmn r1, #1 + beq $l1 + adds ip, ip, r0 + adc r1, r1, #0 + add r2, r2, #1 +$l1: + adds r3, ip, r0 + adcs r1, r1, #0 + moveq r0, r2 + addne r0, r2, #1 + ldmfd sp!, {r4, pc} + +invtab: + .short 1023,1020,1016,1012,1008,1004,1000,996 + .short 992,989,985,981,978,974,970,967 + .short 963,960,956,953,949,946,942,939 + .short 936,932,929,926,923,919,916,913 + .short 910,907,903,900,897,894,891,888 + .short 885,882,879,876,873,870,868,865 + .short 862,859,856,853,851,848,845,842 + .short 840,837,834,832,829,826,824,821 + .short 819,816,814,811,809,806,804,801 + .short 799,796,794,791,789,787,784,782 + .short 780,777,775,773,771,768,766,764 + .short 762,759,757,755,753,751,748,746 + .short 744,742,740,738,736,734,732,730 + .short 728,726,724,722,720,718,716,714 + .short 712,710,708,706,704,702,700,699 + .short 697,695,693,691,689,688,686,684 + .short 682,680,679,677,675,673,672,670 + .short 668,667,665,663,661,660,658,657 + .short 655,653,652,650,648,647,645,644 + .short 642,640,639,637,636,634,633,631 + .short 630,628,627,625,624,622,621,619 + .short 618,616,615,613,612,611,609,608 + .short 606,605,604,602,601,599,598,597 + .short 595,594,593,591,590,589,587,586 + .short 585,583,582,581,579,578,577,576 + .short 574,573,572,571,569,568,567,566 + .short 564,563,562,561,560,558,557,556 + .short 555,554,553,551,550,549,548,547 + .short 546,544,543,542,541,540,539,538 + .short 537,536,534,533,532,531,530,529 + .short 528,527,526,525,524,523,522,521 + .short 520,519,518,517,516,515,514,513 + diff --git a/racket/src/racket/src/gmp/gmplonglong.h b/racket/src/racket/src/gmp/gmplonglong.h index acd71e6669..4ac9c40137 100644 --- a/racket/src/racket/src/gmp/gmplonglong.h +++ b/racket/src/racket/src/gmp/gmplonglong.h @@ -221,17 +221,63 @@ extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *)) #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3" \ : "=r" (sh), "=&r" (sl) \ - : "%r" (ah), "rI" (bh), "%r" (al), "rI" (bl)) + : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC) #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ - __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ - : "=r" (sh), "=&r" (sl) \ - : "r" (ah), "rI" (bh), "r" (al), "rI" (bl)) -#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */ + do { \ + if (__builtin_constant_p (al)) \ + { \ + if (__builtin_constant_p (ah)) \ + __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + else \ + __asm__ ("rsbs\t%1, %5, %4\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + } \ + else if (__builtin_constant_p (ah)) \ + { \ + if (__builtin_constant_p (bl)) \ + __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ + else \ + __asm__ ("rsbs\t%1, %5, %4\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "rI" (al), "r" (bl) __CLOBBER_CC); \ + } \ + else if (__builtin_constant_p (bl)) \ + { \ + if (__builtin_constant_p (bh)) \ + __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ + else \ + __asm__ ("subs\t%1, %4, %5\n\trsc\t%0, %3, %2" \ + : "=r" (sh), "=&r" (sl) \ + : "rI" (ah), "r" (bh), "r" (al), "rI" (bl) __CLOBBER_CC); \ + } \ + else /* only bh might be a constant */ \ + __asm__ ("subs\t%1, %4, %5\n\tsbc\t%0, %2, %3" \ + : "=r" (sh), "=&r" (sl) \ + : "r" (ah), "rI" (bh), "r" (al), "rI" (bl) __CLOBBER_CC);\ + } while (0) +#if 1 || defined (__arm_m__) /* `M' series has widening multiply support */ #define umul_ppmm(xh, xl, a, b) \ __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) +#define UMUL_TIME 5 #define smul_ppmm(xh, xl, a, b) \ __asm__ ("smull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b)) -#define UMUL_TIME 5 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __di; \ + __di = __MPN(invert_limb) (d); \ + udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \ + } while (0) +#define UDIV_PREINV_ALWAYS 1 +#define UDIV_NEEDS_NORMALIZATION 1 +#define UDIV_TIME 70 +#endif /* LONGLONG_STANDALONE */ #else #define umul_ppmm(xh, xl, a, b) \ __asm__ ("%@ Inlined umul_ppmm\n" \ @@ -251,8 +297,16 @@ extern UDItype __MPN(udiv_qrnnd) _PROTO ((UDItype, UDItype, UDItype, UDItype *)) : "r" (a), "r" (b) \ : "r0", "r1", "r2") #define UMUL_TIME 20 +#ifndef LONGLONG_STANDALONE +#define udiv_qrnnd(q, r, n1, n0, d) \ + do { UWtype __r; \ + (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d)); \ + (r) = __r; \ + } while (0) +extern UWtype __MPN(udiv_qrnnd) _PROTO ((UWtype *, UWtype, UWtype, UWtype)); +#define UDIV_TIME 200 +#endif /* LONGLONG_STANDALONE */ #endif -#define UDIV_TIME 100 #endif /* __arm__ */ #if defined (__clipper__) && W_TYPE_SIZE == 32