Chez Scheme: improve multiplication and scheduling

Raise the threshold for using Karatsuba. The experimentally determined
threshold (on an M1 Mac) matches the GMP default threshold, so that
seems like a good sign.

Also, adjust kernel bignum operations to decrease the trap counter.
Otherwise, a program that performs many big multiplcations or
divisions does not check for Ctl-C or swap threads often enough.
This commit is contained in:
Matthew Flatt 2021-02-24 10:25:01 -07:00
parent c8bc0c76ad
commit 0cb9643fcb
4 changed files with 27 additions and 7 deletions

View File

@ -1042,6 +1042,9 @@ ptr S_bignum(tc, n, sign) ptr tc; iptr n; IBOOL sign; {
if ((uptr)n > (uptr)maximum_bignum_length) if ((uptr)n > (uptr)maximum_bignum_length)
S_error("", "invalid bignum size request"); S_error("", "invalid bignum size request");
/* for anything that allocates bignums, make sure scheduling fuel is consumed */
USE_TRAP_FUEL(tc, n);
d = size_bignum(n); d = size_bignum(n);
newspace_find_room(tc, type_typed_object, d, p); newspace_find_room(tc, type_typed_object, d, p);
BIGTYPE(p) = (uptr)n << bignum_length_offset | sign << bignum_sign_offset | type_bignum; BIGTYPE(p) = (uptr)n << bignum_length_offset | sign << bignum_sign_offset | type_bignum;

View File

@ -36,7 +36,7 @@ static ptr big_mul PROTO((ptr tc, ptr x, ptr y, iptr xl, iptr yl, IBOOL sign));
static void big_short_trunc PROTO((ptr tc, ptr x, bigit s, iptr xl, IBOOL qs, IBOOL rs, ptr *q, ptr *r)); static void big_short_trunc PROTO((ptr tc, ptr x, bigit s, iptr xl, IBOOL qs, IBOOL rs, ptr *q, ptr *r));
static void big_trunc PROTO((ptr tc, ptr x, ptr y, iptr xl, iptr yl, IBOOL qs, IBOOL rs, ptr *q, ptr *r)); static void big_trunc PROTO((ptr tc, ptr x, ptr y, iptr xl, iptr yl, IBOOL qs, IBOOL rs, ptr *q, ptr *r));
static INT normalize PROTO((bigit *xp, bigit *yp, iptr xl, iptr yl)); static INT normalize PROTO((bigit *xp, bigit *yp, iptr xl, iptr yl));
static bigit quotient_digit PROTO((bigit *xp, bigit *yp, iptr yl)); static bigit quotient_digit PROTO((ptr tc, bigit *xp, bigit *yp, iptr yl));
static bigit qhat PROTO((bigit *xp, bigit *yp)); static bigit qhat PROTO((bigit *xp, bigit *yp));
static ptr big_short_gcd PROTO((ptr tc, ptr x, bigit y, iptr xl)); static ptr big_short_gcd PROTO((ptr tc, ptr x, bigit y, iptr xl));
static ptr big_gcd PROTO((ptr tc, ptr x, ptr y, iptr xl, iptr yl)); static ptr big_gcd PROTO((ptr tc, ptr x, ptr y, iptr xl, iptr yl));
@ -636,6 +636,9 @@ static ptr big_mul(tc, x, y, xl, yl, sign) ptr tc, x, y; iptr xl, yl; IBOOL sign
PREPARE_BIGNUM(tc, W(tc),xl+yl) PREPARE_BIGNUM(tc, W(tc),xl+yl)
for (xi = xl, zp = &BIGIT(W(tc),xl+yl-1); xi-- > 0; ) *zp-- = 0; for (xi = xl, zp = &BIGIT(W(tc),xl+yl-1); xi-- > 0; ) *zp-- = 0;
/* account for nested loop: */
USE_TRAP_FUEL(tc, xl * yl);
for (yi=yl,yp= &BIGIT(y,yl-1),zp= &BIGIT(W(tc),xl+yl-1); yi-- > 0; yp--, zp--) for (yi=yl,yp= &BIGIT(y,yl-1),zp= &BIGIT(W(tc),xl+yl-1); yi-- > 0; yp--, zp--)
if (*yp == 0) if (*yp == 0)
*(zp-xl) = 0; *(zp-xl) = 0;
@ -796,11 +799,11 @@ static void big_trunc(tc, x, y, xl, yl, qs, rs, q, r)
d = normalize(xp, yp, xl, yl); d = normalize(xp, yp, xl, yl);
if (q == (ptr *)NULL) { if (q == (ptr *)NULL) {
for (i = m; i-- > 0 ; xp++) (void) quotient_digit(xp, yp, yl); for (i = m; i-- > 0 ; xp++) (void) quotient_digit(tc, xp, yp, yl);
} else { } else {
PREPARE_BIGNUM(tc, W(tc),m) PREPARE_BIGNUM(tc, W(tc),m)
p = &BIGIT(W(tc),0); p = &BIGIT(W(tc),0);
for (i = m; i-- > 0 ; xp++) *p++ = quotient_digit(xp, yp, yl); for (i = m; i-- > 0 ; xp++) *p++ = quotient_digit(tc, xp, yp, yl);
*q = copy_normalize(tc, &BIGIT(W(tc),0),m,qs); *q = copy_normalize(tc, &BIGIT(W(tc),0),m,qs);
} }
@ -829,10 +832,13 @@ static INT normalize(xp, yp, xl, yl) bigit *xp, *yp; iptr xl, yl; {
return shft; return shft;
} }
static bigit quotient_digit(xp, yp, yl) bigit *xp, *yp; iptr yl; { static bigit quotient_digit(tc, xp, yp, yl) ptr tc; bigit *xp, *yp; iptr yl; {
bigit *p1, *p2, q, k, b, prod; bigit *p1, *p2, q, k, b, prod;
iptr i; iptr i;
/* this function is called in loops, so use fuel every time */
USE_TRAP_FUEL(tc, yl);
q = qhat(xp, yp); q = qhat(xp, yp);
for (i = yl, p1 = xp+yl, p2 = yp+yl-1, k = 0, b = 0; i-- > 0; p1--, p2--) { for (i = yl, p1 = xp+yl, p2 = yp+yl-1, k = 0, b = 0; i-- > 0; p1--, p2--) {
@ -934,6 +940,9 @@ static ptr big_gcd(tc, x, y, xl, yl) ptr tc, x, y; iptr xl, yl; {
if (asc+shft >= bigit_bits) shft -= bigit_bits; if (asc+shft >= bigit_bits) shft -= bigit_bits;
asc += shft; asc += shft;
/* account for nested loops: */
USE_TRAP_FUEL(tc, xl + yl);
/* shift left or right; adjust lengths, xp and yp */ /* shift left or right; adjust lengths, xp and yp */
if (shft < 0) { /* shift right */ if (shft < 0) { /* shift right */
for (i = yl--, p = yp++, k = 0; i-- > 0; p++) ERSH(-shft,p,&k) for (i = yl--, p = yp++, k = 0; i-- > 0; p++) ERSH(-shft,p,&k)
@ -948,7 +957,7 @@ static ptr big_gcd(tc, x, y, xl, yl) ptr tc, x, y; iptr xl, yl; {
} }
/* destructive remainder x = x rem y */ /* destructive remainder x = x rem y */
for (i = xl-yl+1; i-- > 0; xp++) (void) quotient_digit(xp, yp, yl); for (i = xl-yl+1; i-- > 0; xp++) (void) quotient_digit(tc, xp, yp, yl);
/* strip leading zero bigits. remainder is at most yl bigits long */ /* strip leading zero bigits. remainder is at most yl bigits long */
for (i = yl ; *xp == 0 && i > 0; xp++, i--); for (i = yl ; *xp == 0 && i > 0; xp++, i--);
@ -1121,7 +1130,7 @@ static double big_floatify(tc, x, y, xl, yl, sign) ptr tc, x, y; iptr xl, yl; IB
p = &BIGIT(W(tc),0); p = &BIGIT(W(tc),0);
/* compute 'enough' bigits of the quotient */ /* compute 'enough' bigits of the quotient */
for (i = enough; i-- > 0; xp++) *p++ = quotient_digit(xp, yp, yl); for (i = enough; i-- > 0; xp++) *p++ = quotient_digit(tc, xp, yp, yl);
/* set k if remainder is nonzero */ /* set k if remainder is nonzero */
k = 0; k = 0;

View File

@ -558,3 +558,11 @@ typedef struct thread_gc {
and it would be ok to round up the length to a word size. But and it would be ok to round up the length to a word size. But
probably the compiler does a fine job with plain old `mempcy`. */ probably the compiler does a fine job with plain old `mempcy`. */
#define memcpy_aligned memcpy #define memcpy_aligned memcpy
#define USE_TRAP_FUEL(tc, n) do { \
uptr _amt_ = (uptr)(n); \
if ((uptr)TRAP(tc) > _amt_) \
TRAP(tc) = (ptr)((uptr)TRAP(tc) - _amt_); \
else \
TRAP(tc) = (ptr)1; \
} while (0)

View File

@ -2223,7 +2223,7 @@
(define xl (if (bignum? x) ($bignum-length x) 0)) (define xl (if (bignum? x) ($bignum-length x) 0))
(define yl (if (bignum? y) ($bignum-length y) 0)) (define yl (if (bignum? y) ($bignum-length y) 0))
(cond (cond
[(and (fx< xl 10) (fx< yl 10)) [(and (fx< xl 30) (fx< yl 30))
(integer* x y)] (integer* x y)]
[else [else
(let* ([k (fx* (fxquotient (fxmax xl yl) 2) (constant bigit-bits))] (let* ([k (fx* (fxquotient (fxmax xl yl) 2) (constant bigit-bits))]