#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
#ifndef BIN_GOETGHELUCK_THRESHOLD
#define BIN_GOETGHELUCK_THRESHOLD 1000
#endif
#ifndef BIN_UIUI_ENABLE_SMALLDC
#define BIN_UIUI_ENABLE_SMALLDC 1
#endif
#ifndef BIN_UIUI_RECURSIVE_SMALLDC
#define BIN_UIUI_RECURSIVE_SMALLDC (GMP_NUMB_BITS > 32)
#endif
#define SOME_THRESHOLD 20
static mp_limb_t
mul1 (mp_limb_t m)
{
return m;
}
static mp_limb_t
mul2 (mp_limb_t m)
{
mp_limb_t m01 = (m | 1) * ((m + 1) >> 1);
return m01;
}
static mp_limb_t
mul3 (mp_limb_t m)
{
mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
mp_limb_t m2 = (m + 2);
return m01 * m2;
}
static mp_limb_t
mul4 (mp_limb_t m)
{
mp_limb_t m01 = (m + 0) * (m + 1) >> 1;
mp_limb_t m23 = (m + 2) * (m + 3) >> 1;
return m01 * m23;
}
static mp_limb_t
mul5 (mp_limb_t m)
{
mp_limb_t m012 = (m + 0) * (m + 1) * (m + 2) >> 1;
mp_limb_t m34 = (m + 3) * (m + 4) >> 1;
return m012 * m34;
}
static mp_limb_t
mul6 (mp_limb_t m)
{
mp_limb_t m01 = (m + 0) * (m + 1);
mp_limb_t m23 = (m + 2) * (m + 3);
mp_limb_t m45 = (m + 4) * (m + 5) >> 1;
mp_limb_t m0123 = m01 * m23 >> 3;
return m0123 * m45;
}
static mp_limb_t
mul7 (mp_limb_t m)
{
mp_limb_t m01 = (m + 0) * (m + 1);
mp_limb_t m23 = (m + 2) * (m + 3);
mp_limb_t m456 = (m + 4) * (m + 5) * (m + 6) >> 1;
mp_limb_t m0123 = m01 * m23 >> 3;
return m0123 * m456;
}
static mp_limb_t
mul8 (mp_limb_t m)
{
mp_limb_t m01 = (m + 0) * (m + 1);
mp_limb_t m23 = (m + 2) * (m + 3);
mp_limb_t m45 = (m + 4) * (m + 5);
mp_limb_t m67 = (m + 6) * (m + 7);
mp_limb_t m0123 = m01 * m23 >> 3;
mp_limb_t m4567 = m45 * m67 >> 3;
return m0123 * m4567;
}
typedef mp_limb_t (* mulfunc_t) (mp_limb_t);
static const mulfunc_t mulfunc[] = {mul1,mul2,mul3,mul4,mul5,mul6,mul7,mul8};
#define M (numberof(mulfunc))
static const unsigned char tcnttab[] = {0, 1, 1, 2, 2, 4, 4, 6};
#if 1
#define MAXFACS(max,l) \
do { \
(max) = log_n_max (l); \
} while (0)
#else#endif
static const mp_limb_t facinv[] = { ONE_LIMB_ODD_FACTORIAL_INVERSES_TABLE };
static void
mpz_bdiv_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
{
int nmax, kmax, nmaxnow, numfac;
mp_ptr np, kp;
mp_size_t nn, kn, alloc;
mp_limb_t i, j, t, iii, jjj, cy, dinv;
mp_bitcnt_t i2cnt, j2cnt;
int cnt;
mp_size_t maxn;
TMP_DECL;
ASSERT (k > ODD_FACTORIAL_TABLE_LIMIT);
TMP_MARK;
maxn = 1 + n / GMP_NUMB_BITS;
alloc = SOME_THRESHOLD - 1 + MAX (3 * maxn / 2, SOME_THRESHOLD);
alloc = MIN (alloc, k) + 1;
np = TMP_ALLOC_LIMBS (alloc);
kp = TMP_ALLOC_LIMBS (SOME_THRESHOLD + 1);
MAXFACS (nmax, n);
ASSERT (nmax <= M);
MAXFACS (kmax, k);
ASSERT (kmax <= M);
ASSERT (k >= M);
i = n - k + 1;
np[0] = 1; nn = 1;
i2cnt = 0;
j2cnt = __gmp_fac2cnt_table[ODD_FACTORIAL_TABLE_LIMIT / 2 - 1];
numfac = 1;
j = ODD_FACTORIAL_TABLE_LIMIT + 1;
jjj = ODD_FACTORIAL_TABLE_MAX;
ASSERT (__gmp_oddfac_table[ODD_FACTORIAL_TABLE_LIMIT] == ODD_FACTORIAL_TABLE_MAX);
while (1)
{
kp[0] = jjj;
kn = 1;
t = k - j + 1;
kmax = MIN (kmax, t);
while (kmax != 0 && kn < SOME_THRESHOLD)
{
jjj = mulfunc[kmax - 1] (j);
j += kmax;
count_trailing_zeros (cnt, jjj);
jjj >>= cnt;
j2cnt += tcnttab[kmax - 1] + cnt;
cy = mpn_mul_1 (kp, kp, kn, jjj);
kp[kn] = cy;
kn += cy != 0;
t = k - j + 1;
kmax = MIN (kmax, t);
}
numfac = j - numfac;
while (numfac != 0)
{
nmaxnow = MIN (nmax, numfac);
iii = mulfunc[nmaxnow - 1] (i);
i += nmaxnow;
count_trailing_zeros (cnt, iii);
iii >>= cnt;
i2cnt += tcnttab[nmaxnow - 1] + cnt;
cy = mpn_mul_1 (np, np, nn, iii);
np[nn] = cy;
nn += cy != 0;
numfac -= nmaxnow;
}
ASSERT (nn < alloc);
binvert_limb (dinv, kp[0]);
nn += (np[nn - 1] >= kp[kn - 1]);
nn -= kn;
mpn_sbpi1_bdiv_q (np, np, nn, kp, MIN(kn,nn), -dinv);
if (kmax == 0)
break;
numfac = j;
jjj = mulfunc[kmax - 1] (j);
j += kmax;
count_trailing_zeros (cnt, jjj);
jjj >>= cnt;
j2cnt += tcnttab[kmax - 1] + cnt;
}
cnt = i2cnt - j2cnt;
if (cnt != 0)
{
ASSERT (cnt < GMP_NUMB_BITS);
cy = mpn_lshift (np, np, nn, cnt);
np[nn] = cy;
nn += cy != 0;
}
nn -= np[nn - 1] == 0;
kp = MPZ_NEWALLOC (r, nn);
SIZ(r) = nn;
MPN_COPY (kp, np, nn);
TMP_FREE;
}
static void
mpz_smallk_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
{
int nmax, numfac;
mp_ptr rp;
mp_size_t rn, alloc;
mp_limb_t i, iii, cy;
mp_bitcnt_t i2cnt, cnt;
count_leading_zeros (cnt, (mp_limb_t) n);
cnt = GMP_LIMB_BITS - cnt;
alloc = cnt * k / GMP_NUMB_BITS + 3;
rp = MPZ_NEWALLOC (r, alloc);
MAXFACS (nmax, n);
nmax = MIN (nmax, M);
i = n - k + 1;
nmax = MIN (nmax, k);
rp[0] = mulfunc[nmax - 1] (i);
rn = 1;
i += nmax;
i2cnt = tcnttab[nmax - 1];
numfac = k - nmax;
while (numfac != 0)
{
nmax = MIN (nmax, numfac);
iii = mulfunc[nmax - 1] (i);
i += nmax;
i2cnt += tcnttab[nmax - 1];
cy = mpn_mul_1 (rp, rp, rn, iii);
rp[rn] = cy;
rn += cy != 0;
numfac -= nmax;
}
ASSERT (rn < alloc);
mpn_pi1_bdiv_q_1 (rp, rp, rn, __gmp_oddfac_table[k], facinv[k - 2],
__gmp_fac2cnt_table[k / 2 - 1] - i2cnt);
MPN_NORMALIZE_NOT_ZERO (rp, rn);
SIZ(r) = rn;
}
static mp_limb_t
bc_bin_uiui (unsigned int n, unsigned int k)
{
return ((__gmp_oddfac_table[n] * facinv[k - 2] * facinv[n - k - 2])
<< (__gmp_fac2cnt_table[n / 2 - 1] - __gmp_fac2cnt_table[k / 2 - 1] - __gmp_fac2cnt_table[(n-k) / 2 - 1]))
& GMP_NUMB_MASK;
}
static const mp_limb_t bin2kk[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_TABLE };
static const mp_limb_t bin2kkinv[] = { ONE_LIMB_ODD_CENTRAL_BINOMIAL_INVERSE_TABLE };
static const unsigned char fac2bin[] = { CENTRAL_BINOMIAL_2FAC_TABLE };
static void
mpz_smallkdc_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
{
mp_ptr rp;
mp_size_t rn;
unsigned long int hk;
hk = k >> 1;
if ((! BIN_UIUI_RECURSIVE_SMALLDC) || hk <= ODD_FACTORIAL_TABLE_LIMIT)
mpz_smallk_bin_uiui (r, n, hk);
else
mpz_smallkdc_bin_uiui (r, n, hk);
k -= hk;
n -= hk;
if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
mp_limb_t cy;
rn = SIZ (r);
rp = MPZ_REALLOC (r, rn + 1);
cy = mpn_mul_1 (rp, rp, rn, bc_bin_uiui (n, k));
rp [rn] = cy;
rn += cy != 0;
} else {
mp_limb_t buffer[ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3];
mpz_t t;
ALLOC (t) = ODD_CENTRAL_BINOMIAL_TABLE_LIMIT + 3;
PTR (t) = buffer;
if ((! BIN_UIUI_RECURSIVE_SMALLDC) || k <= ODD_FACTORIAL_TABLE_LIMIT)
mpz_smallk_bin_uiui (t, n, k);
else
mpz_smallkdc_bin_uiui (t, n, k);
mpz_mul (r, r, t);
rp = PTR (r);
rn = SIZ (r);
}
mpn_pi1_bdiv_q_1 (rp, rp, rn, bin2kk[k - ODD_CENTRAL_BINOMIAL_OFFSET],
bin2kkinv[k - ODD_CENTRAL_BINOMIAL_OFFSET],
fac2bin[k - ODD_CENTRAL_BINOMIAL_OFFSET] - (k != hk));
MPN_NORMALIZE_NOT_ZERO (rp, rn);
SIZ(r) = rn;
}
#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
if ((PR) > (MAX_PR)) { \
(VEC)[(I)++] = (PR); \
(PR) = 1; \
}
#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
do { \
if ((PR) > (MAX_PR)) { \
(VEC)[(I)++] = (PR); \
(PR) = (P); \
} else \
(PR) *= (P); \
} while (0)
#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
__max_i = (end); \
\
do { \
++__i; \
if (((sieve)[__index] & __mask) == 0) \
{ \
(prime) = id_to_n(__i)
#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
do { \
mp_limb_t __mask, __index, __max_i, __i; \
\
__i = (start)-(off); \
__index = __i / GMP_LIMB_BITS; \
__mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
__i += (off); \
\
LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
#define LOOP_ON_SIEVE_STOP \
} \
__mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
__index += __mask & 1; \
} while (__i <= __max_i) \
#define LOOP_ON_SIEVE_END \
LOOP_ON_SIEVE_STOP; \
} while (0)
#if WANT_ASSERT
static mp_limb_t
bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
#endif
static mp_limb_t
id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
static mp_limb_t
n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
static mp_size_t
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
#define COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
do { \
mp_limb_t __a, __b, __prime, __ma,__mb; \
__prime = (P); \
__a = (N); __b = (K); __mb = 0; \
FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
do { \
__mb += __b % __prime; __b /= __prime; \
__ma = __a % __prime; __a /= __prime; \
if (__ma < __mb) { \
__mb = 1; (PR) *= __prime; \
} else __mb = 0; \
} while (__a >= __prime); \
} while (0)
#define SH_COUNT_A_PRIME(P, N, K, PR, MAX_PR, VEC, I) \
do { \
mp_limb_t __prime; \
__prime = (P); \
if (((N) % __prime) < ((K) % __prime)) { \
FACTOR_LIST_STORE (__prime, PR, MAX_PR, VEC, I); \
} \
} while (0)
static mp_limb_t
limb_apprsqrt (mp_limb_t x)
{
int s;
ASSERT (x > 2);
count_leading_zeros (s, x - 1);
s = GMP_LIMB_BITS - 1 - s;
return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
}
static void
mpz_goetgheluck_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
{
mp_limb_t *sieve, *factors, count;
mp_limb_t prod, max_prod, j;
TMP_DECL;
ASSERT (BIN_GOETGHELUCK_THRESHOLD >= 13);
ASSERT (n >= 25);
TMP_MARK;
sieve = TMP_ALLOC_LIMBS (primesieve_size (n));
count = gmp_primesieve (sieve, n) + 1;
factors = TMP_ALLOC_LIMBS (count / log_n_max (n) + 1);
max_prod = GMP_NUMB_MAX / n;
popc_limb (count, n - k);
popc_limb (j, k);
count += j;
popc_limb (j, n);
count -= j;
prod = CNST_LIMB(1) << count;
j = 0;
COUNT_A_PRIME (3, n, k, prod, max_prod, factors, j);
{
mp_limb_t s;
{
mp_limb_t prime;
s = limb_apprsqrt(n);
s = n_to_bit (s);
LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
LOOP_ON_SIEVE_END;
s++;
}
ASSERT (max_prod <= GMP_NUMB_MAX / 2);
max_prod <<= 1;
ASSERT (bit_to_n (s) * bit_to_n (s) > n);
ASSERT (s <= n_to_bit (n >> 1));
{
mp_limb_t prime;
LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n >> 1), 0,sieve);
SH_COUNT_A_PRIME (prime, n, k, prod, max_prod, factors, j);
LOOP_ON_SIEVE_END;
}
max_prod >>= 1;
}
ASSERT (n_to_bit (n - k) < n_to_bit (n));
{
mp_limb_t prime;
LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n - k) + 1, n_to_bit (n), 0,sieve);
FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
LOOP_ON_SIEVE_END;
}
if (LIKELY (j != 0))
{
factors[j++] = prod;
mpz_prodlimbs (r, factors, j);
}
else
{
PTR (r)[0] = prod;
SIZ (r) = 1;
}
TMP_FREE;
}
#undef COUNT_A_PRIME
#undef SH_COUNT_A_PRIME
#undef LOOP_ON_SIEVE_END
#undef LOOP_ON_SIEVE_STOP
#undef LOOP_ON_SIEVE_BEGIN
#undef LOOP_ON_SIEVE_CONTINUE
void
mpz_bin_uiui (mpz_ptr r, unsigned long int n, unsigned long int k)
{
if (UNLIKELY (n < k)) {
SIZ (r) = 0;
#if BITS_PER_ULONG > GMP_NUMB_BITS
} else if (UNLIKELY (n > GMP_NUMB_MAX)) {
mpz_t tmp;
mpz_init_set_ui (tmp, n);
mpz_bin_ui (r, tmp, k);
mpz_clear (tmp);
#endif
} else {
ASSERT (n <= GMP_NUMB_MAX);
k = MIN (k, n - k);
if (k < 2) {
PTR(r)[0] = k ? n : 1;
SIZ(r) = 1;
} else if (n <= ODD_FACTORIAL_EXTTABLE_LIMIT) {
PTR(r)[0] = bc_bin_uiui (n, k);
SIZ(r) = 1;
} else if (k <= ODD_FACTORIAL_TABLE_LIMIT)
mpz_smallk_bin_uiui (r, n, k);
else if (BIN_UIUI_ENABLE_SMALLDC &&
k <= (BIN_UIUI_RECURSIVE_SMALLDC ? ODD_CENTRAL_BINOMIAL_TABLE_LIMIT : ODD_FACTORIAL_TABLE_LIMIT)* 2)
mpz_smallkdc_bin_uiui (r, n, k);
else if (ABOVE_THRESHOLD (k, BIN_GOETGHELUCK_THRESHOLD) &&
k > (n >> 4))
mpz_goetgheluck_bin_uiui (r, n, k);
else
mpz_bdiv_bin_uiui (r, n, k);
}
}