#include <gmp.h>
#include "ulong_extras.h"
#include "nmod_vec.h"
static
ulong n_ecm_primorial[] =
{
UWORD(2), UWORD(6), UWORD(30), UWORD(210), UWORD(2310), UWORD(30030),
UWORD(510510), UWORD(9699690),
#if FLINT64
UWORD(223092870), UWORD(6469693230), UWORD(200560490130),
UWORD(7420738134810), UWORD(304250263527210), UWORD(13082761331670030),
UWORD(614889782588491410)
#endif
};
#if FLINT64
#define num_n_ecm_primorials 15
#else
#define num_n_ecm_primorials 9
#endif
int
n_factor_ecm(ulong *f, ulong curves, ulong B1, ulong B2,
flint_rand_t state, ulong n)
{
ulong P, num, maxD, mmin, mmax, mdiff, prod, maxj, sig;
ulong i, j;
int ret;
n_ecm_t n_ecm_inf;
const ulong *prime_array;
n_ecm_inf->normbits = flint_clz(n);
n <<= n_ecm_inf->normbits;
n_ecm_inf->ninv = n_preinvert_limb(n);
n_ecm_inf->one = UWORD(1) << n_ecm_inf->normbits;
ret = 0;
num = n_prime_pi(B1);
prime_array = n_primes_arr_readonly(num);
maxD = n_sqrt(B2);
j = 1;
while ((j < num_n_ecm_primorials) && (n_ecm_primorial[j] < maxD))
j += 1;
P = n_ecm_primorial[j - 1];
mmin = (B1 + (P/2)) / P;
mmax = ((B2 - P/2) + P - 1)/P;
maxj = (P + 1)/2;
mdiff = mmax - mmin + 1;
n_ecm_inf->GCD_table = flint_malloc(maxj + 1);
for (j = 1; j <= maxj; j += 2)
{
if ((j%2) && n_gcd(j, P) == 1)
n_ecm_inf->GCD_table[j] = 1;
else
n_ecm_inf->GCD_table[j] = 0;
}
n_ecm_inf->prime_table = flint_malloc(mdiff * sizeof(unsigned char*));
for (i = 0; i < mdiff; i++)
n_ecm_inf->prime_table[i] = flint_malloc((maxj + 1) * sizeof(unsigned char));
for (i = 0; i < mdiff; i++)
{
for (j = 1; j <= maxj; j += 2)
{
n_ecm_inf->prime_table[i][j] = 0;
if (n_ecm_inf->GCD_table[j] == 1)
{
prod = (i + mmin)*P + j;
if (n_is_prime(prod))
n_ecm_inf->prime_table[i][j] = 1;
prod = (i + mmin)*P - j;
if (n_is_prime(prod))
n_ecm_inf->prime_table[i][j] = 1;
}
}
}
for (j = 0; j < curves; j++)
{
sig = n_randint(state, n >> n_ecm_inf->normbits);
sig = n_addmod(sig, 7, n >> n_ecm_inf->normbits);
sig <<= n_ecm_inf->normbits;
if (n_factor_ecm_select_curve(f, sig, n, n_ecm_inf))
{
(*f) >>= n_ecm_inf->normbits;
ret = -1;
goto cleanup;
}
ret = n_factor_ecm_stage_I(f, prime_array, num, B1, n, n_ecm_inf);
if (ret)
{
(*f) >>= n_ecm_inf->normbits;
ret = 1;
goto cleanup;
}
ret = n_factor_ecm_stage_II(f, B1, B2, P, n, n_ecm_inf);
if (ret)
{
(*f) >>= n_ecm_inf->normbits;
ret = 2;
goto cleanup;
}
}
cleanup:
flint_free(n_ecm_inf->GCD_table);
for (i = 0; i < mdiff; i++)
flint_free(n_ecm_inf->prime_table[i]);
flint_free(n_ecm_inf->prime_table);
return ret;
}
void
n_factor_ecm_add(ulong *x, ulong *z, ulong x1, ulong z1,
ulong x2, ulong z2, ulong x0, ulong z0,
ulong n, n_ecm_t n_ecm_inf)
{
ulong u, v, w;
if (z1 == 0)
{
*x = x2;
*z = z2;
return;
}
if (z2 == 0)
{
*x = x1;
*z = z1;
return;
}
if (z0 == 0)
{
n_factor_ecm_double(x, z, x1, z1, n, n_ecm_inf);
return;
}
u = n_submod(x2, z2, n);
v = n_addmod(x1, z1, n);
u = n_mulmod_preinv(u, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
v = n_submod(x1, z1, n);
w = n_addmod(x2, z2, n);
v = n_mulmod_preinv(v, w, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
w = n_addmod(u, v, n);
v = n_submod(v, u, n);
w = n_mulmod_preinv(w, w, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
v = n_mulmod_preinv(v, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
*x = n_mulmod_preinv(z0, w, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
*z = n_mulmod_preinv(x0, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
}
void
n_factor_ecm_double(ulong *x, ulong *z, ulong x0, ulong z0,
ulong n, n_ecm_t n_ecm_inf)
{
ulong u, v, w;
if (z0 == 0)
{
*x = x0;
*z = 0;
return;
}
u = n_addmod(x0, z0, n);
u = n_mulmod_preinv(u, u, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
v = n_submod(x0, z0, n);
v = n_mulmod_preinv(v, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
*x = n_mulmod_preinv(u, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
w = n_submod(u, v, n);
u = n_mulmod_preinv(w, n_ecm_inf->a24, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
u = n_addmod(u, v, n);
*z = n_mulmod_preinv(w, u, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
}
void
n_factor_ecm_mul_montgomery_ladder(ulong *x, ulong *z, ulong x0, ulong z0,
ulong k, ulong n, n_ecm_t n_ecm_inf)
{
ulong x1, z1, x2, z2, len;
if (k == 0)
{
*x = 0;
*z = 0;
return;
}
if (k == 1)
{
*x = x0;
*z = z0;
return;
}
x1 = x0;
z1 = z0;
x2 = 0;
z2 = 0;
n_factor_ecm_double(&x2, &z2, x0, z0, n, n_ecm_inf);
len = n_sizeinbase(k, 2) - 2;
while (1)
{
if (((UWORD(1) << len) & k) != 0)
{
n_factor_ecm_add(&x1, &z1, x1, z1, x2, z2, x0, z0, n, n_ecm_inf);
n_factor_ecm_double(&x2, &z2, x2, z2, n, n_ecm_inf);
}
else
{
n_factor_ecm_add(&x2, &z2, x1, z1, x2, z2, x0, z0, n, n_ecm_inf);
n_factor_ecm_double(&x1, &z1, x1, z1, n, n_ecm_inf);
}
if (len == 0)
break;
else
len -= 1;
}
*x = x1;
*z = z1;
}
int
n_factor_ecm_select_curve(ulong *f, ulong sig, ulong n, n_ecm_t n_ecm_inf)
{
ulong u, v, w, t, hi, lo;
nn_ptr a;
int ret = 0;
TMP_INIT;
TMP_START;
a = TMP_ALLOC(2 * sizeof(ulong));
u = sig;
v = n_mulmod_preinv(u, UWORD(4) << n_ecm_inf->normbits, n, n_ecm_inf->ninv,
n_ecm_inf->normbits);
w = n_mulmod_preinv(u, u, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
u = n_submod(w, UWORD(5) << n_ecm_inf->normbits, n);
w = n_mulmod_preinv(u, u, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
n_ecm_inf->x = n_mulmod_preinv(w, u, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
w = n_mulmod_preinv(v, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
n_ecm_inf->z = n_mulmod_preinv(w, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
w = n_mulmod_preinv(n_ecm_inf->x, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
t = n_mulmod_preinv(w, UWORD(4) << n_ecm_inf->normbits, n, n_ecm_inf->ninv,
n_ecm_inf->normbits);
w = n_mulmod_preinv(u, UWORD(3) << n_ecm_inf->normbits, n, n_ecm_inf->ninv,
n_ecm_inf->normbits);
u = n_submod(v, u, n);
v = n_addmod(v, w, n);
w = n_mulmod_preinv(u, u, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
u = n_mulmod_preinv(u, w, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
n_ecm_inf->a24 = n_mulmod_preinv(u, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
v = n_mulmod_preinv(t, n_ecm_inf->z, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
*f = n_gcdinv(&u, v, n);
if (*f == n)
{
goto cleanup;
}
else if (*f > n_ecm_inf->one)
{
ret = 1;
goto cleanup;
}
a[1] = UWORD(0);
a[0] = u;
mpn_lshift(a, a, 2, n_ecm_inf->normbits);
hi = a[1];
lo = a[0];
u = n_ll_mod_preinv(hi, lo, n, n_ecm_inf->ninv);
v = n_mulmod_preinv(u, t, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
n_ecm_inf->x = n_mulmod_preinv(n_ecm_inf->x, v, n, n_ecm_inf->ninv,
n_ecm_inf->normbits);
v = n_mulmod_preinv(u, n_ecm_inf->z, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
w = n_mulmod_preinv(n_ecm_inf->a24, v, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
n_ecm_inf->a24 = n_addmod(w, UWORD(2) << n_ecm_inf->normbits, n);
n_ecm_inf->a24 >>= 2;
n_ecm_inf->a24 >>= n_ecm_inf->normbits;
n_ecm_inf->a24 <<= n_ecm_inf->normbits;
n_ecm_inf->z = n_ecm_inf->one;
cleanup:
TMP_END;
return ret;
}
int
n_factor_ecm_stage_I(ulong *f, const ulong *prime_array, ulong num,
ulong B1, ulong n, n_ecm_t n_ecm_inf)
{
ulong times;
ulong i, j, p;
for (i = 0; i < num; i++)
{
p = n_flog(B1, prime_array[i]);
times = prime_array[i];
for (j = 1; j <= p; j ++)
{
n_factor_ecm_mul_montgomery_ladder(&(n_ecm_inf->x), &(n_ecm_inf->z),
n_ecm_inf->x, n_ecm_inf->z,
times, n, n_ecm_inf);
}
*f = n_gcd(n_ecm_inf->z, n);
if ((*f > n_ecm_inf->one) && (*f < n))
{
return 1;
}
}
return 0;
}
int
n_factor_ecm_stage_II(ulong *f, ulong B1, ulong B2, ulong P,
ulong n, n_ecm_t n_ecm_inf)
{
ulong g, Qx, Qz, Rx, Rz, Qdx, Qdz, a, b;
ulong mmin, mmax, maxj, Q0x2, Q0z2;
ulong i, j;
int ret;
nn_ptr arrx, arrz;
mmin = (B1 + (P/2)) / P;
mmax = ((B2 - P/2) + P - 1)/P;
maxj = (P + 1)/2;
g = n_ecm_inf->one;
arrx = _nmod_vec_init((maxj >> 1) + 1);
arrz = _nmod_vec_init((maxj >> 1) + 1);
ret = 0;
arrx[0] = n_ecm_inf->x;
arrz[0] = n_ecm_inf->z;
n_factor_ecm_double(&Q0x2, &Q0z2, arrx[0], arrz[0], n, n_ecm_inf);
n_factor_ecm_add(arrx + 1, arrz + 1, Q0x2, Q0z2, arrx[0], arrz[0],
arrx[0], arrz[0], n, n_ecm_inf);
for (j = 2; j <= (maxj >> 1); j += 1)
{
n_factor_ecm_add(arrx + j, arrz + j, arrx[j - 1], arrz[j - 1], Q0x2,
Q0z2, arrx[j - 2], arrz[j - 2], n, n_ecm_inf);
}
n_factor_ecm_mul_montgomery_ladder(&Qx, &Qz, n_ecm_inf->x, n_ecm_inf->z, P, n, n_ecm_inf);
n_factor_ecm_mul_montgomery_ladder(&Rx, &Rz, Qx, Qz, mmin, n, n_ecm_inf);
n_factor_ecm_mul_montgomery_ladder(&Qdx, &Qdz, Qx, Qz, mmin - 1, n, n_ecm_inf);
for (i = mmin; i <= mmax; i ++)
{
for (j = 1; j <= maxj; j += 2)
{
if (n_ecm_inf->prime_table[i - mmin][j] == 1)
{
a = n_mulmod_preinv(Rx, arrz[j >> 1], n, n_ecm_inf->ninv, n_ecm_inf->normbits);
b = n_mulmod_preinv(Rz, arrx[j >> 1], n, n_ecm_inf->ninv, n_ecm_inf->normbits);
a = n_submod(a, b, n);
g = n_mulmod_preinv(g, a, n, n_ecm_inf->ninv, n_ecm_inf->normbits);
}
}
a = Rx;
b = Rz;
n_factor_ecm_add(&Rx, &Rz, Rx, Rz, Qx, Qz, Qdx, Qdz, n, n_ecm_inf);
Qdx = a;
Qdz = b;
}
*f = n_gcd(g, n);
if ((*f > n_ecm_inf->one) && (*f < n))
{
ret = 1;
}
_nmod_vec_clear(arrx);
_nmod_vec_clear(arrz);
return ret;
}