#include "ulong_extras.h"
#include "fmpz.h"
#include "qsieve.h"
int qsieve_init_A(qs_t qs_inf)
{
slong i, j;
slong s, low, high, span, m, h;
ulong bits, num_factors, rem, mid;
ulong factor_bound[40];
ulong * A_ind;
ulong * curr_subset, * first_subset;
prime_t * factor_base = qs_inf->factor_base;
fmpz_t prod, temp, upper_bound, lower_bound;
int ret = 1, found_j;
fmpz_init(temp);
fmpz_init(upper_bound);
fmpz_init(lower_bound);
fmpz_init_set_ui(prod, 1);
fmpz_fdiv_q_ui(lower_bound, qs_inf->target_A, 2);
fmpz_mul_ui(upper_bound, qs_inf->target_A, 2);
bits = fmpz_bits(qs_inf->target_A);
for (i = 0; i < 40; i++)
factor_bound[i] = 0;
for (j = 0, i = qs_inf->small_primes; i < qs_inf->num_primes; i++)
{
if (qs_inf->factor_base[i].size != j)
{
factor_bound[j] = i;
j = qs_inf->factor_base[i].size;
}
}
if (bits > 210) i = 15;
else if (bits > 190) i = 13;
else if (bits > 180) i = 12;
else i = 11;
high = low = 0;
for ( ; i > 7; i--)
{
num_factors = bits / i;
rem = bits % i;
if (factor_bound[i] == 0 || num_factors == 1)
continue;
if (rem == 0 && num_factors > 2 && factor_bound[i + 1] > 0 && factor_bound[i - 1 - (i <= 10)] > 0)
{
low = factor_bound[i - 1 - (i <= 10)];
high = factor_bound[i + 1];
break;
}
else if (rem <= num_factors)
{
if (num_factors > 2 && factor_bound[i + 2] > 0 && factor_bound[i + - (i <= 9)] > 0)
{
low = factor_bound[i - (i <= 9)];
high = factor_bound[i + 2];
break;
}
}
else if (i - rem <= num_factors)
{
if (factor_bound[i + 1] > 0 && factor_bound[i - 1 - (i <= 10)] > 0)
{
num_factors++;
low = factor_bound[i - 1 - (i <= 10)];
high = factor_bound[i + 1];
break;
}
}
}
if (i == 7)
{
num_factors = (bits >= 15) ? 3 : 2;
low = qs_inf->small_primes;
high = qs_inf->num_primes;
}
s = num_factors;
qs_inf->s = s;
#if QS_DEBUG
flint_printf("s = %wd\n", s);
flint_printf("high = %wd\n", high);
flint_printf("low = %wd\n", low);
flint_printf("span = %wd\n", high - low);
#endif
qsieve_poly_init(qs_inf);
A_ind = qs_inf->A_ind;
curr_subset = qs_inf->curr_subset;
first_subset = qs_inf->first_subset;
span = high - low;
if (s <= 3)
{
m = 0;
h = s;
for (j = 0; j < s; j++)
{
curr_subset[j] = j;
first_subset[j] = j;
}
fmpz_set_ui(prod, 1);
for (j = 0; j < s; j++)
{
fmpz_mul_ui(prod, prod, factor_base[curr_subset[j] + low].p);
A_ind[j] = curr_subset[j] + low;
}
}
else
{
m = 0;
h = s - 1;
for (j = 0; j < s - 1; j++)
curr_subset[j] = j;
while (1)
{
if (4*(curr_subset[0] + s - 2)/3 >= span)
{
ret = 0;
goto init_A_cleanup;
}
fmpz_set_ui(prod, 1);
for (j = 0; j < s - 1; j++)
{
fmpz_mul_ui(prod, prod, factor_base[4*curr_subset[j]/3 + 1 + low].p);
}
i = 0;
j = span/4 - 1;
found_j = 0;
while (i < j)
{
mid = i + (j - i) / 2;
fmpz_mul_ui(temp, prod, factor_base[4*mid + low].p);
if (fmpz_cmp(lower_bound, temp) > 0)
{
i = mid + (i == mid);
}
else if (fmpz_cmp(temp, upper_bound) > 0)
{
j = mid - (j == mid);
}
else
{
j = 4*mid + low;
found_j = 1;
break;
}
}
if (found_j) break;
h = (4*(m + h + 1)/3 >= span) ? h + 1 : 1;
m = curr_subset[s - h - 1] + 1;
for (j = 0; j < h; j++)
curr_subset[s + j - h - 1] = m + j;
}
A_ind[s - 1] = j;
fmpz_mul_ui(prod, prod, qs_inf->factor_base[A_ind[s - 1]].p);
for (j = 0; j < s - 1; j++)
A_ind[j] = 4*curr_subset[j]/3 + 1 + low;
for (j = 0; j < s; j++)
first_subset[j] = curr_subset[j];
#if QS_DEBUG
flint_printf("First A_ind = (");
for (i = 0; i < s - 1; i++)
flint_printf("%ld, ", A_ind[i]);
flint_printf("%ld", A_ind[s - 1]);
flint_printf(")\n");
#endif
}
if (s > 3)
{
qs_inf->j = A_ind[s - 1];
qs_inf->A_ind_diff = 1;
}
qs_inf->h = h;
qs_inf->m = m;
qs_inf->low = low;
qs_inf->high = high;
qs_inf->span = span;
fmpz_set(qs_inf->A, prod);
fmpz_set(qs_inf->low_bound, lower_bound);
fmpz_set(qs_inf->upp_bound, upper_bound);
#if QS_DEBUG
flint_printf("number of factors in hypercube = %wd\n", qs_inf->s);
flint_printf("factor base size = %wd max prime = %wu\n", qs_inf->num_primes, qs_inf->factor_base[qs_inf->num_primes - 1].p);
flint_printf("possible candidate in range [ %wd, %wd ]\n", qs_inf->low, qs_inf->high);
flint_printf("optimal value of hypercube = ");
fmpz_print(qs_inf->target_A);
flint_printf("\n");
flint_printf("lower bound = ");
fmpz_print(lower_bound);
flint_printf("\n");
flint_printf("upper bound = ");
fmpz_print(upper_bound);
flint_printf("\n");
flint_printf("initial hypercube = ");
fmpz_print(qs_inf->A);
flint_printf("\n");
#endif
init_A_cleanup:
fmpz_clear(prod);
fmpz_clear(temp);
fmpz_clear(upper_bound);
fmpz_clear(lower_bound);
return ret;
}
void qsieve_reinit_A(qs_t qs_inf)
{
slong low, s, j;
ulong * A_ind = qs_inf->A_ind;
ulong * curr_subset = qs_inf->curr_subset;
ulong * first_subset = qs_inf->first_subset;
prime_t * factor_base = qs_inf->factor_base;
low = qs_inf->low;
s = qs_inf->s;
fmpz_set_ui(qs_inf->A, UWORD(1));
if (s <= 3)
{
for (j = 0; j < s; j++)
{
curr_subset[j] = first_subset[j];
A_ind[j] = curr_subset[j] + low;
}
} else
{
for (j = 0; j < s - 1; j++)
{
curr_subset[j] = first_subset[j];
A_ind[j] = 4*curr_subset[j]/3 + low;
}
A_ind[s - 1] = qs_inf->j;
}
for (j = 0; j < s; j++)
fmpz_mul_ui(qs_inf->A, qs_inf->A, factor_base[A_ind[j]].p);
qs_inf->m = 0;
qs_inf->h = s;
}
int qsieve_next_A(qs_t qs_inf)
{
slong i, j, mid, diff;
slong s = qs_inf->s;
slong low = qs_inf->low;
slong span = qs_inf->span;
slong h = qs_inf->h;
slong m = qs_inf->m;
ulong ret = 1;
ulong * curr_subset = qs_inf->curr_subset;
ulong * A_ind = qs_inf->A_ind;
prime_t * factor_base = qs_inf->factor_base;
fmpz_t prod, temp;
int found_j, inc_diff;
fmpz_init(prod);
fmpz_init(temp);
if (s <= 3)
{
if (curr_subset[0] != span - s + 1)
{
h = (m >= span - h) ? h + 1 : 1;
m = curr_subset[s - h] + 1;
for (j = 0; j < h; j++)
curr_subset[s + j - h] = m + j;
fmpz_set_ui(prod, UWORD(1));
for (j = 0; j < s; j++)
fmpz_mul_ui(prod, prod, factor_base[curr_subset[j] + low].p);
for (j = 0; j < s; j++)
A_ind[j] = curr_subset[j] + low;
}
else
ret = 0;
} else
{
diff = qs_inf->A_ind_diff;
inc_diff = 0;
while (1)
{
if (4*(curr_subset[0] + s + diff)/3 + 1 >= span)
{
ret = 0;
goto next_A_cleanup;
}
h = (4*(m + diff + h + 1)/3 >= span) ? h + 1 : 1;
m = curr_subset[s - 2 - h] + 1 + ((m%diff) == 0);
if (h == 2)
inc_diff = 1;
else if (h > 2)
diff = 1;
for (j = 0; j < h; j++)
curr_subset[s + j - h - 2] = m + j;
curr_subset[s - 2] = curr_subset[s - 3] + diff;
fmpz_set_ui(prod, 1);
for (j = 0; j < s - 1; j++)
fmpz_mul_ui(prod, prod, factor_base[4*curr_subset[j]/3 + 1 + low].p);
i = 0;
j = span/4 - 1;
found_j = 0;
while (i < j)
{
mid = i + (j - i) / 2;
fmpz_mul_ui(temp, prod, factor_base[4*mid + low].p);
if (fmpz_cmp(qs_inf->low_bound, temp) > 0)
{
i = mid + (i == mid);
}
else if (fmpz_cmp(temp, qs_inf->upp_bound) > 0)
{
j = mid - (j == mid);
}
else
{
j = 4*mid + low;
found_j = 1;
if (inc_diff)
{
diff += 1;
qs_inf->A_ind_diff = diff;
}
break;
}
}
if (found_j)
{
A_ind[s - 1] = j;
fmpz_mul_ui(prod, prod, qs_inf->factor_base[j].p);
for (j = 0; j < s - 1; j++)
A_ind[j] = 4*curr_subset[j]/3 + 1 + low;
break;
}
}
}
#if QS_DEBUG
flint_printf("A_ind = (");
for (i = 0; i < s - 1; i++)
flint_printf("%ld, ", A_ind[i]);
flint_printf("%ld", A_ind[s - 1]);
flint_printf(")\n");
#endif
qs_inf->h = h;
qs_inf->m = m;
fmpz_set(qs_inf->A, prod);
next_A_cleanup:
fmpz_clear(prod);
fmpz_clear(temp);
return ret;
}
void qsieve_init_poly_first(qs_t qs_inf)
{
slong i, k;
slong s = qs_inf->s;
ulong * A_ind = qs_inf->A_ind;
ulong * A_inv = qs_inf->A_inv;
ulong * B0_terms = qs_inf->B0_terms;
ulong ** A_inv2B = qs_inf->A_inv2B;
fmpz_t * B_terms = qs_inf->B_terms;
fmpz_t * A_divp = qs_inf->A_divp;
prime_t * factor_base = qs_inf->factor_base;
int * sqrts = qs_inf->sqrts;
int * soln1 = qs_inf->soln1;
int * soln2 = qs_inf->soln2;
ulong p, pinv, temp, temp2;
#if QS_DEBUG
qs_inf->poly_count += 1;
#endif
fmpz_zero(qs_inf->B);
for (i = 0; i < s; i++)
{
p = factor_base[A_ind[i]].p;
pinv = factor_base[A_ind[i]].pinv;
fmpz_divexact_ui(A_divp[i], qs_inf->A, p);
temp = fmpz_fdiv_ui(A_divp[i], p);
temp = n_invmod(temp, p);
B0_terms[i] = n_mulmod2_preinv(temp, sqrts[A_ind[i]], p, pinv);
if (B0_terms[i] > p/2)
B0_terms[i] = p - B0_terms[i];
fmpz_mul_ui(B_terms[i], A_divp[i], B0_terms[i]);
fmpz_add(qs_inf->B, qs_inf->B, B_terms[i]);
}
for (k = 3; k < qs_inf->num_primes; k++)
{
p = factor_base[k].p;
temp = fmpz_fdiv_ui(qs_inf->A, p);
A_inv[k] = temp == 0 ? 0 : n_invmod(temp, p);
}
for (k = 3; k < qs_inf->num_primes; k++)
{
p = factor_base[k].p;
pinv = factor_base[k].pinv;
for (i = 0; i < s; i++)
{
temp = fmpz_fdiv_ui(B_terms[i], p);
temp *= 2;
if (temp >= p)
temp -= p;
A_inv2B[i][k] = n_mulmod2_preinv(temp, A_inv[k], p, pinv);
}
}
for (k = 3; k < qs_inf->num_primes; k++)
{
p = factor_base[k].p;
pinv = factor_base[k].pinv;
temp = fmpz_fdiv_ui(qs_inf->B, p);
temp = sqrts[k] + p - temp;
temp = n_mulmod2_preinv(temp, A_inv[k], p, pinv);
temp += qs_inf->sieve_size / 2;
temp = n_mod2_preinv(temp, p, pinv);
temp2 = n_mulmod2_preinv(sqrts[k], A_inv[k], p, pinv);
temp2 *= 2;
if (temp2 >= p)
temp2 -= p;
temp2 = temp + p - temp2;
if (temp2 >= p)
temp2 -= p;
if (temp2 > temp)
{
soln1[k] = temp;
soln2[k] = temp2;
} else
{
soln1[k] = temp2;
soln2[k] = temp;
}
}
for (i = 0; i < s; i++)
{
soln1[A_ind[i]] = soln2[A_ind[i]] = 0;
}
}
void qsieve_init_poly_next(qs_t qs_inf, slong i)
{
slong j, v;
slong s = qs_inf->s;
prime_t * factor_base = qs_inf->factor_base;
int * soln1 = qs_inf->soln1;
int * soln2 = qs_inf->soln2;
ulong ** A_inv2B = qs_inf->A_inv2B;
ulong sign, p, r1, r2;
fmpz_t temp;
fmpz_init(temp);
#if QS_DEBUG
qs_inf->poly_count += 1;
#endif
for (v = 0; v < s; v++)
{
if (((i >> v) & 1)) break;
}
sign = (i >> v) & 2;
fmpz_mul_ui(temp, qs_inf->B_terms[v], UWORD(2));
if (sign)
fmpz_add(qs_inf->B, qs_inf->B, temp);
else
fmpz_sub(qs_inf->B, qs_inf->B, temp);
for (j = 3; j < qs_inf->num_primes; j++)
{
p = factor_base[j].p;
if (sign)
{
r1 = soln1[j] + p - A_inv2B[v][j];
r2 = soln2[j] + p - A_inv2B[v][j];
} else
{
r1 = soln1[j] + A_inv2B[v][j];
r2 = soln2[j] + A_inv2B[v][j];
}
if (r1 >= p)
r1 -= p;
if (r2 >= p)
r2 -= p;
if (r1 < r2)
{
soln1[j] = r1;
soln2[j] = r2;
} else
{
soln1[j] = r2;
soln2[j] = r1;
}
}
fmpz_clear(temp);
}
void qsieve_compute_C(fmpz_t C, qs_t qs_inf, qs_poly_t poly)
{
fmpz_t r;
fmpz_init(r);
fmpz_mul(C, poly->B, poly->B);
fmpz_sub(C, C, qs_inf->kn);
#if QS_DEBUG
fmpz_mod(r, C, qs_inf->A);
if (!fmpz_is_zero(r))
{
flint_throw(FLINT_ERROR, "B^2 - kn not divisible by A\n");
}
#endif
fmpz_divexact(C, C, qs_inf->A);
fmpz_clear(r);
}
void qsieve_poly_copy(qs_poly_t poly, qs_t qs_inf)
{
slong i;
fmpz_set(poly->B, qs_inf->B);
for (i = 0; i < qs_inf->num_primes; i++)
{
poly->soln1[i] = qs_inf->soln1[i];
poly->soln2[i] = qs_inf->soln2[i];
}
}