#include <string.h>
#include "thread_pool.h"
#include "ulong_extras.h"
#include "fmpz.h"
#include "qsieve.h"
void qsieve_do_sieving(qs_t qs_inf, unsigned char * sieve, qs_poly_t poly)
{
slong num_primes = qs_inf->num_primes;
int * soln1 = poly->soln1;
int * soln2 = poly->soln2;
prime_t * factor_base = qs_inf->factor_base;
ulong p;
unsigned char * end = sieve + qs_inf->sieve_size;
register unsigned char * pos1;
register unsigned char * pos2;
register unsigned char * bound;
slong size;
slong diff;
slong pind;
memset(sieve, qs_inf->sieve_fill, qs_inf->sieve_size + sizeof(ulong));
*end = (char) 255;
for (pind = qs_inf->small_primes; pind < num_primes; pind++)
{
if (soln2[pind] == 0) continue;
p = factor_base[pind].p;
size = factor_base[pind].size;
pos1 = sieve + soln1[pind];
pos2 = sieve + soln2[pind];
diff = pos2 - pos1;
bound = end - 2*p;
while (bound - pos1 > 0)
{
(*pos1) += size, (*(pos1 + diff)) += size, pos1 += p;
(*pos1) += size, (*(pos1 + diff)) += size, pos1 += p;
}
while ((end - pos1 > 0) && (end - pos1 - diff > 0))
{
(*pos1) += size, (*(pos1 + diff)) += size, pos1 += p;
}
pos2 = pos1 + diff;
if (end - pos2 > 0)
{
(*pos2) += size;
}
if (end - pos1 > 0)
{
(*pos1) += size;
}
}
}
void qsieve_do_sieving2(qs_t qs_inf, unsigned char * sieve, qs_poly_t poly)
{
slong b, d1, d2, i;
slong pind, size;
ulong p;
slong num_primes = qs_inf->num_primes;
int * soln1 = poly->soln1;
int * soln2 = poly->soln2;
int * posn1 = poly->posn1;
int * posn2 = poly->posn2;
prime_t * factor_base = qs_inf->factor_base;
unsigned char * B;
register unsigned char * Bp;
register unsigned char * pos;
memset(sieve, qs_inf->sieve_fill, qs_inf->sieve_size + sizeof(ulong));
sieve[qs_inf->sieve_size] = (char) 255;
for (i = 0; i < num_primes; i++)
{
posn1[i] = soln1[i];
posn2[i] = soln2[i] - posn1[i];
}
for (b = 1; b <= qs_inf->sieve_size / BLOCK_SIZE; b++)
{
B = sieve + b * BLOCK_SIZE;
for (pind = qs_inf->small_primes; pind < qs_inf->second_prime; pind++)
{
if (soln2[pind] == 0)
continue;
p = factor_base[pind].p;
size = factor_base[pind].size;
d1 = posn2[pind];
d2 = p - d1;
Bp = B - 2*d1 - d2;
pos = sieve + posn1[pind];
while (pos < Bp)
{
(*pos) += size, (*(pos + d1)) += size, pos += p;
(*pos) += size, (*(pos + d1)) += size, pos += p;
}
Bp = B - d1;
while (pos < Bp)
{
(*pos) += size,
(*(pos + d1)) += size, pos += p;
}
if (pos < B)
{
(*pos) += size, pos += d1;
posn2[pind] = d2;
}
else
{
posn2[pind] = d1;
}
posn1[pind] = (pos - sieve);
}
for (pind = qs_inf->second_prime; pind < num_primes; pind++)
{
p = factor_base[pind].p;
if (soln2[pind] == 0)
continue;
size = factor_base[pind].size;
pos = sieve + posn1[pind];
if (pos < B)
{
(*pos) += size;
pos += posn2[pind];
if (pos < B)
{
(*pos) += size;
pos += p - posn2[pind];
} else
{
posn2[pind] = p - posn2[pind];
}
posn1[pind] = (pos - sieve);
} else
{
posn1[pind] = (pos - sieve);
}
}
}
}
slong qsieve_evaluate_candidate(qs_t qs_inf, ulong i, unsigned char * sieve, qs_poly_t poly)
{
slong bits, exp, extra_bits;
ulong modp, prime;
slong num_primes = qs_inf->num_primes;
prime_t * factor_base = qs_inf->factor_base;
slong * small = poly->small;
fac_t * factor = poly->factor;
int * soln1 = poly->soln1;
int * soln2 = poly->soln2;
ulong * A_ind = qs_inf->A_ind;
ulong pinv;
slong num_factors = 0;
slong relations = 0;
slong j, k;
fmpz_t X, Y, C, res, p;
fmpz_init(X);
fmpz_init(Y);
fmpz_init(res);
fmpz_init(p);
fmpz_init(C);
qsieve_compute_C(C, qs_inf, poly);
fmpz_set_si(X, i - qs_inf->sieve_size / 2);
fmpz_mul(Y, X, qs_inf->A);
fmpz_add(Y, Y, poly->B);
fmpz_add(res, Y, poly->B);
fmpz_mul(res, res, X);
fmpz_add(res, res, C);
#if QS_DEBUG & 128
flint_printf("res = "); fmpz_print(res); flint_printf("\n");
flint_printf("Poly: "); fmpz_print(qs_inf->A); flint_printf("*x^2 + 2*");
fmpz_print(poly->B); flint_printf("*x + "); fmpz_print(C); flint_printf("\n");
flint_printf("x = %wd\n", i - qs_inf->sieve_size / 2);
#endif
sieve[i] -= qs_inf->sieve_fill;
bits = FLINT_ABS(fmpz_bits(res));
bits -= BITS_ADJUST;
extra_bits = 0;
if (factor_base[0].p != 1)
{
fmpz_set_ui(p, factor_base[0].p);
exp = fmpz_remove(res, res, p);
if (exp)
extra_bits += exp*qs_inf->factor_base[0].size;
small[0] = exp;
#if QS_DEBUG & 128
if (exp)
flint_printf("%ld^%ld ", factor_base[0].p, exp);
#endif
}
else
{
small[0] = 0;
}
fmpz_set_ui(p, 2);
exp = fmpz_remove(res, res, p);
#if QS_DEBUG & 128
if (exp)
flint_printf("%ld^%ld ", 2, exp);
#endif
extra_bits += exp;
small[1] = exp;
for (j = 3; j < qs_inf->small_primes; j++)
{
prime = factor_base[j].p;
pinv = factor_base[j].pinv2;
FLINT_ASSERT(prime < UWORD(1) << (FLINT_BITS / 2));
modp = n_mod_lemire(i, prime, pinv);
if (modp == soln1[j] || modp == soln2[j])
{
fmpz_set_ui(p, prime);
exp = fmpz_remove(res, res, p);
if (exp)
extra_bits += qs_inf->factor_base[j].size;
small[j] = exp;
#if QS_DEBUG & 128
if (exp)
flint_printf("%ld^%ld ", prime, exp);
#endif
}
else
{
small[j] = 0;
}
}
if (extra_bits + sieve[i] > bits)
{
sieve[i] += extra_bits;
if (factor_base[num_primes - 1].p < UWORD(1) << (FLINT_BITS / 2))
{
for (j = qs_inf->small_primes; j < num_primes && extra_bits < sieve[i]; j++)
{
prime = factor_base[j].p;
pinv = factor_base[j].pinv2;
modp = n_mod_lemire(i, prime, pinv);
if (soln2[j] != 0)
{
if (modp == soln1[j] || modp == soln2[j])
{
fmpz_set_ui(p, prime);
exp = fmpz_remove(res, res, p);
extra_bits += qs_inf->factor_base[j].size;
factor[num_factors].ind = j;
factor[num_factors++].exp = exp;
#if QS_DEBUG & 128
flint_printf("%ld^%ld ", prime, exp);
#endif
}
} else
{
fmpz_set_ui(p, prime);
exp = fmpz_remove(res, res, p);
factor[num_factors].ind = j;
factor[num_factors++].exp = exp + 1;
#if QS_DEBUG & 128
if (exp)
flint_printf("%ld^%ld ", prime, exp);
#endif
}
}
}
else
{
for (j = qs_inf->small_primes; j < num_primes && extra_bits < sieve[i]; j++)
{
prime = factor_base[j].p;
pinv = factor_base[j].pinv;
modp = n_mod2_preinv(i, prime, pinv);
if (soln2[j] != 0)
{
if (modp == soln1[j] || modp == soln2[j])
{
fmpz_set_ui(p, prime);
exp = fmpz_remove(res, res, p);
extra_bits += qs_inf->factor_base[j].size;
factor[num_factors].ind = j;
factor[num_factors++].exp = exp;
#if QS_DEBUG & 128
flint_printf("%ld^%ld ", prime, exp);
#endif
}
} else
{
fmpz_set_ui(p, prime);
exp = fmpz_remove(res, res, p);
factor[num_factors].ind = j;
factor[num_factors++].exp = exp + 1;
#if QS_DEBUG & 128
if (exp)
flint_printf("%ld^%ld ", prime, exp);
#endif
}
}
}
#if QS_DEBUG & 128
if (num_factors > 0)
flint_printf("\n");
#endif
if (fmpz_cmp_ui(res, 1) == 0 || fmpz_cmp_si(res, -1) == 0)
{
#if QS_DEBUG
if (qs_inf->full_relation % 100 == 0)
flint_printf("%ld relations\n", qs_inf->full_relation);
#endif
if (fmpz_cmp_si(res, -1) == 0)
small[2] = 1;
else
small[2] = 0;
for (k = 0; k < qs_inf->s; k++)
{
if (A_ind[k] >= j)
{
factor[num_factors].ind = A_ind[k];
factor[num_factors++].exp = 1;
}
}
poly->num_factors = num_factors;
#if FLINT_USES_PTHREAD
pthread_mutex_lock(&qs_inf->mutex);
#endif
qsieve_write_to_file(qs_inf, 1, Y, poly);
qs_inf->full_relation++;
#if FLINT_USES_PTHREAD
pthread_mutex_unlock(&qs_inf->mutex);
#endif
relations++;
} else
{
if (fmpz_sgn(res) < 0)
{
fmpz_neg(res, res);
small[2] = 1;
} else
small[2] = 0;
if (fmpz_bits(res) <= 30)
{
prime = fmpz_get_ui(res);
if (prime < 60*factor_base[qs_inf->num_primes - 1].p && n_gcd(prime, qs_inf->k) == 1)
{
for (k = 0; k < qs_inf->s; k++)
{
if (A_ind[k] >= j)
{
factor[num_factors].ind = A_ind[k];
factor[num_factors++].exp = 1;
}
}
poly->num_factors = num_factors;
#if FLINT_USES_PTHREAD
pthread_mutex_lock(&qs_inf->mutex);
#endif
qsieve_write_to_file(qs_inf, prime, Y, poly);
qs_inf->edges++;
qsieve_add_to_hashtable(qs_inf, prime);
#if FLINT_USES_PTHREAD
pthread_mutex_unlock(&qs_inf->mutex);
#endif
}
}
}
goto cleanup;
}
cleanup:
fmpz_clear(X);
fmpz_clear(Y);
fmpz_clear(C);
fmpz_clear(res);
fmpz_clear(p);
return relations;
}
slong qsieve_evaluate_sieve(qs_t qs_inf, unsigned char * sieve, qs_poly_t poly)
{
slong i = 0, j = 0;
ulong * sieve2 = (ulong *) sieve;
unsigned char bits = qs_inf->sieve_bits;
slong rels = 0;
while (j < qs_inf->sieve_size / sizeof(ulong))
{
#if FLINT64
while ((sieve2[j] & UWORD(0xC0C0C0C0C0C0C0C0)) == 0)
#else
while ((sieve2[j] & UWORD(0xC0C0C0C0)) == 0)
#endif
{
j++;
}
i = j * sizeof(ulong);
while (i < (j + 1) * sizeof(ulong) && i < qs_inf->sieve_size)
{
if (sieve[i] > bits)
rels += qsieve_evaluate_candidate(qs_inf, i, sieve, poly);
i++;
}
j++;
}
return rels;
}
typedef struct
{
qs_s * inf;
unsigned char * sieve;
slong thread_idx;
qs_poly_s * thread_poly;
unsigned char * thread_sieve;
slong rels;
}
_worker_arg_struct;
static void qsieve_collect_relations_worker(void * varg)
{
_worker_arg_struct * arg = (_worker_arg_struct *) varg;
qs_s * qs_inf = arg->inf;
qs_poly_s * thread_poly = arg->thread_poly;
unsigned char * thread_sieve = arg->thread_sieve;
slong j, iterations = (1 << (qs_inf->s - 1));
while (1)
{
#if FLINT_USES_PTHREAD
pthread_mutex_lock(&qs_inf->mutex);
#endif
j = qs_inf->index_j;
qs_inf->index_j = j + 1;
if (j < iterations)
{
if (j > 0)
qsieve_init_poly_next(qs_inf, j);
qsieve_poly_copy(thread_poly, qs_inf);
}
#if FLINT_USES_PTHREAD
pthread_mutex_unlock(&qs_inf->mutex);
#endif
if (j >= iterations)
return;
if (qs_inf->sieve_size < 2*BLOCK_SIZE)
qsieve_do_sieving(qs_inf, thread_sieve, thread_poly);
else
qsieve_do_sieving2(qs_inf, thread_sieve, thread_poly);
arg->rels += qsieve_evaluate_sieve(qs_inf, thread_sieve, thread_poly);
}
}
slong qsieve_collect_relations(qs_t qs_inf, unsigned char * sieve)
{
slong i;
_worker_arg_struct * args;
slong relations;
const thread_pool_handle* handles = qs_inf->handles;
slong num_handles = qs_inf->num_handles;
args = (_worker_arg_struct *) flint_malloc((1 + num_handles)
*sizeof(_worker_arg_struct));
qs_inf->index_j = 0;
qsieve_init_poly_first(qs_inf);
for (i = 0; i <= num_handles; i++)
{
args[i].inf = qs_inf;
args[i].thread_idx = i;
args[i].thread_poly = qs_inf->poly + i;
args[i].thread_sieve = sieve + (qs_inf->sieve_size + sizeof(ulong) + 64)*i;
args[i].rels = 0;
}
for (i = 0; i < num_handles; i++)
{
thread_pool_wake(global_thread_pool, handles[i], 0,
qsieve_collect_relations_worker, &args[i]);
}
qsieve_collect_relations_worker(&args[num_handles]);
relations = args[num_handles].rels;
for (i = 0; i < num_handles; i++)
{
thread_pool_wait(global_thread_pool, handles[i]);
relations += args[i].rels;
}
flint_free(args);
return relations;
}