#include "thread_pool.h"
#include "thread_support.h"
#include "ulong_extras.h"
#include "fmpz.h"
#include "fmpz_vec.h"
#include "fmpz_poly.h"
typedef struct
{
fmpz * poly;
const fmpz * c;
slong len;
}
worker_t;
static void
_fmpz_poly_taylor_shift_divconquer_worker(void * arg_ptr)
{
worker_t * data = (worker_t *) arg_ptr;
_fmpz_poly_taylor_shift_divconquer(data->poly, data->c, data->len);
}
void
_fmpz_poly_taylor_shift_divconquer(fmpz * poly, const fmpz_t c, slong len)
{
fmpz *tmp, *tmp2;
slong k, len1, len2;
slong bits, cutoff;
slong nt, nw;
thread_pool_handle * threads;
worker_t args[2];
if (len < 50 || fmpz_is_zero(c))
{
_fmpz_poly_taylor_shift_horner(poly, c, len);
return;
}
bits = _fmpz_vec_max_bits(poly, len);
bits = FLINT_ABS(bits);
nt = flint_get_num_threads();
cutoff = 100 + 10 * n_sqrt(FLINT_MAX(bits - FLINT_BITS, 0));
if (nt == 1)
cutoff = FLINT_MIN(cutoff, 1000);
else
cutoff = FLINT_MIN(cutoff, 300);
if (len < cutoff)
{
_fmpz_poly_taylor_shift_horner(poly, c, len);
return;
}
len1 = len / 2;
len2 = len - len1;
nw = flint_request_threads(&threads, FLINT_MIN(nt, 2));
if (len < 200 || len + bits < 2000 || nw == 0)
{
_fmpz_poly_taylor_shift_divconquer(poly, c, len1);
_fmpz_poly_taylor_shift_divconquer(poly + len1, c, len2);
}
else
{
slong nw_save;
args[0].poly = poly;
args[0].c = c;
args[0].len = len1;
args[1].poly = poly + len1;
args[1].c = c;
args[1].len = len2;
FLINT_ASSERT(nt >= 2);
nw_save = flint_set_num_workers(nt - nt/2 - 1);
thread_pool_wake(global_thread_pool, threads[0], nt/2 - 1,
_fmpz_poly_taylor_shift_divconquer_worker, &args[1]);
_fmpz_poly_taylor_shift_divconquer_worker(&args[0]);
flint_reset_num_workers(nw_save);
thread_pool_wait(global_thread_pool, threads[0]);
}
flint_give_back_threads(threads, nw);
tmp = _fmpz_vec_init(len1 + 1);
tmp2 = _fmpz_vec_init(len);
fmpz_one(tmp);
for (k = 1; k <= len1; k++)
{
if (k > len1 - k)
{
fmpz_set(tmp + k, tmp + len1 - k);
}
else
{
fmpz_mul_ui(tmp + k, tmp + k - 1, len1 + 1 - k);
fmpz_divexact_ui(tmp + k, tmp + k, k);
}
}
if (!fmpz_is_one(c))
{
if (fmpz_cmp_si(c, -1) == 0)
{
for (k = len1 - 1; k >= 0; k -= 2)
fmpz_neg(tmp + k, tmp + k);
}
else
{
fmpz_set(tmp2, c);
for (k = len1 - 1; k >= 0; k--)
{
fmpz_mul(tmp + k, tmp + k, tmp2);
fmpz_mul(tmp2, tmp2, c);
}
}
}
_fmpz_poly_mul(tmp2, tmp, len1 + 1, poly + len1, len2);
_fmpz_vec_add(poly, poly, tmp2, len1);
_fmpz_vec_set(poly + len1, tmp2 + len1, len2);
_fmpz_vec_clear(tmp, len1 + 1);
_fmpz_vec_clear(tmp2, len);
}
void
fmpz_poly_taylor_shift_divconquer(fmpz_poly_t g, const fmpz_poly_t f,
const fmpz_t c)
{
if (f != g)
fmpz_poly_set(g, f);
_fmpz_poly_taylor_shift_divconquer(g->coeffs, c, g->length);
}