#include "mpn_extras.h"
#include "fmpz.h"
#include "fmpz_vec.h"
#include "fmpz_poly.h"
static void
_fmpz_poly_mullow_kara_recursive(fmpz * out, const fmpz * pol1,
const fmpz * pol2, fmpz * temp, slong len)
{
slong m1 = len / 2;
slong m2 = len - m1;
int odd = (len & 1);
if (len <= 6)
{
_fmpz_poly_mullow_classical(out, pol1, len, pol2, len, len);
return;
}
_fmpz_vec_add(temp + m2, pol1, pol1 + m1, m1);
if (odd)
fmpz_set(temp + m2 + m1, pol1 + 2 * m1);
_fmpz_vec_add(temp + 2 * m2, pol2, pol2 + m1, m1);
if (odd)
fmpz_set(temp + 2 * m2 + m1, pol2 + 2 * m1);
_fmpz_poly_mul_karatsuba(out, pol1, m1, pol2, m1);
fmpz_zero(out + 2 * m1 - 1);
_fmpz_poly_mullow_kara_recursive(temp, temp + m2, temp + 2 * m2,
temp + 3 * m2, m2);
_fmpz_poly_mullow_kara_recursive(temp + m2, pol1 + m1, pol2 + m1,
temp + 2 * m2, m2);
_fmpz_vec_sub(temp, temp, out, m2);
_fmpz_vec_sub(temp, temp, temp + m2, m2);
if (odd)
fmpz_set(out + 2 * m1, temp + m2);
_fmpz_vec_add(out + m1, out + m1, temp, m2);
}
void
_fmpz_poly_mullow_karatsuba_n(fmpz * res, const fmpz * poly1,
const fmpz * poly2, slong n)
{
fmpz *temp;
slong len, loglen = 0;
if (n == 1)
{
fmpz_mul(res, poly1, poly2);
return;
}
while ((WORD(1) << loglen) < n)
loglen++;
len = (WORD(1) << loglen);
temp = _fmpz_vec_init(3 * len);
_fmpz_poly_mullow_kara_recursive(res, poly1, poly2, temp, n);
_fmpz_vec_clear(temp, 3 * len);
}
void
_fmpz_poly_mullow_karatsuba(fmpz * res, const fmpz * poly1, slong len1,
const fmpz * poly2, slong len2, slong n)
{
int clear = 0;
slong i;
fmpz *copy1, *copy2;
if (len1 >= n)
copy1 = (fmpz *) poly1;
else
{
copy1 = (fmpz *) flint_malloc(n * sizeof(fmpz));
for (i = 0; i < len1; i++)
copy1[i] = poly1[i];
flint_mpn_zero((nn_ptr) copy1 + len1, n - len1);
clear |= 1;
}
if (len2 >= n)
copy2 = (fmpz *) poly2;
else
{
copy2 = (fmpz *) flint_malloc(n * sizeof(fmpz));
for (i = 0; i < len2; i++)
copy2[i] = poly2[i];
flint_mpn_zero((nn_ptr) copy2 + len2, n - len2);
clear |= 2;
}
_fmpz_poly_mullow_karatsuba_n(res, copy1, copy2, n);
if (clear & 1)
flint_free(copy1);
if (clear & 2)
flint_free(copy2);
}
void
fmpz_poly_mullow_karatsuba_n(fmpz_poly_t res, const fmpz_poly_t poly1,
const fmpz_poly_t poly2, slong n)
{
const slong len1 = FLINT_MIN(poly1->length, n);
const slong len2 = FLINT_MIN(poly2->length, n);
slong lenr;
if (len1 == 0 || len2 == 0)
{
fmpz_poly_zero(res);
return;
}
lenr = len1 + len2 - 1;
n = FLINT_MIN(n, lenr);
if (res != poly1 && res != poly2)
{
fmpz_poly_fit_length(res, n);
_fmpz_poly_mullow_karatsuba(res->coeffs, poly1->coeffs, len1, poly2->coeffs, len2, n);
}
else
{
fmpz_poly_t t;
fmpz_poly_init2(t, n);
_fmpz_poly_mullow_karatsuba(t->coeffs, poly1->coeffs, len1, poly2->coeffs, len2, n);
fmpz_poly_swap(res, t);
fmpz_poly_clear(t);
}
_fmpz_poly_set_length(res, n);
_fmpz_poly_normalise(res);
}