#include "nmod.h"
#include "fq_nmod.h"
#include "fq_nmod_poly.h"
#include "n_poly.h"
#include "fq_nmod_mpoly/impl.h"
void n_fq_poly_product_roots_n_fq(
n_poly_t master,
const ulong * monomials,
slong mlength,
const fq_nmod_ctx_t ctx,
n_poly_stack_t St)
{
slong d = fq_nmod_ctx_degree(ctx);
slong i;
fq_nmod_poly_t p;
fq_nmod_struct * m = FLINT_ARRAY_ALLOC(mlength, fq_nmod_struct);
fq_nmod_poly_init(p, ctx);
for (i = 0; i < mlength; i++)
{
fq_nmod_init(m + i, ctx);
n_fq_get_fq_nmod(m + i, monomials + d*i, ctx);
}
fq_nmod_poly_product_roots(p, m, mlength, ctx);
n_fq_poly_set_fq_nmod_poly(master, p, ctx);
fq_nmod_poly_clear(p, ctx);
for (i = 0; i < mlength; i++)
fq_nmod_clear(m + i, ctx);
flint_free(m);
}
slong n_polyun_product_roots(
n_polyun_t M,
const n_polyun_t H,
nmod_t ctx)
{
slong i, max_length = 0;
n_polyun_fit_length(M, H->length);
M->length = H->length;
for (i = 0; i < H->length; i++)
{
slong len = H->coeffs[i].length;
M->exps[i] = H->exps[i];
max_length = FLINT_MAX(max_length, len);
n_poly_mod_product_roots_nmod_vec(M->coeffs + i, H->coeffs[i].coeffs, len, ctx);
}
return max_length;
}
slong n_fq_polyun_product_roots(
n_fq_polyun_t M,
const n_fq_polyun_t H,
const fq_nmod_ctx_t ctx,
n_poly_stack_t St)
{
slong i, max_length = 0;
n_polyun_fit_length(M, H->length);
M->length = H->length;
for (i = 0; i < H->length; i++)
{
slong len = H->coeffs[i].length;
M->exps[i] = H->exps[i];
max_length = FLINT_MAX(max_length, len);
n_fq_poly_product_roots_n_fq(M->coeffs + i, H->coeffs[i].coeffs, len, ctx, St);
}
return max_length;
}
ulong _nmod_zip_eval_step(
ulong * cur,
const ulong * inc,
const ulong * coeffs,
slong length,
nmod_t ctx)
{
slong i;
ulong t0, t1, t2, p0, p1;
t2 = t1 = t0 = 0;
for (i = 0; i < length; i++)
{
umul_ppmm(p1, p0, cur[i], coeffs[i]);
add_sssaaaaaa(t2, t1, t0, t2, t1, t0, 0, p1, p0);
cur[i] = nmod_mul(cur[i], inc[i], ctx);
}
NMOD_RED3(t0, t2, t1, t0, ctx);
return t0;
}
void _n_fq_zip_eval_step(
ulong * res,
ulong * cur,
const ulong * inc,
const ulong * coeffs,
slong length,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
slong i;
ulong * tmp, * sum;
TMP_INIT;
if (length < 1)
{
_n_fq_zero(res, d);
return;
}
TMP_START;
tmp = (ulong *) TMP_ALLOC(8*d*sizeof(ulong));
sum = tmp + 4*d;
i = 0;
_n_fq_mul2(sum, cur + d*i, coeffs + d*i, ctx);
_n_fq_mul(cur + d*i, cur + d*i, inc + d*i, ctx, tmp);
for (i = 1; i < length; i++)
{
_n_fq_madd2(sum, cur + d*i, coeffs + d*i, ctx, tmp);
_n_fq_mul(cur + d*i, cur + d*i, inc + d*i, ctx, tmp);
}
_n_fq_reduce2(res, sum, ctx, tmp);
TMP_END;
}
void _n_fqp_zip_eval_step(
ulong * res,
ulong * cur,
const ulong * inc,
const ulong * coeffs,
slong length,
slong d,
nmod_t mod)
{
slong i, j;
ulong p0, p1;
ulong * tmp;
TMP_INIT;
if (length < 1)
{
_n_fq_zero(res, d);
return;
}
TMP_START;
tmp = (ulong *) TMP_ALLOC(3*d*sizeof(ulong));
i = 0;
for (j = 0; j < d; j++)
{
umul_ppmm(tmp[3*j+1], tmp[3*j+0], cur[i], (coeffs + d*i)[j]);
tmp[3*j+2] = 0;
}
cur[i] = nmod_mul(cur[i], inc[i], mod);
for (i = 1; i < length; i++)
{
for (j = 0; j < d; j++)
{
umul_ppmm(p1, p0, cur[i], (coeffs + d*i)[j]);
add_sssaaaaaa(tmp[3*j+2], tmp[3*j+1], tmp[3*j+0],
tmp[3*j+2], tmp[3*j+1], tmp[3*j+0], 0, p1, p0);
}
cur[i] = nmod_mul(cur[i], inc[i], mod);
}
for (j = 0; j < d; j++)
NMOD_RED3(res[j], tmp[3*j+2], tmp[3*j+1], tmp[3*j+0], mod);
TMP_END;
}
int _nmod_zip_vand_solve(
ulong * coeffs,
const ulong * monomials,
slong mlength,
const ulong * evals,
slong elength,
const ulong * master,
ulong * scratch,
nmod_t ctx)
{
slong i, j;
ulong V, V0, V1, V2, T, S, r, p0, p1;
FLINT_ASSERT(elength >= mlength);
for (i = 0; i < mlength; i++)
{
V0 = V1 = V2 = T = S = 0;
r = monomials[i];
for (j = mlength; j > 0; j--)
{
T = nmod_add(nmod_mul(r, T, ctx), master[j], ctx);
S = nmod_add(nmod_mul(r, S, ctx), T, ctx);
umul_ppmm(p1, p0, evals[j - 1], T);
add_sssaaaaaa(V2, V1, V0, V2, V1, V0, 0, p1, p0);
}
FLINT_ASSERT(nmod_add(nmod_mul(r, T, ctx), master[0], ctx) == 0);
NMOD_RED3(V, V2, V1, V0, ctx);
S = nmod_mul(S, r, ctx);
if (S == 0)
return -1;
coeffs[i] = nmod_mul(V, nmod_inv(S, ctx), ctx);
}
for (j = 0; j < mlength; j++)
scratch[j] = nmod_pow_ui(monomials[j], mlength, ctx);
for (i = mlength; i < elength; i++)
{
V0 = V1 = V2 = S = 0;
for (j = 0; j < mlength; j++)
{
scratch[j] = nmod_mul(scratch[j], monomials[j], ctx);
umul_ppmm(p1, p0, coeffs[j], scratch[j]);
add_sssaaaaaa(V2, V1, V0, V2, V1, V0, 0, p1, p0);
}
NMOD_RED3(V, V2, V1, V0, ctx);
if (V != evals[i])
return 0;
}
return 1;
}
int _n_fq_zip_vand_solve(
ulong * coeffs,
const ulong * monomials,
slong mlength,
const ulong * evals,
slong elength,
const ulong * master,
ulong * scratch,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
nmod_t mod = fq_nmod_ctx_mod(ctx);
int success;
slong i, j;
ulong * tmp = FLINT_ARRAY_ALLOC(12*d, ulong);
ulong * V = tmp + 6*d;
ulong * V0 = V + d;
ulong * T = V0 + d;
ulong * S = T + d;
ulong * r = S + d;
ulong * p0 = r + d;
FLINT_ASSERT(elength >= mlength);
for (i = 0; i < mlength; i++)
{
_n_fq_zero(V0, d);
_n_fq_zero(T, d);
_n_fq_zero(S, d);
_n_fq_set(r, monomials + d*i, d);
for (j = mlength; j > 0; j--)
{
_n_fq_mul(T, r, T, ctx, tmp);
_n_fq_add(T, T, master + d*j, d, mod);
_n_fq_mul(S, r, S, ctx, tmp);
_n_fq_add(S, S, T, d, mod);
_n_fq_mul(p0, evals + d*(j - 1), T, ctx, tmp);
_n_fq_add(V0, V0, p0, d, mod);
}
#if FLINT_WANT_ASSERT
_n_fq_mul(p0, r, T, ctx, tmp);
_n_fq_add(p0, p0, master + d*0, d, mod);
FLINT_ASSERT(_n_fq_is_zero(p0, d));
#endif
_n_fq_set(V, V0, d);
_n_fq_mul(S, S, r, ctx, tmp);
if (_n_fq_is_zero(S, d))
{
success = -1;
goto cleanup;
}
_n_fq_inv(p0, S, ctx, tmp);
_n_fq_mul(p0, V, p0, ctx, tmp);
_n_fq_set(coeffs + d*i, p0, d);
}
for (j = 0; j < mlength; j++)
{
_n_fq_set(p0, monomials + d*j, d);
_n_fq_pow_ui(scratch + d*j, p0, mlength, ctx);
}
for (i = mlength; i < elength; i++)
{
_n_fq_zero(V0, d);
_n_fq_zero(S, d);
for (j = 0; j < mlength; j++)
{
_n_fq_set(p0, monomials + d*j, d);
_n_fq_mul(scratch + d*j, scratch + d*j, p0, ctx, tmp);
_n_fq_set(p0, coeffs + d*j, d);
_n_fq_mul(p0, p0, scratch + d*j, ctx, tmp);
_n_fq_add(V0, V0, p0, d, mod);
}
_n_fq_set(V, V0, d);
if (!_n_fq_equal(V, evals + d*i, d))
{
success = 0;
goto cleanup;
}
}
success = 1;
cleanup:
flint_free(tmp);
return success;
}
int _n_fqp_zip_vand_solve(
ulong * coeffs,
const ulong * monomials,
slong mlength,
const ulong * evals,
slong elength,
const ulong * master,
ulong * scratch,
const fq_nmod_ctx_t ctx)
{
slong d = fq_nmod_ctx_degree(ctx);
nmod_t mod = fq_nmod_ctx_mod(ctx);
int success;
slong i, j, k;
ulong * tmp = FLINT_ARRAY_ALLOC(d*20, ulong);
ulong * V = tmp + 6*d;
ulong * V0 = V + d;
ulong * T = V0 + d;
ulong * S = T + d;
ulong * r = S + d;
ulong * p0 = r + d;
ulong * V_p = p0 + d;
ulong r_p, T_p, S_p;
FLINT_ASSERT(elength >= mlength);
for (i = 0; i < mlength; i++)
{
_n_fq_zero(V0, d);
_n_fq_zero(T, d);
_n_fq_zero(S, d);
_nmod_vec_zero(V_p, 3*d);
T_p = S_p = 0;
r_p = monomials[i];
for (j = mlength; j > 0; j--)
{
T_p = nmod_add(nmod_mul(r_p, T_p, mod), master[j], mod);
S_p = nmod_add(nmod_mul(r_p, S_p, mod), T_p, mod);
for (k = 0; k < d; k++)
{
ulong hi, lo;
umul_ppmm(hi, lo, T_p, (evals + d*(j - 1))[k]);
add_sssaaaaaa(V_p[3*k+2], V_p[3*k+1], V_p[3*k+0],
V_p[3*k+2], V_p[3*k+1], V_p[3*k+0], 0, hi, lo);
}
}
FLINT_ASSERT(nmod_add(nmod_mul(r_p, T_p, mod), master[0], mod) == 0);
S_p = nmod_mul(S_p, r_p, mod);
if (S == 0)
return -1;
S_p = nmod_inv(S_p, mod);
for (k = 0; k < d; k++)
{
ulong vk;
NMOD_RED3(vk, V_p[3*k+2], V_p[3*k+1], V_p[3*k+0], mod);
(coeffs + d*i)[k] = nmod_mul(vk, S_p, mod);
}
}
for (j = 0; j < mlength; j++)
scratch[j] = nmod_pow_ui(monomials[j], mlength, mod);
for (i = mlength; i < elength; i++)
{
_nmod_vec_zero(V_p, 3*d);
S_p = 0;
for (j = 0; j < mlength; j++)
{
scratch[j] = nmod_mul(scratch[j], monomials[j], mod);
for (k = 0; k < d; k++)
{
ulong hi, lo;
umul_ppmm(hi, lo, scratch[j], (coeffs + d*j)[k]);
add_sssaaaaaa(V_p[3*k+2], V_p[3*k+1], V_p[3*k+0],
V_p[3*k+2], V_p[3*k+1], V_p[3*k+0], 0, hi, lo);
}
}
for (k = 0; k < d; k++)
{
ulong vk;
NMOD_RED3(vk, V_p[3*k+2], V_p[3*k+1], V_p[3*k+0], mod);
if (vk != (evals + d*i)[k])
{
success = 0;
goto cleanup;
}
}
}
success = 1;
cleanup:
flint_free(tmp);
return success;
}