#include "fmpz.h"
#include "fmpz_vec.h"
#include "mpoly.h"
#include "fmpz_mpoly.h"
static slong _fmpz_mpoly_scalar_fmma1(
fmpz * Acoeffs, ulong * Aexps,
const fmpz * Bcoeffs, const ulong * Bexps, slong Blen,
const fmpz_t c,
const fmpz * Dcoeffs, const ulong * Dexps, slong Dlen,
const fmpz_t e,
ulong maskhi)
{
slong i = 0, j = 0, k = 0;
while (i < Blen && j < Dlen)
{
if ((Bexps[i]^maskhi) > (Dexps[j]^maskhi))
{
fmpz_mul(Acoeffs + k, Bcoeffs + i, c);
Aexps[k] = Bexps[i];
i++;
k++;
}
else if ((Bexps[i]^maskhi) == (Dexps[j]^maskhi))
{
fmpz_fmma(Acoeffs + k, Bcoeffs + i, c, Dcoeffs + j, e);
Aexps[k] = Bexps[i];
k += !fmpz_is_zero(Acoeffs + k);
i++;
j++;
}
else
{
fmpz_mul(Acoeffs + k, Dcoeffs + j, e);
Aexps[k] = Dexps[j];
j++;
k++;
}
}
while (i < Blen)
{
fmpz_mul(Acoeffs + k, Bcoeffs + i, c);
Aexps[k] = Bexps[i];
i++;
k++;
}
while (j < Dlen)
{
fmpz_mul(Acoeffs + k, Dcoeffs + j, e);
Aexps[k] = Dexps[j];
j++;
k++;
}
return k;
}
static slong _fmpz_mpoly_scalar_fmma(
fmpz * Acoeffs, ulong * Aexps,
const fmpz * Bcoeffs, const ulong * Bexps, slong Blen,
const fmpz_t c,
const fmpz * Dcoeffs, const ulong * Dexps, slong Dlen,
const fmpz_t e,
slong N,
const ulong * cmpmask)
{
slong i = 0, j = 0, k = 0;
if (N == 1)
{
return _fmpz_mpoly_scalar_fmma1(Acoeffs, Aexps,
Bcoeffs, Bexps, Blen, c,
Dcoeffs, Dexps, Dlen, e, cmpmask[0]);
}
while (i < Blen && j < Dlen)
{
int cmp = mpoly_monomial_cmp(Bexps + i*N, Dexps + j*N, N, cmpmask);
if (cmp > 0)
{
fmpz_mul(Acoeffs + k, Bcoeffs + i, c);
mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
i++;
k++;
}
else if (cmp == 0)
{
fmpz_fmma(Acoeffs + k, Bcoeffs + i, c, Dcoeffs + j, e);
mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
k += !fmpz_is_zero(Acoeffs + k);
i++;
j++;
}
else
{
fmpz_mul(Acoeffs + k, Dcoeffs + j, e);
mpoly_monomial_set(Aexps + k*N, Dexps + j*N, N);
j++;
k++;
}
}
while (i < Blen)
{
fmpz_mul(Acoeffs + k, Bcoeffs + i, c);
mpoly_monomial_set(Aexps + k*N, Bexps + i*N, N);
i++;
k++;
}
while (j < Dlen)
{
fmpz_mul(Acoeffs + k, Dcoeffs + j, e);
mpoly_monomial_set(Aexps + k*N, Dexps + j*N, N);
j++;
k++;
}
return k;
}
static void fmpz_mpoly_scalar_fmma_inplace(
fmpz_mpoly_t A,
const fmpz_t a,
const fmpz_mpoly_t B,
const fmpz_t b,
const fmpz_mpoly_ctx_t ctx)
{
slong i, s, new_len, N;
slong Alen = A->length;
slong Blen = B->length;
ulong * Bexps, * cmpmask;
int cmp, freeBexps;
flint_bitcnt_t Abits;
fmpz_mpoly_t T;
TMP_INIT;
FLINT_ASSERT(A != B);
FLINT_ASSERT(Alen > 0);
FLINT_ASSERT(Blen > 0);
FLINT_ASSERT(!fmpz_is_zero(a));
FLINT_ASSERT(!fmpz_is_zero(b));
TMP_START;
if (A->bits <= B->bits)
{
Abits = B->bits;
if (A->bits < B->bits)
fmpz_mpoly_repack_bits_inplace(A, Abits, ctx);
N = mpoly_words_per_exp(Abits, ctx->minfo);
Bexps = B->exps;
freeBexps = 0;
}
else
{
Abits = A->bits;
N = mpoly_words_per_exp(Abits, ctx->minfo);
Bexps = (ulong *) flint_malloc(N*Blen*sizeof(ulong));
mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits, Blen, ctx->minfo);
freeBexps = 1;
}
cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);
for (s = 0; s < Alen/4; s++)
{
cmp = mpoly_monomial_cmp(A->exps + N*(Alen - s - 1),
Bexps + N*0, N, cmpmask);
if (cmp >= 0)
{
s += (cmp == 0);
goto doit;
}
}
fmpz_mpoly_init3(T, Alen + Blen, Abits, ctx);
T->length = _fmpz_mpoly_scalar_fmma(T->coeffs, T->exps,
A->coeffs, A->exps, Alen, a,
B->coeffs, Bexps, Blen, b, N, cmpmask);
fmpz_mpoly_swap(A, T, ctx);
fmpz_mpoly_clear(T, ctx);
goto cleanup;
doit:
FLINT_ASSERT(0 <= s && s <= Alen);
FLINT_ASSERT(s == 0 || mpoly_monomial_cmp(A->exps + N*(Alen - s),
Bexps + N*0, N, cmpmask) <= 0);
FLINT_ASSERT(s == Alen || mpoly_monomial_cmp(A->exps + N*(Alen - s - 1),
Bexps + N*0, N, cmpmask) > 0);
fmpz_mpoly_fit_length(A, Alen + Blen + s, ctx);
mpoly_copy_monomials(A->exps + N*(Alen + Blen), A->exps + N*(Alen - s), s, N);
_fmpz_vec_swap(A->coeffs + Alen + Blen, A->coeffs + Alen - s, s);
if (!fmpz_is_one(a))
_fmpz_vec_scalar_mul_fmpz(A->coeffs, A->coeffs, Alen - s, a);
new_len = _fmpz_mpoly_scalar_fmma(
A->coeffs + Alen - s, A->exps + N*(Alen - s),
A->coeffs + (Alen + Blen), A->exps + N*(Alen + Blen), s, a,
B->coeffs, Bexps, Blen, b, N, cmpmask);
for (i = 0; i < s; i++)
_fmpz_demote(A->coeffs + Alen + Blen + i);
_fmpz_mpoly_set_length(A, Alen - s + new_len, ctx);
cleanup:
if (freeBexps)
flint_free(Bexps);
TMP_END;
return;
}
void fmpz_mpoly_scalar_fmma(
fmpz_mpoly_t A,
const fmpz_mpoly_t B,
const fmpz_t c,
const fmpz_mpoly_t D,
const fmpz_t e,
const fmpz_mpoly_ctx_t ctx)
{
slong len, N;
flint_bitcnt_t Abits;
ulong * Bexps = B->exps, * Dexps = D->exps;
ulong * cmpmask;
int freeBexps = 0, freeDexps = 0;
TMP_INIT;
if (fmpz_mpoly_is_zero(B, ctx) || fmpz_is_zero(c))
{
fmpz_mpoly_scalar_mul_fmpz(A, D, e, ctx);
return;
}
else if (fmpz_mpoly_is_zero(D, ctx) || fmpz_is_zero(e))
{
fmpz_mpoly_scalar_mul_fmpz(A, B, c, ctx);
return;
}
else if (A == B)
{
if (A == D)
{
fmpz_t t;
fmpz_init(t);
fmpz_add(t, c, e);
fmpz_mpoly_scalar_mul_fmpz(A, A, t, ctx);
fmpz_clear(t);
}
else
{
fmpz_mpoly_scalar_fmma_inplace(A, c, D, e, ctx);
}
return;
}
else if (A == D)
{
fmpz_mpoly_scalar_fmma_inplace(A, e, B, c, ctx);
return;
}
Abits = FLINT_MAX(B->bits, D->bits);
N = mpoly_words_per_exp(Abits, ctx->minfo);
TMP_START;
cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
mpoly_get_cmpmask(cmpmask, N, Abits, ctx->minfo);
if (Abits != B->bits)
{
freeBexps = 1;
Bexps = (ulong *) flint_malloc(N*B->length*sizeof(ulong));
mpoly_repack_monomials(Bexps, Abits, B->exps, B->bits, B->length, ctx->minfo);
}
if (Abits != D->bits)
{
freeDexps = 1;
Dexps = (ulong *) flint_malloc(N*D->length*sizeof(ulong));
mpoly_repack_monomials(Dexps, Abits, D->exps, D->bits, D->length, ctx->minfo);
}
fmpz_mpoly_fit_length_reset_bits(A, B->length + D->length, Abits, ctx);
len = _fmpz_mpoly_scalar_fmma(A->coeffs, A->exps,
B->coeffs, Bexps, B->length, c,
D->coeffs, Dexps, D->length, e, N, cmpmask);
_fmpz_mpoly_set_length(A, len, ctx);
if (freeBexps)
flint_free(Bexps);
if (freeDexps)
flint_free(Dexps);
TMP_END;
}