#include "nmod.h"
#include "nmod_vec.h"
#include "mpoly.h"
#include "nmod_mpoly.h"
static slong _nmod_mpoly_mulsub1(nmod_mpoly_t A,
const ulong * Dcoeff, const ulong * Dexp, slong Dlen,
const ulong * Bcoeff, const ulong * Bexp, slong Blen,
const ulong * Ccoeff, const ulong * Cexp, slong Clen,
ulong maskhi, nmod_t fctx)
{
slong i, j;
slong next_loc;
slong heap_len = 2;
mpoly_heap1_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
slong Di;
slong Alen;
ulong * Acoeff = A->coeffs;
ulong * Aexp = A->exps;
ulong exp;
slong * hind;
ulong acc0, acc1, acc2, pp1, pp0;
TMP_INIT;
FLINT_ASSERT(Blen > 0);
FLINT_ASSERT(Clen > 0);
TMP_START;
next_loc = Blen + 4;
heap = (mpoly_heap1_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap1_s));
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong));
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong));
for (i = 0; i < Blen; i++)
hind[i] = 1;
x = chain + 0;
x->i = 0;
x->j = 0;
x->next = NULL;
HEAP_ASSIGN(heap[1], Bexp[0] + Cexp[0], x);
hind[0] = 2*1 + 0;
Alen = 0;
Di = 0;
while (heap_len > 1)
{
exp = heap[1].exp;
while (Di < Dlen && mpoly_monomial_gt1(Dexp[Di], exp, maskhi))
{
_nmod_mpoly_fit_length(&Acoeff, &A->coeffs_alloc,
&Aexp, &A->exps_alloc, 1, Alen + 1);
Aexp[Alen] = Dexp[Di];
Acoeff[Alen] = Dcoeff[Di];
Alen++;
Di++;
}
_nmod_mpoly_fit_length(&Acoeff, &A->coeffs_alloc,
&Aexp, &A->exps_alloc, 1, Alen + 1);
Aexp[Alen] = exp;
acc0 = acc1 = acc2 = 0;
do
{
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
umul_ppmm(pp1, pp0, Bcoeff[x->i], Ccoeff[x->j]);
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, 0, pp1, pp0);
while ((x = x->next) != NULL)
{
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
umul_ppmm(pp1, pp0, Bcoeff[x->i], Ccoeff[x->j]);
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, 0, pp1, pp0);
}
} while (heap_len > 1 && heap[1].exp == exp);
NMOD_RED3(pp0, acc2, acc1, acc0, fctx);
pp1 = 0;
if (Di < Dlen && Dexp[Di] == exp)
{
pp1 = Dcoeff[Di];
Di++;
}
Acoeff[Alen] = nmod_sub(pp1, pp0, fctx);
Alen += Acoeff[Alen] != 0;
while (store > store_base)
{
j = *--store;
i = *--store;
if ( (i + 1 < Blen)
&& (hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexp[x->i] + Cexp[x->j], x,
&next_loc, &heap_len, maskhi);
}
if ( (j + 1 < Clen)
&& ((hind[i] & 1) == 1)
&& ((i == 0) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexp[x->i] + Cexp[x->j], x,
&next_loc, &heap_len, maskhi);
}
}
}
FLINT_ASSERT(Di <= Dlen);
if (Di < Dlen)
{
_nmod_mpoly_fit_length(&Acoeff, &A->coeffs_alloc,
&Aexp, &A->exps_alloc, 1, Alen + Dlen - Di);
_nmod_vec_set(Acoeff + Alen, Dcoeff + Di, Dlen - Di);
mpoly_copy_monomials(Aexp + 1*Alen, Dexp + 1*Di, Dlen - Di, 1);
Alen += Dlen - Di;
}
A->coeffs = Acoeff;
A->exps = Aexp;
A->length = Alen;
TMP_END;
return Alen;
}
static void _nmod_mpoly_mulsub(nmod_mpoly_t A,
const ulong * Dcoeff, const ulong * Dexp, slong Dlen,
const ulong * Bcoeff, const ulong * Bexp, slong Blen,
const ulong * Ccoeff, const ulong * Cexp, slong Clen,
flint_bitcnt_t bits, slong N, const ulong * cmpmask, nmod_t fctx)
{
slong i, j;
slong next_loc;
slong heap_len = 2;
mpoly_heap_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
slong Di;
slong Alen;
ulong * Acoeff = A->coeffs;
ulong * Aexp = A->exps;
ulong * exp, * exps;
ulong ** exp_list;
slong exp_next;
slong * hind;
ulong acc0, acc1, acc2, pp1, pp0;
TMP_INIT;
FLINT_ASSERT(Blen > 0);
FLINT_ASSERT(Clen > 0);
if (N == 1)
{
_nmod_mpoly_mulsub1(A, Dcoeff, Dexp, Dlen,
Bcoeff, Bexp, Blen, Ccoeff, Cexp, Clen, cmpmask[0], fctx);
return;
}
TMP_START;
next_loc = Blen + 4;
heap = (mpoly_heap_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap_s));
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong));
exps = (ulong *) TMP_ALLOC(Blen*N*sizeof(ulong));
exp_list = (ulong **) TMP_ALLOC(Blen*sizeof(ulong *));
for (i = 0; i < Blen; i++)
exp_list[i] = exps + i*N;
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong));
for (i = 0; i < Blen; i++)
hind[i] = 1;
exp_next = 0;
x = chain + 0;
x->i = 0;
x->j = 0;
x->next = NULL;
heap[1].next = x;
heap[1].exp = exp_list[exp_next++];
if (bits <= FLINT_BITS)
mpoly_monomial_add(heap[1].exp, Bexp + N*0, Cexp + N*0, N);
else
mpoly_monomial_add_mp(heap[1].exp, Bexp + N*0, Cexp + N*0, N);
hind[0] = 2*1 + 0;
Alen = 0;
Di = 0;
while (heap_len > 1)
{
exp = heap[1].exp;
while (Di < Dlen && mpoly_monomial_gt(Dexp + N*Di, exp, N, cmpmask))
{
_nmod_mpoly_fit_length(&Acoeff, &A->coeffs_alloc,
&Aexp, &A->exps_alloc, N, Alen + 1);
mpoly_monomial_set(Aexp + N*Alen, Dexp + N*Di, N);
Acoeff[Alen] = Dcoeff[Di];
Alen++;
Di++;
}
_nmod_mpoly_fit_length(&Acoeff, &A->coeffs_alloc,
&Aexp, &A->exps_alloc, N, Alen + 1);
mpoly_monomial_set(Aexp + N*Alen, exp, N);
acc0 = acc1 = acc2 = 0;
do
{
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
umul_ppmm(pp1, pp0, Bcoeff[x->i], Ccoeff[x->j]);
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, 0, pp1, pp0);
while ((x = x->next) != NULL)
{
hind[x->i] |= WORD(1);
*store++ = x->i;
*store++ = x->j;
umul_ppmm(pp1, pp0, Bcoeff[x->i], Ccoeff[x->j]);
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, 0, pp1, pp0);
}
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
NMOD_RED3(pp0, acc2, acc1, acc0, fctx);
pp1 = 0;
if (Di < Dlen && mpoly_monomial_equal(Dexp + N*Di, exp, N))
{
pp1 = Dcoeff[Di];
Di++;
}
Acoeff[Alen] = nmod_sub(pp1, pp0, fctx);
Alen += Acoeff[Alen] != 0;
while (store > store_base)
{
j = *--store;
i = *--store;
if ( (i + 1 < Blen)
&& (hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j+1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], Bexp + N*x->i,
Cexp + N*x->j, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], Bexp + N*x->i,
Cexp + N*x->j, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
if ( (j + 1 < Clen)
&& ((hind[i] & 1) == 1)
&& ((i == 0) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j+1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], Bexp + N*x->i,
Cexp + N*x->j, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], Bexp + N*x->i,
Cexp + N*x->j, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
}
}
FLINT_ASSERT(Di <= Dlen);
if (Di < Dlen)
{
_nmod_mpoly_fit_length(&Acoeff, &A->coeffs_alloc,
&Aexp, &A->exps_alloc, N, Alen + Dlen - Di);
_nmod_vec_set(Acoeff + Alen, Dcoeff + Di, Dlen - Di);
mpoly_copy_monomials(Aexp + N*Alen, Dexp + N*Di, Dlen - Di, N);
Alen += Dlen - Di;
}
A->coeffs = Acoeff;
A->exps = Aexp;
A->length = Alen;
TMP_END;
}
int nmod_mpolyuu_divides(
nmod_mpolyu_t Q,
const nmod_mpolyu_t A,
const nmod_mpolyu_t B,
slong nmainvars,
const nmod_mpoly_ctx_t ctx)
{
flint_bitcnt_t bits = A->bits;
nmod_mpoly_struct * Bcoeff = B->coeffs;
slong Blen = B->length;
ulong * Bexp = B->exps;
nmod_mpoly_struct * Acoeff = A->coeffs;
slong Alen = A->length;
ulong * Aexp = A->exps;
nmod_mpoly_struct * a, * b, * q;
slong N;
ulong * cmpmask;
nmod_mpoly_t T, S;
int success;
ulong maskhi = 0;
int lt_divides;
slong i, j, s;
slong next_loc, heap_len;
mpoly_heap1_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
slong * hind;
ulong mask, exp, maxexp = Aexp[Alen - 1];
TMP_INIT;
FLINT_ASSERT(Alen > 0);
FLINT_ASSERT(Blen > 0);
FLINT_ASSERT(bits == A->bits);
FLINT_ASSERT(bits == B->bits);
FLINT_ASSERT(bits == Q->bits);
TMP_START;
N = mpoly_words_per_exp(bits, ctx->minfo);
cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
mpoly_get_cmpmask(cmpmask, N, bits, ctx->minfo);
next_loc = Blen + 4;
heap = (mpoly_heap1_s *) TMP_ALLOC((Blen + 1)*sizeof(mpoly_heap1_s));
chain = (mpoly_heap_t *) TMP_ALLOC(Blen*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*Blen*sizeof(slong));
hind = (slong *) TMP_ALLOC(Blen*sizeof(slong));
for (i = 0; i < B->length; i++)
hind[i] = 1;
mask = 0;
for (i = 0; i < nmainvars; i++)
mask = (mask << (FLINT_BITS/nmainvars)) + (UWORD(1) << (FLINT_BITS/nmainvars - 1));
s = Blen;
heap_len = 2;
x = chain + 0;
x->i = -WORD(1);
x->j = 0;
x->next = NULL;
HEAP_ASSIGN(heap[1], Aexp[0], x);
Q->length = 0;
nmod_mpoly_init3(T, 16, bits, ctx);
nmod_mpoly_init3(S, 16, bits, ctx);
while (heap_len > 1)
{
exp = heap[1].exp;
if (mpoly_monomial_overflows1(exp, mask))
{
goto not_exact_division;
}
nmod_mpolyu_fit_length(Q, Q->length + 1, ctx);
lt_divides = mpoly_monomial_divides1(Q->exps + Q->length, exp, Bexp[0], mask);
T->length = 0;
do
{
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do
{
*store++ = x->i;
*store++ = x->j;
if (x->i != -UWORD(1))
hind[x->i] |= WORD(1);
if (x->i == -UWORD(1))
{
a = Acoeff + x->j;
nmod_mpoly_fit_length(S, T->length + a->length, ctx);
S->length = _nmod_mpoly_add(
S->coeffs, S->exps,
T->coeffs, T->exps, T->length,
a->coeffs, a->exps, a->length,
N, cmpmask, ctx->mod);
}
else
{
b = Bcoeff + x->i;
q = Q->coeffs + x->j;
_nmod_mpoly_mulsub(S, T->coeffs, T->exps, T->length,
b->coeffs, b->exps, b->length,
q->coeffs, q->exps, q->length,
bits, N, cmpmask, ctx->mod);
}
nmod_mpoly_swap(S, T, ctx);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
while (store > store_base)
{
j = *--store;
i = *--store;
if (i == -WORD(1))
{
if (j + 1 < Alen)
{
x = chain + 0;
x->i = i;
x->j = j + 1;
x->next = NULL;
_mpoly_heap_insert1(heap, Aexp[x->j], x,
&next_loc, &heap_len, maskhi);
}
}
else
{
if ( (i + 1 < Blen)
&& (hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexp[x->i] + Q->exps[x->j], x,
&next_loc, &heap_len, maskhi);
}
if (j + 1 == Q->length)
{
s++;
} else if ( ((hind[i] & 1) == 1)
&& ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexp[x->i] + Q->exps[x->j], x,
&next_loc, &heap_len, maskhi);
}
}
}
if (T->length == 0)
{
continue;
}
if (mpoly_monomials_overflow_test(T->exps, T->length, bits, ctx->minfo))
{
goto not_exact_division;
}
q = Q->coeffs + Q->length;
FLINT_ASSERT(q->bits == bits);
b = Bcoeff + 0;
if (!_nmod_mpoly_divides_monagan_pearce(q,
T->coeffs, T->exps, T->length,
b->coeffs, b->exps, b->length,
bits, N, cmpmask, ctx->mod))
{
goto not_exact_division;
}
if (!lt_divides || (exp^maskhi) < (maxexp^maskhi))
{
goto not_exact_division;
}
if (s > 1)
{
i = 1;
x = chain + i;
x->i = i;
x->j = Q->length;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, Bexp[x->i] + Q->exps[x->j], x,
&next_loc, &heap_len, maskhi);
}
s = 1;
Q->length++;
}
success = 1;
cleanup:
nmod_mpoly_clear(T, ctx);
nmod_mpoly_clear(S, ctx);
TMP_END;
return success;
not_exact_division:
success = 0;
Q->length = 0;
goto cleanup;
}