#include "nmod.h"
#include "fmpz.h"
#include "fmpz_vec.h"
#include "mpoly.h"
#include "nmod_mpoly.h"
static int _nmod_mpoly_divides_monagan_pearce1(
nmod_mpoly_t Q,
const ulong * coeff2, const ulong * exp2, slong len2,
const ulong * coeff3, const ulong * exp3, slong len3,
slong bits,
ulong maskhi,
nmod_t fctx)
{
int lt_divides;
slong i, j, q_len, s;
slong next_loc, heap_len;
mpoly_heap1_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
ulong * q_coeff = Q->coeffs;
ulong * q_exp = Q->exps;
slong * hind;
ulong mask, exp, maxexp = exp2[len2 - 1];
ulong lc_minus_inv, acc0, acc1, acc2, pp1, pp0;
TMP_INIT;
TMP_START;
next_loc = len3 + 4;
heap = (mpoly_heap1_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap1_s));
chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(slong));
hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
for (i = 0; i < len3; i++)
hind[i] = 1;
mask = mpoly_overflow_mask_sp(bits);
q_len = WORD(0);
s = len3;
heap_len = 2;
x = chain + 0;
x->i = -WORD(1);
x->j = 0;
x->next = NULL;
HEAP_ASSIGN(heap[1], exp2[0], x);
lc_minus_inv = fctx.n - nmod_inv(coeff3[0], fctx);
while (heap_len > 1)
{
exp = heap[1].exp;
if (mpoly_monomial_overflows1(exp, mask))
goto not_exact_division;
_nmod_mpoly_fit_length(&q_coeff, &Q->coeffs_alloc,
&q_exp, &Q->exps_alloc, 1, q_len + 1);
lt_divides = mpoly_monomial_divides1(q_exp + q_len, exp, exp3[0], mask);
acc0 = acc1 = acc2 = 0;
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
*store++ = x->i;
*store++ = x->j;
if (x->i == -UWORD(1))
{
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0,
WORD(0), WORD(0), fctx.n - coeff2[x->j]);
}
else
{
hind[x->i] |= WORD(1);
umul_ppmm(pp1, pp0, coeff3[x->i], q_coeff[x->j]);
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, WORD(0), pp1, pp0);
}
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
NMOD_RED3(q_coeff[q_len], acc2, acc1, acc0, fctx);
while (store > store_base)
{
j = *--store;
i = *--store;
if (i == -WORD(1))
{
if (j + 1 < len2)
{
x = chain + 0;
x->i = i;
x->j = j + 1;
x->next = NULL;
_mpoly_heap_insert1(heap, exp2[x->j], x,
&next_loc, &heap_len, maskhi);
}
} else
{
if ( (i + 1 < len3)
&& (hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
&next_loc, &heap_len, maskhi);
}
if (j + 1 == q_len)
{
s++;
} else if ( ((hind[i] & 1) == 1)
&& ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
&next_loc, &heap_len, maskhi);
}
}
}
q_coeff[q_len] = nmod_mul(q_coeff[q_len], lc_minus_inv, fctx);
if (q_coeff[q_len] == 0)
{
continue;
}
if (!lt_divides || (exp^maskhi) < (maxexp^maskhi))
goto not_exact_division;
if (s > 1)
{
i = 1;
x = chain + i;
x->i = i;
x->j = q_len;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
_mpoly_heap_insert1(heap, exp3[x->i] + q_exp[x->j], x,
&next_loc, &heap_len, maskhi);
}
s = 1;
q_len++;
}
Q->coeffs = q_coeff;
Q->exps = q_exp;
Q->length = q_len;
TMP_END;
return 1;
not_exact_division:
Q->coeffs = q_coeff;
Q->exps = q_exp;
Q->length = 0;
TMP_END;
return 0;
}
int _nmod_mpoly_divides_monagan_pearce(
nmod_mpoly_t Q,
const ulong * coeff2, const ulong * exp2, slong len2,
const ulong * coeff3, const ulong * exp3, slong len3,
flint_bitcnt_t bits,
slong N,
const ulong * cmpmask,
nmod_t fctx)
{
int lt_divides;
slong i, j, q_len, s;
slong next_loc, heap_len;
mpoly_heap_s * heap;
mpoly_heap_t * chain;
slong * store, * store_base;
mpoly_heap_t * x;
ulong * q_coeff = Q->coeffs;
ulong * q_exp = Q->exps;
ulong * exp, * exps;
ulong ** exp_list;
slong exp_next;
ulong lc_minus_inv, acc0, acc1, acc2, pp1, pp0;
ulong mask;
slong * hind;
TMP_INIT;
if (N == 1)
return _nmod_mpoly_divides_monagan_pearce1(Q, coeff2, exp2, len2,
coeff3, exp3, len3, bits, cmpmask[0], fctx);
TMP_START;
next_loc = len3 + 4;
heap = (mpoly_heap_s *) TMP_ALLOC((len3 + 1)*sizeof(mpoly_heap_s));
chain = (mpoly_heap_t *) TMP_ALLOC(len3*sizeof(mpoly_heap_t));
store = store_base = (slong *) TMP_ALLOC(2*len3*sizeof(slong));
exps = (ulong *) TMP_ALLOC(len3*N*sizeof(ulong));
exp_list = (ulong **) TMP_ALLOC(len3*sizeof(ulong *));
exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
exp_next = 0;
for (i = 0; i < len3; i++)
exp_list[i] = exps + i*N;
hind = (slong *) TMP_ALLOC(len3*sizeof(slong));
for (i = 0; i < len3; i++)
hind[i] = 1;
mask = bits <= FLINT_BITS ? mpoly_overflow_mask_sp(bits) : 0;
q_len = WORD(0);
s = len3;
heap_len = 2;
x = chain + 0;
x->i = -WORD(1);
x->j = 0;
x->next = NULL;
heap[1].next = x;
heap[1].exp = exp_list[exp_next++];
mpoly_monomial_set(heap[1].exp, exp2, N);
lc_minus_inv = fctx.n - nmod_inv(coeff3[0], fctx);
while (heap_len > 1)
{
_nmod_mpoly_fit_length(&q_coeff, &Q->coeffs_alloc,
&q_exp, &Q->exps_alloc, N, q_len + 1);
mpoly_monomial_set(exp, heap[1].exp, N);
if (bits <= FLINT_BITS)
{
if (mpoly_monomial_overflows(exp, N, mask))
goto not_exact_division;
lt_divides = mpoly_monomial_divides(q_exp + q_len*N, exp, exp3, N, mask);
}
else
{
if (mpoly_monomial_overflows_mp(exp, N, bits))
goto not_exact_division;
lt_divides = mpoly_monomial_divides_mp(q_exp + q_len*N, exp, exp3, N, bits);
}
acc0 = acc1 = acc2 = 0;
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
*store++ = x->i;
*store++ = x->j;
if (x->i == -UWORD(1))
{
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0,
WORD(0), WORD(0), fctx.n - coeff2[x->j]);
}
else
{
hind[x->i] |= WORD(1);
umul_ppmm(pp1, pp0, coeff3[x->i], q_coeff[x->j]);
add_sssaaaaaa(acc2, acc1, acc0, acc2, acc1, acc0, WORD(0), pp1, pp0);
}
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
NMOD_RED3(q_coeff[q_len], acc2, acc1, acc0, fctx);
while (store > store_base)
{
j = *--store;
i = *--store;
if (i == -WORD(1))
{
if (j + 1 < len2)
{
x = chain + 0;
x->i = i;
x->j = j + 1;
x->next = NULL;
mpoly_monomial_set(exp_list[exp_next], exp2 + x->j*N, N);
if (!_mpoly_heap_insert(heap, exp_list[exp_next++], x,
&next_loc, &heap_len, N, cmpmask))
exp_next--;
}
} else
{
if ( (i + 1 < len3)
&& (hind[i + 1] == 2*j + 1)
)
{
x = chain + i + 1;
x->i = i + 1;
x->j = j;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
q_exp + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
q_exp + x->j*N, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
if (j + 1 == q_len)
{
s++;
} else if ( ((hind[i] & 1) == 1)
&& ((i == 1) || (hind[i - 1] >= 2*(j + 2) + 1))
)
{
x = chain + i;
x->i = i;
x->j = j + 1;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N,
q_exp + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N,
q_exp + x->j*N, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
}
}
q_coeff[q_len] = nmod_mul(q_coeff[q_len], lc_minus_inv, fctx);
if (q_coeff[q_len] == 0)
{
continue;
}
if (!lt_divides ||
mpoly_monomial_gt(exp2 + N*(len2 - 1), exp, N, cmpmask))
{
goto not_exact_division;
}
if (s > 1)
{
i = 1;
x = chain + i;
x->i = i;
x->j = q_len;
x->next = NULL;
hind[x->i] = 2*(x->j + 1) + 0;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], exp3 + x->i*N, q_exp + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], exp3 + x->i*N, q_exp + x->j*N, N);
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
s = 1;
q_len++;
}
Q->coeffs = q_coeff;
Q->exps = q_exp;
Q->length = q_len;
TMP_END;
return 1;
not_exact_division:
Q->coeffs = q_coeff;
Q->exps = q_exp;
Q->length = 0;
TMP_END;
return 0;
}
int nmod_mpoly_divides_monagan_pearce(
nmod_mpoly_t Q,
const nmod_mpoly_t A,
const nmod_mpoly_t B,
const nmod_mpoly_ctx_t ctx)
{
slong i, N;
flint_bitcnt_t Qbits;
fmpz * Amaxfields, * Bmaxfields;
ulong * cmpmask;
ulong * exp2 = A->exps, * exp3 = B->exps, * expq;
int divides, easy_exit, free2 = 0, free3 = 0;
ulong mask = 0;
TMP_INIT;
if (B->length == 0)
{
if (A->length == 0 || nmod_mpoly_ctx_modulus(ctx) == 1)
{
nmod_mpoly_set(Q, A, ctx);
return 1;
}
else
{
flint_throw(FLINT_DIVZERO, "nmod_mpoly_divides_monagan_pearce: divide by zero");
}
}
if (A->length == 0)
{
nmod_mpoly_zero(Q, ctx);
return 1;
}
TMP_START;
Amaxfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
Bmaxfields = (fmpz *) TMP_ALLOC(ctx->minfo->nfields*sizeof(fmpz));
for (i = 0; i < ctx->minfo->nfields; i++)
{
fmpz_init(Amaxfields + i);
fmpz_init(Bmaxfields + i);
}
mpoly_max_fields_fmpz(Amaxfields, A->exps, A->length,
A->bits, ctx->minfo);
mpoly_max_fields_fmpz(Bmaxfields, B->exps, B->length,
B->bits, ctx->minfo);
easy_exit = 0;
for (i = 0; i < ctx->minfo->nfields; i++)
{
if (fmpz_cmp(Amaxfields + i, Bmaxfields + i) < 0)
easy_exit = 1;
}
Qbits = 1 + _fmpz_vec_max_bits(Amaxfields, ctx->minfo->nfields);
Qbits = FLINT_MAX(Qbits, A->bits);
Qbits = FLINT_MAX(Qbits, B->bits);
Qbits = mpoly_fix_bits(Qbits, ctx->minfo);
for (i = 0; i < ctx->minfo->nfields; i++)
{
fmpz_clear(Amaxfields + i);
fmpz_clear(Bmaxfields + i);
}
if (easy_exit)
{
nmod_mpoly_zero(Q, ctx);
divides = 0;
goto cleanup;
}
N = mpoly_words_per_exp(Qbits, ctx->minfo);
cmpmask = (ulong*) TMP_ALLOC(N*sizeof(ulong));
mpoly_get_cmpmask(cmpmask, N, Qbits, ctx->minfo);
expq = (ulong *) TMP_ALLOC(N*sizeof(ulong));
if (Qbits == A->bits && Qbits == B->bits && A->exps[N - 1] < B->exps[N - 1])
{
nmod_mpoly_zero(Q, ctx);
divides = 0;
goto cleanup;
}
if (Qbits > A->bits)
{
free2 = 1;
exp2 = (ulong *) flint_malloc(N*A->length*sizeof(ulong));
mpoly_repack_monomials(exp2, Qbits, A->exps, A->bits,
A->length, ctx->minfo);
}
if (Qbits > B->bits)
{
free3 = 1;
exp3 = (ulong *) flint_malloc(N*B->length*sizeof(ulong));
mpoly_repack_monomials(exp3, Qbits, B->exps, B->bits,
B->length, ctx->minfo);
}
if (Qbits <= FLINT_BITS)
{
for (i = 0; (ulong) i < FLINT_BITS/Qbits; i++)
mask = (mask << Qbits) + (UWORD(1) << (Qbits - 1));
if (!mpoly_monomial_divides(expq, exp2, exp3, N, mask))
{
nmod_mpoly_zero(Q, ctx);
divides = 0;
goto cleanup;
}
}
else
{
if (!mpoly_monomial_divides_mp(expq, exp2, exp3, N, Qbits))
{
nmod_mpoly_zero(Q, ctx);
divides = 0;
goto cleanup;
}
}
if (Q == A || Q == B)
{
nmod_mpoly_t temp;
nmod_mpoly_init3(temp, A->length/B->length + 1, Qbits, ctx);
divides = _nmod_mpoly_divides_monagan_pearce(temp,
A->coeffs, exp2, A->length,
B->coeffs, exp3, B->length,
Qbits, N, cmpmask, ctx->mod);
nmod_mpoly_swap(temp, Q, ctx);
nmod_mpoly_clear(temp, ctx);
}
else
{
nmod_mpoly_fit_length_reset_bits(Q, A->length/B->length + 1, Qbits, ctx);
divides = _nmod_mpoly_divides_monagan_pearce(Q,
A->coeffs, exp2, A->length,
B->coeffs, exp3, B->length,
Qbits, N, cmpmask, ctx->mod);
}
cleanup:
if (free2)
flint_free(exp2);
if (free3)
flint_free(exp3);
TMP_END;
return divides;
}