#include <math.h>
#include "gmpcompat.h"
#include "mpn_extras.h"
#include "nmod.h"
#include "nmod_mpoly.h"
#include "fmpz.h"
#include "fmpz_vec.h"
#include "mpoly.h"
#include "fmpz_mpoly.h"
static mpz_srcptr _fmpz_mpoly_get_mpz_signed_uiuiui(ulong * sm, fmpz x, mpz_ptr t)
{
mpz_ptr p;
slong i, abs_size;
ulong s;
if (!COEFF_IS_MPZ(x))
{
sm[0] = x;
sm[1] = FLINT_SIGN_EXT(x);
sm[2] = FLINT_SIGN_EXT(x);
}
else
{
p = COEFF_TO_PTR(x);
sm[0] = 0;
sm[1] = 0;
sm[2] = 0;
s = FLINT_SIGN_EXT(p->_mp_size);
abs_size = FLINT_ABS(p->_mp_size);
if (abs_size > 3 || (abs_size == 3 && p->_mp_d[2] >= COEFF_MAX))
return p;
for (i = 0; i < abs_size; i++)
sm[i] = p->_mp_d[i];
sub_dddmmmsss(sm[2], sm[1], sm[0], s^sm[2], s^sm[1], s^sm[0], s, s, s);
}
mpz_set_ui(t, 0);
return t;
}
static int _is_proved_not_square(
int count,
ulong * p,
flint_rand_t state,
const fmpz * Acoeffs,
const ulong * Aexps,
slong Alen,
flint_bitcnt_t Abits,
const mpoly_ctx_t mctx)
{
int success = 0;
slong i, N = mpoly_words_per_exp(Abits, mctx);
ulong eval, * alphas;
nmod_t mod;
ulong * t;
TMP_INIT;
FLINT_ASSERT(Alen > 0);
TMP_START;
t = (ulong *) TMP_ALLOC(FLINT_MAX(Alen, N)*sizeof(ulong));
if (count == 1)
{
success = mpoly_is_proved_not_square(Aexps, Alen, Abits, N, t);
if (success)
goto cleanup;
}
count *= 3;
alphas = (ulong *) TMP_ALLOC(mctx->nvars*sizeof(ulong));
next_p:
if (*p >= UWORD_MAX_PRIME)
*p = UWORD(1) << (SMALL_FMPZ_BITCOUNT_MAX);
*p = n_nextprime(*p, 1);
nmod_init(&mod, *p);
for (i = 0; i < mctx->nvars; i++)
alphas[i] = n_urandint(state, mod.n);
_fmpz_vec_get_nmod_vec(t, Acoeffs, Alen, mod);
eval = _nmod_mpoly_eval_all_ui(t, Aexps, Alen, Abits, alphas, mctx, mod);
success = n_jacobi_unsigned(eval, mod.n) < 0;
if (!success && --count >= 0)
goto next_p;
cleanup:
TMP_END;
return success;
}
static slong _fmpz_mpoly_sqrt_heap1(
fmpz ** polyq, ulong ** expq, slong * allocq,
const fmpz * Acoeffs, const ulong * Aexps, slong Alen,
flint_bitcnt_t bits,
const mpoly_ctx_t mctx,
int check)
{
slong i, j, Qlen, Ai;
slong next_loc, heap_len = 1, heap_alloc;
mpoly_heap1_s * heap;
mpoly_heap_t * chain_nodes[64];
mpoly_heap_t ** chain;
slong exp_alloc;
slong * store, * store_base;
mpoly_heap_t * x;
fmpz * Qcoeffs = *polyq;
ulong * Qexps = *expq;
ulong mask, exp, exp3 = 0;
ulong maskhi;
mpz_t r, acc, acc2;
mpz_srcptr acc_lg;
mpz_ptr t;
ulong acc_sm[3], acc_sm2[3], pp[3];
int lt_divides, q_rest_small;
flint_rand_t heuristic_state;
ulong heuristic_p = UWORD(1) << (SMALL_FMPZ_BITCOUNT_MAX);
int heuristic_count = 0;
ulong lc_abs = 0;
ulong lc_norm = 0;
ulong lc_n = 0;
ulong lc_i = 0;
mpz_ptr lc_lg = NULL;
FLINT_ASSERT(mpoly_words_per_exp(bits, mctx) == 1);
mpoly_get_cmpmask(&maskhi, 1, bits, mctx);
flint_rand_init(heuristic_state);
mpz_init(r);
mpz_init(acc);
mpz_init(acc2);
next_loc = 2*n_sqrt(Alen) + 4;
heap_alloc = next_loc - 3;
heap = (mpoly_heap1_s *) flint_malloc((heap_alloc + 1)*sizeof(mpoly_heap1_s));
chain_nodes[0] = (mpoly_heap_t *) flint_malloc(heap_alloc*sizeof(mpoly_heap_t));
chain = (mpoly_heap_t **) flint_malloc(heap_alloc*sizeof(mpoly_heap_t*));
store = store_base = (slong *) flint_malloc(2*heap_alloc*sizeof(mpoly_heap_t *));
for (i = 0; i < heap_alloc; i++)
chain[i] = chain_nodes[0] + i;
exp_alloc = 1;
mask = mpoly_overflow_mask_sp(bits);
Qlen = 0;
Ai = 1;
if (!fmpz_is_square(Acoeffs + 0))
goto not_sqrt;
_fmpz_mpoly_fit_length(&Qcoeffs, &Qexps, allocq, Qlen + 1, 1);
fmpz_sqrt(Qcoeffs + 0, Acoeffs + 0);
Qlen++;
fmpz_mul_2exp(Qcoeffs + 0, Qcoeffs + 0, 1);
q_rest_small = 1;
if (fmpz_abs_fits_ui(Qcoeffs + 0))
{
lc_abs = fmpz_get_ui(Qcoeffs + 0);
lc_norm = flint_clz(lc_abs);
lc_n = lc_abs << lc_norm;
lc_i = n_preinvert_limb_prenorm(lc_n);
}
else
{
lc_lg = COEFF_TO_PTR(Qcoeffs[0]);
}
if (!mpoly_monomial_halves1(Qexps + 0, Aexps[0], mask))
goto not_sqrt;
{
if (!fmpz_is_square(Acoeffs + Alen - 1))
goto not_sqrt;
if (!mpoly_monomial_halves1(&exp3, Aexps[Alen - 1], mask))
goto not_sqrt;
exp3 += Qexps[0];
}
while (heap_len > 1 || Ai < Alen)
{
_fmpz_mpoly_fit_length(&Qcoeffs, &Qexps, allocq, Qlen + 1, 1);
if (heap_len > 1 && Ai < Alen && Aexps[Ai] == heap[1].exp)
{
exp = Aexps[Ai];
acc_lg = _fmpz_mpoly_get_mpz_signed_uiuiui(acc_sm, Acoeffs[Ai], acc);
Ai++;
}
else if (heap_len > 1 && (Ai >= Alen ||
mpoly_monomial_gt1(heap[1].exp, Aexps[Ai], maskhi)))
{
exp = heap[1].exp;
mpz_set_ui(acc, 0);
acc_lg = acc;
acc_sm[2] = acc_sm[1] = acc_sm[0] = 0;
if (mpoly_monomial_overflows1(exp, mask))
goto not_sqrt;
}
else
{
FLINT_ASSERT(Ai < Alen);
exp = Aexps[Ai];
acc_lg = _fmpz_mpoly_get_mpz_signed_uiuiui(acc_sm, Acoeffs[Ai], acc);
Ai++;
if (!check && mpoly_monomial_gt1(exp3, exp, maskhi))
break;
lt_divides = mpoly_monomial_divides1(Qexps + Qlen, exp, Qexps[0], mask);
goto skip_heap;
}
lt_divides = mpoly_monomial_divides1(Qexps + Qlen, exp, Qexps[0], mask);
if (!lt_divides && !check)
{
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
*store++ = x->i;
*store++ = x->j;
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
mpz_set_ui(acc, 0);
acc_lg = acc;
}
else if (q_rest_small)
{
acc_sm2[2] = acc_sm2[1] = acc_sm2[0] = 0;
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
*store++ = x->i;
*store++ = x->j;
smul_ppmm(pp[1], pp[0], Qcoeffs[x->i], Qcoeffs[x->j]);
pp[2] = FLINT_SIGN_EXT(pp[1]);
if (x->i != x->j)
sub_dddmmmsss(acc_sm2[2], acc_sm2[1], acc_sm2[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0],
pp[2], pp[1], pp[0]);
else
sub_dddmmmsss(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
pp[2], pp[1], pp[0]);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
if (mpz_sgn(acc_lg) != 0)
{
flint_mpz_add_signed_uiuiui(acc, acc_lg,
acc_sm[2], acc_sm[1], acc_sm[0]);
acc_lg = acc;
acc_sm[2] = acc_sm[1] = acc_sm[0] = 0;
}
}
else
{
acc_sm2[2] = acc_sm2[1] = acc_sm2[0] = 0;
mpz_tdiv_q_2exp(acc2, acc_lg, 1);
mpz_tdiv_r_2exp(acc, acc_lg, 1);
do {
x = _mpoly_heap_pop1(heap, &heap_len, maskhi);
do {
fmpz Qi, Qj;
*store++ = x->i;
*store++ = x->j;
Qi = Qcoeffs[x->i];
Qj = Qcoeffs[x->j];
t = (x->i != x->j) ? acc2 : acc;
if (!COEFF_IS_MPZ(Qi) && !COEFF_IS_MPZ(Qj))
{
smul_ppmm(pp[1], pp[0], Qi, Qj);
pp[2] = FLINT_SIGN_EXT(pp[1]);
if (x->i != x->j)
sub_dddmmmsss(acc_sm2[2], acc_sm2[1], acc_sm2[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0],
pp[2], pp[1], pp[0]);
else
sub_dddmmmsss(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
pp[2], pp[1], pp[0]);
}
else if (!COEFF_IS_MPZ(Qi) && COEFF_IS_MPZ(Qj))
{
if (Qi < WORD(0))
flint_mpz_addmul_ui(t, COEFF_TO_PTR(Qj), -Qi);
else
flint_mpz_submul_ui(t, COEFF_TO_PTR(Qj), Qi);
}
else if (COEFF_IS_MPZ(Qi) && !COEFF_IS_MPZ(Qj))
{
if (Qj < WORD(0))
flint_mpz_addmul_ui(t, COEFF_TO_PTR(Qi), -Qj);
else
flint_mpz_submul_ui(t, COEFF_TO_PTR(Qi), Qj);
}
else
{
mpz_submul(t, COEFF_TO_PTR(Qi), COEFF_TO_PTR(Qj));
}
} while ((x = x->next) != NULL);
} while (heap_len > 1 && heap[1].exp == exp);
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
flint_mpz_add_signed_uiuiui(acc, acc, acc_sm[2], acc_sm[1], acc_sm[0]);
mpz_addmul_ui(acc, acc2, 2);
acc_lg = acc;
acc_sm[2] = acc_sm[1] = acc_sm[0] = 0;
}
while (store > store_base)
{
j = *--store;
i = *--store;
if (j < i)
{
x = chain[i];
x->i = i;
x->j = j + 1;
x->next = NULL;
if (check || !mpoly_monomial_gt1(exp3, Qexps[x->i] + Qexps[x->j], maskhi))
{
_mpoly_heap_insert1(heap, Qexps[x->i] + Qexps[x->j], x,
&next_loc, &heap_len, maskhi);
}
}
}
skip_heap:
if (!check && !lt_divides)
continue;
if (mpz_sgn(acc_lg) == 0)
{
ulong d0, d1, ds = acc_sm[2];
sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
continue;
if (!lt_divides)
goto not_sqrt;
if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
{
ulong qq, rr, nhi, nlo;
nhi = MPN_LEFT_SHIFT_HI(d1, d0, lc_norm);
nlo = d0 << lc_norm;
udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
if (rr != 0)
goto not_sqrt;
if (qq == 0)
continue;
if (qq <= COEFF_MAX)
{
_fmpz_demote(Qcoeffs + Qlen);
Qcoeffs[Qlen] = qq;
if (ds != 0)
Qcoeffs[Qlen] = -Qcoeffs[Qlen];
}
else
{
q_rest_small = 0;
if (ds == 0)
fmpz_set_ui(Qcoeffs + Qlen, qq);
else
fmpz_neg_ui(Qcoeffs + Qlen, qq);
}
}
else
{
flint_mpz_add_signed_uiuiui(acc, acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
goto large_lt_divides;
}
}
else
{
flint_mpz_add_signed_uiuiui(acc, acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
if (mpz_sgn(acc) == 0)
continue;
if (!lt_divides)
goto not_sqrt;
large_lt_divides:
t = _fmpz_promote(Qcoeffs + Qlen);
if (lc_abs > 0)
flint_mpz_fdiv_qr_ui(t, r, acc, lc_abs);
else
mpz_fdiv_qr(t, r, acc, lc_lg);
_fmpz_demote_val(Qcoeffs + Qlen);
q_rest_small = q_rest_small && !COEFF_IS_MPZ(Qcoeffs[Qlen]);
if (mpz_sgn(r) != 0)
{
Qlen++;
goto not_sqrt;
}
}
if (Qlen >= heap_alloc)
{
if (Qlen > Alen && _is_proved_not_square(
++heuristic_count, &heuristic_p, heuristic_state,
Acoeffs, Aexps, Alen, bits, mctx))
{
Qlen++;
goto not_sqrt;
}
heap_alloc *= 2;
heap = (mpoly_heap1_s *) flint_realloc(heap, (heap_alloc + 1)*sizeof(mpoly_heap1_s));
chain_nodes[exp_alloc] = (mpoly_heap_t *) flint_malloc((heap_alloc/2)*sizeof(mpoly_heap_t));
chain = (mpoly_heap_t **) flint_realloc(chain, heap_alloc*sizeof(mpoly_heap_t*));
store = store_base = (slong *) flint_realloc(store_base, 2*heap_alloc*sizeof(mpoly_heap_t *));
for (i = 0; i < heap_alloc/2; i++)
chain[i + heap_alloc/2] = chain_nodes[exp_alloc] + i;
exp_alloc++;
}
i = Qlen;
x = chain[i];
x->i = i;
x->j = 1;
x->next = NULL;
if (check || !mpoly_monomial_gt1(exp3, Qexps[i] + Qexps[1], maskhi))
{
_mpoly_heap_insert1(heap, Qexps[i] + Qexps[1], x,
&next_loc, &heap_len, maskhi);
}
Qlen++;
}
fmpz_fdiv_q_2exp(Qcoeffs + 0, Qcoeffs + 0, 1);
cleanup:
flint_rand_clear(heuristic_state);
mpz_clear(r);
mpz_clear(acc);
mpz_clear(acc2);
(*polyq) = Qcoeffs;
(*expq) = Qexps;
flint_free(heap);
flint_free(chain);
flint_free(store_base);
for (i = 0; i < exp_alloc; i++)
flint_free(chain_nodes[i]);
return Qlen;
not_sqrt:
for (i = 0; i < Qlen; i++)
_fmpz_demote(Qcoeffs + i);
Qlen = 0;
goto cleanup;
}
slong _fmpz_mpoly_sqrt_heap(
fmpz ** polyq, ulong ** expq, slong * allocq,
const fmpz * Acoeffs, const ulong * Aexps, slong Alen,
flint_bitcnt_t bits,
const mpoly_ctx_t mctx,
int check)
{
slong N = mpoly_words_per_exp(bits, mctx);
ulong * cmpmask;
slong i, j, Qlen, Ai;
slong next_loc;
slong heap_len = 1, heap_alloc;
int exp_alloc;
mpoly_heap_s * heap;
mpoly_heap_t * chain_nodes[64];
mpoly_heap_t ** chain;
slong * store, * store_base;
mpoly_heap_t * x;
fmpz * Qcoeffs = *polyq;
ulong * Qexps = *expq;
ulong * exp, * exp3;
ulong * exps[64];
ulong ** exp_list;
slong exp_next;
ulong mask;
mpz_t r, acc, acc2;
mpz_srcptr acc_lg;
mpz_ptr t;
ulong acc_sm[3], acc_sm2[3], pp[3];
int halves, use_heap, lt_divides, q_rest_small;
flint_rand_t heuristic_state;
ulong heuristic_p = UWORD(1) << (SMALL_FMPZ_BITCOUNT_MAX);
int heuristic_count = 0;
ulong lc_abs = 0;
ulong lc_norm = 0;
ulong lc_n = 0;
ulong lc_i = 0;
mpz_ptr lc_lg = NULL;
TMP_INIT;
if (N == 1)
return _fmpz_mpoly_sqrt_heap1(polyq, expq, allocq,
Acoeffs, Aexps, Alen, bits, mctx, check);
TMP_START;
cmpmask = (ulong *) TMP_ALLOC(N*sizeof(ulong));
mpoly_get_cmpmask(cmpmask, N, bits, mctx);
flint_rand_init(heuristic_state);
mpz_init(r);
mpz_init(acc);
mpz_init(acc2);
next_loc = 2*sqrt(Alen) + 4;
heap_alloc = next_loc - 3;
heap = (mpoly_heap_s *) flint_malloc((heap_alloc + 1)*sizeof(mpoly_heap_s));
chain_nodes[0] = (mpoly_heap_t *) flint_malloc(heap_alloc*sizeof(mpoly_heap_t));
chain = (mpoly_heap_t **) flint_malloc(heap_alloc*sizeof(mpoly_heap_t*));
store = store_base = (slong *) flint_malloc(2*heap_alloc*sizeof(mpoly_heap_t *));
for (i = 0; i < heap_alloc; i++)
chain[i] = chain_nodes[0] + i;
exps[0] = (ulong *) flint_malloc(heap_alloc*N*sizeof(ulong));
exp_alloc = 1;
exp_list = (ulong **) flint_malloc(heap_alloc*sizeof(ulong *));
exp = (ulong *) TMP_ALLOC(N*sizeof(ulong));
exp3 = (ulong *) TMP_ALLOC(N*sizeof(ulong));
exp_next = 0;
for (i = 0; i < heap_alloc; i++)
exp_list[i] = exps[0] + i*N;
mask = (bits <= FLINT_BITS) ? mpoly_overflow_mask_sp(bits) : 0;
Qlen = 0;
Ai = 1;
if (!fmpz_is_square(Acoeffs + 0))
goto not_sqrt;
_fmpz_mpoly_fit_length(&Qcoeffs, &Qexps, allocq, Qlen + 1, 1);
fmpz_sqrt(Qcoeffs + 0, Acoeffs + 0);
Qlen++;
fmpz_mul_2exp(Qcoeffs + 0, Qcoeffs + 0, 1);
q_rest_small = 1;
if (fmpz_abs_fits_ui(Qcoeffs + 0))
{
lc_abs = fmpz_get_ui(Qcoeffs + 0);
lc_norm = flint_clz(lc_abs);
lc_n = lc_abs << lc_norm;
lc_i = n_preinvert_limb_prenorm(lc_n);
}
else
{
lc_lg = COEFF_TO_PTR(Qcoeffs[0]);
}
if (bits <= FLINT_BITS)
halves = mpoly_monomial_halves(Qexps + 0, Aexps + 0, N, mask);
else
halves = mpoly_monomial_halves_mp(Qexps + 0, Aexps + 0, N, bits);
if (!halves)
goto not_sqrt;
{
if (!fmpz_is_square(Acoeffs + Alen - 1))
goto not_sqrt;
if (bits <= FLINT_BITS)
halves = mpoly_monomial_halves(exp3, Aexps + (Alen - 1)*N, N, mask);
else
halves = mpoly_monomial_halves_mp(exp3, Aexps + (Alen - 1)*N, N, bits);
if (!halves)
goto not_sqrt;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp3, exp3, Qexps + 0, N);
else
mpoly_monomial_add_mp(exp3, exp3, Qexps + 0, N);
}
while (heap_len > 1 || Ai < Alen)
{
_fmpz_mpoly_fit_length(&Qcoeffs, &Qexps, allocq, Qlen + 1, N);
if (heap_len > 1 && Ai < Alen &&
mpoly_monomial_equal(Aexps + N*Ai, heap[1].exp, N))
{
mpoly_monomial_set(exp, Aexps + N*Ai, N);
acc_lg = _fmpz_mpoly_get_mpz_signed_uiuiui(acc_sm, Acoeffs[Ai], acc);
Ai++;
use_heap = 1;
}
else if (heap_len > 1 && (Ai >= Alen ||
mpoly_monomial_lt(Aexps + N*Ai, heap[1].exp, N, cmpmask)))
{
mpoly_monomial_set(exp, heap[1].exp, N);
mpz_set_ui(acc, 0);
acc_lg = acc;
acc_sm[2] = acc_sm[1] = acc_sm[0] = 0;
if (bits <= FLINT_BITS ? mpoly_monomial_overflows(exp, N, mask)
: mpoly_monomial_overflows_mp(exp, N, bits))
goto not_sqrt;
use_heap = 1;
}
else
{
FLINT_ASSERT(Ai < Alen);
mpoly_monomial_set(exp, Aexps + N*Ai, N);
acc_lg = _fmpz_mpoly_get_mpz_signed_uiuiui(acc_sm, Acoeffs[Ai], acc);
Ai++;
if (!check && mpoly_monomial_gt(exp3, exp, N, cmpmask))
break;
use_heap = 0;
}
if (bits <= FLINT_BITS)
lt_divides = mpoly_monomial_divides(Qexps + N*Qlen,
exp, Qexps + 0, N, mask);
else
lt_divides = mpoly_monomial_divides_mp(Qexps + N*Qlen,
exp, Qexps + 0, N, bits);
if (!use_heap)
goto skip_heap;
if (!lt_divides && !check)
{
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
*store++ = x->i;
*store++ = x->j;
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
mpz_set_ui(acc, 0);
acc_lg = acc;
}
else if (q_rest_small)
{
acc_sm2[2] = acc_sm2[1] = acc_sm2[0] = 0;
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
*store++ = x->i;
*store++ = x->j;
smul_ppmm(pp[1], pp[0], Qcoeffs[x->i], Qcoeffs[x->j]);
pp[2] = FLINT_SIGN_EXT(pp[1]);
if (x->i != x->j)
sub_dddmmmsss(acc_sm2[2], acc_sm2[1], acc_sm2[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0],
pp[2], pp[1], pp[0]);
else
sub_dddmmmsss(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
pp[2], pp[1], pp[0]);
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
if (mpz_sgn(acc_lg) != 0)
{
flint_mpz_add_signed_uiuiui(acc, acc_lg,
acc_sm[2], acc_sm[1], acc_sm[0]);
acc_lg = acc;
acc_sm[2] = acc_sm[1] = acc_sm[0] = 0;
}
}
else
{
acc_sm2[2] = acc_sm2[1] = acc_sm2[0] = 0;
mpz_tdiv_q_2exp(acc2, acc_lg, 1);
mpz_tdiv_r_2exp(acc, acc_lg, 1);
do {
exp_list[--exp_next] = heap[1].exp;
x = _mpoly_heap_pop(heap, &heap_len, N, cmpmask);
do {
fmpz Qi, Qj;
*store++ = x->i;
*store++ = x->j;
Qi = Qcoeffs[x->i];
Qj = Qcoeffs[x->j];
t = (x->i != x->j) ? acc2 : acc;
if (!COEFF_IS_MPZ(Qi) && !COEFF_IS_MPZ(Qj))
{
smul_ppmm(pp[1], pp[0], Qi, Qj);
pp[2] = FLINT_SIGN_EXT(pp[1]);
if (x->i != x->j)
sub_dddmmmsss(acc_sm2[2], acc_sm2[1], acc_sm2[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0],
pp[2], pp[1], pp[0]);
else
sub_dddmmmsss(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
pp[2], pp[1], pp[0]);
}
else if (!COEFF_IS_MPZ(Qi) && COEFF_IS_MPZ(Qj))
{
if (Qi < WORD(0))
flint_mpz_addmul_ui(t, COEFF_TO_PTR(Qj), -Qi);
else
flint_mpz_submul_ui(t, COEFF_TO_PTR(Qj), Qi);
}
else if (COEFF_IS_MPZ(Qi) && !COEFF_IS_MPZ(Qj))
{
if (Qj < WORD(0))
flint_mpz_addmul_ui(t, COEFF_TO_PTR(Qi), -Qj);
else
flint_mpz_submul_ui(t, COEFF_TO_PTR(Qi), Qj);
}
else
{
mpz_submul(t, COEFF_TO_PTR(Qi), COEFF_TO_PTR(Qj));
}
} while ((x = x->next) != NULL);
} while (heap_len > 1 && mpoly_monomial_equal(heap[1].exp, exp, N));
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
add_sssaaaaaa(acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm[2], acc_sm[1], acc_sm[0],
acc_sm2[2], acc_sm2[1], acc_sm2[0]);
flint_mpz_add_signed_uiuiui(acc, acc, acc_sm[2], acc_sm[1], acc_sm[0]);
mpz_addmul_ui(acc, acc2, 2);
acc_lg = acc;
acc_sm[2] = acc_sm[1] = acc_sm[0] = 0;
}
while (store > store_base)
{
j = *--store;
i = *--store;
if (j < i)
{
x = chain[i];
x->i = i;
x->j = j + 1;
x->next = NULL;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], Qexps + x->i*N,
Qexps + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], Qexps + x->i*N,
Qexps + x->j*N, N);
if (check || !mpoly_monomial_gt(exp3 + 0, exp_list[exp_next], N, cmpmask))
{
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
}
}
skip_heap:
if (!check && !lt_divides)
continue;
if (mpz_sgn(acc_lg) == 0)
{
ulong d0, d1, ds = acc_sm[2];
sub_ddmmss(d1, d0, acc_sm[1]^ds, acc_sm[0]^ds, ds, ds);
if ((acc_sm[0] | acc_sm[1] | acc_sm[2]) == 0)
continue;
if (!lt_divides)
goto not_sqrt;
if (ds == FLINT_SIGN_EXT(acc_sm[1]) && d1 < lc_abs)
{
ulong qq, rr, nhi, nlo;
nhi = MPN_LEFT_SHIFT_HI(d1, d0, lc_norm);
nlo = d0 << lc_norm;
udiv_qrnnd_preinv(qq, rr, nhi, nlo, lc_n, lc_i);
if (rr != 0)
goto not_sqrt;
if (qq == 0)
continue;
if (qq <= COEFF_MAX)
{
_fmpz_demote(Qcoeffs + Qlen);
Qcoeffs[Qlen] = qq;
if (ds != 0)
Qcoeffs[Qlen] = -Qcoeffs[Qlen];
}
else
{
q_rest_small = 0;
if (ds == 0)
fmpz_set_ui(Qcoeffs + Qlen, qq);
else
fmpz_neg_ui(Qcoeffs + Qlen, qq);
}
}
else
{
flint_mpz_add_signed_uiuiui(acc, acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
goto large_lt_divides;
}
}
else
{
flint_mpz_add_signed_uiuiui(acc, acc_lg, acc_sm[2], acc_sm[1], acc_sm[0]);
if (mpz_sgn(acc) == 0)
continue;
if (!lt_divides)
goto not_sqrt;
large_lt_divides:
t = _fmpz_promote(Qcoeffs + Qlen);
if (lc_abs > 0)
flint_mpz_fdiv_qr_ui(t, r, acc, lc_abs);
else
mpz_fdiv_qr(t, r, acc, lc_lg);
_fmpz_demote_val(Qcoeffs + Qlen);
q_rest_small = q_rest_small && !COEFF_IS_MPZ(Qcoeffs[Qlen]);
if (mpz_sgn(r) != 0)
{
Qlen++;
goto not_sqrt;
}
}
if (Qlen >= heap_alloc)
{
if (Qlen > Alen && _is_proved_not_square(
++heuristic_count, &heuristic_p, heuristic_state,
Acoeffs, Aexps, Alen, bits, mctx))
{
Qlen++;
goto not_sqrt;
}
heap_alloc *= 2;
heap = (mpoly_heap_s *) flint_realloc(heap, (heap_alloc + 1)*sizeof(mpoly_heap_s));
chain_nodes[exp_alloc] = (mpoly_heap_t *) flint_malloc((heap_alloc/2)*sizeof(mpoly_heap_t));
chain = (mpoly_heap_t **) flint_realloc(chain, heap_alloc*sizeof(mpoly_heap_t*));
store = store_base = (slong *) flint_realloc(store_base, 2*heap_alloc*sizeof(mpoly_heap_t *));
exps[exp_alloc] = (ulong *) flint_malloc((heap_alloc/2)*N*sizeof(ulong));
exp_list = (ulong **) flint_realloc(exp_list, heap_alloc*sizeof(ulong *));
for (i = 0; i < heap_alloc/2; i++)
{
chain[i + heap_alloc/2] = chain_nodes[exp_alloc] + i;
exp_list[i + heap_alloc/2] = exps[exp_alloc] + i*N;
}
exp_alloc++;
}
i = Qlen;
x = chain[i];
x->i = i;
x->j = 1;
x->next = NULL;
if (bits <= FLINT_BITS)
mpoly_monomial_add(exp_list[exp_next], Qexps + x->i*N,
Qexps + x->j*N, N);
else
mpoly_monomial_add_mp(exp_list[exp_next], Qexps + x->i*N,
Qexps + x->j*N, N);
if (check || !mpoly_monomial_gt(exp3 + 0, exp_list[exp_next], N, cmpmask))
{
exp_next += _mpoly_heap_insert(heap, exp_list[exp_next], x,
&next_loc, &heap_len, N, cmpmask);
}
Qlen++;
}
fmpz_fdiv_q_2exp(Qcoeffs + 0, Qcoeffs + 0, 1);
cleanup:
flint_rand_clear(heuristic_state);
mpz_clear(r);
mpz_clear(acc);
mpz_clear(acc2);
(*polyq) = Qcoeffs;
(*expq) = Qexps;
flint_free(heap);
flint_free(chain);
flint_free(store_base);
flint_free(exp_list);
for (i = 0; i < exp_alloc; i++)
{
flint_free(exps[i]);
flint_free(chain_nodes[i]);
}
TMP_END;
return Qlen;
not_sqrt:
for (i = 0; i < Qlen; i++)
_fmpz_demote(Qcoeffs + i);
Qlen = 0;
goto cleanup;
}
int fmpz_mpoly_sqrt_heap(fmpz_mpoly_t Q, const fmpz_mpoly_t A,
const fmpz_mpoly_ctx_t ctx, int check)
{
slong lenq, lenq_est;
flint_bitcnt_t exp_bits;
fmpz_mpoly_t T;
fmpz_mpoly_struct * q;
if (fmpz_mpoly_is_zero(A, ctx))
{
fmpz_mpoly_zero(Q, ctx);
return 1;
}
exp_bits = A->bits;
lenq_est = n_sqrt(A->length);
if (Q == A)
{
fmpz_mpoly_init3(T, lenq_est, exp_bits, ctx);
q = T;
}
else
{
fmpz_mpoly_fit_length_reset_bits(Q, lenq_est, exp_bits, ctx);
q = Q;
}
lenq = _fmpz_mpoly_sqrt_heap(&q->coeffs, &q->exps, &q->alloc,
A->coeffs, A->exps, A->length,
exp_bits, ctx->minfo, check);
if (Q == A)
{
fmpz_mpoly_swap(Q, T, ctx);
fmpz_mpoly_clear(T, ctx);
}
_fmpz_mpoly_set_length(Q, lenq, ctx);
return lenq != 0;
}