#include <math.h>
#include "fmpz_vec.h"
#include "fmpz_poly.h"
#include "arb_poly.h"
static void
_arb_poly_get_scale(fmpz_t scale, arb_srcptr x, slong xlen,
arb_srcptr y, slong ylen)
{
slong xa, xb, ya, yb, den;
fmpz_zero(scale);
xa = 0;
xb = xlen - 1;
while (xa < xlen && arf_is_special(arb_midref(x + xa))) xa++;
while (xb > xa && arf_is_special(arb_midref(x + xb))) xb--;
ya = 0;
yb = ylen - 1;
while (ya < ylen && arf_is_special(arb_midref(y + ya))) ya++;
while (yb > ya && arf_is_special(arb_midref(y + yb))) yb--;
if (xa <= xb && ya <= yb && (xa < xb || ya < yb))
{
fmpz_add(scale, scale, ARF_EXPREF(arb_midref(x + xb)));
fmpz_sub(scale, scale, ARF_EXPREF(arb_midref(x + xa)));
fmpz_add(scale, scale, ARF_EXPREF(arb_midref(y + yb)));
fmpz_sub(scale, scale, ARF_EXPREF(arb_midref(y + ya)));
den = (xb - xa) + (yb - ya);
fmpz_mul_2exp(scale, scale, 1);
fmpz_add_ui(scale, scale, den);
fmpz_fdiv_q_ui(scale, scale, 2 * den);
}
}
#define ALPHA 3.0
#define BETA 512
#define DOUBLE_BLOCK_MAX_LENGTH 1000
#define DOUBLE_ROUNDING_FACTOR (1.0 + 1e-9)
#define DOUBLE_BLOCK_MAX_HEIGHT 800
#define DOUBLE_BLOCK_SHIFT (DOUBLE_BLOCK_MAX_HEIGHT / 2)
static void
_mag_vec_get_fmpz_2exp_blocks(fmpz * coeffs,
double * dblcoeffs, fmpz * exps, slong * blocks, const fmpz_t scale,
arb_srcptr x, mag_srcptr xm, slong len)
{
fmpz_t top, bot, t, b, v, block_top, block_bot;
slong i, j, s, block, bits, maxheight;
int in_zero;
mag_srcptr cur;
fmpz_init(top);
fmpz_init(bot);
fmpz_init(t);
fmpz_init(b);
fmpz_init(v);
fmpz_init(block_top);
fmpz_init(block_bot);
blocks[0] = 0;
block = 0;
in_zero = 1;
maxheight = ALPHA * MAG_BITS + BETA;
if (maxheight > DOUBLE_BLOCK_MAX_HEIGHT)
flint_throw(FLINT_ERROR, "(%s): maxheight > DOUBLE_BLOCK_MAX_HEIGHT\n", __func__);
for (i = 0; i < len; i++)
{
cur = (x == NULL) ? (xm + i) : arb_radref(x + i);
if (mag_is_special(cur))
continue;
bits = MAG_BITS;
fmpz_set(top, MAG_EXPREF(cur));
fmpz_submul_ui(top, scale, i);
fmpz_sub_ui(bot, top, bits);
if (in_zero)
{
fmpz_swap(block_top, top);
fmpz_swap(block_bot, bot);
}
else
{
fmpz_max(t, top, block_top);
fmpz_min(b, bot, block_bot);
fmpz_sub(v, t, b);
if (fmpz_cmp_ui(v, maxheight) < 0)
{
fmpz_swap(block_top, t);
fmpz_swap(block_bot, b);
}
else
{
fmpz_set(exps + block, block_bot);
block++;
blocks[block] = i;
fmpz_swap(block_top, top);
fmpz_swap(block_bot, bot);
}
}
in_zero = 0;
}
fmpz_set(exps + block, block_bot);
blocks[block + 1] = len;
for (i = 0; blocks[i] != len; i++)
{
for (j = blocks[i]; j < blocks[i + 1]; j++)
{
cur = (x == NULL) ? (xm + j) : arb_radref(x + j);
if (mag_is_special(cur))
{
fmpz_zero(coeffs + j);
dblcoeffs[j] = 0.0;
}
else
{
ulong man;
double c;
man = MAG_MAN(cur);
fmpz_mul_ui(t, scale, j);
fmpz_sub(t, MAG_EXPREF(cur), t);
fmpz_sub_ui(t, t, MAG_BITS);
s = _fmpz_sub_small(t, exps + i);
if (s < 0)
flint_throw(FLINT_ERROR, "(%s): s < 0\n", __func__);
fmpz_set_ui(coeffs + j, man);
fmpz_mul_2exp(coeffs + j, coeffs + j, s);
c = man;
c = ldexp(c, s - DOUBLE_BLOCK_SHIFT);
if (c < 1e-150 || c > 1e150)
flint_throw(FLINT_ERROR, "(%s): c large or big\n", __func__);
dblcoeffs[j] = c;
}
}
}
fmpz_clear(top);
fmpz_clear(bot);
fmpz_clear(t);
fmpz_clear(b);
fmpz_clear(v);
fmpz_clear(block_top);
fmpz_clear(block_bot);
}
static void
_arb_vec_get_fmpz_2exp_blocks(fmpz * coeffs, fmpz * exps,
slong * blocks, const fmpz_t scale, arb_srcptr x, slong len, slong prec)
{
fmpz_t top, bot, t, b, v, block_top, block_bot;
slong i, j, s, block, bits, maxheight;
int in_zero;
fmpz_init(top);
fmpz_init(bot);
fmpz_init(t);
fmpz_init(b);
fmpz_init(v);
fmpz_init(block_top);
fmpz_init(block_bot);
blocks[0] = 0;
block = 0;
in_zero = 1;
if (prec == ARF_PREC_EXACT)
maxheight = ARF_PREC_EXACT;
else
maxheight = ALPHA * prec + BETA;
for (i = 0; i < len; i++)
{
bits = arf_bits(arb_midref(x + i));
if (bits == 0)
continue;
fmpz_set(top, ARF_EXPREF(arb_midref(x + i)));
fmpz_submul_ui(top, scale, i);
fmpz_sub_ui(bot, top, bits);
if (in_zero)
{
fmpz_swap(block_top, top);
fmpz_swap(block_bot, bot);
}
else
{
fmpz_max(t, top, block_top);
fmpz_min(b, bot, block_bot);
fmpz_sub(v, t, b);
if (fmpz_cmp_ui(v, maxheight) < 0)
{
fmpz_swap(block_top, t);
fmpz_swap(block_bot, b);
}
else
{
fmpz_set(exps + block, block_bot);
block++;
blocks[block] = i;
fmpz_swap(block_top, top);
fmpz_swap(block_bot, bot);
}
}
in_zero = 0;
}
fmpz_set(exps + block, block_bot);
blocks[block + 1] = len;
for (i = 0; blocks[i] != len; i++)
{
for (j = blocks[i]; j < blocks[i + 1]; j++)
{
if (arf_is_special(arb_midref(x + j)))
{
fmpz_zero(coeffs + j);
}
else
{
arf_get_fmpz_2exp(coeffs + j, bot, arb_midref(x + j));
fmpz_mul_ui(t, scale, j);
fmpz_sub(t, bot, t);
s = _fmpz_sub_small(t, exps + i);
if (s < 0)
flint_throw(FLINT_ERROR, "(%s): s < 0\n", __func__);
fmpz_mul_2exp(coeffs + j, coeffs + j, s);
}
}
}
fmpz_clear(top);
fmpz_clear(bot);
fmpz_clear(t);
fmpz_clear(b);
fmpz_clear(v);
fmpz_clear(block_top);
fmpz_clear(block_bot);
}
static void
_arb_poly_addmulmid_rad(arb_ptr z, fmpz * zz,
const fmpz * xz, const double * xdbl, const fmpz * xexps,
const slong * xblocks, slong xlen,
const fmpz * yz, const double * ydbl, const fmpz * yexps,
const slong * yblocks, slong ylen, slong nlo, slong nhi)
{
slong i, j, k, ii, xp, yp, xl, yl, bnlo, bnhi;
fmpz_t zexp;
mag_t t;
fmpz_init(zexp);
mag_init(t);
for (i = 0; (xp = xblocks[i]) != xlen; i++)
{
for (j = 0; (yp = yblocks[j]) != ylen; j++)
{
if (xp + yp >= nhi)
continue;
xl = xblocks[i + 1] - xp;
yl = yblocks[j + 1] - yp;
if (xp + yp + (xl + yl - 1) <= nlo)
continue;
bnhi = FLINT_MIN(xl + yl - 1, nhi - xp - yp);
xl = FLINT_MIN(xl, bnhi);
yl = FLINT_MIN(yl, bnhi);
bnlo = FLINT_MAX(0, nlo - xp - yp);
fmpz_add_inline(zexp, xexps + i, yexps + j);
if (xl > 1 && yl > 1 &&
(xl < DOUBLE_BLOCK_MAX_LENGTH || yl < DOUBLE_BLOCK_MAX_LENGTH))
{
fmpz_add_ui(zexp, zexp, 2 * DOUBLE_BLOCK_SHIFT);
for (k = bnlo; k < bnhi; k++)
{
double ss = 0.0;
for (ii = FLINT_MAX(0, k - yl + 1); ii <= FLINT_MIN(xl - 1, k); ii++)
{
ss += xdbl[xp + ii] * ydbl[yp + k - ii];
}
ss *= DOUBLE_ROUNDING_FACTOR;
mag_set_d_2exp_fmpz(t, ss, zexp);
mag_add(arb_radref(z + xp + yp + k - nlo),
arb_radref(z + xp + yp + k - nlo), t);
}
}
else
{
_fmpz_poly_mulmid(zz, xz + xp, xl, yz + yp, yl, bnlo, bnhi);
for (k = bnlo; k < bnhi; k++)
{
mag_set_fmpz_2exp_fmpz(t, zz + k - bnlo, zexp);
mag_add(arb_radref(z + xp + yp + k - nlo),
arb_radref(z + xp + yp + k - nlo), t);
}
}
}
}
fmpz_clear(zexp);
mag_clear(t);
}
static void
_arb_poly_addmulmid_block(arb_ptr z, fmpz * zz,
const fmpz * xz, const fmpz * xexps, const slong * xblocks, slong xlen,
const fmpz * yz, const fmpz * yexps, const slong * yblocks, slong ylen,
slong nlo, slong nhi, slong prec, int squaring)
{
slong i, j, k, xp, yp, xl, yl, bnlo, bnhi;
fmpz_t zexp;
fmpz_init(zexp);
if (squaring)
{
for (i = 0; (xp = xblocks[i]) != xlen; i++)
{
if (2 * xp >= nhi)
continue;
xl = xblocks[i + 1] - xp;
if (2 * xp + (2 * xl - 1) <= nlo)
continue;
bnhi = FLINT_MIN(2 * xl - 1, nhi - 2 * xp);
xl = FLINT_MIN(xl, bnhi);
bnlo = FLINT_MAX(0, nlo - 2 * xp);
_fmpz_poly_mulmid(zz, xz + xp, xl, xz + xp, xl, bnlo, bnhi);
_fmpz_add2_fast(zexp, xexps + i, xexps + i, 0);
for (k = bnlo; k < bnhi; k++)
arb_add_fmpz_2exp(z + 2 * xp + k - nlo, z + 2 * xp + k - nlo, zz + k - bnlo, zexp, prec);
}
}
for (i = 0; (xp = xblocks[i]) != xlen; i++)
{
for (j = squaring ? i + 1 : 0; (yp = yblocks[j]) != ylen; j++)
{
if (xp + yp >= nhi)
continue;
xl = xblocks[i + 1] - xp;
yl = yblocks[j + 1] - yp;
if (xp + yp + (xl + yl - 1) <= nlo)
continue;
bnhi = FLINT_MIN(xl + yl - 1, nhi - xp - yp);
xl = FLINT_MIN(xl, bnhi);
yl = FLINT_MIN(yl, bnhi);
bnlo = FLINT_MAX(0, nlo - xp - yp);
_fmpz_poly_mulmid(zz, xz + xp, xl, yz + yp, yl, bnlo, bnhi);
_fmpz_add2_fast(zexp, xexps + i, yexps + j, squaring);
for (k = bnlo; k < bnhi; k++)
arb_add_fmpz_2exp(z + xp + yp + k - nlo, z + xp + yp + k - nlo, zz + k - bnlo, zexp, prec);
}
}
fmpz_clear(zexp);
}
void
_arb_poly_mulmid_block(arb_ptr z, arb_srcptr x, slong xlen,
arb_srcptr y, slong ylen, slong nlo, slong nhi, slong prec)
{
slong xmlen, xrlen, ymlen, yrlen, i;
fmpz *xz, *yz, *zz;
fmpz *xe, *ye;
slong *xblocks, *yblocks;
int squaring;
fmpz_t scale, t;
xlen = FLINT_MIN(xlen, nhi);
ylen = FLINT_MIN(ylen, nhi);
slong nlo2 = (xlen + ylen - 1) - nlo;
if (xlen > nlo2)
{
slong trunc = xlen - nlo2;
x += trunc;
xlen -= trunc;
nlo -= trunc;
nhi -= trunc;
}
if (ylen > nlo2)
{
slong trunc = ylen - nlo2;
y += trunc;
ylen -= trunc;
nlo -= trunc;
nhi -= trunc;
}
squaring = (x == y) && (xlen == ylen);
xmlen = xrlen = xlen;
while (xmlen > 0 && arf_is_zero(arb_midref(x + xmlen - 1))) xmlen--;
while (xrlen > 0 && mag_is_zero(arb_radref(x + xrlen - 1))) xrlen--;
if (squaring)
{
ymlen = xmlen;
yrlen = xrlen;
}
else
{
ymlen = yrlen = ylen;
while (ymlen > 0 && arf_is_zero(arb_midref(y + ymlen - 1))) ymlen--;
while (yrlen > 0 && mag_is_zero(arb_radref(y + yrlen - 1))) yrlen--;
}
if (!_arb_vec_is_finite(x, xlen) ||
(!squaring && !_arb_vec_is_finite(y, ylen)))
{
_arb_poly_mulmid_classical(z, x, xlen, y, ylen, nlo, nhi, prec);
return;
}
xlen = FLINT_MAX(xmlen, xrlen);
ylen = FLINT_MAX(ymlen, yrlen);
_arb_vec_zero(z, nhi - nlo);
if (xlen == 0 || ylen == 0)
return;
nhi = FLINT_MIN(nhi, xlen + ylen - 1);
fmpz_init(scale);
fmpz_init(t);
xz = _fmpz_vec_init(xlen);
yz = _fmpz_vec_init(ylen);
zz = _fmpz_vec_init(nhi - nlo);
xe = _fmpz_vec_init(xlen);
ye = _fmpz_vec_init(ylen);
xblocks = flint_malloc(sizeof(slong) * (xlen + 1));
yblocks = flint_malloc(sizeof(slong) * (ylen + 1));
_arb_poly_get_scale(scale, x, xlen, y, ylen);
if (xrlen != 0 || yrlen != 0)
{
mag_ptr tmp;
double *xdbl, *ydbl;
tmp = _mag_vec_init(FLINT_MAX(xlen, ylen));
xdbl = flint_malloc(sizeof(double) * xlen);
ydbl = flint_malloc(sizeof(double) * ylen);
if (squaring)
{
_mag_vec_get_fmpz_2exp_blocks(xz, xdbl, xe, xblocks, scale, x, NULL, xrlen);
for (i = 0; i < xlen; i++)
{
arf_get_mag(tmp + i, arb_midref(x + i));
mag_mul_2exp_si(tmp + i, tmp + i, 1);
mag_add(tmp + i, tmp + i, arb_radref(x + i));
}
_mag_vec_get_fmpz_2exp_blocks(yz, ydbl, ye, yblocks, scale, NULL, tmp, xlen);
_arb_poly_addmulmid_rad(z, zz, xz, xdbl, xe, xblocks, xrlen, yz, ydbl, ye, yblocks, xlen, nlo, nhi);
}
else if (yrlen == 0)
{
_mag_vec_get_fmpz_2exp_blocks(xz, xdbl, xe, xblocks, scale, x, NULL, xrlen);
for (i = 0; i < ymlen; i++)
arf_get_mag(tmp + i, arb_midref(y + i));
_mag_vec_get_fmpz_2exp_blocks(yz, ydbl, ye, yblocks, scale, NULL, tmp, ymlen);
_arb_poly_addmulmid_rad(z, zz, xz, xdbl, xe, xblocks, xrlen, yz, ydbl, ye, yblocks, ymlen, nlo, nhi);
}
else
{
for (i = 0; i < xmlen; i++)
arf_get_mag(tmp + i, arb_midref(x + i));
_mag_vec_get_fmpz_2exp_blocks(xz, xdbl, xe, xblocks, scale, NULL, tmp, xmlen);
_mag_vec_get_fmpz_2exp_blocks(yz, ydbl, ye, yblocks, scale, y, NULL, yrlen);
_arb_poly_addmulmid_rad(z, zz, xz, xdbl, xe, xblocks, xmlen, yz, ydbl, ye, yblocks, yrlen, nlo, nhi);
if (xrlen != 0)
{
_mag_vec_get_fmpz_2exp_blocks(xz, xdbl, xe, xblocks, scale, x, NULL, xrlen);
for (i = 0; i < ylen; i++)
arb_get_mag(tmp + i, y + i);
_mag_vec_get_fmpz_2exp_blocks(yz, ydbl, ye, yblocks, scale, NULL, tmp, ylen);
_arb_poly_addmulmid_rad(z, zz, xz, xdbl, xe, xblocks, xrlen, yz, ydbl, ye, yblocks, ylen, nlo, nhi);
}
}
_mag_vec_clear(tmp, FLINT_MAX(xlen, ylen));
flint_free(xdbl);
flint_free(ydbl);
}
if (xmlen != 0 && ymlen != 0)
{
_arb_vec_get_fmpz_2exp_blocks(xz, xe, xblocks, scale, x, xmlen, prec);
if (squaring)
{
_arb_poly_addmulmid_block(z, zz, xz, xe, xblocks, xmlen, xz, xe, xblocks, xmlen, nlo, nhi, prec, 1);
}
else
{
_arb_vec_get_fmpz_2exp_blocks(yz, ye, yblocks, scale, y, ymlen, prec);
_arb_poly_addmulmid_block(z, zz, xz, xe, xblocks, xmlen, yz, ye, yblocks, ymlen, nlo, nhi, prec, 0);
}
}
if (!fmpz_is_zero(scale))
{
fmpz_mul_ui(t, scale, nlo);
for (i = nlo; i < nhi; i++)
{
arb_mul_2exp_fmpz(z + i - nlo, z + i - nlo, t);
fmpz_add(t, t, scale);
}
}
_fmpz_vec_clear(xz, xlen);
_fmpz_vec_clear(yz, ylen);
_fmpz_vec_clear(zz, nhi - nlo);
_fmpz_vec_clear(xe, xlen);
_fmpz_vec_clear(ye, ylen);
flint_free(xblocks);
flint_free(yblocks);
fmpz_clear(scale);
fmpz_clear(t);
}
void
_arb_poly_mullow_block(arb_ptr z, arb_srcptr x, slong xlen,
arb_srcptr y, slong ylen, slong n, slong prec)
{
_arb_poly_mulmid_block(z, x, xlen, y, ylen, 0, n, prec);
}
void
arb_poly_mullow_block(arb_poly_t res, const arb_poly_t poly1,
const arb_poly_t poly2, slong n, slong prec)
{
slong xlen, ylen, zlen;
xlen = poly1->length;
ylen = poly2->length;
if (xlen == 0 || ylen == 0 || n == 0)
{
arb_poly_zero(res);
return;
}
xlen = FLINT_MIN(xlen, n);
ylen = FLINT_MIN(ylen, n);
zlen = FLINT_MIN(xlen + ylen - 1, n);
if (res == poly1 || res == poly2)
{
arb_poly_t tmp;
arb_poly_init2(tmp, zlen);
_arb_poly_mullow_block(tmp->coeffs, poly1->coeffs, xlen,
poly2->coeffs, ylen, zlen, prec);
arb_poly_swap(res, tmp);
arb_poly_clear(tmp);
}
else
{
arb_poly_fit_length(res, zlen);
_arb_poly_mullow_block(res->coeffs, poly1->coeffs, xlen,
poly2->coeffs, ylen, zlen, prec);
}
_arb_poly_set_length(res, zlen);
_arb_poly_normalise(res);
}
void
arb_poly_mulmid_block(arb_poly_t res, const arb_poly_t poly1,
const arb_poly_t poly2, slong nlo, slong nhi, slong prec)
{
slong xlen, ylen, zlen;
xlen = poly1->length;
ylen = poly2->length;
if (xlen == 0 || ylen == 0 || nlo >= FLINT_MIN(nhi, xlen + ylen - 1))
{
arb_poly_zero(res);
return;
}
nhi = FLINT_MIN(nhi, xlen + ylen - 1);
zlen = nhi - nlo;
if (res == poly1 || res == poly2)
{
arb_poly_t tmp;
arb_poly_init2(tmp, zlen);
_arb_poly_mulmid_block(tmp->coeffs, poly1->coeffs, xlen,
poly2->coeffs, ylen, nlo, nhi, prec);
arb_poly_swap(res, tmp);
arb_poly_clear(tmp);
}
else
{
arb_poly_fit_length(res, zlen);
_arb_poly_mulmid_block(res->coeffs, poly1->coeffs, xlen,
poly2->coeffs, ylen, nlo, nhi, prec);
}
_arb_poly_set_length(res, zlen);
_arb_poly_normalise(res);
}