#include <string.h>
#include "mpn_extras.h"
static const signed short flint_mpn_sqrhigh_k_tab[FLINT_MPN_SQRHIGH_K_TAB_SIZE] =
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28, 29, 0, 0, 31, 0, 31,
0, 32, 0, 34, 0, 36, 36, 40, 0, 40, 40, 40, 0, 44, 0, 44, 44, 48, 0, 52, 48, 52, 44, 44, 48, 48, 48, 48, 48, 48,
48, 48, 48, 48, 52, 52, 52, 52, 52, 56, 56, 52, 56, 56, 56, 56, 56, 56, 56, 56, 60, 60, 68, 64, 60, 60, 60, 60, 64, 64,
64, 68, 68, 68, 64, 64, 68, 68, 72, 72, 68, 68, 68, 72, 76, 80, 72, 72, 72, 88, 76, 76, 80, 80, 76, 80, 76, 80, 84, 80,
88, 88, 80, 84, 88, 80, 80, 84, 92, 88, 92, 88, 88, 88, 88, 96, 88, 108, 100, 92, 88, 88, 104, 100, 100, 92, 104, 108, 100, 92,
104, 100, 104, 96, 108, 104, 96, 96, 104, 100, 100, 104, 112, 116, 108, 104, 104, 116, 108, 104, 104, 120, 116, 104, 108, 132, 116, 108, 120, 108,
108, 108, 132, 108, 120, 112, 112, 116, 132, 128, 116, 124, 128, 116, 132, 120, 132, 120, 124, 120, 132, 120, 124, 128, 120, 128, 128, 132, 144, 124,
128, 124, 140, 128, 128, 124, 136, 132, 128, 128, 140, 144, 128, 128, 140, 136, 132, 144, 148, 152, 144, 132, 160, 156, 140, 144, 156, 144, 140, 144,
140, 156, 156, 156, 140, 144, 168, 156, 156, 164, 168, 156, 156, 160, 144, 144, 180, 156, 152, 168, 156, 160, 156, 148, 180, 168, 180, 156, 164, 156,
156, 172, 156, 156, 156, 180, 180, 172, 180, 168, 164, 172, 164, 172, 176, 176, 168, 176, 172, 176, 180, 168, 176, 180, 180, 180, 192, 184, 180, 176,
204, 176, 188, 180, 188, 180, 204, 180, 192, 204, 180, 192, 180, 204, 228, 192, 188, 192, 204, 180, 192, 216, 200, 216, 228, 216, 204, 216, 188, 216,
216, 204, 216, 192, 204, 212, 228, 204, 228, 216, 204, 216, 192, 204, 204, 212, 204, 216, 204, 216, 228, 228, 224, 212, 204, 216, 216, 224, 228, 212,
216, 228, 212, 204, 204, 216, 216, 216, 228, 216, 220, 224, 228, 220, 228, 228, 236, 224, 260, 224, 228, 228, 260, 228, 248, 252, 264, 248, 216, 212,
212, 212, 220, 216, 216, 220, 216, 224, 216, 228, 232, 224, 220, 224, 240, 244, 236, 244, 232, 256, 288, 240, 236, 288, 260, 260, 264, 232, 228, 228,
228, 236, 236, 228, 236, 236, 236, 248, 256, 260, 232, 236, 264, 256, 260, 252, 284, 252, 264, 276, 244, 296, 244, 240, 244, 248, 240, 252, 256, 260,
260, 252, 252, 272, 252, 272, 260, 296, 268, 260, 256, 276, 272, 264, 312, 284, 256, 252, 252, 252, 264, 252, 256, 260, 264, 276, 292, 268, 264, 276,
264, 260, 264, 304, 288, 296, 296, 296, 288, 296, 272, 280, 264, 264, 272, 264, 288, 288, 280, 288, 272, 280, 296, 296, 280, 288, 296, 320, 344, 320,
344, 272, 344, 344, 304, 288, 280, 280, 312, 280, 280, 304, 304, 312, 304, 296, 288, 328, 320, 352, 320, 320, 328, 360, 344, 344, 360, 288, 288, 304,
288, 296, 320, 320, 312, 312, 304, 328, 336, 312, 312, 360, 336, 344, 344, 336, 360, 360, 296, 296, 304, 360, 328, 328, 312, 320, 320, 328, 312, 344,
344, 328, 344, 344, 368, 360, 352, 360, 392, 392, 368, 320, 312, 392, 312, 328, 344, 336, 344, 328, 360, 352, 352, 360, 360, 360, 368, 408, 360, 376,
392, 392, 376, 336, 344, 352, 360, 352, 344, 344, 384, 344, 360, 376, 392, 368, 360, 408, 408, 448, 432, 384, 392, 336, 360, 344, 360, 360, 368, 376,
376, 360, 368, 408, 368, 376, 376, 376, 432, 376, 384, 464, 432, 344, 432, 376, 344, 344, 384, 344, 384, 376, 400, 432, 456, 432, 456, 392, 432, 392,
448, 456, 360, 376, 456, 408, 384, 368, 376, 432, 376, 472, 464, 504, 448, 360, 408, 456, 376, 408, 424, 424, 448, 440, 456, 392, 408, 384, 408, 392,
384, 416, 424, 432, 400, 472, 480, 408, 432, 432, 464, 456, 504, 464, 456, 472, 496, 416, 392, 424, 504, 400, 440, 432, 472, 448, 456, 456, 432, 448,
456, 504, 504, 512, 496, 504, 424, 416, 408, 432, 544, 432, 440, 456, 448, 448, 464, 472, 480, 552, 552, 544, 552, 560, 552, 544, 544, 544, 552, 552,
552, 552, 552, 552, 552, 560, 552, 552, 560, 560, 560, 560, 568, 576, 568, 592, 576, 560, 560, 560, 560, 568, 568, 576, 568, 568, 576, 568, 568, 576,
568, 568, 600, 592, 576, 608, 576, 576, 576, 576, 584, 584, 576, 592, 592, 600, 584, 592, 584, 600, 600, 600, 584, 624, 584, 592, 600, 592, 616, 592,
592, 592, 592, 592, 592, 592, 600, 608, 600, 608, 600, 600, 600, 624, 616, 608, 616, 600, 632, 608, 608, 608, 616, 608, 608, 608, 608, 616, 608, 664,
664, 632, 656, 616, 664, 624, 616, 632, 632, 616, 624, 624, 624, 632, 624, 624, 624, 624, 648, 632, 624, 624, 872, 872, 656, 664, 696, 872, 872, 872,
632, 872, 872, 896, 872, 896, 872, 896, 880, 872, 896, 872, 888, 872, 872, 872, 872, 872, 888, 872, 880, 824, 872, 856, 880, 888, 800, 848, 800, 880,
848, 800, 808, 872, 872, 864, 824, 840, 872, 872, 872, 872, 872, 872, 872, 872, 872, 872, 880, 928, 872, 920, 872, 920, 888, 880, 872, 880, 872, 872,
872, 888, 880, 888, 888, 872, 888, 872, 880, 896, 920, 920, 880, 928, 904, 872, 888, 904, 872, 872, 872, 872, 880, 888, 872, 872, 872, 880, 872, 904,
920, 888, 888, 872, 880, 880, 888, 896, 896, 880, 872, 904, 880, 912, 896, 872, 904, 904, 912, 880, 880, 920, 912, 896, 928, 928, 872, 872, 872, 872,
880, 880, 888, 888, 888, 912, 880, 880, 880, 912, 896, 896, 928, 896, 912, 912, 896, 928, 928, 928, 928, 912, 880, 928, 880, 880, 896, 880, 896, 880,
896, 880, 880, 912, 912, 880, 896, 880, 896, 912, 880, 880, 928, 912, 912, 928, 928, 896, 928, 912, 928, 928, 928, 928, 928, 928, 880, 880, 880, 880,
880, 912, 896, 912, 912, 896, 928, 896, 928, 896, 928, 928, 928, 928, 912, 880, 928, 912, 896, 928, 896, 880, 912, 896, 912, 896, 896, 896, 928, 896,
912, 928, 912, 928, 912, 928, 912, 880, 928, 928, 880, 928, 928, 880, 896, 928, 880, 896, 912, 928, 928, 896, 912, 928, 912, 928, 1024, 928, 928, 1040,
912, 928, 1024, 1024, 928, 928, 928, 928, 912, 1040, 1072, 912, 1072, 1072, 1056, 1040, 1072, 1088, 1104, 1040, 1040, 928, 1024, 928, 928, 1056, 928, 928, 1056, 1040,
928, 1056, 1024, 1056, 1056, 1056, 1104, 1024, 1056, 1072, 1056, 1056, 1088, 1072, 1120, 1104, 1072, 1072, 1072, 1104, 1120, 1024, 1120, 1120, 1056, 1024, 1040, 1024, 1040, 1040,
1040, 1024, 1040, 1056, 1040, 1088, 1040, 1056, 1120, 1040, 1040, 1072, 1072, 1088, 1104, 1088, 1104, 1088, 1088, 1056, 1120, 1104, 1152, 1104, 1088, 1120, 1104, 1120, 1024, 1120,
1152, 1136, 1152, 1056, 1056, 1072, 1056, 1152, 1088, 1056, 1104, 1072, 1104, 1088, 1120, 1072, 1104, 1088, 1088, 1120, 1120, 1120, 1152, 1136, 1120, 1120, 1152, 1120, 1136, 1152,
1152, 1072, 1136, 1136, 1088, 1152, 1136, 1136, 1104, 1056, 1072, 1088, 1104, 1104, 1120, 1120, 1104, 1104, 1104, 1136, 1152, 1136, 1152, 1136, 1136, 1152, 1152, 1152, 1136, 1072,
1152, 1136, 1152, 1152, 1104, 1152, 1104, 1152, 1152, 1088, 1120, 1152, 1136, 1136, 1120, 1136, 1120, 1152, 1136, 1136, 1120, 1136, 1152, 1136, 1152, 1136, 1136, 1120, 1152, 1152,
1104, 1152, 1152, 1152, 1104, 1120, 1104, 1120, 1120, 1120, 1120, 1136, 1136, 1136, 1136, 1152, 1120, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1104,
1120, 1152, 1136, 1136, 1136, 1136, 1136, 1136, 1120, 1152, 1152, 1152, 1136, 1104, 1152, 1136, 1136, 1152, 1136, 1120, 1136, 1152, 1152, 1136, 1136, 1120, 1136, 1136, 1136, 1120,
1152, 1152, 1152, 1136, 1136, 1136, 1152, 1104, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1136, 1120, 1152,
1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1152, 1152,
1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1152, 1120, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152,
1120, 1152, 1136, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1152, 1152, 1152, 1152, 1152, 1152,
1136, 1152, 1152, 1152, 1152, 1152, 1136, 1136, 1152, 1136, 1152, 1120, 1152, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1136, 1136, 1136, 1152, 1152, 1152, 1136,
1152, 1152, 1152, 1104, 1120, 1152, 1136, 1152, 1152, 1136, 1152, 1136, 1152, 1152, 1152, 1136, 1136, 1152, 1152, 1120, 1152, 1136, 1152, 1152, 1120, 1152, 1120, 1136, 1152, 1152,
1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1152, 1120, 1152, 1152, 1136, 1152, 1136, 1552, 1552, 1136, 1136, 1152, 1552, 1136, 1136, 1584, 1552, 1552, 1552,
1152, 1552, 1552, 1152, 1584, 1552, 1152, 1152, 1552, 1584, 1152, 1152, 1552, 1584, 1568, 1552, 1552, 1552, 1152, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1552,
1552, 1552, 1552, 1552, 1552, 1625, 1552, 1552, 1552, 1552, 1552, 1552, 1552, 1568, 1632, 1552, 1632, 1568, 1584, 1568, 1568, 1584, 1600, 1552, 1552, 1632, 1600, 1632, 1632, 1632,
1632, 1568, 1584, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1552, 1662, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1568, 1600, 1632, 1552, 1616, 1632, 1632, 1632, 1584, 1632,
1632, 1552, 1632, 1632, 1632, 1568, 1600, 1632, 1648, 1616, 1632, 1648, 1632, 1632, 1632, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1680, 1664, 1632, 1664, 1632, 1632,
1632, 1632, 1632, 1632, 1632, 1648, 1648, 1648, 1616, 1632, 1680, 1632, 1680, 1648, 1632, 1664, 1632, 1648, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1632, 1632, 1632, 1632, 1632,
1648, 1648, 1632, 1632, 1632, 1632, 1632, 1632, 1632, 1664, 1632, 1648, 1664, 1648, 1648, 1632, 1648, 1632, 1680, 1664, 1648, 1680, 1664, 1696, 1632, 1664, 1680, 1632, 1648, 1696,
1680, 1632, 1632, 1648, 1648, 1648, 1632, 1664, 1632, 1648, 1648, 1632, 1648, 1648, 1632, 1664, 1664, 1680, 1680, 1632, 1664, 1664, 1648, 1648, 1712, 1728, 1664, 1712, 1696, 1712,
1680, 1632, 1728, 1648, 1728, 1728, 1712, 1648, 1632, 1632, 1664, 1648, 1664, 1664, 1680, 1712, 1664, 1632, 1696, 1664, 1712, 1696, 1680, 1680, 1696, 1680, 1680, 1712, 1648, 1712,
1632, 1728, 1696, 1648, 1712, 1632, 1712, 1696, 1648, 1680, 1648, 1664, 1648, 1696, 1712, 1648, 1648, 1680, 1664, 1696, 1728, 1696, 1712, 1728, 1696, 1728, 1712, 1664, 1680, 1728,
1712, 1712, 1712, 1728, 1728, 1712, 1728, 1696, 1728, 1712, 1696, 1728, 1728, 1680, 1728, 1712, 1664, 1680, 1680, 1696, 1712, 1696, 1696, 1696, 1728, 1696, 1728, 1728, 1696, 1712,
1712, 1664, 1712, 1680, 1664, 1728, 1728, 1664, 1696, 1696, 1680, 1712, 1680, 1712, 1696, 1728, 1696, 1696, 1728, 1696, 1712, 1696, 1712, 1712, 1712, 1728, 1712, 1696, 1728, 1680,
1696, 1712, 1712, 1728, 1712, 1728, 1696, 1728, 1776, 1728, 1696, 1776, 1728, 1728, 1712, 1712, 1824, 1856, 1728, 1824, 1728, 1728, 1712, 1728, 1728, 1728, 1856, 1696, 1728, 1840,
1712, 1712, 1824, 1856, 1792, 1712, 1840, 1728, 1808, 1728, 1824, 1824, 1840, 1824, 1824, 1856, 1856, 1824, 1856, 1840, 1696, 1856, 1840, 1840, 1856, 1856, 1824, 1712, 1792, 1856,
1824, 1728, 1808, 1792, 1856, 1728, 1792, 1840, 1808, 1808, 1712, 1808, 1840, 1808, 1824, 1824, 1824, 1840, 1824, 1840, 1856, 1824, 1728, 1856, 1856, 1824, 1856, 1856, 1792, 1792,
1856, 1824, 1856, 1824, 1824, 1856, 1808, 1824, 1856, 1856, 1840, 1840, 1840, 1840, 1856, 1840, 1840, 1856, 1824, 1840, 1808, 1824, 1840, 1856, 1856, 1824, 1856, 1840, 1840, 1840,
1824, 1824, 1840, 1840, 1840, 1856, 1856, 1856, };
void
_flint_mpn_sqrhigh_mulders_recursive(mp_ptr rp, mp_srcptr np, mp_size_t n)
{
mp_limb_t cy;
mp_size_t l;
slong k;
if (FLINT_HAVE_SQRHIGH_FUNC(n))
{
rp[n - 1] = flint_mpn_sqrhigh_func_tab[n](rp + n, np);
return;
}
if (n < FLINT_MPN_SQRHIGH_K_TAB_SIZE)
k = flint_mpn_sqrhigh_k_tab[n];
else
k = (n + 4) / 2;
if (k == 0)
{
rp[n - 1] = _flint_mpn_sqrhigh_basecase(rp + n, np, n);
return;
}
FLINT_ASSERT(k >= (n + 4) / 2);
if (k == n)
{
flint_mpn_sqr(rp, np, n);
return;
}
l = n - k;
flint_mpn_sqr(rp + 2 * l, np + l, k);
_flint_mpn_mulhigh_n_mulders_recursive(rp, np, np + k, l);
cy = mpn_lshift(rp + l - 1, rp + l - 1, l + 1, 1);
cy += mpn_add_n(rp + n - 1, rp + n - 1, rp + l - 1, l + 1);
MPN_INCR_U(rp + n + l, k, cy);
{
mp_limb_t hi, lo;
FLINT_ASSERT(k != l);
umul_ppmm(hi, lo, np[k - 1], np[l - 1]);
MPN_INCR_U(rp + n - 1, n + 1, hi);
MPN_INCR_U(rp + n - 1, n + 1, hi);
}
}
mp_limb_t
_flint_mpn_sqrhigh_mulders(mp_ptr res, mp_srcptr u, mp_size_t n)
{
mp_ptr tmp;
mp_limb_t bot;
TMP_INIT;
TMP_START;
tmp = TMP_ALLOC(sizeof(mp_limb_t) * (2 * n));
_flint_mpn_sqrhigh_mulders_recursive(tmp, u, n);
memcpy(res, tmp + n, sizeof(mp_limb_t) * n);
bot = tmp[n - 1];
TMP_END;
return bot;
}
mp_limb_t
_flint_mpn_sqrhigh_sqr(mp_ptr res, mp_srcptr u, mp_size_t n)
{
mp_ptr tmp;
mp_limb_t bot;
tmp = flint_malloc(sizeof(mp_limb_t) * (2 * n));
flint_mpn_sqr(tmp, u, n);
memcpy(res, tmp + n, sizeof(mp_limb_t) * n);
bot = tmp[n - 1];
flint_free(tmp);
return bot;
}
mp_limb_t
_flint_mpn_sqrhigh(mp_ptr res, mp_srcptr u, mp_size_t n)
{
if (n <= FLINT_MPN_SQRHIGH_MULDERS_CUTOFF)
return _flint_mpn_sqrhigh_basecase(res, u, n);
else if (n <= FLINT_MPN_SQRHIGH_SQR_CUTOFF)
return _flint_mpn_sqrhigh_mulders(res, u, n);
else
return _flint_mpn_sqrhigh_sqr(res, u, n);
}
mp_limb_pair_t _flint_mpn_sqrhigh_normalised(mp_ptr rp, mp_srcptr xp, mp_size_t n)
{
mp_limb_pair_t ret;
FLINT_ASSERT(n >= 1);
FLINT_ASSERT(rp != xp);
ret.m1 = flint_mpn_sqrhigh(rp, xp, n);
if (rp[n - 1] >> (FLINT_BITS - 1))
{
ret.m2 = 0;
}
else
{
ret.m2 = 1;
mpn_lshift(rp, rp, n, 1);
rp[0] |= (ret.m1 >> (FLINT_BITS - 1));
ret.m1 <<= 1;
}
return ret;
}