#include <stdio.h>
#include <stdlib.h>
#include "bootstrap.c"
#if defined (__STDC__) \
|| defined (__cplusplus) \
|| defined (_AIX) \
|| defined (__DECC) \
|| (defined (__mips) && defined (_SYSTYPE_SVR4)) \
|| defined (_MSC_VER) \
|| defined (_WIN32)
#define HAVE_CONST 1
#endif
#if ! HAVE_CONST
#define const
#endif
mpz_t *sq_res_0x100;
int nsq_res_0x100;
int sq_res_0x100_num;
double sq_res_0x100_fraction;
int mod34_bits;
int mod_bits;
int max_divisor;
int max_divisor_bits;
double total_fraction;
mpz_t pp;
mpz_t pp_norm;
mpz_t pp_inverted;
mpz_t mod_mask;
char mod34_excuse[128];
struct rawfactor_t {
int divisor;
int multiplicity;
};
struct rawfactor_t *rawfactor;
int nrawfactor;
struct factor_t {
int divisor;
mpz_t inverse;
mpz_t mask;
double fraction;
};
struct factor_t *factor;
int nfactor;
int factor_alloc;
int
f_cmp_divisor (const void *parg, const void *qarg)
{
const struct factor_t *p, *q;
p = (const struct factor_t *) parg;
q = (const struct factor_t *) qarg;
if (p->divisor > q->divisor)
return 1;
else if (p->divisor < q->divisor)
return -1;
else
return 0;
}
int
f_cmp_fraction (const void *parg, const void *qarg)
{
const struct factor_t *p, *q;
p = (const struct factor_t *) parg;
q = (const struct factor_t *) qarg;
if (p->fraction > q->fraction)
return 1;
else if (p->fraction < q->fraction)
return -1;
else
return 0;
}
#define COLLAPSE_ELEMENT(array, idx, narray) \
do { \
memmove (&(array)[idx], \
&(array)[idx+1], \
((narray)-((idx)+1)) * sizeof (array[0])); \
(narray)--; \
} while (0)
int
mul_2exp_mod (int n, int p, int m)
{
while (--p >= 0)
n = (2 * n) % m;
return n;
}
int
neg_mod (int n, int m)
{
assert (n >= 0 && n < m);
return (n == 0 ? 0 : m-n);
}
void
square_mask (mpz_t mask, int m)
{
int p, i, r, idx;
p = mul_2exp_mod (1, mod_bits, m);
p = neg_mod (p, m);
mpz_set_ui (mask, 0L);
for (i = 0; i < m; i++)
{
r = (i * i) % m;
idx = (r * p) % m;
mpz_setbit (mask, (unsigned long) idx);
}
}
void
generate_sq_res_0x100 (int limb_bits)
{
int i, res;
nsq_res_0x100 = (0x100 + limb_bits - 1) / limb_bits;
sq_res_0x100 = (mpz_t *) xmalloc (nsq_res_0x100 * sizeof (*sq_res_0x100));
for (i = 0; i < nsq_res_0x100; i++)
mpz_init_set_ui (sq_res_0x100[i], 0L);
for (i = 0; i < 0x100; i++)
{
res = (i * i) % 0x100;
mpz_setbit (sq_res_0x100[res / limb_bits],
(unsigned long) (res % limb_bits));
}
sq_res_0x100_num = 0;
for (i = 0; i < nsq_res_0x100; i++)
sq_res_0x100_num += mpz_popcount (sq_res_0x100[i]);
sq_res_0x100_fraction = (double) sq_res_0x100_num / 256.0;
}
void
generate_mod (int limb_bits, int nail_bits)
{
int numb_bits = limb_bits - nail_bits;
int i, divisor;
mpz_init_set_ui (pp, 0L);
mpz_init_set_ui (pp_norm, 0L);
mpz_init_set_ui (pp_inverted, 0L);
factor_alloc = limb_bits;
factor = (struct factor_t *) xmalloc (factor_alloc * sizeof (*factor));
rawfactor = (struct rawfactor_t *) xmalloc (factor_alloc * sizeof (*rawfactor));
if (numb_bits % 4 != 0)
{
strcpy (mod34_excuse, "GMP_NUMB_BITS % 4 != 0");
goto use_pp;
}
max_divisor = 2*limb_bits;
max_divisor_bits = log2_ceil (max_divisor);
if (numb_bits / 4 < max_divisor_bits)
{
max_divisor = limb_bits;
max_divisor_bits = log2_ceil (max_divisor);
if (numb_bits / 4 < max_divisor_bits)
{
strcpy (mod34_excuse, "GMP_NUMB_BITS / 4 too small");
goto use_pp;
}
}
{
mpz_t m, q, r;
int multiplicity;
mod34_bits = (numb_bits / 4) * 3;
mod_bits = mod34_bits + 1;
mpz_init_set_ui (m, 1L);
mpz_mul_2exp (m, m, mod34_bits);
mpz_sub_ui (m, m, 1L);
mpz_init (q);
mpz_init (r);
for (i = 3; i <= max_divisor; i+=2)
{
if (! isprime (i))
continue;
mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
if (mpz_sgn (r) != 0)
continue;
divisor = 1;
multiplicity = 0;
do
{
if (divisor > max_divisor / i)
break;
multiplicity++;
mpz_set (m, q);
mpz_tdiv_qr_ui (q, r, m, (unsigned long) i);
}
while (mpz_sgn (r) == 0);
assert (nrawfactor < factor_alloc);
rawfactor[nrawfactor].divisor = i;
rawfactor[nrawfactor].multiplicity = multiplicity;
nrawfactor++;
}
mpz_clear (m);
mpz_clear (q);
mpz_clear (r);
}
if (nrawfactor <= 2)
{
mpz_t new_pp;
sprintf (mod34_excuse, "only %d small factor%s",
nrawfactor, nrawfactor == 1 ? "" : "s");
use_pp:
max_divisor = 2*limb_bits;
max_divisor_bits = log2_ceil (max_divisor);
mpz_init (new_pp);
nrawfactor = 0;
mod_bits = MIN (numb_bits, limb_bits - max_divisor_bits);
mpz_set_ui (pp, 1L);
for (i = 3; i <= max_divisor; i+=2)
{
if (! isprime (i))
continue;
mpz_mul_ui (new_pp, pp, (unsigned long) i);
if (mpz_sizeinbase (new_pp, 2) > mod_bits)
break;
mpz_set (pp, new_pp);
assert (nrawfactor < factor_alloc);
rawfactor[nrawfactor].divisor = i;
rawfactor[nrawfactor].multiplicity = 1;
nrawfactor++;
}
for (i = nrawfactor-1; i >= 0; i--)
{
if (rawfactor[i].divisor > max_divisor / rawfactor[i].divisor)
continue;
mpz_mul_ui (new_pp, pp, (unsigned long) rawfactor[i].divisor);
if (mpz_sizeinbase (new_pp, 2) > mod_bits)
continue;
mpz_set (pp, new_pp);
rawfactor[i].multiplicity++;
}
mod_bits = mpz_sizeinbase (pp, 2);
mpz_set (pp_norm, pp);
while (mpz_sizeinbase (pp_norm, 2) < numb_bits)
mpz_add (pp_norm, pp_norm, pp_norm);
mpz_preinv_invert (pp_inverted, pp_norm, numb_bits);
mpz_clear (new_pp);
}
for (i = 0; i < nrawfactor; i++)
{
int j;
assert (nfactor < factor_alloc);
factor[nfactor].divisor = 1;
for (j = 0; j < rawfactor[i].multiplicity; j++)
factor[nfactor].divisor *= rawfactor[i].divisor;
nfactor++;
}
combine:
qsort (factor, nfactor, sizeof (factor[0]), f_cmp_divisor);
for (i = nfactor-1; i >= 1; i--)
{
if (factor[i].divisor <= max_divisor / factor[0].divisor)
{
factor[0].divisor *= factor[i].divisor;
COLLAPSE_ELEMENT (factor, i, nfactor);
goto combine;
}
}
total_fraction = 1.0;
for (i = 0; i < nfactor; i++)
{
mpz_init (factor[i].inverse);
mpz_invert_ui_2exp (factor[i].inverse,
(unsigned long) factor[i].divisor,
(unsigned long) mod_bits);
mpz_init (factor[i].mask);
square_mask (factor[i].mask, factor[i].divisor);
factor[i].fraction = (double) mpz_popcount (factor[i].mask)
/ factor[i].divisor;
total_fraction *= factor[i].fraction;
}
qsort (factor, nfactor, sizeof (factor[0]), f_cmp_fraction);
}
void
print (int limb_bits, int nail_bits)
{
int i;
mpz_t mhi, mlo;
printf ("/* This file generated by gen-psqr.c - DO NOT EDIT. */\n");
printf ("\n");
printf ("#if GMP_LIMB_BITS != %d || GMP_NAIL_BITS != %d\n",
limb_bits, nail_bits);
printf ("Error, error, this data is for %d bit limb and %d bit nail\n",
limb_bits, nail_bits);
printf ("#endif\n");
printf ("\n");
printf ("/* Non-zero bit indicates a quadratic residue mod 0x100.\n");
printf (" This test identifies %.2f%% as non-squares (%d/256). */\n",
(1.0 - sq_res_0x100_fraction) * 100.0,
0x100 - sq_res_0x100_num);
printf ("static const mp_limb_t\n");
printf ("sq_res_0x100[%d] = {\n", nsq_res_0x100);
for (i = 0; i < nsq_res_0x100; i++)
{
printf (" CNST_LIMB(0x");
mpz_out_str (stdout, 16, sq_res_0x100[i]);
printf ("),\n");
}
printf ("};\n");
printf ("\n");
if (mpz_sgn (pp) != 0)
{
printf ("/* mpn_mod_34lsub1 not used due to %s */\n", mod34_excuse);
printf ("/* PERFSQR_PP = ");
}
else
printf ("/* 2^%d-1 = ", mod34_bits);
for (i = 0; i < nrawfactor; i++)
{
if (i != 0)
printf (" * ");
printf ("%d", rawfactor[i].divisor);
if (rawfactor[i].multiplicity != 1)
printf ("^%d", rawfactor[i].multiplicity);
}
printf (" %s*/\n", mpz_sgn (pp) == 0 ? "... " : "");
printf ("#define PERFSQR_MOD_BITS %d\n", mod_bits);
if (mpz_sgn (pp) != 0)
{
printf ("#define PERFSQR_PP CNST_LIMB(0x");
mpz_out_str (stdout, 16, pp);
printf (")\n");
printf ("#define PERFSQR_PP_NORM CNST_LIMB(0x");
mpz_out_str (stdout, 16, pp_norm);
printf (")\n");
printf ("#define PERFSQR_PP_INVERTED CNST_LIMB(0x");
mpz_out_str (stdout, 16, pp_inverted);
printf (")\n");
}
printf ("\n");
mpz_init (mhi);
mpz_init (mlo);
printf ("/* This test identifies %.2f%% as non-squares. */\n",
(1.0 - total_fraction) * 100.0);
printf ("#define PERFSQR_MOD_TEST(up, usize) \\\n");
printf (" do { \\\n");
printf (" mp_limb_t r; \\\n");
if (mpz_sgn (pp) != 0)
printf (" PERFSQR_MOD_PP (r, up, usize); \\\n");
else
printf (" PERFSQR_MOD_34 (r, up, usize); \\\n");
for (i = 0; i < nfactor; i++)
{
printf (" \\\n");
printf (" /* %5.2f%% */ \\\n",
(1.0 - factor[i].fraction) * 100.0);
printf (" PERFSQR_MOD_%d (r, CNST_LIMB(%2d), CNST_LIMB(0x",
factor[i].divisor <= limb_bits ? 1 : 2,
factor[i].divisor);
mpz_out_str (stdout, 16, factor[i].inverse);
printf ("), \\\n");
printf (" CNST_LIMB(0x");
if ( factor[i].divisor <= limb_bits)
{
mpz_out_str (stdout, 16, factor[i].mask);
}
else
{
mpz_tdiv_r_2exp (mlo, factor[i].mask, (unsigned long) limb_bits);
mpz_tdiv_q_2exp (mhi, factor[i].mask, (unsigned long) limb_bits);
mpz_out_str (stdout, 16, mhi);
printf ("), CNST_LIMB(0x");
mpz_out_str (stdout, 16, mlo);
}
printf (")); \\\n");
}
printf (" } while (0)\n");
printf ("\n");
printf ("/* Grand total sq_res_0x100 and PERFSQR_MOD_TEST, %.2f%% non-squares. */\n",
(1.0 - (total_fraction * 44.0/256.0)) * 100.0);
printf ("\n");
printf ("/* helper for tests/mpz/t-perfsqr.c */\n");
printf ("#define PERFSQR_DIVISORS { 256,");
for (i = 0; i < nfactor; i++)
printf (" %d,", factor[i].divisor);
printf (" }\n");
mpz_clear (mhi);
mpz_clear (mlo);
}
int
main (int argc, char *argv[])
{
int limb_bits, nail_bits;
if (argc != 3)
{
fprintf (stderr, "Usage: gen-psqr <limbbits> <nailbits>\n");
exit (1);
}
limb_bits = atoi (argv[1]);
nail_bits = atoi (argv[2]);
if (limb_bits <= 0
|| nail_bits < 0
|| nail_bits >= limb_bits)
{
fprintf (stderr, "Invalid limb/nail bits: %d %d\n",
limb_bits, nail_bits);
exit (1);
}
generate_sq_res_0x100 (limb_bits);
generate_mod (limb_bits, nail_bits);
print (limb_bits, nail_bits);
return 0;
}