#include <math.h>
#include "profiler.h"
#include "mpn_extras.h"
#include "ulong_extras.h"
#ifdef FLINT_NUM_CLOCKS
void
mock_mulmod_preinvn(mp_ptr rxx, mp_ptr axx, mp_srcptr bxx, slong nnn, mp_srcptr nxx, mp_srcptr ninv, slong norm)
{
mp_ptr __t;
TMP_INIT;
TMP_START;
__t = TMP_ALLOC((3*(nnn)+1)*sizeof(mp_limb_t));
flint_mpn_mul_n(__t, axx, bxx, nnn);
if (norm)
mpn_rshift(__t, __t, 2*(nnn), norm);
mpn_tdiv_qr(__t + 2*(nnn), rxx, 0, __t, 2*(nnn), nxx, nnn);
TMP_END;
}
typedef struct
{
slong limbs;
int algo;
} info_t;
void sample(void * arg, ulong count)
{
info_t * info = (info_t *) arg;
slong size = info->limbs, i, j;
int algo = info->algo;
int scale = 200;
mpz_t a, b, d, r2;
gmp_randstate_t st;
mp_ptr dinv;
flint_bitcnt_t norm;
FLINT_TEST_INIT(state);
mpz_init(a);
mpz_init(b);
mpz_init(d);
gmp_randinit_default(st);
for (i = 0; i < count; i++)
{
mpz_rrandomb(a, st, size*FLINT_BITS);
mpz_rrandomb(b, st, size*FLINT_BITS);
do {
mpz_rrandomb(d, st, size*FLINT_BITS);
} while (mpz_sgn(d) == 0);
mpz_fdiv_r(a, a, d);
mpz_fdiv_r(b, b, d);
norm = flint_clz(d->_mp_d[d->_mp_size - 1]);
mpz_mul_2exp(a, a, norm);
mpz_mul_2exp(b, b, norm);
mpz_mul_2exp(d, d, norm);
dinv = flint_malloc(size*sizeof(mp_limb_t));
flint_mpn_preinvn(dinv, d->_mp_d, size);
r2->_mp_d = flint_malloc(size*sizeof(mp_limb_t));
prof_start();
if (algo == 1)
{
for (j = 0; j < scale; j++)
{
flint_mpn_mulmod_preinvn(r2->_mp_d, a->_mp_d, b->_mp_d, size, d->_mp_d, dinv, norm);
}
} else
{
for (j = 0; j < scale; j++)
{
mock_mulmod_preinvn(r2->_mp_d, a->_mp_d, b->_mp_d, size, d->_mp_d, dinv, norm);
}
}
prof_stop();
flint_free(r2->_mp_d);
flint_free(dinv);
}
mpz_clear(a);
mpz_clear(b);
mpz_clear(d);
gmp_randclear(st);
FLINT_TEST_CLEAR(state);
}
int main(void)
{
double min, max;
info_t info;
slong k, scale;
printf("1: With precomputed inverse\n");
printf("2: Without precomputed inverse\n\n");
for (k = 1; k <= 10000; k = (slong) ceil(1.1*k))
{
info.limbs = k;
info.algo = 1;
scale = 200;
prof_repeat(&min, &max, sample, (void *) &info);
flint_printf("1: limbs %wd, min %.3g ms, max %.3g ms\n",
info.limbs,
((min/(double)FLINT_CLOCK_SCALE_FACTOR)/scale)/2400000.0,
((max/(double)FLINT_CLOCK_SCALE_FACTOR)/scale)/2400000.0
);
info.algo = 2;
prof_repeat(&min, &max, sample, (void *) &info);
flint_printf("2: limbs %wd, min %.3g ms, max %.3g ms\n\n",
info.limbs,
((min/(double)FLINT_CLOCK_SCALE_FACTOR)/scale)/2400000.0,
((max/(double)FLINT_CLOCK_SCALE_FACTOR)/scale)/2400000.0
);
}
return 0;
}
#else
int main(void) { return 0; }
#endif