#ifndef REDUCER_H
#define REDUCER_H
#include <algorithm>
#include <array>
#include <cmath>
#include "ClassGroup.h"
#ifdef __has_attribute
# define HAS_ATTRIBUTE(x) __has_attribute(x)
#else
# define HAS_ATTRIBUTE(x) 0
#endif
#if HAS_ATTRIBUTE(weak)
# define WEAK __attribute__((weak))
#else
# define WEAK
#endif
extern "C" {
int has_lzcnt_hard();
unsigned int lzcnt64_soft(unsigned long long x);
unsigned int lzcnt64_hard(unsigned long long x) WEAK;
}
namespace {
const int_fast64_t THRESH{1ul << 31};
const int_fast64_t EXP_THRESH{31};
}
class alignas(64) Reducer {
public:
Reducer(ClassGroupContext &ctx_) : ctx(ctx_) {}
~Reducer() {}
void run() {
while (!isReduced()) {
int_fast64_t a, b, c;
{
int_fast64_t a_exp, b_exp, c_exp;
mpz_get_si_2exp(a, a_exp, ctx.a);
mpz_get_si_2exp(b, b_exp, ctx.b);
mpz_get_si_2exp(c, c_exp, ctx.c);
auto mm = std::minmax({a_exp, b_exp, c_exp});
if (mm.second - mm.first > EXP_THRESH) {
reducer();
continue;
}
int_fast64_t max_exp(++mm.second); a >>= (max_exp - a_exp);
b >>= (max_exp - b_exp);
c >>= (max_exp - c_exp);
}
{
int_fast64_t u, v, w, x;
calc_uvwx(u, v, w, x, a, b, c);
mpz_mul_si(ctx.faa, ctx.a, u * u);
mpz_mul_si(ctx.fab, ctx.b, u * w);
mpz_mul_si(ctx.fac, ctx.c, w * w);
mpz_mul_si(ctx.fba, ctx.a, uint_fast64_t(u * v) << 1);
mpz_mul_si(ctx.fbb, ctx.b, u * x + v * w);
mpz_mul_si(ctx.fbc, ctx.c, uint_fast64_t(w * x) << 1);
mpz_mul_si(ctx.fca, ctx.a, v * v);
mpz_mul_si(ctx.fcb, ctx.b, v * x);
mpz_mul_si(ctx.fcc, ctx.c, x * x);
mpz_add(ctx.a, ctx.faa, ctx.fab);
mpz_add(ctx.a, ctx.a, ctx.fac);
mpz_add(ctx.b, ctx.fba, ctx.fbb);
mpz_add(ctx.b, ctx.b, ctx.fbc);
mpz_add(ctx.c, ctx.fca, ctx.fcb);
mpz_add(ctx.c, ctx.c, ctx.fcc);
}
}
}
private:
inline void signed_shift(uint64_t op, int64_t shift, int_fast64_t &r) {
if (shift > 0)
r = static_cast<int64_t>(op << shift);
else if (shift <= -64)
r = 0;
else
r = static_cast<int64_t>(op >> (-shift));
}
bool bLZCChecked=false;
bool bLZCHasHW=false;
inline void mpz_get_si_2exp(int_fast64_t &r, int_fast64_t &exp,
const mpz_t op) {
int_fast64_t size(static_cast<long>(mpz_size(op)));
uint_fast64_t last(mpz_getlimbn(op, (size - 1)));
if(!bLZCChecked)
{
bLZCHasHW=has_lzcnt_hard();
bLZCChecked=true;
}
int_fast64_t lg2;
if(bLZCHasHW)
lg2 = exp = ((63 - lzcnt64_hard(last)) + 1);
else
lg2 = exp = ((63 - lzcnt64_soft(last)) + 1);
signed_shift(last, (63 - exp), r);
if (size > 1) {
exp += (size - 1) * 64;
uint_fast64_t prev(mpz_getlimbn(op, (size - 2)));
int_fast64_t t;
signed_shift(prev, -1 - lg2, t);
r += t;
}
if (mpz_sgn(op) < 0)
r = -r;
}
inline bool isReduced() {
int a_b(mpz_cmpabs(ctx.a, ctx.b));
int c_b(mpz_cmpabs(ctx.c, ctx.b));
if (a_b < 0 || c_b < 0)
return false;
int a_c(mpz_cmp(ctx.a, ctx.c));
if (a_c > 0) {
mpz_swap(ctx.a, ctx.c);
mpz_neg(ctx.b, ctx.b);
} else if (a_c == 0 && mpz_sgn(ctx.b) < 0) {
mpz_neg(ctx.b, ctx.b);
}
return true;
}
inline void reducer() {
mpz_mdiv(ctx.r, ctx.b, ctx.c);
mpz_add_ui(ctx.r, ctx.r, 1);
mpz_div_2exp(ctx.s, ctx.r, 1);
mpz_mul(ctx.m, ctx.c, ctx.s);
mpz_mul_2exp(ctx.r, ctx.m, 1);
mpz_sub(ctx.m, ctx.m, ctx.b);
mpz_sub(ctx.b, ctx.r, ctx.b);
mpz_swap(ctx.a, ctx.c);
mpz_addmul(ctx.c, ctx.s, ctx.m);
}
inline void calc_uvwx(int_fast64_t &u, int_fast64_t &v, int_fast64_t &w,
int_fast64_t &x, int_fast64_t &a, int_fast64_t &b,
int_fast64_t &c) {
int below_threshold;
int_fast64_t u_{1}, v_{0}, w_{0}, x_{1};
int_fast64_t a_, b_, s;
do {
u = u_;
v = v_;
w = w_;
x = x_;
s = b >= 0 ? (b+c) / (c<<1) : - (-b+c) / (c<<1);
a_ = a;
b_ = b;
a = c;
b = -b + (uint_fast64_t(c * s) << 1);
c = a_ - s * (b_ - c * s);
u_ = v;
v_ = -u + s * v;
w_ = x;
x_ = -w + s * x;
below_threshold = (llabs(v_) | llabs(x_)) <= THRESH ? 1 : 0;
} while (below_threshold && a > c && c > 0);
if (below_threshold) {
u = u_;
v = v_;
w = w_;
x = x_;
}
}
ClassGroupContext &ctx;
};
#endif