use crate::int::types::compute_limbs::Limb;
#[derive(Clone, Copy)]
pub(crate) struct Mg2By1 {
d: u64,
v: u64,
}
impl Mg2By1 {
#[inline]
pub(crate) const fn new(d: u64) -> Self {
debug_assert!(d >> 63 == 1, "Mg2By1::new: divisor must be normalised");
let num = ((!d as u128) << 64) | (u64::MAX as u128);
let v = (num / (d as u128)) as u64;
Self { d, v }
}
#[inline]
pub(crate) const fn div_rem(&self, u1: u64, u0: u64) -> (u64, u64) {
debug_assert!(u1 < self.d, "Mg2By1::div_rem: high word must be < divisor");
let q128 = (self.v as u128)
.wrapping_mul(u1 as u128)
.wrapping_add(((u1 as u128) << 64) | (u0 as u128));
let mut q1 = (q128 >> 64) as u64;
let q0 = q128 as u64;
q1 = q1.wrapping_add(1);
let mut r = u0.wrapping_sub(q1.wrapping_mul(self.d));
if r > q0 {
q1 = q1.wrapping_sub(1);
r = r.wrapping_add(self.d);
}
if r >= self.d {
q1 = q1.wrapping_add(1);
r = r.wrapping_sub(self.d);
}
(q1, r)
}
}
#[derive(Clone, Copy)]
pub(crate) struct Mg3By2 {
d1: u64,
d0: u64,
dinv: u64,
}
impl Mg3By2 {
#[inline]
pub(crate) const fn new(d1: u64, d0: u64) -> Self {
debug_assert!(
d1 >> 63 == 1,
"Mg3By2::new: top divisor limb must be normalised"
);
let num = ((!d1 as u128) << 64) | (u64::MAX as u128);
let mut v = (num / (d1 as u128)) as u64;
let mut p = d1.wrapping_mul(v).wrapping_add(d0);
if p < d0 {
v = v.wrapping_sub(1);
let mask = if p >= d1 { u64::MAX } else { 0 };
p = p.wrapping_sub(d1);
v = v.wrapping_add(mask);
p = p.wrapping_sub(mask & d1);
}
let prod = (d0 as u128) * (v as u128);
let t1 = (prod >> 64) as u64;
let t0 = prod as u64;
let (new_p, carry) = p.overflowing_add(t1);
let _p_final = new_p;
if carry {
v = v.wrapping_sub(1);
if new_p >= d1 && (new_p > d1 || t0 >= d0) {
v = v.wrapping_sub(1);
}
}
Self { d1, d0, dinv: v }
}
#[inline]
pub(crate) const fn div_rem(&self, n2: u64, n1: u64, n0: u64) -> (u64, u64, u64) {
debug_assert!(
n2 < self.d1 || (n2 == self.d1 && n1 < self.d0),
"Mg3By2::div_rem: numerator high pair must be < divisor"
);
let prod = (n2 as u128)
.wrapping_mul(self.dinv as u128)
.wrapping_add(((n2 as u128) << 64) | (n1 as u128));
let mut q = (prod >> 64) as u64;
let q_lo = prod as u64;
let mut r1 = n1.wrapping_sub(q.wrapping_mul(self.d1));
let r256 = (((r1 as u128) << 64) | (n0 as u128))
.wrapping_sub(((self.d1 as u128) << 64) | (self.d0 as u128));
r1 = (r256 >> 64) as u64;
let mut r0 = r256 as u64;
let t = (self.d0 as u128).wrapping_mul(q as u128);
let r256 = (((r1 as u128) << 64) | (r0 as u128)).wrapping_sub(t);
r1 = (r256 >> 64) as u64;
r0 = r256 as u64;
q = q.wrapping_add(1);
let mask = if r1 >= q_lo { u64::MAX } else { 0 };
q = q.wrapping_add(mask); let add = ((mask & self.d1) as u128) << 64 | ((mask & self.d0) as u128);
let r256 = (((r1 as u128) << 64) | (r0 as u128)).wrapping_add(add);
r1 = (r256 >> 64) as u64;
r0 = r256 as u64;
if r1 > self.d1 || (r1 == self.d1 && r0 >= self.d0) {
q = q.wrapping_add(1);
let r256 = (((r1 as u128) << 64) | (r0 as u128))
.wrapping_sub(((self.d1 as u128) << 64) | (self.d0 as u128));
r1 = (r256 >> 64) as u64;
r0 = r256 as u64;
}
(q, r1, r0)
}
}
pub(crate) trait DivLimb: Limb {
type Recip: Copy;
type Acc: Copy;
const MAX: Self;
const ACC_ZERO: Self::Acc;
fn new_recip(v_top: Self) -> Self::Recip;
fn est_2by1(recip: &Self::Recip, hi: Self, lo: Self) -> (Self, Self);
fn mul_sub_step(q_hat: Self, v_i: Self, u_ji: Self, carry: Self::Acc) -> (Self, Self::Acc);
fn mul_sub_final(u_top: Self, carry: Self::Acc) -> (Self, bool);
fn store_quot_digit(quot: &mut [u64], j: usize, digit: Self);
}
impl DivLimb for u64 {
type Recip = Mg2By1;
type Acc = u128;
const MAX: Self = u64::MAX;
const ACC_ZERO: Self::Acc = 0;
#[inline]
fn new_recip(v_top: Self) -> Self::Recip {
Mg2By1::new(v_top)
}
#[inline]
fn est_2by1(recip: &Self::Recip, hi: Self, lo: Self) -> (Self, Self) {
recip.div_rem(hi, lo)
}
#[inline(always)]
fn mul_sub_step(q_hat: Self, v_i: Self, u_ji: Self, carry: u128) -> (Self, u128) {
let acc = carry + (q_hat as u128) * (v_i as u128);
let (res, b) = u_ji.overflowing_sub(acc as u64);
(res, (acc >> 64) + (b as u128))
}
#[inline(always)]
fn mul_sub_final(u_top: Self, carry: u128) -> (Self, bool) {
let (s, b1) = u_top.overflowing_sub(carry as u64);
(s, b1 || (carry >> 64) != 0)
}
#[inline]
fn store_quot_digit(quot: &mut [u64], j: usize, digit: Self) {
if j < quot.len() {
quot[j] = digit;
}
}
}
impl DivLimb for u128 {
type Recip = Mg3By2;
type Acc = u128;
const MAX: Self = u128::MAX;
const ACC_ZERO: Self::Acc = 0;
#[inline]
fn new_recip(v_top: Self) -> Self::Recip {
Mg3By2::new((v_top >> 64) as u64, v_top as u64)
}
#[inline]
fn est_2by1(recip: &Self::Recip, hi: Self, lo: Self) -> (Self, Self) {
let a3 = (hi >> 64) as u64;
let a2 = hi as u64;
let a1 = (lo >> 64) as u64;
let a0 = lo as u64;
let (q1, r1, r0) = recip.div_rem(a3, a2, a1);
let (q0, s1, s0) = recip.div_rem(r1, r0, a0);
let q = ((q1 as u128) << 64) | (q0 as u128);
let r = ((s1 as u128) << 64) | (s0 as u128);
(q, r)
}
#[inline(always)]
fn mul_sub_step(q_hat: Self, v_i: Self, u_ji: Self, carry: u128) -> (Self, u128) {
let (p_lo, p_hi) = <u128 as Limb>::widening_mul(q_hat, v_i);
let (sub_lo, c0) = p_lo.overflowing_add(carry);
let (res, b) = u_ji.overflowing_sub(sub_lo);
(res, p_hi + (c0 as u128) + (b as u128))
}
#[inline(always)]
fn mul_sub_final(u_top: Self, carry: u128) -> (Self, bool) {
u_top.overflowing_sub(carry)
}
#[inline]
fn store_quot_digit(quot: &mut [u64], j: usize, digit: Self) {
if 2 * j < quot.len() {
quot[2 * j] = digit as u64;
}
if 2 * j + 1 < quot.len() {
quot[2 * j + 1] = (digit >> 64) as u64;
}
}
}