#[inline]
const fn mul_128(a: u128, b: u128) -> (u128, u128) {
let (a_hi, a_lo) = (a >> 64, a & u64::MAX as u128);
let (b_hi, b_lo) = (b >> 64, b & u64::MAX as u128);
let (mid, carry1) = (a_lo * b_hi).overflowing_add(a_hi * b_lo);
let (low, carry2) = (a_lo * b_lo).overflowing_add(mid << 64);
let high = a_hi * b_hi + (mid >> 64) + ((carry1 as u128) << 64) + carry2 as u128;
(high, low)
}
#[inline]
pub(crate) const fn limbs_is_zero(a: &[u128]) -> bool {
let mut i = 0;
while i < a.len() {
if a[i] != 0 {
return false;
}
i += 1;
}
true
}
#[inline]
pub(crate) const fn limbs_eq(a: &[u128], b: &[u128]) -> bool {
let n = if a.len() > b.len() { a.len() } else { b.len() };
let mut i = 0;
while i < n {
let av = if i < a.len() { a[i] } else { 0 };
let bv = if i < b.len() { b[i] } else { 0 };
if av != bv {
return false;
}
i += 1;
}
true
}
#[inline]
pub(crate) const fn limbs_cmp(a: &[u128], b: &[u128]) -> i32 {
let n = if a.len() > b.len() { a.len() } else { b.len() };
let mut i = n;
while i > 0 {
i -= 1;
let av = if i < a.len() { a[i] } else { 0 };
let bv = if i < b.len() { b[i] } else { 0 };
if av < bv {
return -1;
}
if av > bv {
return 1;
}
}
0
}
#[inline]
pub(crate) const fn limbs_bit_len(a: &[u128]) -> u32 {
let mut i = a.len();
while i > 0 {
i -= 1;
if a[i] != 0 {
return (i as u32) * 128 + (128 - a[i].leading_zeros());
}
}
0
}
#[inline]
pub(crate) const fn limbs_add_assign(a: &mut [u128], b: &[u128]) -> bool {
let mut carry = 0u128;
let mut i = 0;
while i < a.len() {
let bv = if i < b.len() { b[i] } else { 0 };
let (s1, c1) = a[i].overflowing_add(bv);
let (s2, c2) = s1.overflowing_add(carry);
a[i] = s2;
carry = (c1 as u128) + (c2 as u128);
i += 1;
}
carry != 0
}
#[inline]
pub(crate) const fn limbs_sub_assign(a: &mut [u128], b: &[u128]) -> bool {
let mut borrow = 0u128;
let mut i = 0;
while i < a.len() {
let bv = if i < b.len() { b[i] } else { 0 };
let (d1, b1) = a[i].overflowing_sub(bv);
let (d2, b2) = d1.overflowing_sub(borrow);
a[i] = d2;
borrow = (b1 as u128) + (b2 as u128);
i += 1;
}
borrow != 0
}
pub(crate) const fn limbs_shl(a: &[u128], shift: u32, out: &mut [u128]) {
let mut z = 0;
while z < out.len() {
out[z] = 0;
z += 1;
}
let limb_shift = (shift / 128) as usize;
let bit = shift % 128;
let mut i = 0;
while i < a.len() {
let dst = i + limb_shift;
if dst < out.len() {
if bit == 0 {
out[dst] |= a[i];
} else {
out[dst] |= a[i] << bit;
if dst + 1 < out.len() {
out[dst + 1] |= a[i] >> (128 - bit);
}
}
}
i += 1;
}
}
pub(crate) const fn limbs_shr(a: &[u128], shift: u32, out: &mut [u128]) {
let mut z = 0;
while z < out.len() {
out[z] = 0;
z += 1;
}
let limb_shift = (shift / 128) as usize;
let bit = shift % 128;
let mut i = limb_shift;
while i < a.len() {
let dst = i - limb_shift;
if dst < out.len() {
if bit == 0 {
out[dst] |= a[i];
} else {
out[dst] |= a[i] >> bit;
if dst >= 1 {
out[dst - 1] |= a[i] << (128 - bit);
}
}
}
i += 1;
}
}
pub(crate) const fn limbs_mul(a: &[u128], b: &[u128], out: &mut [u128]) {
if a.len() == 2 && b.len() == 2 && out.len() >= 4 {
let (a0, a1) = (a[0], a[1]);
let (b0, b1) = (b[0], b[1]);
let (h0, l0) = mul_128(a0, b0);
let (h1, l1) = mul_128(a0, b1);
let (h2, l2) = mul_128(a1, b0);
let (h3, l3) = mul_128(a1, b1);
out[0] = l0;
let (s1, c1a) = h0.overflowing_add(l1);
let (s1, c1b) = s1.overflowing_add(l2);
out[1] = s1;
let mid_carry = (c1a as u128) + (c1b as u128);
let (s2, c2a) = h1.overflowing_add(h2);
let (s2, c2b) = s2.overflowing_add(l3);
let (s2, c2c) = s2.overflowing_add(mid_carry);
out[2] = s2;
let top_carry = (c2a as u128) + (c2b as u128) + (c2c as u128);
out[3] = h3.wrapping_add(top_carry);
return;
}
let mut i = 0;
while i < a.len() {
if a[i] != 0 {
let mut carry = 0u128;
let mut j = 0;
while j < b.len() {
if b[j] != 0 || carry != 0 {
let (hi, lo) = mul_128(a[i], b[j]);
let idx = i + j;
let (s1, c1) = out[idx].overflowing_add(lo);
let (s2, c2) = s1.overflowing_add(carry);
out[idx] = s2;
carry = hi + (c1 as u128) + (c2 as u128);
}
j += 1;
}
let mut idx = i + b.len();
while carry != 0 && idx < out.len() {
let (s, c) = out[idx].overflowing_add(carry);
out[idx] = s;
carry = c as u128;
idx += 1;
}
}
i += 1;
}
}
const KARATSUBA_MIN: usize = 32;
#[cfg(feature = "alloc")]
pub(crate) fn limbs_mul_fast(a: &[u128], b: &[u128], out: &mut [u128]) {
if a.len() == b.len() && a.len() >= KARATSUBA_MIN {
limbs_mul_karatsuba(a, b, out);
} else {
limbs_mul(a, b, out);
}
}
#[cfg(not(feature = "alloc"))]
pub(crate) fn limbs_mul_fast(a: &[u128], b: &[u128], out: &mut [u128]) {
limbs_mul(a, b, out);
}
#[cfg(feature = "alloc")]
fn limbs_mul_karatsuba(a: &[u128], b: &[u128], out: &mut [u128]) {
debug_assert_eq!(a.len(), b.len());
debug_assert!(out.len() >= 2 * a.len());
let n = a.len();
if n < KARATSUBA_MIN {
for o in out.iter_mut().take(2 * n) {
*o = 0;
}
limbs_mul(a, b, out);
return;
}
let h = n / 2;
let (a_lo, a_hi_full) = a.split_at(h);
let (b_lo, b_hi_full) = b.split_at(h);
let a_hi = a_hi_full;
let b_hi = b_hi_full;
let mut z0 = alloc::vec![0u128; 2 * h];
limbs_mul_karatsuba_padded(a_lo, b_lo, &mut z0);
let hi_len = n - h;
let mut z2 = alloc::vec![0u128; 2 * hi_len];
limbs_mul_karatsuba_padded(a_hi, b_hi, &mut z2);
let sum_len = core::cmp::max(h, hi_len) + 1;
let mut sum_a = alloc::vec![0u128; sum_len];
let mut sum_b = alloc::vec![0u128; sum_len];
sum_a[..h].copy_from_slice(a_lo);
sum_b[..h].copy_from_slice(b_lo);
limbs_add_assign(&mut sum_a[..], a_hi);
limbs_add_assign(&mut sum_b[..], b_hi);
let mut z1 = alloc::vec![0u128; 2 * sum_len];
limbs_mul_karatsuba_padded(&sum_a, &sum_b, &mut z1);
limbs_sub_assign(&mut z1[..], &z0);
limbs_sub_assign(&mut z1[..], &z2);
for o in out.iter_mut().take(2 * n) {
*o = 0;
}
let z0_take = core::cmp::min(z0.len(), out.len());
out[..z0_take].copy_from_slice(&z0[..z0_take]);
let z2_take = core::cmp::min(z2.len(), out.len().saturating_sub(2 * h));
if z2_take > 0 {
out[2 * h..2 * h + z2_take].copy_from_slice(&z2[..z2_take]);
}
let z1_take = core::cmp::min(z1.len(), out.len().saturating_sub(h));
if z1_take > 0 {
limbs_add_assign(&mut out[h..h + z1_take], &z1[..z1_take]);
}
}
#[cfg(feature = "alloc")]
fn limbs_mul_karatsuba_padded(a: &[u128], b: &[u128], out: &mut [u128]) {
if a.len() == b.len() && a.len() >= KARATSUBA_MIN {
limbs_mul_karatsuba(a, b, out);
} else {
for o in out.iter_mut() {
*o = 0;
}
limbs_mul(a, b, out);
}
}
#[inline]
const fn limbs_shl1(a: &mut [u128]) -> u128 {
let mut carry = 0u128;
let mut i = 0;
while i < a.len() {
let new_carry = a[i] >> 127;
a[i] = (a[i] << 1) | carry;
carry = new_carry;
i += 1;
}
carry
}
#[inline]
const fn limbs_fit_one(a: &[u128]) -> bool {
let mut i = 1;
while i < a.len() {
if a[i] != 0 {
return false;
}
i += 1;
}
true
}
pub(crate) const fn limbs_divmod(
num: &[u128],
den: &[u128],
quot: &mut [u128],
rem: &mut [u128],
) {
let mut z = 0;
while z < quot.len() {
quot[z] = 0;
z += 1;
}
z = 0;
while z < rem.len() {
rem[z] = 0;
z += 1;
}
let den_one_limb = limbs_fit_one(den);
if den_one_limb && limbs_fit_one(num) {
if !quot.is_empty() {
quot[0] = num[0] / den[0];
}
if !rem.is_empty() {
rem[0] = num[0] % den[0];
}
return;
}
if den_one_limb && den[0] <= u64::MAX as u128 {
let d = den[0];
let mut r: u128 = 0;
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
let mut i = top;
while i > 0 {
i -= 1;
let hi = num[i] >> 64;
let acc_hi = (r << 64) | hi;
let q_hi = acc_hi / d;
r = acc_hi % d;
let lo = num[i] & u64::MAX as u128;
let acc_lo = (r << 64) | lo;
let q_lo = acc_lo / d;
r = acc_lo % d;
if i < quot.len() {
quot[i] = (q_hi << 64) | q_lo;
}
}
if !rem.is_empty() {
rem[0] = r;
}
return;
}
let bits = limbs_bit_len(num);
let mut i = bits;
while i > 0 {
i -= 1;
limbs_shl1(rem);
let bit = (num[(i / 128) as usize] >> (i % 128)) & 1;
rem[0] |= bit;
limbs_shl1(quot);
if limbs_cmp(rem, den) >= 0 {
limbs_sub_assign(rem, den);
quot[0] |= 1;
}
}
}
const SCRATCH_LIMBS: usize = 72;
pub(crate) fn limbs_divmod_dispatch(
num: &[u128],
den: &[u128],
quot: &mut [u128],
rem: &mut [u128],
) {
const BZ_THRESHOLD: usize = 8;
let mut n = den.len();
while n > 0 && den[n - 1] == 0 {
n -= 1;
}
assert!(n > 0, "limbs_divmod_dispatch: divide by zero");
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
if n == 1 && top <= 1 {
limbs_divmod(num, den, quot, rem);
return;
}
if n == 1 && den[0] <= u64::MAX as u128 {
limbs_divmod(num, den, quot, rem);
return;
}
if n >= BZ_THRESHOLD && top >= 2 * n {
limbs_divmod_bz(num, den, quot, rem);
} else {
limbs_divmod_knuth(num, den, quot, rem);
}
}
#[derive(Clone, Copy)]
pub(crate) struct MG2by1 {
d: u128,
v: u128,
}
impl MG2by1 {
#[inline]
pub(crate) const fn new(d: u128) -> Self {
debug_assert!(d >> 127 == 1, "MG2by1::new: divisor must be normalised");
let (v, _r) = div_2_by_1(!d, u128::MAX, d);
Self { d, v }
}
#[inline]
pub(crate) const fn div_rem(&self, u1: u128, u0: u128) -> (u128, u128) {
debug_assert!(u1 < self.d, "MG2by1::div_rem: high word must be < divisor");
let (vu1_hi, vu1_lo) = mul_128(self.v, u1);
let (q0, c_lo) = vu1_lo.overflowing_add(u0);
let (q1, _c_hi_a) = vu1_hi.overflowing_add(u1);
let (q1, _c_hi_b) = q1.overflowing_add(c_lo as u128);
let q1 = q1.wrapping_add(1);
let r = u0.wrapping_sub(q1.wrapping_mul(self.d));
let (q1, r) = if r > q0 {
(q1.wrapping_sub(1), r.wrapping_add(self.d))
} else {
(q1, r)
};
if r >= self.d {
(q1.wrapping_add(1), r.wrapping_sub(self.d))
} else {
(q1, r)
}
}
}
#[inline]
const fn div_2_by_1(high: u128, low: u128, d: u128) -> (u128, u128) {
let mut q: u128 = 0;
let mut r = high;
let mut i = 128;
while i > 0 {
i -= 1;
let r_top = r >> 127;
r = (r << 1) | ((low >> i) & 1);
q <<= 1;
if r_top != 0 || r >= d {
r = r.wrapping_sub(d);
q |= 1;
}
}
(q, r)
}
pub(crate) fn limbs_divmod_knuth(
num: &[u128],
den: &[u128],
quot: &mut [u128],
rem: &mut [u128],
) {
for q in quot.iter_mut() {
*q = 0;
}
for r in rem.iter_mut() {
*r = 0;
}
let mut n = den.len();
while n > 0 && den[n - 1] == 0 {
n -= 1;
}
assert!(n > 0, "limbs_divmod_knuth: divide by zero");
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
if top < n {
let copy_n = num.len().min(rem.len());
let mut i = 0;
while i < copy_n {
rem[i] = num[i];
i += 1;
}
return;
}
let shift = den[n - 1].leading_zeros();
let mut u = [0u128; SCRATCH_LIMBS];
let mut v = [0u128; SCRATCH_LIMBS];
debug_assert!(top < SCRATCH_LIMBS && n <= SCRATCH_LIMBS);
if shift == 0 {
u[..top].copy_from_slice(&num[..top]);
u[top] = 0;
v[..n].copy_from_slice(&den[..n]);
} else {
let mut carry: u128 = 0;
for i in 0..top {
let val = num[i];
u[i] = (val << shift) | carry;
carry = val >> (128 - shift);
}
u[top] = carry;
carry = 0;
for i in 0..n {
let val = den[i];
v[i] = (val << shift) | carry;
carry = val >> (128 - shift);
}
}
let m_plus_n = if u[top] != 0 { top + 1 } else { top };
debug_assert!(m_plus_n >= n);
let m = m_plus_n - n;
let mg_top = MG2by1::new(v[n - 1]);
let mut j_plus_one = m + 1;
while j_plus_one > 0 {
j_plus_one -= 1;
let j = j_plus_one;
let u_top = u[j + n];
let u_next = u[j + n - 1];
let v_top = v[n - 1];
let (mut q_hat, mut r_hat) = if u_top >= v_top {
let q = u128::MAX;
let (r, of) = u_next.overflowing_add(v_top);
if of || u_top > v_top {
(q, u128::MAX) } else {
(q, r)
}
} else {
mg_top.div_rem(u_top, u_next)
};
if n >= 2 {
let v_below = v[n - 2];
loop {
let (hi, lo) = mul_128(q_hat, v_below);
let rhs_lo = u[j + n - 2];
let rhs_hi = r_hat;
if hi < rhs_hi || (hi == rhs_hi && lo <= rhs_lo) {
break;
}
q_hat = q_hat.wrapping_sub(1);
let (new_r, of) = r_hat.overflowing_add(v_top);
if of {
break;
}
r_hat = new_r;
}
}
let mut mul_carry: u128 = 0;
let mut borrow: u128 = 0;
for i in 0..n {
let (hi, lo) = mul_128(q_hat, v[i]);
let (prod_lo, c1) = lo.overflowing_add(mul_carry);
let new_mul_carry = hi + u128::from(c1);
let (s1, b1) = u[j + i].overflowing_sub(prod_lo);
let (s2, b2) = s1.overflowing_sub(borrow);
u[j + i] = s2;
borrow = u128::from(b1) + u128::from(b2);
mul_carry = new_mul_carry;
}
let (s1, b1) = u[j + n].overflowing_sub(mul_carry);
let (s2, b2) = s1.overflowing_sub(borrow);
u[j + n] = s2;
let final_borrow = u128::from(b1) + u128::from(b2);
if final_borrow != 0 {
q_hat = q_hat.wrapping_sub(1);
let mut carry: u128 = 0;
for i in 0..n {
let (s1, c1) = u[j + i].overflowing_add(v[i]);
let (s2, c2) = s1.overflowing_add(carry);
u[j + i] = s2;
carry = u128::from(c1) + u128::from(c2);
}
u[j + n] = u[j + n].wrapping_add(carry);
}
if j < quot.len() {
quot[j] = q_hat;
}
}
if shift == 0 {
let copy_n = n.min(rem.len());
rem[..copy_n].copy_from_slice(&u[..copy_n]);
} else {
for i in 0..n {
if i < rem.len() {
let lo = u[i] >> shift;
let hi_into_lo = if i + 1 < n {
u[i + 1] << (128 - shift)
} else {
0
};
rem[i] = lo | hi_into_lo;
}
}
}
}
pub(crate) fn limbs_divmod_bz(
num: &[u128],
den: &[u128],
quot: &mut [u128],
rem: &mut [u128],
) {
const BZ_THRESHOLD: usize = 8;
let mut n = den.len();
while n > 0 && den[n - 1] == 0 {
n -= 1;
}
assert!(n > 0, "limbs_divmod_bz: divide by zero");
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
if n < BZ_THRESHOLD || top < 2 * n {
limbs_divmod_knuth(num, den, quot, rem);
return;
}
for q in quot.iter_mut() {
*q = 0;
}
for r in rem.iter_mut() {
*r = 0;
}
let chunks = top.div_ceil(n);
let mut carry = [0u128; SCRATCH_LIMBS];
let mut buf = [0u128; SCRATCH_LIMBS];
let mut q_chunk = [0u128; SCRATCH_LIMBS];
let mut r_chunk = [0u128; SCRATCH_LIMBS];
let mut idx = chunks;
while idx > 0 {
idx -= 1;
let lo = idx * n;
let hi = ((idx + 1) * n).min(top);
buf.fill(0);
let chunk_len = hi - lo;
buf[..chunk_len].copy_from_slice(&num[lo..lo + chunk_len]);
buf[chunk_len..chunk_len + n].copy_from_slice(&carry[..n]);
let buf_len = chunk_len + n;
limbs_divmod_knuth(
&buf[..buf_len],
&den[..n],
&mut q_chunk[..buf_len],
&mut r_chunk[..n],
);
let store_end = (lo + n).min(quot.len());
let store_len = store_end.saturating_sub(lo);
quot[lo..lo + store_len].copy_from_slice(&q_chunk[..store_len]);
carry[..n].copy_from_slice(&r_chunk[..n]);
}
let rem_n = n.min(rem.len());
rem[..rem_n].copy_from_slice(&carry[..rem_n]);
}
pub(crate) fn limbs_isqrt(n: &[u128], out: &mut [u128]) {
for o in out.iter_mut() {
*o = 0;
}
let bits = limbs_bit_len(n);
if bits == 0 {
return;
}
if bits <= 1 {
out[0] = 1;
return;
}
let work = n.len() + 1;
debug_assert!(work <= SCRATCH_LIMBS, "wide-int isqrt scratch overflow");
let mut x = [0u128; SCRATCH_LIMBS];
let e = bits.div_ceil(2);
x[(e / 128) as usize] |= 1u128 << (e % 128);
loop {
let mut q = [0u128; SCRATCH_LIMBS];
let mut r = [0u128; SCRATCH_LIMBS];
limbs_divmod(n, &x[..work], &mut q[..work], &mut r[..work]);
limbs_add_assign(&mut q[..work], &x[..work]);
let mut y = [0u128; SCRATCH_LIMBS];
limbs_shr(&q[..work], 1, &mut y[..work]);
if limbs_cmp(&y[..work], &x[..work]) >= 0 {
break;
}
x = y;
}
let copy_len = if out.len() < work { out.len() } else { work };
out[..copy_len].copy_from_slice(&x[..copy_len]);
}
fn limbs_div_small(limbs: &mut [u128], radix: u128) -> u128 {
let mut rem = 0u128;
for limb in limbs.iter_mut().rev() {
let hi = (*limb) >> 64;
let lo = (*limb) & u128::from(u64::MAX);
let acc_hi = (rem << 64) | hi;
let q_hi = acc_hi / radix;
let r1 = acc_hi % radix;
let acc_lo = (r1 << 64) | lo;
let q_lo = acc_lo / radix;
rem = acc_lo % radix;
*limb = (q_hi << 64) | q_lo;
}
rem
}
pub(crate) fn limbs_fmt_into<'a>(
limbs: &[u128],
radix: u128,
lower: bool,
buf: &'a mut [u8],
) -> &'a str {
let digits: &[u8] = if lower {
b"0123456789abcdef"
} else {
b"0123456789ABCDEF"
};
if limbs_is_zero(limbs) {
let last = buf.len() - 1;
buf[last] = b'0';
return core::str::from_utf8(&buf[last..]).unwrap();
}
let mut work = [0u128; SCRATCH_LIMBS];
work[..limbs.len()].copy_from_slice(limbs);
let wl = limbs.len();
let mut pos = buf.len();
while !limbs_is_zero(&work[..wl]) {
let r = limbs_div_small(&mut work[..wl], radix);
pos -= 1;
buf[pos] = digits[r as usize];
}
core::str::from_utf8(&buf[pos..]).unwrap()
}
#[inline]
pub(crate) const fn limbs_is_zero_u64(a: &[u64]) -> bool {
let mut i = 0;
while i < a.len() {
if a[i] != 0 {
return false;
}
i += 1;
}
true
}
#[inline]
pub(crate) const fn limbs_eq_u64(a: &[u64], b: &[u64]) -> bool {
let n = if a.len() > b.len() { a.len() } else { b.len() };
let mut i = 0;
while i < n {
let av = if i < a.len() { a[i] } else { 0 };
let bv = if i < b.len() { b[i] } else { 0 };
if av != bv {
return false;
}
i += 1;
}
true
}
#[inline]
pub(crate) const fn limbs_cmp_u64(a: &[u64], b: &[u64]) -> i32 {
let n = if a.len() > b.len() { a.len() } else { b.len() };
let mut i = n;
while i > 0 {
i -= 1;
let av = if i < a.len() { a[i] } else { 0 };
let bv = if i < b.len() { b[i] } else { 0 };
if av < bv {
return -1;
}
if av > bv {
return 1;
}
}
0
}
#[inline]
pub(crate) const fn limbs_bit_len_u64(a: &[u64]) -> u32 {
let mut i = a.len();
while i > 0 {
i -= 1;
if a[i] != 0 {
return (i as u32) * 64 + (64 - a[i].leading_zeros());
}
}
0
}
#[inline]
pub(crate) const fn limbs_add_assign_u64(a: &mut [u64], b: &[u64]) -> bool {
let mut carry: u64 = 0;
let mut i = 0;
while i < a.len() {
let bv = if i < b.len() { b[i] } else { 0 };
let (s1, c1) = a[i].overflowing_add(bv);
let (s2, c2) = s1.overflowing_add(carry);
a[i] = s2;
carry = (c1 as u64) + (c2 as u64);
i += 1;
}
carry != 0
}
#[inline]
pub(crate) const fn limbs_sub_assign_u64(a: &mut [u64], b: &[u64]) -> bool {
let mut borrow: u64 = 0;
let mut i = 0;
while i < a.len() {
let bv = if i < b.len() { b[i] } else { 0 };
let (d1, b1) = a[i].overflowing_sub(bv);
let (d2, b2) = d1.overflowing_sub(borrow);
a[i] = d2;
borrow = (b1 as u64) + (b2 as u64);
i += 1;
}
borrow != 0
}
pub(crate) const fn limbs_shl_u64(a: &[u64], shift: u32, out: &mut [u64]) {
let mut z = 0;
while z < out.len() {
out[z] = 0;
z += 1;
}
let limb_shift = (shift / 64) as usize;
let bit = shift % 64;
let mut i = 0;
while i < a.len() {
let dst = i + limb_shift;
if dst < out.len() {
if bit == 0 {
out[dst] |= a[i];
} else {
out[dst] |= a[i] << bit;
if dst + 1 < out.len() {
out[dst + 1] |= a[i] >> (64 - bit);
}
}
}
i += 1;
}
}
pub(crate) const fn limbs_shr_u64(a: &[u64], shift: u32, out: &mut [u64]) {
let mut z = 0;
while z < out.len() {
out[z] = 0;
z += 1;
}
let limb_shift = (shift / 64) as usize;
let bit = shift % 64;
let mut i = limb_shift;
while i < a.len() {
let dst = i - limb_shift;
if dst < out.len() {
if bit == 0 {
out[dst] |= a[i];
} else {
out[dst] |= a[i] >> bit;
if dst >= 1 {
out[dst - 1] |= a[i] << (64 - bit);
}
}
}
i += 1;
}
}
#[inline]
const fn limbs_shl1_u64(a: &mut [u64]) -> u64 {
let mut carry: u64 = 0;
let mut i = 0;
while i < a.len() {
let new_carry = a[i] >> 63;
a[i] = (a[i] << 1) | carry;
carry = new_carry;
i += 1;
}
carry
}
#[inline]
const fn limbs_fit_one_u64(a: &[u64]) -> bool {
let mut i = 1;
while i < a.len() {
if a[i] != 0 {
return false;
}
i += 1;
}
true
}
pub(crate) const fn limbs_mul_u64(a: &[u64], b: &[u64], out: &mut [u64]) {
let mut i = 0;
while i < a.len() {
if a[i] != 0 {
let mut carry: u64 = 0;
let mut j = 0;
while j < b.len() {
if b[j] != 0 || carry != 0 {
let prod = (a[i] as u128) * (b[j] as u128);
let prod_lo = prod as u64;
let prod_hi = (prod >> 64) as u64;
let idx = i + j;
let (s1, c1) = out[idx].overflowing_add(prod_lo);
let (s2, c2) = s1.overflowing_add(carry);
out[idx] = s2;
carry = prod_hi + (c1 as u64) + (c2 as u64);
}
j += 1;
}
let mut idx = i + b.len();
while carry != 0 && idx < out.len() {
let (s, c) = out[idx].overflowing_add(carry);
out[idx] = s;
carry = c as u64;
idx += 1;
}
}
i += 1;
}
}
pub(crate) const fn limbs_divmod_u64(
num: &[u64],
den: &[u64],
quot: &mut [u64],
rem: &mut [u64],
) {
let mut z = 0;
while z < quot.len() {
quot[z] = 0;
z += 1;
}
z = 0;
while z < rem.len() {
rem[z] = 0;
z += 1;
}
let den_one_limb = limbs_fit_one_u64(den);
if den_one_limb && limbs_fit_one_u64(num) {
if !quot.is_empty() {
quot[0] = num[0] / den[0];
}
if !rem.is_empty() {
rem[0] = num[0] % den[0];
}
return;
}
if den_one_limb {
let d = den[0];
let mut r: u64 = 0;
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
let mut i = top;
while i > 0 {
i -= 1;
let acc = ((r as u128) << 64) | (num[i] as u128);
let q = (acc / (d as u128)) as u64;
r = (acc % (d as u128)) as u64;
if i < quot.len() {
quot[i] = q;
}
}
if !rem.is_empty() {
rem[0] = r;
}
return;
}
let bits = limbs_bit_len_u64(num);
let mut i = bits;
while i > 0 {
i -= 1;
limbs_shl1_u64(rem);
let bit = (num[(i / 64) as usize] >> (i % 64)) & 1;
rem[0] |= bit;
limbs_shl1_u64(quot);
if limbs_cmp_u64(rem, den) >= 0 {
limbs_sub_assign_u64(rem, den);
quot[0] |= 1;
}
}
}
const SCRATCH_LIMBS_U64: usize = 288;
pub(crate) fn limbs_mul_fast_u64(a: &[u64], b: &[u64], out: &mut [u64]) {
limbs_mul_u64(a, b, out);
}
#[derive(Clone, Copy)]
pub(crate) struct MG2by1U64 {
d: u64,
v: u64,
}
impl MG2by1U64 {
#[inline]
pub(crate) const fn new(d: u64) -> Self {
debug_assert!(d >> 63 == 1, "MG2by1U64::new: divisor must be normalised");
let num = ((!d as u128) << 64) | (u64::MAX as u128);
let v = (num / (d as u128)) as u64;
Self { d, v }
}
#[inline]
pub(crate) const fn div_rem(&self, u1: u64, u0: u64) -> (u64, u64) {
debug_assert!(u1 < self.d, "MG2by1U64::div_rem: high word must be < divisor");
let q128 = (self.v as u128).wrapping_mul(u1 as u128)
.wrapping_add(((u1 as u128) << 64) | (u0 as u128));
let mut q1 = (q128 >> 64) as u64;
let q0 = q128 as u64;
q1 = q1.wrapping_add(1);
let mut r = u0.wrapping_sub(q1.wrapping_mul(self.d));
if r > q0 {
q1 = q1.wrapping_sub(1);
r = r.wrapping_add(self.d);
}
if r >= self.d {
q1 = q1.wrapping_add(1);
r = r.wrapping_sub(self.d);
}
(q1, r)
}
}
#[derive(Clone, Copy)]
pub(crate) struct MG3by2U64 {
d1: u64,
d0: u64,
dinv: u64,
}
impl MG3by2U64 {
#[inline]
pub(crate) const fn new(d1: u64, d0: u64) -> Self {
debug_assert!(d1 >> 63 == 1, "MG3by2U64::new: top divisor limb must be normalised");
let num = ((!d1 as u128) << 64) | (u64::MAX as u128);
let mut v = (num / (d1 as u128)) as u64;
let mut p = d1.wrapping_mul(v).wrapping_add(d0);
if p < d0 {
v = v.wrapping_sub(1);
let mask = if p >= d1 { u64::MAX } else { 0 };
p = p.wrapping_sub(d1);
v = v.wrapping_add(mask);
p = p.wrapping_sub(mask & d1);
}
let prod = (d0 as u128) * (v as u128);
let t1 = (prod >> 64) as u64;
let t0 = prod as u64;
let (new_p, carry) = p.overflowing_add(t1);
let _p_final = new_p;
if carry {
v = v.wrapping_sub(1);
if new_p >= d1 && (new_p > d1 || t0 >= d0) {
v = v.wrapping_sub(1);
}
}
Self { d1, d0, dinv: v }
}
#[inline]
pub(crate) const fn div_rem(&self, n2: u64, n1: u64, n0: u64) -> (u64, u64, u64) {
debug_assert!(
n2 < self.d1 || (n2 == self.d1 && n1 < self.d0),
"MG3by2U64::div_rem: numerator high pair must be < divisor"
);
let prod = (n2 as u128).wrapping_mul(self.dinv as u128)
.wrapping_add(((n2 as u128) << 64) | (n1 as u128));
let mut q = (prod >> 64) as u64;
let q_lo = prod as u64;
let mut r1 = n1.wrapping_sub(q.wrapping_mul(self.d1));
let r256 = (((r1 as u128) << 64) | (n0 as u128))
.wrapping_sub(((self.d1 as u128) << 64) | (self.d0 as u128));
r1 = (r256 >> 64) as u64;
let mut r0 = r256 as u64;
let t = (self.d0 as u128).wrapping_mul(q as u128);
let r256 = (((r1 as u128) << 64) | (r0 as u128)).wrapping_sub(t);
r1 = (r256 >> 64) as u64;
r0 = r256 as u64;
q = q.wrapping_add(1);
let mask = if r1 >= q_lo { u64::MAX } else { 0 };
q = q.wrapping_add(mask); let add = ((mask & self.d1) as u128) << 64 | ((mask & self.d0) as u128);
let r256 = (((r1 as u128) << 64) | (r0 as u128)).wrapping_add(add);
r1 = (r256 >> 64) as u64;
r0 = r256 as u64;
if r1 > self.d1 || (r1 == self.d1 && r0 >= self.d0) {
q = q.wrapping_add(1);
let r256 = (((r1 as u128) << 64) | (r0 as u128))
.wrapping_sub(((self.d1 as u128) << 64) | (self.d0 as u128));
r1 = (r256 >> 64) as u64;
r0 = r256 as u64;
}
(q, r1, r0)
}
}
pub(crate) fn limbs_divmod_dispatch_u64(
num: &[u64],
den: &[u64],
quot: &mut [u64],
rem: &mut [u64],
) {
const BZ_THRESHOLD_U64: usize = 16;
let mut n = den.len();
while n > 0 && den[n - 1] == 0 {
n -= 1;
}
assert!(n > 0, "limbs_divmod_dispatch_u64: divide by zero");
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
if n == 1 {
limbs_divmod_u64(num, den, quot, rem);
return;
}
if n >= BZ_THRESHOLD_U64 && top >= 2 * n {
limbs_divmod_bz_u64(num, den, quot, rem);
} else {
limbs_divmod_knuth_u64(num, den, quot, rem);
}
}
pub(crate) fn limbs_divmod_knuth_u64(
num: &[u64],
den: &[u64],
quot: &mut [u64],
rem: &mut [u64],
) {
for q in quot.iter_mut() {
*q = 0;
}
for r in rem.iter_mut() {
*r = 0;
}
let mut n = den.len();
while n > 0 && den[n - 1] == 0 {
n -= 1;
}
assert!(n > 0, "limbs_divmod_knuth_u64: divide by zero");
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
if top < n {
let copy_n = num.len().min(rem.len());
let mut i = 0;
while i < copy_n {
rem[i] = num[i];
i += 1;
}
return;
}
let shift = den[n - 1].leading_zeros();
let mut u = [0u64; SCRATCH_LIMBS_U64];
let mut v = [0u64; SCRATCH_LIMBS_U64];
debug_assert!(top < SCRATCH_LIMBS_U64 && n <= SCRATCH_LIMBS_U64);
if shift == 0 {
u[..top].copy_from_slice(&num[..top]);
u[top] = 0;
v[..n].copy_from_slice(&den[..n]);
} else {
let mut carry: u64 = 0;
for i in 0..top {
let val = num[i];
u[i] = (val << shift) | carry;
carry = val >> (64 - shift);
}
u[top] = carry;
carry = 0;
for i in 0..n {
let val = den[i];
v[i] = (val << shift) | carry;
carry = val >> (64 - shift);
}
}
let m_plus_n = if u[top] != 0 { top + 1 } else { top };
debug_assert!(m_plus_n >= n);
let m = m_plus_n - n;
let mg_top = MG2by1U64::new(v[n - 1]);
let mut j_plus_one = m + 1;
while j_plus_one > 0 {
j_plus_one -= 1;
let j = j_plus_one;
let u_top = u[j + n];
let u_next = u[j + n - 1];
let v_top = v[n - 1];
let (mut q_hat, mut r_hat) = if u_top >= v_top {
let q = u64::MAX;
let (r, of) = u_next.overflowing_add(v_top);
if of || u_top > v_top {
(q, u64::MAX)
} else {
(q, r)
}
} else {
mg_top.div_rem(u_top, u_next)
};
if n >= 2 {
let v_below = v[n - 2];
loop {
let prod = (q_hat as u128) * (v_below as u128);
let hi = (prod >> 64) as u64;
let lo = prod as u64;
let rhs_lo = u[j + n - 2];
let rhs_hi = r_hat;
if hi < rhs_hi || (hi == rhs_hi && lo <= rhs_lo) {
break;
}
q_hat = q_hat.wrapping_sub(1);
let (new_r, of) = r_hat.overflowing_add(v_top);
if of {
break;
}
r_hat = new_r;
}
}
let mut mul_carry: u64 = 0;
let mut borrow: u64 = 0;
for i in 0..n {
let prod = (q_hat as u128) * (v[i] as u128);
let prod_lo = prod as u64;
let prod_hi = (prod >> 64) as u64;
let (s_prod, c1) = prod_lo.overflowing_add(mul_carry);
let new_mul_carry = prod_hi + (c1 as u64);
let (s1, b1) = u[j + i].overflowing_sub(s_prod);
let (s2, b2) = s1.overflowing_sub(borrow);
u[j + i] = s2;
borrow = (b1 as u64) + (b2 as u64);
mul_carry = new_mul_carry;
}
let (s1, b1) = u[j + n].overflowing_sub(mul_carry);
let (s2, b2) = s1.overflowing_sub(borrow);
u[j + n] = s2;
let final_borrow = (b1 as u64) + (b2 as u64);
if final_borrow != 0 {
q_hat = q_hat.wrapping_sub(1);
let mut carry: u64 = 0;
for i in 0..n {
let (s1, c1) = u[j + i].overflowing_add(v[i]);
let (s2, c2) = s1.overflowing_add(carry);
u[j + i] = s2;
carry = (c1 as u64) + (c2 as u64);
}
u[j + n] = u[j + n].wrapping_add(carry);
}
if j < quot.len() {
quot[j] = q_hat;
}
}
if shift == 0 {
let copy_n = n.min(rem.len());
rem[..copy_n].copy_from_slice(&u[..copy_n]);
} else {
for i in 0..n {
if i < rem.len() {
let lo = u[i] >> shift;
let hi_into_lo = if i + 1 < n {
u[i + 1] << (64 - shift)
} else {
0
};
rem[i] = lo | hi_into_lo;
}
}
}
}
pub(crate) fn limbs_divmod_bz_u64(
num: &[u64],
den: &[u64],
quot: &mut [u64],
rem: &mut [u64],
) {
const BZ_THRESHOLD_U64: usize = 16;
let mut n = den.len();
while n > 0 && den[n - 1] == 0 {
n -= 1;
}
assert!(n > 0, "limbs_divmod_bz_u64: divide by zero");
let mut top = num.len();
while top > 0 && num[top - 1] == 0 {
top -= 1;
}
if n < BZ_THRESHOLD_U64 || top < 2 * n {
limbs_divmod_knuth_u64(num, den, quot, rem);
return;
}
for q in quot.iter_mut() {
*q = 0;
}
for r in rem.iter_mut() {
*r = 0;
}
let chunks = top.div_ceil(n);
let mut carry = [0u64; SCRATCH_LIMBS_U64];
let mut buf = [0u64; SCRATCH_LIMBS_U64];
let mut q_chunk = [0u64; SCRATCH_LIMBS_U64];
let mut r_chunk = [0u64; SCRATCH_LIMBS_U64];
let mut idx = chunks;
while idx > 0 {
idx -= 1;
let lo = idx * n;
let hi = ((idx + 1) * n).min(top);
buf.fill(0);
let chunk_len = hi - lo;
buf[..chunk_len].copy_from_slice(&num[lo..lo + chunk_len]);
buf[chunk_len..chunk_len + n].copy_from_slice(&carry[..n]);
let buf_len = chunk_len + n;
limbs_divmod_knuth_u64(
&buf[..buf_len],
&den[..n],
&mut q_chunk[..buf_len],
&mut r_chunk[..n],
);
let store_end = (lo + n).min(quot.len());
let store_len = store_end.saturating_sub(lo);
quot[lo..lo + store_len].copy_from_slice(&q_chunk[..store_len]);
carry[..n].copy_from_slice(&r_chunk[..n]);
}
let rem_n = n.min(rem.len());
rem[..rem_n].copy_from_slice(&carry[..rem_n]);
}
pub(crate) fn limbs_isqrt_u64(n: &[u64], out: &mut [u64]) {
for o in out.iter_mut() {
*o = 0;
}
let bits = limbs_bit_len_u64(n);
if bits == 0 {
return;
}
if bits <= 1 {
out[0] = 1;
return;
}
let work = n.len() + 1;
debug_assert!(work <= SCRATCH_LIMBS_U64, "isqrt scratch overflow");
let mut x = [0u64; SCRATCH_LIMBS_U64];
let e = bits.div_ceil(2);
x[(e / 64) as usize] |= 1u64 << (e % 64);
loop {
let mut q = [0u64; SCRATCH_LIMBS_U64];
let mut r = [0u64; SCRATCH_LIMBS_U64];
limbs_divmod_dispatch_u64(n, &x[..work], &mut q[..work], &mut r[..work]);
limbs_add_assign_u64(&mut q[..work], &x[..work]);
let mut y = [0u64; SCRATCH_LIMBS_U64];
limbs_shr_u64(&q[..work], 1, &mut y[..work]);
if limbs_cmp_u64(&y[..work], &x[..work]) >= 0 {
break;
}
x = y;
}
let copy_len = if out.len() < work { out.len() } else { work };
out[..copy_len].copy_from_slice(&x[..copy_len]);
}
fn limbs_div_small_u64(limbs: &mut [u64], radix: u64) -> u64 {
let mut rem: u64 = 0;
for limb in limbs.iter_mut().rev() {
let acc = ((rem as u128) << 64) | (*limb as u128);
*limb = (acc / (radix as u128)) as u64;
rem = (acc % (radix as u128)) as u64;
}
rem
}
pub(crate) fn limbs_fmt_into_u64<'a>(
limbs: &[u64],
radix: u64,
lower: bool,
buf: &'a mut [u8],
) -> &'a str {
let digits: &[u8] = if lower {
b"0123456789abcdef"
} else {
b"0123456789ABCDEF"
};
if limbs_is_zero_u64(limbs) {
let last = buf.len() - 1;
buf[last] = b'0';
return core::str::from_utf8(&buf[last..]).unwrap();
}
let mut work = [0u64; SCRATCH_LIMBS_U64];
work[..limbs.len()].copy_from_slice(limbs);
let wl = limbs.len();
let mut pos = buf.len();
while !limbs_is_zero_u64(&work[..wl]) {
let r = limbs_div_small_u64(&mut work[..wl], radix);
pos -= 1;
buf[pos] = digits[r as usize];
}
core::str::from_utf8(&buf[pos..]).unwrap()
}
#[inline]
pub(crate) const fn scmp_u64(a_neg: bool, a: &[u64], b_neg: bool, b: &[u64]) -> i32 {
match (a_neg, b_neg) {
(true, false) => -1,
(false, true) => 1,
_ => limbs_cmp_u64(a, b),
}
}
mod macros;
use macros::decl_wide_int;
#[inline]
pub(crate) const fn scmp(a_neg: bool, a: &[u128], b_neg: bool, b: &[u128]) -> i32 {
match (a_neg, b_neg) {
(true, false) => -1,
(false, true) => 1,
_ => limbs_cmp(a, b),
}
}
pub(crate) trait WideInt: Copy {
fn to_mag_sign(self) -> ([u64; 288], bool);
fn from_mag_sign(mag: &[u64], negative: bool) -> Self;
}
macro_rules! impl_wideint_signed_prim {
($($t:ty),*) => {$(
impl WideInt for $t {
#[inline]
fn to_mag_sign(self) -> ([u64; 288], bool) {
let mut out = [0u64; 288];
let mag = self.unsigned_abs() as u128;
out[0] = mag as u64;
out[1] = (mag >> 64) as u64;
(out, self < 0)
}
#[inline]
fn from_mag_sign(mag: &[u64], negative: bool) -> $t {
let lo = mag.first().copied().unwrap_or(0) as u128;
let hi = mag.get(1).copied().unwrap_or(0) as u128;
let combined = lo | (hi << 64);
let m = combined as $t;
if negative { m.wrapping_neg() } else { m }
}
}
)*};
}
impl_wideint_signed_prim!(i8, i16, i32, i64, i128);
impl WideInt for u128 {
#[inline]
fn to_mag_sign(self) -> ([u64; 288], bool) {
let mut out = [0u64; 288];
out[0] = self as u64;
out[1] = (self >> 64) as u64;
(out, false)
}
#[inline]
fn from_mag_sign(mag: &[u64], _negative: bool) -> u128 {
let lo = mag.first().copied().unwrap_or(0) as u128;
let hi = mag.get(1).copied().unwrap_or(0) as u128;
lo | (hi << 64)
}
}
#[inline]
pub(crate) fn wide_cast<S: WideInt, T: WideInt>(src: S) -> T {
let (mag, negative) = src.to_mag_sign();
T::from_mag_sign(&mag, negative)
}
decl_wide_int!(Uint192, Int192, 3, 6);
decl_wide_int!(Uint256, Int256, 4, 8);
decl_wide_int!(Uint384, Int384, 6, 12);
decl_wide_int!(Uint512, Int512, 8, 16);
decl_wide_int!(Uint768, Int768, 12, 24);
decl_wide_int!(Uint1024, Int1024, 16, 32);
decl_wide_int!(Uint1536, Int1536, 24, 48);
decl_wide_int!(Uint2048, Int2048, 32, 64);
decl_wide_int!(Uint3072, Int3072, 48, 96);
decl_wide_int!(Uint4096, Int4096, 64, 128);
decl_wide_int!(Uint6144, Int6144, 96, 192);
decl_wide_int!(Uint8192, Int8192, 128, 256);
decl_wide_int!(Uint12288, Int12288, 192, 384);
decl_wide_int!(Uint16384, Int16384, 256, 512);
#[cfg(any(feature = "d56", feature = "wide"))]
pub(crate) use self::{Int192 as I192, Uint192 as U192};
#[cfg(any(feature = "d56", feature = "d76", feature = "wide"))]
pub(crate) use self::Int384 as I384;
#[cfg(any(feature = "d114", feature = "wide"))]
pub(crate) use self::Uint384 as U384;
#[cfg(any(feature = "d76", feature = "wide"))]
pub(crate) use self::{Int256 as I256, Uint256 as U256};
#[cfg(any(feature = "d76", feature = "d114", feature = "d153", feature = "wide"))]
pub(crate) use self::Int512 as I512;
#[cfg(any(feature = "d153", feature = "wide"))]
pub(crate) use self::Uint512 as U512;
#[cfg(any(feature = "d114", feature = "d153", feature = "d230", feature = "wide"))]
pub(crate) use self::Int768 as I768;
#[cfg(any(feature = "d230", feature = "wide"))]
pub(crate) use self::Uint768 as U768;
#[cfg(any(feature = "d153", feature = "d230", feature = "d307", feature = "wide", feature = "x-wide"))]
pub(crate) use self::Int1024 as I1024;
#[cfg(any(feature = "d230", feature = "d307", feature = "d461", feature = "wide", feature = "x-wide"))]
pub(crate) use self::Int1536 as I1536;
#[cfg(any(feature = "d461", feature = "x-wide"))]
pub(crate) use self::Uint1536 as U1536;
#[cfg(any(feature = "d307", feature = "d461", feature = "d615", feature = "wide", feature = "x-wide"))]
pub(crate) use self::{Int2048 as I2048, Uint1024 as U1024};
#[cfg(any(feature = "d615", feature = "x-wide"))]
pub(crate) use self::Uint2048 as U2048;
#[cfg(any(feature = "d461", feature = "d615", feature = "d923", feature = "x-wide", feature = "xx-wide"))]
pub(crate) use self::Int3072 as I3072;
#[cfg(any(feature = "d923", feature = "xx-wide"))]
pub(crate) use self::Uint3072 as U3072;
#[cfg(any(feature = "d615", feature = "d923", feature = "d1231", feature = "x-wide", feature = "xx-wide"))]
pub(crate) use self::Int4096 as I4096;
#[cfg(any(feature = "d1231", feature = "xx-wide"))]
pub(crate) use self::Uint4096 as U4096;
#[cfg(any(feature = "d923", feature = "d1231", feature = "xx-wide"))]
pub(crate) use self::Int6144 as I6144;
#[cfg(any(feature = "d1231", feature = "xx-wide"))]
pub(crate) use self::Int8192 as I8192;
#[cfg(any(feature = "d923", feature = "xx-wide"))]
#[allow(unused_imports)]
pub(crate) use self::Int12288 as I12288;
#[cfg(any(feature = "d1231", feature = "xx-wide"))]
#[allow(unused_imports)]
pub(crate) use self::Int16384 as I16384;
#[cfg(test)]
mod karatsuba_tests {
use super::*;
#[test]
fn karatsuba_matches_schoolbook_at_n16() {
let a: [u128; 16] = core::array::from_fn(|i| (i as u128) * 0xdead_beef + 1);
let b: [u128; 16] = core::array::from_fn(|i| 0xcafe_babe ^ ((i as u128) << 5));
let mut s = [0u128; 32];
let mut k = [0u128; 32];
limbs_mul(&a, &b, &mut s);
limbs_mul_karatsuba(&a, &b, &mut k);
assert_eq!(s, k);
}
#[test]
fn karatsuba_matches_schoolbook_at_n32() {
let a: [u128; 32] = core::array::from_fn(|i| (i as u128).wrapping_mul(0x1234_5678_9abc));
let b: [u128; 32] = core::array::from_fn(|i| (i as u128 + 1).wrapping_mul(0xfedc_ba98));
let mut s = [0u128; 64];
let mut k = [0u128; 64];
limbs_mul(&a, &b, &mut s);
limbs_mul_karatsuba(&a, &b, &mut k);
assert_eq!(s, k);
}
#[test]
fn karatsuba_handles_zero_inputs() {
let a = [0u128; 16];
let b: [u128; 16] = core::array::from_fn(|i| (i as u128) + 1);
let mut k = [0u128; 32];
limbs_mul_karatsuba(&a, &b, &mut k);
for o in &k {
assert_eq!(*o, 0);
}
}
}
#[cfg(test)]
mod hint_tests {
use super::*;
#[test]
fn signed_add_sub_neg() {
let a = Int256::from_i128(5);
let b = Int256::from_i128(3);
assert_eq!(a.wrapping_add(b), Int256::from_i128(8));
assert_eq!(a.wrapping_sub(b), Int256::from_i128(2));
assert_eq!(b.wrapping_sub(a), Int256::from_i128(-2));
assert_eq!(a.negate(), Int256::from_i128(-5));
assert_eq!(Int256::ZERO.negate(), Int256::ZERO);
}
#[test]
fn signed_mul_div_rem() {
let six = Int512::from_i128(6);
let two = Int512::from_i128(2);
let three = Int512::from_i128(3);
assert_eq!(six.wrapping_mul(three), Int512::from_i128(18));
assert_eq!(six.wrapping_div(two), three);
assert_eq!(Int512::from_i128(7).wrapping_rem(three), Int512::from_i128(1));
assert_eq!(Int512::from_i128(-7).wrapping_rem(three), Int512::from_i128(-1));
assert_eq!(six.negate().wrapping_mul(three), Int512::from_i128(-18));
}
#[test]
fn checked_overflow() {
assert_eq!(Int256::MAX.checked_add(Int256::ONE), None);
assert_eq!(Int256::MIN.checked_sub(Int256::ONE), None);
assert_eq!(Int256::MIN.checked_neg(), None);
assert_eq!(
Int256::from_i128(2).checked_add(Int256::from_i128(3)),
Some(Int256::from_i128(5))
);
}
#[test]
fn from_str_and_pow() {
let ten = Int1024::from_str_radix("10", 10).unwrap();
assert_eq!(ten, Int1024::from_i128(10));
assert_eq!(ten.pow(3), Int1024::from_i128(1000));
let big = Int1024::from_str_radix("10", 10).unwrap().pow(40);
let from_str = Int1024::from_str_radix(
"10000000000000000000000000000000000000000",
10,
)
.unwrap();
assert_eq!(big, from_str);
assert_eq!(Int256::from_str_radix("-42", 10).unwrap(), Int256::from_i128(-42));
}
#[test]
fn ordering_and_resize() {
assert!(Int256::from_i128(-1) < Int256::ZERO);
assert!(Int256::MIN < Int256::MAX);
let v = Int256::from_i128(-123_456_789);
let wide: Int1024 = v.resize();
let back: Int256 = wide.resize();
assert_eq!(back, v);
assert_eq!(wide, Int1024::from_i128(-123_456_789));
}
#[test]
fn isqrt_and_f64() {
assert_eq!(Int512::from_i128(144).isqrt(), Int512::from_i128(12));
assert_eq!(Int256::from_i128(1_000_000).as_f64(), 1_000_000.0);
assert_eq!(Int256::from_f64(-2_500.0), Int256::from_i128(-2500));
}
#[test]
fn uint256_is_zero_and_bit_helpers() {
let zero = Uint256::ZERO;
let one = Uint256::from_str_radix("1", 10).unwrap();
let two = Uint256::from_str_radix("2", 10).unwrap();
assert!(zero.is_zero());
assert!(!one.is_zero());
assert!(one.is_power_of_two());
assert!(two.is_power_of_two());
let three = Uint256::from_str_radix("3", 10).unwrap();
assert!(!three.is_power_of_two());
assert_eq!(zero.next_power_of_two(), one);
assert_eq!(one.next_power_of_two(), one);
let four = Uint256::from_str_radix("4", 10).unwrap();
assert_eq!(three.next_power_of_two(), four);
assert_eq!(zero.count_ones(), 0);
assert_eq!(one.count_ones(), 1);
assert_eq!(zero.leading_zeros(), Uint256::BITS);
assert_eq!(one.leading_zeros(), Uint256::BITS - 1);
}
#[test]
fn uint256_parse_arithmetic_and_pow() {
assert!(Uint256::from_str_radix("10", 2).is_err());
assert!(Uint256::from_str_radix("1a", 10).is_err());
let two = Uint256::from_str_radix("2", 10).unwrap();
let three = Uint256::from_str_radix("3", 10).unwrap();
let six = Uint256::from_str_radix("6", 10).unwrap();
let seven = Uint256::from_str_radix("7", 10).unwrap();
assert_eq!(three - two, Uint256::from_str_radix("1", 10).unwrap());
assert_eq!(six / two, three);
assert_eq!(seven % three, Uint256::from_str_radix("1", 10).unwrap());
let five = Uint256::from_str_radix("5", 10).unwrap(); let four = Uint256::from_str_radix("4", 10).unwrap(); let one = Uint256::from_str_radix("1", 10).unwrap(); assert_eq!(five & four, four); assert_eq!(five | one, five); assert_eq!(five ^ four, one); let p10 = two.pow(10);
assert_eq!(p10, Uint256::from_str_radix("1024", 10).unwrap());
let signed = three.cast_signed();
assert_eq!(signed, Int256::from_i128(3));
}
#[test]
fn signed_bit_and_trailing_zeros() {
let v = Int256::from_i128(0b1100);
assert!(v.bit(2));
assert!(v.bit(3));
assert!(!v.bit(0));
assert!(!v.bit(1));
assert!(!v.bit(1000));
let n = Int256::from_i128(-1);
assert!(n.bit(1000));
assert_eq!(Int256::from_i128(8).trailing_zeros(), 3);
assert_eq!(Int256::ZERO.trailing_zeros(), Int256::BITS);
}
}
#[cfg(test)]
mod slice_tests {
use super::*;
#[test]
fn mul_and_divmod_round_trip() {
let a = [123u128, 7, 0, 0];
let b = [456u128, 0, 0, 0];
let mut prod = [0u128; 8];
limbs_mul(&a, &b, &mut prod);
let mut q = [0u128; 8];
let mut r = [0u128; 8];
limbs_divmod(&prod, &b, &mut q, &mut r);
assert_eq!(&q[..4], &a, "quotient");
assert!(limbs_is_zero(&r), "remainder");
}
#[test]
fn shifts() {
let a = [1u128, 0];
let mut out = [0u128; 2];
limbs_shl(&a, 130, &mut out);
assert_eq!(out, [0, 4]);
let mut back = [0u128; 2];
limbs_shr(&out, 130, &mut back);
assert_eq!(back, [1, 0]);
}
#[test]
fn isqrt_basic() {
let n = [0u128, 0, 1, 0];
let mut out = [0u128; 4];
limbs_isqrt(&n, &mut out);
assert_eq!(out, [0, 1, 0, 0]);
let n = [144u128, 0];
let mut out = [0u128; 2];
limbs_isqrt(&n, &mut out);
assert_eq!(out, [12, 0]);
let n = [2u128, 0];
let mut out = [0u128; 2];
limbs_isqrt(&n, &mut out);
assert_eq!(out, [1, 0]);
}
#[test]
fn add_sub_carry() {
let mut a = [u128::MAX, 0];
let carry = limbs_add_assign(&mut a, &[1, 0]);
assert!(!carry);
assert_eq!(a, [0, 1]);
let borrow = limbs_sub_assign(&mut a, &[1, 0]);
assert!(!borrow);
assert_eq!(a, [u128::MAX, 0]);
}
#[test]
fn div_2_by_1_basics() {
assert_eq!(div_2_by_1(0, 1, 1), (1, 0));
assert_eq!(div_2_by_1(0, 5, 2), (2, 1));
assert_eq!(div_2_by_1(3, 0, 4), (3 << 126, 0));
let d = u128::MAX - 7;
let (q, r) = div_2_by_1(d - 1, u128::MAX, d);
let (mul_hi, mul_lo) = mul_128(q, d);
let (sum_lo, c) = mul_lo.overflowing_add(r);
let sum_hi = mul_hi + c as u128;
assert_eq!(sum_hi, d - 1);
assert_eq!(sum_lo, u128::MAX);
assert!(r < d);
}
fn pack(limbs: &[u128]) -> alloc::vec::Vec<u64> {
let mut out = alloc::vec![0u64; 2 * limbs.len()];
for (i, &l) in limbs.iter().enumerate() {
out[2 * i] = l as u64;
out[2 * i + 1] = (l >> 64) as u64;
}
out
}
fn unpack(words: &[u64]) -> alloc::vec::Vec<u128> {
assert!(words.len() % 2 == 0);
let mut out = alloc::vec![0u128; words.len() / 2];
for i in 0..out.len() {
out[i] = (words[2 * i] as u128) | ((words[2 * i + 1] as u128) << 64);
}
out
}
fn corpus() -> alloc::vec::Vec<alloc::vec::Vec<u128>> {
alloc::vec![
alloc::vec![0u128, 0, 0, 0],
alloc::vec![1u128, 0, 0, 0],
alloc::vec![u128::MAX, 0, 0, 0],
alloc::vec![u128::MAX, u128::MAX, 0, 0],
alloc::vec![u128::MAX, u128::MAX, u128::MAX, u128::MAX],
alloc::vec![123u128, 456, 0, 0],
alloc::vec![
0x1234_5678_9abc_def0_fedc_ba98_7654_3210_u128,
0xa5a5_a5a5_5a5a_5a5a_3c3c_3c3c_c3c3_c3c3,
0,
0,
],
]
}
#[test]
fn limbs_mul_u64_matches_u128() {
for a in corpus() {
for b in corpus() {
let mut out128 = alloc::vec![0u128; a.len() + b.len()];
limbs_mul(&a, &b, &mut out128);
let a64 = pack(&a);
let b64 = pack(&b);
let mut out64 = alloc::vec![0u64; a64.len() + b64.len()];
limbs_mul_u64(&a64, &b64, &mut out64);
assert_eq!(unpack(&out64), out128, "limbs_mul mismatch");
}
}
}
#[test]
fn limbs_divmod_u64_matches_u128() {
for num in corpus() {
for den in corpus() {
if den.iter().all(|&x| x == 0) {
continue;
}
let mut q128 = alloc::vec![0u128; num.len()];
let mut r128 = alloc::vec![0u128; num.len()];
limbs_divmod(&num, &den, &mut q128, &mut r128);
let n64 = pack(&num);
let d64 = pack(&den);
let mut q64 = alloc::vec![0u64; n64.len()];
let mut r64 = alloc::vec![0u64; n64.len()];
limbs_divmod_u64(&n64, &d64, &mut q64, &mut r64);
assert_eq!(unpack(&q64), q128, "divmod q mismatch");
assert_eq!(unpack(&r64), r128, "divmod r mismatch");
}
}
}
#[test]
fn limbs_divmod_knuth_u64_matches_u128() {
for num in corpus() {
for den in corpus() {
if den.iter().all(|&x| x == 0) {
continue;
}
let mut q128 = alloc::vec![0u128; num.len()];
let mut r128 = alloc::vec![0u128; num.len()];
limbs_divmod_knuth(&num, &den, &mut q128, &mut r128);
let n64 = pack(&num);
let d64 = pack(&den);
let mut q64 = alloc::vec![0u64; n64.len()];
let mut r64 = alloc::vec![0u64; n64.len()];
limbs_divmod_knuth_u64(&n64, &d64, &mut q64, &mut r64);
assert_eq!(unpack(&q64), q128, "knuth q mismatch");
assert_eq!(unpack(&r64), r128, "knuth r mismatch");
}
}
}
#[test]
fn mg3by2_u64_matches_reference() {
let cases: &[(u64, u64, u64, u64, u64)] = &[
(0, 0, 1, 1u64 << 63, 0),
(0, 1, 0, 1u64 << 63, 0),
((1u64 << 63) - 1, u64::MAX, u64::MAX, 1u64 << 63, 1),
(u64::MAX - 1, u64::MAX, u64::MAX, u64::MAX, u64::MAX),
(0, 0, 1, u64::MAX, 1),
(0xc0ffee, 0xdead_beef, 0xface_b00c, (1u64 << 63) | 0xc0ffee_u64, 0xdead_beef_face_b00c),
(0, 1, 2, (1u64 << 63) | 1, 2),
];
for &(n2, n1, n0, d1, d0) in cases {
assert!(d1 >> 63 == 1, "d1 not normalised: {d1:#x}");
assert!(
n2 < d1 || (n2 == d1 && n1 < d0),
"test precondition (n2, n1) < (d1, d0) violated"
);
let mg = MG3by2U64::new(d1, d0);
let (q, r1, r0) = mg.div_rem(n2, n1, n0);
let num = alloc::vec![n0, n1, n2];
let den = alloc::vec![d0, d1];
let mut q_ref = alloc::vec![0u64; 3];
let mut r_ref = alloc::vec![0u64; 3];
limbs_divmod_u64(&num, &den, &mut q_ref, &mut r_ref);
assert_eq!(q_ref[0], q, "MG3by2 q mismatch for n=({n2:#x},{n1:#x},{n0:#x}) d=({d1:#x},{d0:#x})");
assert_eq!(q_ref[1], 0, "MG3by2 q higher limb non-zero — precondition violated");
assert_eq!(q_ref[2], 0, "MG3by2 q higher limb non-zero — precondition violated");
assert_eq!(r_ref[0], r0, "MG3by2 r0 mismatch");
assert_eq!(r_ref[1], r1, "MG3by2 r1 mismatch");
}
}
#[test]
fn mg2by1_u64_matches_reference() {
let cases: &[(u64, u64, u64)] = &[
(0, 1, 1u64 << 63),
(0, u64::MAX, 1u64 << 63),
((1u64 << 63) - 1, u64::MAX, 1u64 << 63),
(0, 1, u64::MAX),
(u64::MAX - 1, u64::MAX, u64::MAX),
(12345, 67890, (1u64 << 63) | 0xdead_beef_u64),
(u64::MAX - 1, 0, u64::MAX),
];
for &(u1, u0, d) in cases {
assert!(d >> 63 == 1);
assert!(u1 < d);
let mg = MG2by1U64::new(d);
let (q, r) = mg.div_rem(u1, u0);
let num = ((u1 as u128) << 64) | (u0 as u128);
let exp_q = (num / (d as u128)) as u64;
let exp_r = (num % (d as u128)) as u64;
assert_eq!((q, r), (exp_q, exp_r), "MG u64 mismatch for {u1:#x}, {u0:#x}, d={d:#x}");
}
}
#[test]
fn mg2by1_matches_div_2_by_1() {
let cases: &[(u128, u128, u128)] = &[
(0, 1, 1u128 << 127),
(0, u128::MAX, 1u128 << 127),
((1u128 << 127) - 1, u128::MAX, 1u128 << 127),
(0, 1, u128::MAX),
(u128::MAX - 1, u128::MAX, u128::MAX),
(u128::MAX - 1, u128::MAX, u128::MAX),
(12345, 67890, (1u128 << 127) | 0xdead_beefu128),
(u128::MAX - 1, 0, u128::MAX),
(0x1234_5678_9abc_def0_u128 ^ 0xa5a5, 0xfedc_ba98_7654_3210_u128, (1u128 << 127) | 0xc0ffee_u128),
];
for &(u1, u0, d) in cases {
assert!(d >> 127 == 1, "test divisor not normalised: {d:#x}");
assert!(u1 < d, "test precondition u1 < d violated: {u1:#x} >= {d:#x}");
let (q_ref, r_ref) = div_2_by_1(u1, u0, d);
let mg = MG2by1::new(d);
let (q_mg, r_mg) = mg.div_rem(u1, u0);
assert_eq!(
(q_mg, r_mg),
(q_ref, r_ref),
"MG2by1 disagrees with div_2_by_1 for (u1={u1:#x}, u0={u0:#x}, d={d:#x})"
);
}
}
#[test]
fn knuth_matches_canonical_divmod() {
let cases: &[(&[u128], &[u128])] = &[
(&[42], &[7]),
(&[u128::MAX, 0], &[2]),
(&[1, 1, 0, 0], &[3]),
(&[u128::MAX, u128::MAX, 1, 0], &[5, 9]),
(&[u128::MAX, u128::MAX, u128::MAX, 0], &[1, 2, 3]),
(&[100, 0, 0], &[200, 0, 1]),
(
&[0, 0, u128::MAX, u128::MAX],
&[1, 2, u128::MAX],
),
];
for (num, den) in cases {
let mut q_canon = [0u128; 8];
let mut r_canon = [0u128; 8];
limbs_divmod(num, den, &mut q_canon, &mut r_canon);
let mut q_knuth = [0u128; 8];
let mut r_knuth = [0u128; 8];
limbs_divmod_knuth(num, den, &mut q_knuth, &mut r_knuth);
assert_eq!(q_canon, q_knuth, "quotient mismatch on {:?} / {:?}", num, den);
assert_eq!(r_canon, r_knuth, "remainder mismatch on {:?} / {:?}", num, den);
}
}
#[test]
fn bz_matches_canonical_divmod() {
let mut num = [0u128; 16];
for (i, slot) in num.iter_mut().enumerate() {
*slot = (i as u128)
.wrapping_mul(0x9E37_79B9_7F4A_7C15)
.wrapping_add(i as u128);
}
let mut den = [0u128; 10];
for (i, slot) in den.iter_mut().enumerate() {
*slot = ((i + 1) as u128).wrapping_mul(0xBF58_476D_1CE4_E5B9);
}
let mut q_canon = [0u128; 16];
let mut r_canon = [0u128; 16];
limbs_divmod(&num, &den, &mut q_canon, &mut r_canon);
let mut q_bz = [0u128; 16];
let mut r_bz = [0u128; 16];
limbs_divmod_bz(&num, &den, &mut q_bz, &mut r_bz);
assert_eq!(q_canon, q_bz, "BZ quotient mismatch");
assert_eq!(r_canon, r_bz, "BZ remainder mismatch");
}
#[cfg(feature = "alloc")]
#[test]
fn fast_mul_dispatches_to_karatsuba_at_threshold() {
let a: [u128; 16] = core::array::from_fn(|i| (i as u128).wrapping_mul(0xABCD) + 1);
let b: [u128; 16] = core::array::from_fn(|i| (i as u128).wrapping_mul(0xBEEF) + 7);
let mut fast = [0u128; 32];
let mut school = [0u128; 32];
limbs_mul_fast(&a, &b, &mut fast);
limbs_mul(&a, &b, &mut school);
assert_eq!(fast, school, "fast (Karatsuba) and schoolbook disagree");
}
#[cfg(feature = "alloc")]
#[test]
fn fast_mul_falls_through_to_schoolbook_below_threshold() {
let a: [u128; 8] = core::array::from_fn(|i| (i as u128).wrapping_mul(0x1234) + 1);
let b: [u128; 8] = core::array::from_fn(|i| (i as u128).wrapping_mul(0x5678) + 3);
let mut fast = [0u128; 16];
let mut school = [0u128; 16];
limbs_mul_fast(&a, &b, &mut fast);
limbs_mul(&a, &b, &mut school);
assert_eq!(fast, school);
}
#[cfg(feature = "alloc")]
#[test]
fn karatsuba_safety_fallback_below_threshold() {
let a: [u128; 4] = [123, 456, 789, 0];
let b: [u128; 4] = [987, 654, 321, 0];
let mut karatsuba_out = [0u128; 8];
let mut school_out = [0u128; 8];
limbs_mul_karatsuba(&a, &b, &mut karatsuba_out);
limbs_mul(&a, &b, &mut school_out);
assert_eq!(karatsuba_out, school_out);
}
#[test]
fn isqrt_one_short_circuit() {
let n = [1u128, 0];
let mut out = [0u128; 2];
limbs_isqrt(&n, &mut out);
assert_eq!(out, [1, 0]);
}
#[test]
fn isqrt_zero_short_circuit() {
let n = [0u128, 0];
let mut out = [0u128; 2];
limbs_isqrt(&n, &mut out);
assert_eq!(out, [0, 0]);
}
#[test]
fn wide_cast_into_u128_returns_first_limb() {
let src = Int256::from_i128(123_456_789);
let dst: u128 = wide_cast(src);
assert_eq!(dst, 123_456_789);
let dst: u128 = wide_cast(Int256::ZERO);
assert_eq!(dst, 0);
}
#[test]
fn knuth_q_hat_cap_branch_matches_canonical() {
let num: [u128; 4] = [0, 0, u128::MAX, u128::MAX >> 1];
let den: [u128; 3] = [1, 2, u128::MAX >> 1];
let mut q_canon = [0u128; 4];
let mut r_canon = [0u128; 4];
limbs_divmod(&num, &den, &mut q_canon, &mut r_canon);
let mut q_knuth = [0u128; 4];
let mut r_knuth = [0u128; 4];
limbs_divmod_knuth(&num, &den, &mut q_knuth, &mut r_knuth);
assert_eq!(q_canon, q_knuth);
assert_eq!(r_canon, r_knuth);
}
#[test]
fn bz_strips_numerator_trailing_zeros() {
let mut num = [0u128; 16];
for slot in &mut num[..8] {
*slot = 0xCAFE_F00D;
}
let mut den = [0u128; 10];
den[0] = 7;
let mut q_canon = [0u128; 16];
let mut r_canon = [0u128; 16];
limbs_divmod(&num, &den, &mut q_canon, &mut r_canon);
let mut q_bz = [0u128; 16];
let mut r_bz = [0u128; 16];
limbs_divmod_bz(&num, &den, &mut q_bz, &mut r_bz);
assert_eq!(q_canon, q_bz);
assert_eq!(r_canon, r_bz);
}
}