use crate::wide_int::{limbs_divmod_dispatch, limbs_mul, limbs_sub_assign};
#[derive(Clone)]
pub struct NewtonReciprocal {
pub r: Vec<u128>,
pub k_limbs: usize,
pub pow_scale: Vec<u128>,
}
impl NewtonReciprocal {
pub fn precompute(scale: u32, width_limbs: usize) -> Self {
let pow_limbs = (scale as usize / 38 + 2).max(1);
let mut pow_scale = vec![0u128; pow_limbs];
pow_scale[0] = 1u128;
for _ in 0..scale {
let mut carry: u128 = 0;
for limb in pow_scale.iter_mut() {
let (lo, hi) = mul_u128_by_u64(*limb, 10);
let (sum_lo, c1) = lo.overflowing_add(carry);
*limb = sum_lo;
carry = hi + u128::from(c1);
}
debug_assert_eq!(carry, 0, "pow_scale buffer too small at scale={scale}");
}
let k_limbs = width_limbs + pow_limbs;
let mut num = vec![0u128; k_limbs + 1];
num[k_limbs] = 1u128;
let mut r = vec![0u128; k_limbs + 1];
let mut rem = vec![0u128; pow_limbs + 1];
limbs_divmod_dispatch(&num, &pow_scale, &mut r, &mut rem);
Self { r, k_limbs, pow_scale }
}
}
#[inline]
fn mul_u128_by_u64(a: u128, b: u64) -> (u128, u128) {
let a_lo = a as u64 as u128;
let a_hi = a >> 64;
let b = b as u128;
let lo_full = a_lo * b;
let hi_full = a_hi * b;
let lo_full_hi = lo_full >> 64;
let mid = hi_full + lo_full_hi;
let lo = (lo_full as u64 as u128) | ((mid as u64 as u128) << 64);
let hi = mid >> 64;
(lo, hi)
}
pub fn div_newton(
n: &[u128],
table: &NewtonReciprocal,
quot: &mut [u128],
) -> Vec<u128> {
let prod_len = n.len() + table.r.len();
let mut prod = vec![0u128; prod_len];
limbs_mul(n, &table.r, &mut prod);
let lo = table.k_limbs.min(prod.len());
let q_slice = &prod[lo..];
for (dst, src) in quot.iter_mut().zip(q_slice.iter()) {
*dst = *src;
}
for dst in quot.iter_mut().skip(q_slice.len()) {
*dst = 0;
}
let prod2_len = quot.len() + table.pow_scale.len();
let mut prod2 = vec![0u128; prod2_len];
limbs_mul(quot, &table.pow_scale, &mut prod2);
let mut rem = vec![0u128; n.len() + 1];
for (dst, src) in rem.iter_mut().zip(n.iter()) {
*dst = *src;
}
let sub_len = prod2.len().min(rem.len());
let _ = limbs_sub_assign(&mut rem[..sub_len], &prod2[..sub_len]);
loop {
let cmp = cmp_limbs(&rem, &table.pow_scale);
if cmp == core::cmp::Ordering::Less {
break;
}
let sub_len = rem.len().min(table.pow_scale.len());
let _ = limbs_sub_assign(&mut rem[..sub_len], &table.pow_scale[..sub_len]);
let mut carry: u128 = 1;
for limb in quot.iter_mut() {
let (s, c) = limb.overflowing_add(carry);
*limb = s;
if !c {
carry = 0;
break;
}
}
let _ = carry;
}
rem
}
fn cmp_limbs(a: &[u128], b: &[u128]) -> core::cmp::Ordering {
let mut a_top = a.len();
while a_top > 0 && a[a_top - 1] == 0 {
a_top -= 1;
}
let mut b_top = b.len();
while b_top > 0 && b[b_top - 1] == 0 {
b_top -= 1;
}
if a_top != b_top {
return a_top.cmp(&b_top);
}
let mut i = a_top;
while i > 0 {
i -= 1;
match a[i].cmp(&b[i]) {
core::cmp::Ordering::Equal => continue,
ord => return ord,
}
}
core::cmp::Ordering::Equal
}
pub(crate) fn div_wide_pow10_newton_with<W: crate::wide_int::WideInt>(
n: W,
scale: u32,
mode: crate::support::rounding::RoundingMode,
table: &NewtonReciprocal,
) -> W {
use crate::support::rounding;
let mut mag = [0u128; 64];
let neg = n.mag_into_u128(&mut mag);
let mut top = mag.len();
while top > 0 && mag[top - 1] == 0 {
top -= 1;
}
let n_slice = &mag[..top.max(1)];
let mut quot = vec![0u128; mag.len()];
let rem = div_newton(n_slice, table, &mut quot);
let rem_is_zero = rem.iter().all(|&x| x == 0);
if !rem_is_zero {
let mut half = table.pow_scale.clone();
let mut i = half.len();
let mut carry_in: u128 = 0;
while i > 0 {
i -= 1;
let next_carry = half[i] & 1;
half[i] = (carry_in << 127) | (half[i] >> 1);
carry_in = next_carry;
}
let cmp_r = cmp_limbs(&rem, &half);
let q_is_odd = (quot[0] & 1) != 0;
if rounding::should_bump(mode, cmp_r, q_is_odd, !neg) {
let mut carry: u128 = 1;
for limb in quot.iter_mut() {
let (s, c) = limb.overflowing_add(carry);
*limb = s;
if !c {
carry = 0;
break;
}
}
let _ = carry;
}
}
let mut out = [0u128; 64];
for (dst, src) in out.iter_mut().zip(quot.iter()) {
*dst = *src;
}
W::from_mag_sign_u128(&out, neg)
}
#[inline]
const fn newton_wins(width_bits: u32, scale: u32) -> bool {
if scale <= 38 {
return false;
}
match width_bits {
2048 if scale >= 200 => true,
3072 if scale >= 200 => true,
4096 if scale >= 400 => true,
_ => false,
}
}
#[cfg(feature = "std")]
mod cache {
use super::NewtonReciprocal;
use ::std::thread_local;
thread_local! {
static C_2048: ::core::cell::RefCell<alloc::vec::Vec<(u32, NewtonReciprocal)>> = const {
::core::cell::RefCell::new(alloc::vec::Vec::new())
};
static C_3072: ::core::cell::RefCell<alloc::vec::Vec<(u32, NewtonReciprocal)>> = const {
::core::cell::RefCell::new(alloc::vec::Vec::new())
};
static C_4096: ::core::cell::RefCell<alloc::vec::Vec<(u32, NewtonReciprocal)>> = const {
::core::cell::RefCell::new(alloc::vec::Vec::new())
};
}
pub(super) fn with_table<R>(
width_bits: u32,
scale: u32,
width_limbs: usize,
f: impl FnOnce(&NewtonReciprocal) -> R,
) -> R {
let slot = match width_bits {
2048 => &C_2048,
3072 => &C_3072,
4096 => &C_4096,
_ => unreachable!("with_table called on un-cached width {width_bits}"),
};
let needs_insert = slot.with(|c| {
let cache = c.borrow();
!cache.iter().any(|(s, _)| *s == scale)
});
if needs_insert {
let table = NewtonReciprocal::precompute(scale, width_limbs);
slot.with(|c| {
let mut cache = c.borrow_mut();
if !cache.iter().any(|(s, _)| *s == scale) {
cache.push((scale, table));
}
});
}
slot.with(|c| {
let cache = c.borrow();
let entry = cache
.iter()
.find(|(s, _)| *s == scale)
.expect("cache invariant: entry inserted above");
f(&entry.1)
})
}
}
#[inline]
pub(crate) fn dispatch_wide_pow10_with<W: crate::wide_int::WideStorage, const N: usize>(
n: W,
scale: u32,
mode: crate::support::rounding::RoundingMode,
) -> W {
debug_assert_eq!(N, W::U128_LIMBS, "magnitude buffer must match W's u128-limb width");
let bits = <W as crate::wide_int::WideStorage>::BITS;
if !newton_wins(bits, scale) {
return crate::algos::mg_divide::div_wide_pow10_chain_with::<W, N>(n, scale, mode);
}
#[cfg(feature = "std")]
{
let width_limbs = (bits as usize) / 128;
return cache::with_table(bits, scale, width_limbs, |table| {
div_wide_pow10_newton_with::<W>(n, scale, mode, table)
});
}
#[cfg(not(feature = "std"))]
{
crate::algos::mg_divide::div_wide_pow10_chain_with::<W, N>(n, scale, mode)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::algos::mg_divide::div_wide_pow10_chain_with;
use crate::support::rounding::RoundingMode;
use crate::wide_int::{I1024, I2048, I4096};
#[test]
fn newton_matches_mg_chain_d307_s150() {
let scale = 150u32;
let width_limbs = 8;
let table = NewtonReciprocal::precompute(scale, width_limbs);
let mut limbs = [0u128; 64];
limbs[6] = 1u128 << 32;
limbs[0] = 42;
let n = <I1024 as crate::wide_int::WideInt>::from_mag_sign_u128(&limbs, false);
let got = div_wide_pow10_newton_with(n, scale, RoundingMode::HalfToEven, &table);
let want = div_wide_pow10_chain_with::<I1024, { <I1024 as crate::wide_int::WideInt>::U128_LIMBS }>(
n,
scale,
RoundingMode::HalfToEven,
);
assert_eq!(got, want, "Newton differs from MG chain at D307 s=150");
}
#[test]
fn newton_matches_mg_chain_d616_s308() {
let scale = 308u32;
let width_limbs = 16;
let table = NewtonReciprocal::precompute(scale, width_limbs);
let mut limbs = [0u128; 64];
limbs[14] = 1u128 << 16;
limbs[3] = 0xdeadbeef;
let n = <I2048 as crate::wide_int::WideInt>::from_mag_sign_u128(&limbs, false);
let got = div_wide_pow10_newton_with(n, scale, RoundingMode::HalfToEven, &table);
let want = div_wide_pow10_chain_with::<I2048, { <I2048 as crate::wide_int::WideInt>::U128_LIMBS }>(
n,
scale,
RoundingMode::HalfToEven,
);
assert_eq!(got, want, "Newton differs from MG chain at D616 s=308");
}
#[test]
fn newton_matches_mg_chain_d1232_s615() {
let scale = 615u32;
let width_limbs = 32;
let table = NewtonReciprocal::precompute(scale, width_limbs);
let mut limbs = [0u128; 64];
limbs[30] = 1u128 << 8;
limbs[5] = 0xcafef00d;
let n = <I4096 as crate::wide_int::WideInt>::from_mag_sign_u128(&limbs, false);
let got = div_wide_pow10_newton_with(n, scale, RoundingMode::HalfToEven, &table);
let want = div_wide_pow10_chain_with::<I4096, { <I4096 as crate::wide_int::WideInt>::U128_LIMBS }>(
n,
scale,
RoundingMode::HalfToEven,
);
assert_eq!(got, want, "Newton differs from MG chain at D1232 s=615");
}
}