use crate::int::algos::isqrt::isqrt_newton::isqrt_newton;
use crate::int::algos::mul::mul_schoolbook::mul_schoolbook;
use crate::int::algos::support::limbs::{add_assign, bit_len, cmp, shl, shr, sub_assign};
use crate::int::policy::div_rem::dispatch as div_rem_dispatch;
use crate::int::types::compute_limbs::MAX_DOUBLE_LIMBS;
const SCRATCH_LIMBS: usize = MAX_DOUBLE_LIMBS;
const BASE_LIMBS: usize = 2;
pub(crate) fn isqrt_karatsuba(n: &[u64], out: &mut [u64]) {
for o in out.iter_mut() {
*o = 0;
}
let bits = bit_len(n);
if bits == 0 {
return;
}
if bits <= 1 {
out[0] = 1;
return;
}
let sig = sig_len(n);
if sig <= BASE_LIMBS {
isqrt_newton(&n[..sig], out);
return;
}
let w = next_pow2_limbs(sig);
debug_assert!(w <= SCRATCH_LIMBS, "isqrt_karatsuba window exceeds scratch");
let sh = (w as u32) * 64 - bits; let sh_even = sh & !1u32;
let mut nn = [0u64; SCRATCH_LIMBS];
shl(&n[..sig], sh_even, &mut nn[..w]);
let mut s = [0u64; SCRATCH_LIMBS];
let mut r = [0u64; SCRATCH_LIMBS];
sqrtrem(&nn[..w], &mut s, &mut r);
let s_len = sig_len(&s[..SCRATCH_LIMBS]);
let mut s_out = [0u64; SCRATCH_LIMBS];
shr(&s[..s_len], sh_even / 2, &mut s_out[..s_len]);
let copy_len = out.len().min(s_len);
out[..copy_len].copy_from_slice(&s_out[..copy_len]);
}
#[inline]
fn next_pow2_limbs(x: usize) -> usize {
let mut w = 4usize;
while w < x {
w <<= 1;
}
w
}
#[inline]
fn sig_len(a: &[u64]) -> usize {
let mut i = a.len();
while i > 0 {
if a[i - 1] != 0 {
return i;
}
i -= 1;
}
1
}
fn sqrtrem(n: &[u64], s: &mut [u64], r: &mut [u64]) {
for v in s.iter_mut() {
*v = 0;
}
for v in r.iter_mut() {
*v = 0;
}
let w = n.len();
if w <= BASE_LIMBS {
isqrt_newton(n, &mut s[..w]);
let s_len = sig_len(&s[..w]);
let mut sq = [0u64; SCRATCH_LIMBS];
let sq_len = (2 * s_len).min(SCRATCH_LIMBS);
mul_schoolbook(&s[..s_len], &s[..s_len], &mut sq[..sq_len]);
r[..w].copy_from_slice(n);
sub_assign(&mut r[..w], &sq[..sq_len.min(w)]);
return;
}
let h = w / 4;
let a0 = &n[0..h];
let a1 = &n[h..2 * h];
let high = &n[2 * h..w];
let mut sp = [0u64; SCRATCH_LIMBS];
let mut rp = [0u64; SCRATCH_LIMBS];
sqrtrem(high, &mut sp, &mut rp);
let sp_len = sig_len(&sp[..SCRATCH_LIMBS]);
let rp_len = sig_len(&rp[..SCRATCH_LIMBS]);
let mut num = [0u64; SCRATCH_LIMBS];
for (i, &v) in rp[..rp_len].iter().enumerate() {
if h + i < SCRATCH_LIMBS {
num[h + i] = v;
}
}
add_assign(&mut num, a1);
let num_len = sig_len(&num[..SCRATCH_LIMBS]);
let mut den = [0u64; SCRATCH_LIMBS];
shl(&sp[..sp_len], 1, &mut den[..sp_len + 1]);
let den_len = sig_len(&den[..SCRATCH_LIMBS]);
let mut q = [0u64; SCRATCH_LIMBS];
let mut u = [0u64; SCRATCH_LIMBS];
let qrlen = num_len.max(den_len);
div_rem_dispatch(
&num[..num_len],
&den[..den_len],
&mut q[..qrlen],
&mut u[..qrlen],
);
let q_len = sig_len(&q[..qrlen]);
let u_len = sig_len(&u[..qrlen]);
for (i, &v) in sp[..sp_len].iter().enumerate() {
if h + i < SCRATCH_LIMBS {
s[h + i] = v;
}
}
add_assign(s, &q[..q_len]);
let mut rr = [0u64; SCRATCH_LIMBS];
for (i, &v) in u[..u_len].iter().enumerate() {
if h + i < SCRATCH_LIMBS {
rr[h + i] = v;
}
}
add_assign(&mut rr, a0);
let mut qsq = [0u64; SCRATCH_LIMBS];
let qsq_len = (2 * q_len).min(SCRATCH_LIMBS);
mul_schoolbook(&q[..q_len], &q[..q_len], &mut qsq[..qsq_len]);
let one = [1u64];
if cmp(&rr[..SCRATCH_LIMBS], &qsq[..qsq_len]) >= 0 {
sub_assign(&mut rr, &qsq[..qsq_len]);
r[..SCRATCH_LIMBS].copy_from_slice(&rr[..SCRATCH_LIMBS]);
} else {
let mut deficit = [0u64; SCRATCH_LIMBS];
deficit[..qsq_len].copy_from_slice(&qsq[..qsq_len]);
sub_assign(&mut deficit, &rr[..SCRATCH_LIMBS]);
let mut guard = 0usize;
loop {
guard += 1;
debug_assert!(
guard <= 8,
"isqrt_karatsuba correction exceeded bound (guard={guard}); \
normalization broken — deficit={:?} s={:?}",
&deficit[..SCRATCH_LIMBS.min(8)],
&s[..SCRATCH_LIMBS.min(8)],
);
let mut tm = [0u64; SCRATCH_LIMBS];
shl(s, 1, &mut tm);
sub_assign(&mut tm, &one);
if cmp(&deficit[..SCRATCH_LIMBS], &tm[..SCRATCH_LIMBS]) <= 0 {
let mut rfinal = [0u64; SCRATCH_LIMBS];
rfinal.copy_from_slice(&tm);
sub_assign(&mut rfinal, &deficit[..SCRATCH_LIMBS]);
sub_assign(s, &one);
r[..SCRATCH_LIMBS].copy_from_slice(&rfinal[..SCRATCH_LIMBS]);
break;
}
sub_assign(&mut deficit, &tm[..SCRATCH_LIMBS]);
sub_assign(s, &one);
}
}
}
#[cfg(test)]
mod tests {
use super::isqrt_karatsuba;
use crate::int::algos::isqrt::isqrt_newton::isqrt_newton;
fn kara(n: &[u64], limbs: usize) -> Vec<u64> {
let mut out = vec![0u64; limbs];
isqrt_karatsuba(n, &mut out);
out
}
fn newton(n: &[u64], limbs: usize) -> Vec<u64> {
let mut out = vec![0u64; limbs];
isqrt_newton(n, &mut out);
out
}
fn kara_u64(n: u64) -> u64 {
kara(&[n], 1)[0]
}
fn newton_u64(n: u64) -> u64 {
newton(&[n], 1)[0]
}
fn kara_u128(n: u128) -> u128 {
let v = kara(&[n as u64, (n >> 64) as u64], 2);
(v[0] as u128) | ((v[1] as u128) << 64)
}
fn newton_u128(n: u128) -> u128 {
let v = newton(&[n as u64, (n >> 64) as u64], 2);
(v[0] as u128) | ((v[1] as u128) << 64)
}
#[test]
fn kara_known_values_u64() {
let cases: &[(u64, u64)] = &[
(0, 0),
(1, 1),
(2, 1),
(3, 1),
(4, 2),
(8, 2),
(9, 3),
(15, 3),
(16, 4),
(24, 4),
(25, 5),
(99, 9),
(100, 10),
(101, 10),
(1u64 << 62, 1u64 << 31),
(u64::MAX, 4_294_967_295),
];
for &(n, expected) in cases {
assert_eq!(kara_u64(n), expected, "isqrt_karatsuba({n})");
}
}
#[test]
fn kara_matches_newton_u64_dense() {
for n in 0u64..=8192 {
assert_eq!(kara_u64(n), newton_u64(n), "dense mismatch n={n}");
}
}
#[test]
fn kara_matches_newton_u64_perfect_squares_and_edges() {
let mut k: u64 = 1;
while let Some(sq) = k.checked_mul(k) {
for &n in &[sq - 1, sq, sq + 1] {
assert_eq!(kara_u64(n), newton_u64(n), "edge mismatch n={n} (k={k})");
}
k += 1;
if k > 4_294_967_295 {
break;
}
if k > 8192 {
k += 1_000_003;
}
}
for &n in &[u64::MAX, u64::MAX - 1, 1u64 << 32, 1u64 << 63, (1u64 << 31) * (1u64 << 31)] {
assert_eq!(kara_u64(n), newton_u64(n), "boundary mismatch n={n}");
}
}
#[test]
fn kara_matches_newton_u128() {
for n in 0u128..=1024 {
assert_eq!(kara_u128(n), newton_u128(n), "u128 dense mismatch n={n}");
}
for k in [
2u128, 3, 5, 10, 100, 1_000, 1_000_000, 1_000_000_000,
1_000_000_000_000u128, 4_294_967_296u128, 18_446_744_073_709_551_616u128,
] {
let Some(sq) = k.checked_mul(k) else { continue };
for &n in &[sq - 1, sq, sq + 1] {
assert_eq!(kara_u128(n), newton_u128(n), "u128 sq-edge mismatch n={n}");
}
}
for &n in &[
u128::MAX,
u128::MAX - 1,
1u128 << 64,
1u128 << 100,
1u128 << 126,
(1u128 << 64) + 1,
] {
assert_eq!(kara_u128(n), newton_u128(n), "u128 boundary mismatch n={n}");
}
}
#[cfg(feature = "wide")]
#[test]
fn kara_matches_newton_wide_widths() {
let mut state: u64 = 0xD1B5_4A32_D192_ED03;
let mut next = || {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
state
};
for &limbs in &[3usize, 4, 5, 6, 8, 16, 24] {
for _ in 0..40 {
let mut n = vec![0u64; limbs];
let top = 1 + (next() as usize % limbs);
for l in n.iter_mut().take(top) {
*l = next();
}
if n[top - 1] == 0 {
n[top - 1] = 1;
}
assert_eq!(
kara(&n, limbs),
newton(&n, limbs),
"wide mismatch limbs={limbs} n={n:?}"
);
}
for _ in 0..10 {
let mut b = vec![0u64; limbs];
let bt = 1 + (next() as usize % limbs.div_ceil(2).max(1));
for l in b.iter_mut().take(bt) {
*l = next();
}
if b[bt - 1] == 0 {
b[bt - 1] = 1;
}
let mut sq = vec![0u64; limbs * 2 + 1];
crate::int::algos::mul::mul_schoolbook::mul_schoolbook(&b, &b, &mut sq);
let mut n = vec![0u64; limbs];
n.copy_from_slice(&sq[..limbs]);
assert_eq!(
kara(&n, limbs),
newton(&n, limbs),
"wide square mismatch limbs={limbs}"
);
}
}
}
#[cfg(feature = "xx-wide")]
#[test]
fn kara_matches_newton_widest_widths() {
let mut state: u64 = 0x2545_F491_4F6C_DD1D;
let mut next = || {
state ^= state << 13;
state ^= state >> 7;
state ^= state << 17;
state
};
for &limbs in &[32usize, 48, 64] {
for _ in 0..20 {
let mut n = vec![0u64; limbs];
let top = 1 + (next() as usize % limbs);
for l in n.iter_mut().take(top) {
*l = next();
}
if n[top - 1] == 0 {
n[top - 1] = 1;
}
assert_eq!(
kara(&n, limbs),
newton(&n, limbs),
"widest mismatch limbs={limbs} n={n:?}"
);
}
for _ in 0..6 {
let mut b = vec![0u64; limbs];
let bt = 1 + (next() as usize % limbs.div_ceil(2).max(1));
for l in b.iter_mut().take(bt) {
*l = next();
}
if b[bt - 1] == 0 {
b[bt - 1] = 1;
}
let mut sq = vec![0u64; limbs * 2 + 1];
crate::int::algos::mul::mul_schoolbook::mul_schoolbook(&b, &b, &mut sq);
let mut n = vec![0u64; limbs];
n.copy_from_slice(&sq[..limbs]);
assert_eq!(
kara(&n, limbs),
newton(&n, limbs),
"widest square mismatch limbs={limbs}"
);
}
}
}
}