1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
//! Core Knuth Algorithm D quotient loop with MG10 reciprocal-based digit estimation.
use super::KnuthD;
impl KnuthD {
/// Core Knuth Algorithm D loop with MG10 reciprocal-based quotient
/// estimation.
///
/// Processes quotient digits from most-significant to least-significant.
/// Each iteration either uses [`div_3x2`](Self::div_3x2) for fast
/// reciprocal estimation or falls back to full n-limb
/// [`submul`](Self::submul) when `(u2, u1) >= (d1, d0)`. The remainder
/// is left in `u[0..n]` (still normalized).
pub(super) fn knuth_loop(&self, u: &mut [u64], m: usize, q: &mut [u64]) {
for j in (0..=m).rev() {
let u2 = u[j + self.n];
let u1 = u[j + self.n - 1];
let u0 = u[j + self.n - 2];
let u21 = ((u2 as u128) << 64) | (u1 as u128);
let mut qj;
if u21 >= self.d {
// Overflow: (u2, u1) >= (d1, d0), quotient digit is u64::MAX.
// Full n-limb multiply-subtract.
qj = u64::MAX;
let borrow = self.submul(&mut u[j..], qj);
let top = u[j + self.n];
u[j + self.n] = top.wrapping_sub(borrow);
if borrow > top {
qj -= 1;
self.add_back(&mut u[j..]);
}
} else {
// Normal: div_3x2 reciprocal estimation
let (q_est, r) = self.div_3x2(u2, u1, u0);
qj = q_est;
// Subtract qj * vn[0..n-2] from u[j..j+n-2]
let mut borrow: u64 = 0;
for i in 0..self.n - 2 {
let prod = (qj as u128) * (self.vn[i] as u128) + (borrow as u128);
let (diff, b) = u[j + i].overflowing_sub(prod as u64);
u[j + i] = diff;
borrow = (prod >> 64) as u64 + b as u64;
}
// Subtract borrow from the 128-bit remainder
let (r_adj, underflow) = r.overflowing_sub(borrow as u128);
u[j + self.n - 2] = r_adj as u64;
u[j + self.n - 1] = (r_adj >> 64) as u64;
u[j + self.n] = 0;
if underflow {
qj -= 1;
self.add_back(&mut u[j..]);
}
}
q[j] = qj;
}
}
}
#[cfg(test)]
mod ai_tests {
use crate::u256::U256;
use crate::u512::U512;
use super::*;
/// Full pipeline: normalize, knuth_loop, unnormalize gives correct remainder.
#[test]
fn full_pipeline() {
// 2^64 mod (2^64 + 1) = 2^64 - (2^64+1) * 0 = 2^64, but 2^64 < 2^64+1
// so remainder is 2^64
let divisor = U256::from_be_limbs([0, 0, 1, 1]);
let dividend = U512::from_be_limbs([0, 0, 0, 0, 0, 0, 1, 0]);
let n = divisor.sig_limbs();
let v_le = divisor.to_le_limbs();
let kd = KnuthD::new(&v_le, n);
let u_le = dividend.to_le_limbs();
let m = 8 - kd.n;
let mut un = [0u64; 9];
kd.normalize_dividend(&u_le, &mut un);
let mut q_arr = [0u64; 8];
kd.knuth_loop(&mut un[..m + kd.n + 1], m, &mut q_arr[..m + 1]);
let rem_le = kd.unnormalize(&un);
let result = U256(rem_le);
assert_eq!(result, U256::from_be_limbs([0, 0, 1, 0]));
}
}