use crate::int::types::compute_limbs::{ComputeLimbs, Limb, Limbs};
pub(crate) const fn mul_schoolbook(a: &[u64], b: &[u64], out: &mut [u64]) {
let mut i = 0;
while i < a.len() {
if a[i] != 0 {
let mut carry: u64 = 0;
let mut j = 0;
while j < b.len() {
if b[j] != 0 || carry != 0 {
let prod = (a[i] as u128) * (b[j] as u128);
let prod_lo = prod as u64;
let prod_hi = (prod >> 64) as u64;
let idx = i + j;
let (s1, c1) = out[idx].overflowing_add(prod_lo);
let (s2, c2) = s1.overflowing_add(carry);
out[idx] = s2;
carry = prod_hi + (c1 as u64) + (c2 as u64);
}
j += 1;
}
let mut idx = i + b.len();
while carry != 0 && idx < out.len() {
let (s, c) = out[idx].overflowing_add(carry);
out[idx] = s;
carry = c as u64;
idx += 1;
}
}
i += 1;
}
}
#[inline]
pub(crate) const fn mul_schoolbook_fixed<const L: usize, const D: usize>(
a: &[u64; L],
b: &[u64; L],
out: &mut [u64; D],
) {
debug_assert!(D >= 2 * L, "mul_schoolbook_fixed: D must be ≥ 2·L");
let mut i = 0;
while i < L {
let ai = a[i];
if ai != 0 {
let mut carry: u64 = 0;
let mut j = 0;
while j < L {
let v = (ai as u128) * (b[j] as u128) + (out[i + j] as u128) + (carry as u128);
out[i + j] = v as u64;
carry = (v >> 64) as u64;
j += 1;
}
let mut idx = i + L;
let mut c = carry;
while c != 0 && idx < D {
let v = (out[idx] as u128) + (c as u128);
out[idx] = v as u64;
c = (v >> 64) as u64;
idx += 1;
}
}
i += 1;
}
}
#[inline(always)]
pub(crate) const fn mul_schoolbook_into<const L: usize, const LP1: usize>(
a: &[u64; L],
n: u64,
out: &mut [u64; LP1],
) {
debug_assert!(LP1 == L + 1, "mul_schoolbook_into: LP1 must equal L + 1");
let mut carry: u64 = 0;
let mut i = 0;
while i < L {
let p = (a[i] as u128) * (n as u128) + (out[i] as u128) + (carry as u128);
out[i] = p as u64;
carry = (p >> 64) as u64;
i += 1;
}
out[L] = carry;
}
#[inline]
pub(crate) const fn mul_low_fixed<const N: usize>(a: &[u64; N], b: &[u64; N], out: &mut [u64; N]) {
let mut i = 0;
while i < N {
let ai = a[i];
if ai != 0 {
let mut carry: u64 = 0;
let mut j = 0;
while j < N - i {
let v = (ai as u128) * (b[j] as u128) + (out[i + j] as u128) + (carry as u128);
out[i + j] = v as u64;
carry = (v >> 64) as u64;
j += 1;
}
}
i += 1;
}
}
#[inline]
pub(crate) fn mul_low_limb<const N: usize, L: Limb>(a: &[u64; N], b: &[u64; N], out: &mut [u64; N]) {
let h = L::packed_len(N);
let mut ap = [L::ZERO; N];
let mut bp = [L::ZERO; N];
L::pack(a, &mut ap[..h]);
L::pack(b, &mut bp[..h]);
let sb = if N >= 16 {
let mut s = h;
while s > 0 && bp[s - 1] == L::ZERO {
s -= 1;
}
s
} else {
h
};
let mut acc = [L::ZERO; N];
let mut i = 0;
while i < h {
let ai = ap[i];
if ai != L::ZERO {
let mut carry = L::ZERO;
let mut j = 0;
let jmax = (h - i).min(sb);
while j < jmax {
let (lo, hi) = ai.widening_mul(bp[j]);
let idx = i + j;
let (s1, c1) = acc[idx].overflowing_add(lo);
let (s2, c2) = s1.overflowing_add(carry);
acc[idx] = s2;
carry = hi.add_carries(c1, c2);
j += 1;
}
let mut idx = i + jmax;
while idx < h && carry != L::ZERO {
let (s, c) = acc[idx].overflowing_add(carry);
acc[idx] = s;
carry = L::ZERO.add_carries(false, c);
idx += 1;
}
}
i += 1;
}
L::unpack(&acc[..h], out);
}
#[inline]
pub(crate) fn mul_full_limb<const N: usize, L: Limb>(a: &[u64; N], b: &[u64; N], out: &mut [u64])
where
Limbs<N>: ComputeLimbs,
{
let h = L::packed_len(N); let d = 2 * h; let mut ap = [L::ZERO; N];
let mut bp = [L::ZERO; N];
L::pack(a, &mut ap[..h]);
L::pack(b, &mut bp[..h]);
let mut acc_buf = L::double::<Limbs<N>>();
let acc = acc_buf.as_mut();
let mut i = 0;
while i < h {
let ai = ap[i];
if ai != L::ZERO {
let mut carry = L::ZERO;
let mut j = 0;
while j < h {
let (lo, hi) = ai.widening_mul(bp[j]);
let idx = i + j;
let (s1, c1) = acc[idx].overflowing_add(lo);
let (s2, c2) = s1.overflowing_add(carry);
acc[idx] = s2;
carry = hi.add_carries(c1, c2);
j += 1;
}
let mut idx = i + h;
while carry != L::ZERO && idx < d {
let (s, c) = acc[idx].overflowing_add(carry);
acc[idx] = s;
carry = if c { L::ONE } else { L::ZERO };
idx += 1;
}
}
i += 1;
}
L::unpack(&acc[..d], &mut out[..2 * N]);
}