#[derive(Debug, Clone, Copy)]
pub struct KahanAccumulatorF64 {
sum: f64,
compensation: f64,
count: u64,
}
impl KahanAccumulatorF64 {
#[inline]
pub fn new() -> Self {
KahanAccumulatorF64 {
sum: 0.0,
compensation: 0.0,
count: 0,
}
}
#[inline]
pub fn add(&mut self, value: f64) {
if value == 0.0 {
self.count += 1;
return;
}
let y = value - self.compensation;
let t = self.sum + y;
self.compensation = (t - self.sum) - y;
self.sum = t;
self.count += 1;
}
#[inline]
pub fn add_slice(&mut self, values: &[f64]) {
for &v in values {
self.add(v);
}
}
#[inline]
pub fn finalize(&self) -> f64 {
self.sum
}
#[inline]
pub fn count(&self) -> u64 {
self.count
}
#[inline]
pub fn compensation_bits(&self) -> u64 {
self.compensation.to_bits()
}
#[inline]
pub fn from_components(sum: f64, compensation: f64, count: u64) -> Self {
Self {
sum,
compensation,
count,
}
}
}
impl Default for KahanAccumulatorF64 {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Copy)]
pub struct KahanAccumulatorF32 {
sum: f32,
compensation: f32,
count: u64,
}
impl KahanAccumulatorF32 {
#[inline]
pub fn new() -> Self {
KahanAccumulatorF32 {
sum: 0.0,
compensation: 0.0,
count: 0,
}
}
#[inline]
pub fn add(&mut self, value: f32) {
if value == 0.0 {
self.count += 1;
return;
}
let y = value - self.compensation;
let t = self.sum + y;
self.compensation = (t - self.sum) - y;
self.sum = t;
self.count += 1;
}
#[inline]
pub fn add_slice(&mut self, values: &[f32]) {
for &v in values {
self.add(v);
}
}
#[inline]
pub fn finalize(&self) -> f32 {
self.sum
}
#[inline]
pub fn count(&self) -> u64 {
self.count
}
}
impl Default for KahanAccumulatorF32 {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Copy)]
pub struct KahanAccumulatorF64x4 {
sum: [f64; 4],
compensation: [f64; 4],
count: u64,
}
impl KahanAccumulatorF64x4 {
#[inline]
pub fn new() -> Self {
Self {
sum: [0.0; 4],
compensation: [0.0; 4],
count: 0,
}
}
#[inline]
pub fn add_lanes(&mut self, values: [f64; 4]) {
for i in 0..4 {
let v = values[i];
if v == 0.0 {
continue;
}
let y = v - self.compensation[i];
let t = self.sum[i] + y;
self.compensation[i] = (t - self.sum[i]) - y;
self.sum[i] = t;
}
self.count += 4;
}
#[inline]
pub fn add_slice(&mut self, values: &[f64]) {
let mut iter = values.chunks_exact(4);
for chunk in &mut iter {
let lanes = [chunk[0], chunk[1], chunk[2], chunk[3]];
self.add_lanes(lanes);
}
for &v in iter.remainder() {
if v == 0.0 {
self.count += 1;
continue;
}
let y = v - self.compensation[0];
let t = self.sum[0] + y;
self.compensation[0] = (t - self.sum[0]) - y;
self.sum[0] = t;
self.count += 1;
}
}
#[inline]
pub fn finalize(&self) -> f64 {
let mut acc = KahanAccumulatorF64::new();
for i in 0..4 {
acc.add(self.sum[i]);
acc.add(self.compensation[i]);
}
acc.finalize()
}
#[inline]
pub fn count(&self) -> u64 {
self.count
}
}
impl Default for KahanAccumulatorF64x4 {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Copy)]
pub struct KahanAccumulatorF64x8 {
sum: [f64; 8],
compensation: [f64; 8],
count: u64,
}
impl KahanAccumulatorF64x8 {
#[inline]
pub fn new() -> Self {
Self {
sum: [0.0; 8],
compensation: [0.0; 8],
count: 0,
}
}
#[inline]
pub fn add_lanes(&mut self, values: [f64; 8]) {
for i in 0..8 {
let v = values[i];
if v == 0.0 {
continue;
}
let y = v - self.compensation[i];
let t = self.sum[i] + y;
self.compensation[i] = (t - self.sum[i]) - y;
self.sum[i] = t;
}
self.count += 8;
}
#[inline]
pub fn add_slice(&mut self, values: &[f64]) {
let mut iter = values.chunks_exact(8);
for chunk in &mut iter {
let lanes = [
chunk[0], chunk[1], chunk[2], chunk[3], chunk[4], chunk[5], chunk[6], chunk[7],
];
self.add_lanes(lanes);
}
for &v in iter.remainder() {
if v == 0.0 {
self.count += 1;
continue;
}
let y = v - self.compensation[0];
let t = self.sum[0] + y;
self.compensation[0] = (t - self.sum[0]) - y;
self.sum[0] = t;
self.count += 1;
}
}
#[inline]
pub fn finalize(&self) -> f64 {
let mut acc = KahanAccumulatorF64::new();
for i in 0..8 {
acc.add(self.sum[i]);
acc.add(self.compensation[i]);
}
acc.finalize()
}
#[inline]
pub fn count(&self) -> u64 {
self.count
}
}
impl Default for KahanAccumulatorF64x8 {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_kahan_accumulator_simple() {
let mut acc = KahanAccumulatorF64::new();
for i in 1..=10 {
acc.add(i as f64);
}
assert_eq!(acc.finalize(), 55.0);
assert_eq!(acc.count(), 10);
}
#[test]
fn test_kahan_accumulator_stability() {
let mut acc = KahanAccumulatorF64::new();
for _ in 0..10000 {
acc.add(0.0001);
}
assert!((acc.finalize() - 1.0).abs() < 1e-10);
}
#[test]
fn test_kahan_accumulator_f32() {
let mut acc = KahanAccumulatorF32::new();
for _ in 0..10000 {
acc.add(0.0001f32);
}
assert!((acc.finalize() - 1.0).abs() < 1e-4);
}
#[test]
fn test_kahan_matches_existing_function() {
let values: Vec<f64> = (1..=1000).map(|i| i as f64 * 0.001).collect();
let mut acc = KahanAccumulatorF64::new();
acc.add_slice(&values);
let func_result = crate::kahan_sum_f64(&values);
assert_eq!(acc.finalize().to_bits(), func_result.to_bits());
}
#[test]
fn test_kahan_x4_simple_sum() {
let mut acc = KahanAccumulatorF64x4::new();
let values: Vec<f64> = (1..=20).map(|i| i as f64).collect();
acc.add_slice(&values);
assert_eq!(acc.finalize(), 210.0);
assert_eq!(acc.count(), 20);
}
#[test]
fn test_kahan_x4_stability() {
let mut acc = KahanAccumulatorF64x4::new();
let values: Vec<f64> = (0..10_000).map(|_| 0.0001).collect();
acc.add_slice(&values);
assert!((acc.finalize() - 1.0).abs() < 1e-10);
assert_eq!(acc.count(), 10_000);
}
#[test]
fn test_kahan_x8_stability() {
let mut acc = KahanAccumulatorF64x8::new();
let values: Vec<f64> = (0..10_000).map(|_| 0.0001).collect();
acc.add_slice(&values);
assert!((acc.finalize() - 1.0).abs() < 1e-10);
assert_eq!(acc.count(), 10_000);
}
#[test]
fn test_kahan_x4_tail_handling() {
let mut acc1 = KahanAccumulatorF64x4::new();
let mut acc2 = KahanAccumulatorF64x4::new();
let values: Vec<f64> = (0..23).map(|i| (i as f64) * 0.137).collect();
acc1.add_slice(&values);
acc2.add_slice(&values);
assert_eq!(
acc1.finalize().to_bits(),
acc2.finalize().to_bits(),
"two identical runs must agree bit-for-bit",
);
assert_eq!(acc1.count(), 23);
}
#[test]
fn test_kahan_x4_byte_stable_across_runs() {
let values: Vec<f64> = (0..100)
.map(|i| ((i as f64) * 0.137).sin() * 1e7)
.collect();
let runs: Vec<u64> = (0..5)
.map(|_| {
let mut acc = KahanAccumulatorF64x4::new();
acc.add_slice(&values);
acc.finalize().to_bits()
})
.collect();
let first = runs[0];
for (i, bits) in runs.iter().enumerate() {
assert_eq!(*bits, first, "run {i} bits diverged");
}
}
#[test]
fn test_kahan_x4_zero_value_short_circuit() {
let values_with_zeros: Vec<f64> =
[1.0, 0.0, 2.0, 0.0, 3.0, 0.0, 4.0, 0.0].to_vec();
let values_no_zeros: Vec<f64> = [1.0, 2.0, 3.0, 4.0].to_vec();
let mut a = KahanAccumulatorF64x4::new();
let mut b = KahanAccumulatorF64x4::new();
a.add_slice(&values_with_zeros);
b.add_slice(&values_no_zeros);
assert_eq!(a.count(), 8);
assert_eq!(b.count(), 4);
assert_eq!(a.finalize(), 10.0);
assert_eq!(b.finalize(), 10.0);
}
#[test]
fn test_kahan_x4_add_lanes_matches_add_slice_at_alignment() {
let values: [f64; 12] = [
1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0,
];
let mut slice_acc = KahanAccumulatorF64x4::new();
slice_acc.add_slice(&values);
let mut lanes_acc = KahanAccumulatorF64x4::new();
lanes_acc.add_lanes([values[0], values[1], values[2], values[3]]);
lanes_acc.add_lanes([values[4], values[5], values[6], values[7]]);
lanes_acc.add_lanes([values[8], values[9], values[10], values[11]]);
assert_eq!(
slice_acc.finalize().to_bits(),
lanes_acc.finalize().to_bits(),
"add_slice and add_lanes must produce byte-identical results at alignment"
);
assert_eq!(slice_acc.count(), lanes_acc.count());
}
#[test]
fn test_kahan_x8_byte_stable_across_runs() {
let values: Vec<f64> = (0..200)
.map(|i| ((i as f64) * 0.057).cos() * 1e6)
.collect();
let runs: Vec<u64> = (0..5)
.map(|_| {
let mut acc = KahanAccumulatorF64x8::new();
acc.add_slice(&values);
acc.finalize().to_bits()
})
.collect();
let first = runs[0];
for (i, bits) in runs.iter().enumerate() {
assert_eq!(*bits, first, "x8 run {i} bits diverged");
}
}
#[test]
fn test_kahan_x4_empty_slice_is_zero() {
let mut acc = KahanAccumulatorF64x4::new();
acc.add_slice(&[]);
assert_eq!(acc.finalize().to_bits(), 0.0_f64.to_bits());
assert_eq!(acc.count(), 0);
}
}