use core::borrow::Borrow;
use num_traits::Float;
#[inline]
pub fn twosum<T: Float>(a: T, b: T) -> (T, T) {
let sum = a + b;
let b_rounded = sum - a;
let a_rounded = sum - b_rounded;
let a_roundoff = a - a_rounded;
let b_roundoff = b - b_rounded;
let err = a_roundoff + b_roundoff;
(sum, err)
}
#[inline]
#[must_use]
pub fn sum<I, T>(values: I) -> (T, T)
where
I: IntoIterator,
I::Item: Borrow<T>,
T: Float,
{
let mut values = values.into_iter();
let Some(value) = values.next() else {
return (T::zero(), T::zero());
};
let mut acc = TwoSum::<T>::new(*value.borrow());
for value in values {
acc.add(*value.borrow());
}
acc.finish()
}
#[derive(Clone, Copy, Debug)]
pub struct TwoSum<T: Float, const NLANES: usize = 8> {
values: [T; NLANES],
residuals: [T; NLANES],
staged: [T; NLANES],
staged_len: usize,
}
impl<T: Float, const NLANES: usize> TwoSum<T, NLANES> {
#[inline]
#[must_use]
pub fn new(value: T) -> Self {
Self::with_residual(value, T::zero())
}
#[inline]
#[must_use]
pub fn with_residual(value: T, residual: T) -> Self {
assert!(NLANES > 0, "TwoSum requires at least one lane");
let mut values = [T::zero(); NLANES];
let mut residuals = [T::zero(); NLANES];
values[0] = value;
residuals[0] = residual;
Self {
values,
residuals,
staged: [T::zero(); NLANES],
staged_len: 0,
}
}
#[inline]
pub fn add(&mut self, value: T) {
self.staged[self.staged_len] = value;
self.staged_len += 1;
if self.staged_len == NLANES {
self.flush_staged();
}
}
#[inline]
#[must_use]
pub fn finish(mut self) -> (T, T) {
self.flush_tail();
for lane in 1..NLANES {
let value = self.values[lane];
let residual = self.residuals[lane];
self.consume_lane(0, value);
self.consume_lane(0, residual);
}
(self.values[0], self.residuals[0])
}
#[inline]
fn flush_staged(&mut self) {
debug_assert_eq!(self.staged_len, NLANES);
for lane in 0..NLANES {
self.consume_lane(lane, self.staged[lane]);
}
self.staged_len = 0;
}
#[inline]
fn flush_tail(&mut self) {
for lane in 0..self.staged_len {
self.consume_lane(lane, self.staged[lane]);
}
self.staged_len = 0;
}
#[inline]
fn consume_lane(&mut self, lane: usize, value: T) {
let (sum, err) = twosum(self.values[lane], value);
self.values[lane] = sum;
self.residuals[lane] = self.residuals[lane] + err;
}
}
#[cfg(test)]
mod test {
use super::{TwoSum, sum, twosum};
use num_traits::Float;
fn assert_twosum_residual<T: Float + core::fmt::Debug>() {
let one = T::one();
let quarter = T::from(0.25).unwrap();
let tiny = T::epsilon() * quarter;
let (sum, err) = twosum(tiny, one);
assert_eq!(sum, one);
assert_eq!(err, tiny);
}
#[test]
fn twosum_returns_zero_error_for_exact_sum_f32() {
let (sum, err) = twosum(1.0f32, 0.5f32);
assert_eq!(sum, 1.5);
assert_eq!(err, 0.0);
}
#[test]
fn twosum_returns_zero_error_for_exact_sum_f64() {
let (sum, err) = twosum(1.0f64, 0.5f64);
assert_eq!(sum, 1.5);
assert_eq!(err, 0.0);
}
#[test]
fn twosum_captures_rounded_off_bits_without_ordering_f32() {
assert_twosum_residual::<f32>();
}
#[test]
fn twosum_captures_rounded_off_bits_without_ordering_f64() {
assert_twosum_residual::<f64>();
}
#[test]
fn accumulator_preserves_initial_residual() {
let residual = f64::EPSILON * 0.25;
let (sum, err) = TwoSum::<f64, 4>::with_residual(1.0, residual).finish();
assert_eq!(sum, 1.0);
assert_eq!(err, residual);
}
#[test]
fn top_level_sum_accepts_owned_items() {
let tiny = f64::EPSILON * 0.25;
let (sum, err) = sum([1.0f64, tiny]);
assert_eq!(sum, 1.0);
assert_eq!(err, tiny);
}
#[test]
fn top_level_sum_accepts_borrowed_items() {
let values = [1.0f64, 2.0, 3.0];
let (sum, err) = sum::<_, f64>(values.iter());
assert_eq!(sum, 6.0);
assert_eq!(err, 0.0);
}
#[test]
fn top_level_sum_handles_empty_input() {
let (sum, err) = sum(core::iter::empty::<f64>());
assert_eq!(sum, 0.0);
assert_eq!(err, 0.0);
}
#[test]
fn accumulator_flushes_partial_tail_on_finish() {
let tiny = f64::EPSILON * 0.25;
let mut acc = TwoSum::<f64, 4>::new(1.0);
acc.add(tiny);
acc.add(tiny);
let (sum, err) = acc.finish();
assert_eq!(sum, 1.0);
assert_eq!(err, tiny + tiny);
}
#[test]
fn accumulator_reduces_full_and_partial_lanes_into_lane_zero() {
let tiny = f64::EPSILON * 0.25;
let mut acc = TwoSum::<f64, 4>::with_residual(1.0, tiny);
for _ in 0..5 {
acc.add(tiny);
}
let (sum, err) = acc.finish();
assert_eq!(sum, 1.0);
assert_eq!(err, tiny * 6.0);
}
#[test]
fn single_lane_accumulator_matches_scalar_updates() {
let tiny = f32::EPSILON * 0.25;
let mut acc = TwoSum::<f32, 1>::new(1.0);
acc.add(tiny);
acc.add(tiny);
let (sum, err) = acc.finish();
assert_eq!(sum, 1.0);
assert_eq!(err, tiny + tiny);
}
}