compensated_summation/lib.rs
1/*! This crate implements summation algorithms that significantly reduce the numerical error in the total obtained by adding a sequence of finite-precision floating-point numbers, compared to the obvious approach.
2
3# Algorithms
4
5#### Exact addition
6
7[`two_sum()`] and [`fast_two_sum()`] allow to compute the rounded addition of two floating-point numbers and the associated numerical error.
8
9Both functions return a tuple `(s, t)` where `s` is the floating-point sum rounded to nearest and `t` is the floating-point error.
10
11#### Compensated summation
12
13[`KahanBabuska`] and [`KahanBabuskaNeumaier`] allow to compute compensated sums using the [Kahan-Babuška](https://en.wikipedia.org/wiki/Kahan_summation_algorithm#The_algorithm) and [Kahan-Babuška-Neumaier](https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements) algorithms respectively.
14
15Both types are generic over a parameter `T: num_traits::float::Float`, which is usually [`f32`] or [`f64`] and can typically be inferred.
16
17They support addition and subtraction (also with assignment) of `T` and `&T`.
18The estimated total sum (of type `T`) can be retrieved with a method called `total()`.
19
20Both types also implement [`std::iter::Sum`], which means that iterators of floating-point numbers can be conveniently summed.
21
22# Examples
23
24An empty accumulator for the Kahan-Babuška-Neumaier algorithm can be created with [`KahanBabuskaNeumaier::new()`];
25floating-point numbers can be added to it and also subtracted from it;
26finally, the estimated total sum can be retrieved with the [`KahanBabuskaNeumaier::total()`] method.
27
28```
29use compensated_summation::KahanBabuskaNeumaier;
30let mut sum = KahanBabuskaNeumaier::new();
31sum += 0.1;
32sum += 0.2;
33sum -= 0.3;
34assert_eq!(sum.total(), f64::EPSILON / 8.0);
35```
36
37An iterator of floating-point numbers can be conveniently summed via its [`Iterator::sum()`] method, specifying the desired algorithm.
38
39```
40use compensated_summation::KahanBabuska;
41let iter = [0.1; 10].iter();
42assert_eq!(iter.sum::<KahanBabuska<_>>().total(), 1.0);
43```
44
45# Caution!
46
47A word of caution: floating-point arithmetic is subtler than one may naively think!
48
49For example, the *exact sum* of the double precision ([`f64`]) floating-point numbers
50`0.1`, `0.2` and `-0.3` is **not zero**, not because of some numerical errors in the computation of
51arithmetic operations (I'm speaking of their exact mathematical sum, not `0.1 + 0.2 - 0.3`), but
52because `0.1`, `0.2` and `-0.3` do not represent what you may think. Mathematically, they are not
53$1/10$, $2/10=1/5$ and $-3/10$ respectively, because these are not
54[dyadic rational numbers](https://en.wikipedia.org/wiki/Dyadic_rational)
55hence cannot be represented exactly as [`f64`] floating-point numbers. Instead, `0.1`, `0.2` and `-0.3` are
56$3602879701896397/36028797018963968$, $3602879701896397/18014398509481984$ and
57$-5404319552844595/18014398509481984$ respectively, whose sum is $1/36028797018963968=2^{-55}$, which is equal to `f64::EPSILON / 8.0`.
58
59With this in mind, one can realize that in this case Kahan-Babuška-Neumaier computes the correct sum
60
61```
62use compensated_summation::*;
63assert_eq!([0.1, 0.2, -0.3].iter().sum::<KahanBabuskaNeumaier<f64>>().total(), f64::EPSILON / 8.0);
64```
65
66whereas both the naive summation and Kahan-Babuška do not
67
68```
69use compensated_summation::*;
70assert_eq!([0.1, 0.2, -0.3].iter().sum::<f64>(), f64::EPSILON / 4.0);
71assert_eq!([0.1, 0.2, -0.3].iter().sum::<KahanBabuska<f64>>().total(), 0.0);
72```
73
74Don't be fooled by the Kahan-Babuška result being `0.0`: from the `f64` perspective it is incorrect.
75*/
76
77#![cfg_attr(docsrs, feature(doc_auto_cfg))]
78#![allow(uncommon_codepoints, mixed_script_confusables)]
79
80use num_traits::float::Float;
81use std::iter::Sum;
82use std::ops::{Add, AddAssign, Sub, SubAssign};
83
84/// `2Sum` algorithm, see <https://en.wikipedia.org/wiki/2Sum>.
85///
86/// **Input:** two floating-point numbers $a$ and $b$.
87///
88/// **Output:** a tuple $(s,t)$ where $s=a\oplus b$ is the floating-point sum [rounded to nearest](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest) and $t=a+b-(a\oplus b)$ is the floating-point error, so that $a+b=s+t$.
89pub fn two_sum<T: Float>(a: T, b: T) -> (T, T) {
90 // https://web.archive.org/web/20230122184725/https://ir.cwi.nl/pub/9159/9159D.pdf
91 // Equation (4.16) on page 15.
92 let s = a + b;
93 let aʹ = s - b;
94 let bʹ = s - aʹ;
95 let δa = a - aʹ;
96 let δb = b - bʹ;
97 let t = δa + δb;
98 (s, t)
99}
100
101// This is private, for the time being.
102fn two_sub<T: Float>(a: T, b: T) -> (T, T) {
103 let s = a - b;
104 let aʹ = s + b;
105 let bʹ = aʹ - s;
106 let δa = a - aʹ;
107 let δb = b - bʹ;
108 let t = δa - δb;
109 (s, t)
110}
111
112/// `Fast2Sum` algorithm, see <https://en.wikipedia.org/wiki/2Sum>.
113///
114/// **Input:** two floating-point numbers $a$ and $b$, of which at least one is zero, or which have normalized exponents $e_a\geq e_b$ (such as when $|a|\geq|b|$).
115///
116/// **Output:** a tuple $(s,t)$ where $s=a\oplus b$ is the floating-point sum [rounded to nearest](https://en.wikipedia.org/wiki/IEEE_754#Roundings_to_nearest) and $t=a+b-(a\oplus b)$ is the floating-point error, so that $a+b=s+t$.
117pub fn fast_two_sum<T: Float>(a: T, b: T) -> (T, T) {
118 // https://web.archive.org/web/20220105073915/http://mgnet.org/~douglas/Classes/na-sc/notes/kahan.pdf
119 let s = a + b;
120 let bʹ = s - a;
121 let δb = b - bʹ;
122 (s, δb)
123}
124
125/// This type is an accumulator for computing a sum with [Kahan-Babuška algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm#The_algorithm).
126///
127/// The generic parameter `T` should typically implement [`num_traits::float::Float`] and can usually be inferred.
128///
129/// # Examples
130///
131/// You can create a new empty accumulator with [`KahanBabuska::new()`];
132/// then you can add and subtract floating-point numbers;
133/// when you are done, you can retrieve the total with the [`KahanBabuska::total()`] method.
134///
135/// ```
136/// # use compensated_summation::KahanBabuska;
137/// let mut sum = KahanBabuska::new();
138/// sum += 0.1;
139/// sum += 0.2;
140/// sum -= 0.3;
141/// assert_eq!(sum.total(), 0.0);
142/// ```
143///
144/// In addition, [`KahanBabuska`] implements the [`std::iter::Sum`](#impl-Sum<V>-for-KahanBabuska<T>) trait, which means that an iterator of floating-point numbers can be summed either by calling [`KahanBabuska::sum()`] directly
145///
146/// ```
147/// # use compensated_summation::KahanBabuska;
148/// use std::iter::Sum; // remember to import the trait
149/// let iter = [0.1, 0.2, -0.3].iter();
150/// assert_eq!(KahanBabuska::sum(iter).total(), 0.0);
151/// ```
152///
153/// or by using its [`Iterator::sum()`] method
154///
155/// ```
156/// # use compensated_summation::KahanBabuska;
157/// let iter = [0.1, 0.2, -0.3].iter();
158/// assert_eq!(iter.sum::<KahanBabuska<_>>().total(), 0.0);
159/// ```
160#[derive(Clone, Debug, PartialEq)]
161pub struct KahanBabuska<T> {
162 /// Accumulated sum.
163 pub sum: T,
164 /// Compensation of the error.
165 pub comp: T,
166}
167
168impl<T: Float> KahanBabuska<T> {
169 /// Create a new empty accumulator.
170 pub fn new() -> Self {
171 Self {
172 sum: T::zero(),
173 comp: T::zero(),
174 }
175 }
176
177 /// Get the estimated total sum.
178 pub fn total(&self) -> T {
179 self.sum + self.comp
180 }
181}
182
183impl<T: Float> Default for KahanBabuska<T> {
184 fn default() -> Self {
185 Self::new()
186 }
187}
188
189impl<T: Float> Add<T> for KahanBabuska<T> {
190 type Output = Self;
191 fn add(mut self, rhs: T) -> Self::Output {
192 self += rhs;
193 self
194 }
195}
196
197impl<T: Float> AddAssign<T> for KahanBabuska<T> {
198 fn add_assign(&mut self, rhs: T) {
199 let (s, c) = fast_two_sum(self.sum, rhs + self.comp);
200 self.sum = s;
201 self.comp = c;
202 }
203}
204
205impl<T: Float> Sub<T> for KahanBabuska<T> {
206 type Output = Self;
207 fn sub(mut self, rhs: T) -> Self::Output {
208 self -= rhs;
209 self
210 }
211}
212
213impl<T: Float> SubAssign<T> for KahanBabuska<T> {
214 fn sub_assign(&mut self, rhs: T) {
215 let (s, c) = fast_two_sum(self.sum, self.comp - rhs);
216 self.sum = s;
217 self.comp = c;
218 }
219}
220
221impl<T: Float> Add<&T> for KahanBabuska<T> {
222 type Output = Self;
223 fn add(self, rhs: &T) -> Self::Output {
224 self + *rhs
225 }
226}
227
228impl<T: Float> AddAssign<&T> for KahanBabuska<T> {
229 fn add_assign(&mut self, rhs: &T) {
230 *self += *rhs;
231 }
232}
233
234impl<T: Float> Sub<&T> for KahanBabuska<T> {
235 type Output = Self;
236 fn sub(self, rhs: &T) -> Self::Output {
237 self - *rhs
238 }
239}
240
241impl<T: Float> SubAssign<&T> for KahanBabuska<T> {
242 fn sub_assign(&mut self, rhs: &T) {
243 *self -= *rhs;
244 }
245}
246
247impl<T: Float, V> Sum<V> for KahanBabuska<T>
248where
249 Self: AddAssign<V>,
250{
251 fn sum<I>(iter: I) -> Self
252 where
253 I: Iterator<Item = V>,
254 {
255 // This could be implemented as
256 // iter.fold(KahanBabuska::new(), KahanBabuska::add)
257 // however, using a for loop improves codegen (smaller assembly).
258 let mut sum = KahanBabuska::new();
259 for x in iter {
260 sum += x;
261 }
262 sum
263 }
264}
265
266/// This type is an accumulator for computing a sum with [Kahan-Babuška-Neumaier algorithm](https://en.wikipedia.org/wiki/Kahan_summation_algorithm#Further_enhancements).
267///
268/// The generic parameter `T` should typically implement [`num_traits::float::Float`] and can usually be inferred.
269///
270/// # Examples
271///
272/// You can create a new empty accumulator with [`KahanBabuskaNeumaier::new()`];
273/// then you can add and subtract floating-point numbers;
274/// when you are done, you can retrieve the total with the [`KahanBabuskaNeumaier::total()`] method.
275///
276/// ```
277/// # use compensated_summation::KahanBabuskaNeumaier;
278/// let mut sum = KahanBabuskaNeumaier::new();
279/// sum += 0.1;
280/// sum += 0.2;
281/// sum -= 0.3;
282/// assert_eq!(sum.total(), f64::EPSILON / 8.0);
283/// ```
284///
285/// In addition, [`KahanBabuskaNeumaier`] implements the [`std::iter::Sum`](#impl-Sum<V>-for-KahanBabuskaNeumaier<T>) trait, which means that an iterator of floating-point numbers can be summed either by calling [`KahanBabuskaNeumaier::sum()`] directly
286///
287/// ```
288/// # use compensated_summation::KahanBabuskaNeumaier;
289/// use std::iter::Sum; // remember to import the trait
290/// let iter = [0.1, 0.2, -0.3].iter();
291/// assert_eq!(KahanBabuskaNeumaier::sum(iter).total(), f64::EPSILON / 8.0);
292/// ```
293///
294/// or by using its [`Iterator::sum()`] method
295///
296/// ```
297/// # use compensated_summation::KahanBabuskaNeumaier;
298/// let iter = [0.1, 0.2, -0.3].iter();
299/// assert_eq!(iter.sum::<KahanBabuskaNeumaier<_>>().total(), f64::EPSILON / 8.0);
300/// ```
301#[derive(Clone, Debug, PartialEq)]
302pub struct KahanBabuskaNeumaier<T> {
303 /// Accumulated sum.
304 pub sum: T,
305 /// Compensation of the error.
306 pub comp: T,
307}
308
309impl<T: Float> KahanBabuskaNeumaier<T> {
310 /// Create a new empty accumulator.
311 pub fn new() -> Self {
312 Self {
313 sum: T::zero(),
314 comp: T::zero(),
315 }
316 }
317
318 /// Get the estimated total sum.
319 pub fn total(&self) -> T {
320 self.sum + self.comp
321 }
322}
323
324impl<T: Float> Default for KahanBabuskaNeumaier<T> {
325 fn default() -> Self {
326 Self::new()
327 }
328}
329
330impl<T: Float> Add<T> for KahanBabuskaNeumaier<T> {
331 type Output = Self;
332 fn add(mut self, rhs: T) -> Self::Output {
333 self += rhs;
334 self
335 }
336}
337
338impl<T: Float> AddAssign<T> for KahanBabuskaNeumaier<T> {
339 fn add_assign(&mut self, rhs: T) {
340 let (s, c) = two_sum(self.sum, rhs);
341 self.sum = s;
342 self.comp = self.comp + c;
343 }
344}
345
346impl<T: Float> Sub<T> for KahanBabuskaNeumaier<T> {
347 type Output = Self;
348 fn sub(mut self, rhs: T) -> Self::Output {
349 self -= rhs;
350 self
351 }
352}
353
354impl<T: Float> SubAssign<T> for KahanBabuskaNeumaier<T> {
355 fn sub_assign(&mut self, rhs: T) {
356 let (s, c) = two_sub(self.sum, rhs);
357 self.sum = s;
358 self.comp = self.comp + c;
359 }
360}
361
362impl<T: Float> Add<&T> for KahanBabuskaNeumaier<T> {
363 type Output = Self;
364 fn add(self, rhs: &T) -> Self::Output {
365 self + *rhs
366 }
367}
368
369impl<T: Float> AddAssign<&T> for KahanBabuskaNeumaier<T> {
370 fn add_assign(&mut self, rhs: &T) {
371 *self += *rhs;
372 }
373}
374
375impl<T: Float> Sub<&T> for KahanBabuskaNeumaier<T> {
376 type Output = Self;
377 fn sub(self, rhs: &T) -> Self::Output {
378 self - *rhs
379 }
380}
381
382impl<T: Float> SubAssign<&T> for KahanBabuskaNeumaier<T> {
383 fn sub_assign(&mut self, rhs: &T) {
384 *self -= *rhs;
385 }
386}
387
388impl<T: Float, V> Sum<V> for KahanBabuskaNeumaier<T>
389where
390 Self: AddAssign<V>,
391{
392 fn sum<I>(iter: I) -> Self
393 where
394 I: Iterator<Item = V>,
395 {
396 // This could be implemented as
397 // iter.fold(KahanBabuskaNeumaier::new(), KahanBabuskaNeumaier::add)
398 // however, using a for loop improves codegen (smaller assembly).
399 let mut sum = KahanBabuskaNeumaier::new();
400 for x in iter {
401 sum += x;
402 }
403 sum
404 }
405}
406
407/// Same as [`KahanBabuska`], but with correct spelling of the second surname.
408pub type KahanBabuška<T> = KahanBabuska<T>;
409
410/// Same as [`KahanBabuskaNeumaier`], but with correct spelling of the second surname.
411pub type KahanBabuškaNeumaier<T> = KahanBabuskaNeumaier<T>;
412
413/// This module is for development purposes only!
414///
415/// It provides additional functions and alternative implementations used in testing and benchmarking.
416///
417/// **DO NOT RELY ON IT!**
418///
419/// Initially this module was gated by `[cfg(test)]`.
420/// However, some of these functions turned out to be useful also in benchmarking and
421/// to analyze the generated assembly, hence needed a way to be be exported outside of
422/// the crate. This is the reason why this module is now public and gated by
423/// `#[cfg(any(test, feature = "dev"))]`.
424#[cfg(any(test, feature = "dev"))]
425pub mod dev;
426
427#[cfg(test)]
428mod tests {
429 use crate::*;
430
431 #[test]
432 fn test_two_sum() {
433 use rand::prelude::*;
434 use rand_distr::LogNormal;
435 use rand_xoshiro::Xoshiro256PlusPlus;
436
437 let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
438 let dist = LogNormal::new(0.0, 40.0).unwrap();
439
440 for _ in 0..1000 {
441 let a = rng.sample(dist);
442 let b = rng.sample(dist);
443 assert_eq!(two_sum(a, b), dev::abs_two_sum(a, b));
444 }
445 }
446
447 #[test]
448 fn test_two_sub() {
449 use rand::prelude::*;
450 use rand_distr::LogNormal;
451 use rand_xoshiro::Xoshiro256PlusPlus;
452
453 let mut rng = Xoshiro256PlusPlus::seed_from_u64(42);
454 let dist = LogNormal::new(0.0, 40.0).unwrap();
455
456 for _ in 0..1000 {
457 let a = rng.sample(dist);
458 let b = rng.sample(dist);
459 assert_eq!(two_sub(a, b), two_sum(a, -b));
460 }
461 }
462
463 #[test]
464 fn kahan_123() {
465 let mut k = KahanBabuska::new();
466 let mut s = 0.0;
467 k += 0.1;
468 s += 0.1;
469 k += 0.2;
470 s += 0.2;
471 k += -0.3;
472 s += -0.3;
473 assert_eq!(k.sum, 0.0);
474 assert_eq!(k.comp, 0.0);
475 assert_eq!(s, f64::EPSILON / 4.);
476 }
477
478 #[test]
479 fn kahan_tenth() {
480 let mut k = KahanBabuska::new();
481 let mut s = 0.0;
482 for _ in 0..10 {
483 k += 0.1;
484 s += 0.1;
485 }
486 k += -1.0;
487 s += -1.0;
488 assert_eq!(k.sum, 0.0);
489 assert_eq!(k.comp, 0.0);
490 assert_eq!(s, -f64::EPSILON / 2.);
491 }
492
493 #[test]
494 fn kahan_123_iter() {
495 assert_eq!(
496 [0.1, 0.2, -0.3].iter().sum::<KahanBabuska<f64>>().total(),
497 0.0
498 );
499 assert_eq!(
500 [0.1, 0.2, -0.3]
501 .iter()
502 .cloned()
503 .sum::<KahanBabuska<f64>>()
504 .total(),
505 0.0
506 );
507 }
508
509 #[test]
510 fn kahan_tenth_iter() {
511 assert_eq!([0.1; 10].iter().sum::<KahanBabuska<f64>>().total(), 1.0);
512 assert_eq!(
513 [0.1; 10].iter().cloned().sum::<KahanBabuska<f64>>().total(),
514 1.0
515 );
516 }
517
518 #[test]
519 fn kahan_large() {
520 assert_eq!(
521 [1.0, 1e100, 1.0, -1e100]
522 .iter()
523 .sum::<KahanBabuska<f64>>()
524 .total(),
525 0.0
526 );
527 }
528
529 #[test]
530 fn neumaier_large() {
531 assert_eq!(
532 [1.0, 1e100, 1.0, -1e100]
533 .iter()
534 .sum::<KahanBabuskaNeumaier<f64>>()
535 .total(),
536 2.0
537 );
538 }
539
540 #[test]
541 fn test_correctness() {
542 use rand::prelude::*;
543 use rand_distr::LogNormal;
544 use rand_xoshiro::Xoshiro256PlusPlus;
545
546 for seed in 0..100 {
547 let rng = Xoshiro256PlusPlus::seed_from_u64(seed);
548 let values: Vec<f64> = rng
549 .sample_iter(LogNormal::new(0.0, 40.0).unwrap())
550 .take(1_000)
551 .collect();
552
553 assert_eq!(
554 values.iter().sum::<KahanBabuska<_>>().total(),
555 dev::kahan_babuska_sum(values.iter().cloned())
556 );
557
558 assert_eq!(
559 values.iter().sum::<KahanBabuskaNeumaier<_>>().total(),
560 dev::kahan_babuska_neumaier_sum(values.iter().cloned())
561 );
562 assert_eq!(
563 values.iter().sum::<KahanBabuskaNeumaier<_>>().total(),
564 dev::kahan_babuska_neumaier_abs_sum(values.iter().cloned())
565 );
566 assert_eq!(
567 values.iter().sum::<KahanBabuskaNeumaier<_>>().total(),
568 dev::kahan_babuska_neumaier_abs_two_sum(values.iter().cloned())
569 );
570 }
571 }
572}