xprec/lib.rs
1//! Fast compensated (extended-precision) arithmetic
2//!
3//! Extends the precision of basic numerical types such as `f64` by
4//! compensating for the error. For convenience, we rovide the `Df64` type,
5//! which implements all basic arithmetic traits, but does so in quadruple
6//! (double-double) precision. This is usually faster than emulated quad
7//! precision and multi-precision by several orders of magnitude.
8//
9// Copyright (C) 2023-2025 Markus Wallerberger and others
10// SPDX-License-Identifier: MIT
11
12use core::f64;
13use std::ops::{Add, Sub};
14use num_traits::Zero;
15
16/// Type for compensated arithmetic.
17#[derive(PartialEq, PartialOrd, Clone, Copy, Debug)]
18pub struct Compensated<H, L>
19{
20 hi: H,
21 lo: L
22}
23
24impl<H, L> Compensated<H, L> {
25 /// Get the high part of the compensated value
26 pub fn hi(self) -> H {
27 self.hi
28 }
29
30 /// Get the low part of the compensated value
31 pub fn lo(self) -> L {
32 self.lo
33 }
34}
35
36/// Compensated f64 (emulated quad precision) type.
37///
38/// Emulates quadruple precision with a pair of doubles. This roughly doubles
39/// the mantissa bits (and thus squares the precision of double). The range
40/// is almost the same as double, with a larger area of denormalized numbers.
41/// This is also called double-double arithmetic, compensated arithmetic, or
42/// Dekker arithmetic.
43///
44/// The rough cost in floating point operations (fl) and relative error as
45/// multiples of u² = 1.32e-32 (round-off error or half the machine epsilon) is
46/// as follows:
47///
48/// | (op) | f64 f64 | error | Df64 f64 | error | Df64 Df64 | error |
49/// |------------|--------:|------:|---------:|------:|----------:|------:|
50/// | add_fast | 3 fl | 0u² | 7 fl | 2u² | 17 fl | 3u² |
51/// | + - | 6 fl | 0u² | 10 fl | 2u² | 20 fl | 3u² |
52/// | * | 2 fl | 0u² | 6 fl | 2u² | 9 fl | 4u² |
53/// | / | 3* fl | 1u² | 7* fl | 3u² | 28* fl | 6u² |
54/// | reciprocal | 3* fl | 1u² | | | 19* fl | 2.3u² |
55/// | sqrt | 4* fl | 2u² | | | 8* fl | 4u² |
56///
57/// The error bounds are mostly tight analytical bounds (except for
58/// divisions).[^1] An asterisk indicates the need for one or two double
59/// divisions, which are about an order of magnitude more expensive than
60/// regular flops on a modern CPU.
61///
62/// The table can be distilled into two rules of thumb: double-double
63/// arithmetic roughly doubles the number of significant digits at the cost of
64/// a roughly 15x slowdown compared to double arithmetic.
65///
66/// [^1]: M. Joldes, et al., ACM Trans. Math. Softw. 44, 1-27 (2018) and
67/// J.-M. Muller and L. Rideau, ACM Trans. Math. Softw. 48, 1, 9 (2022).
68/// The flop count has been reduced by 3 for divisons/reciprocals.
69/// In the case of double-double division, the bound is 10u² but largest
70/// observed error is 6u². In double by double division, we expect u². We
71/// report the largest observed error.
72pub type Df64 = Compensated<f64, f64>;
73
74impl Df64 {
75
76 /// Construct new compensated result with zero compensation.
77 #[inline(always)]
78 pub const fn new(x: f64) -> Df64 {
79 return Df64 { hi: x, lo: 0.0 };
80 }
81
82 /// Construct new compensated result for given compensation
83 ///
84 /// **Safety**: you must ensure that `lo` is a valid compensation term for
85 /// `hi`, i.e., for any finite `hi` it must hold that `hi + lo == hi`.
86 #[inline(always)]
87 pub const unsafe fn new_full(hi: f64, lo: f64) -> Df64 {
88 debug_assert!(hi + lo == hi || !hi.is_finite());
89 return Df64 { hi: hi, lo: lo };
90 }
91
92}
93
94/// Arithmetic with compensated errors.
95///
96/// This trait marks a type as a compensated arithmetic type. It maintains the
97/// result of an arithmetic operation plus a compensate, which is the
98/// difference of the operation inside `T` and the exact result (or at least
99/// more accurate result.)
100///
101/// The following operations are required:
102///
103/// | (op) | method | Also known as | Exact? |
104/// |------------|-----------------------------|---------------|--------|
105/// | `a + b` | `::compensated_sum(a, b)` | 2sum(a, b) | yes |
106/// | `a - b` | `::compensated_diff(a, b)` | 2diff(a, b) | yes |
107/// | `a * b` | `::compensated_prod(a, b)` | 2prod(a, b) | yes |
108/// | `a / b` | `::compensated_ratio(a, b)` | | no |
109/// | `a.sqrt()` | `::compensated_sqrt(a, b)` | | no |
110///
111/// For sum and difference, there are `unsafe` versions, which may be faster
112/// because they may assume that the arguments are ordered by magnitude, i.e,
113/// `a.abs() >= b.abs()`. This constraint can be slightly relaxed.[^1]
114///
115/// | (op) | unsafe method | Also known as | Exact? |
116/// |---------|---------------------------------|-------------------|--------|
117/// | `a + b` | `::compensated_fast_sum(a, b)` | fast2sum(a, b) | yes* |
118/// | `a - b` | `::compensated_fast_diff(a, b)` | fast2diff(a, b) | yes* |
119///
120/// **Warning**: Compensated arithmetic is not guaranteed to conform to IEEE
121/// rules when it comes to infinities. One usually gets NaN in this case.
122///
123/// [^1]: J.-M. Muller and L. Rideau, ACM Trans. Math. Softw. 48, 1, 9 (2022).
124pub trait CompensatedArithmetic<T> : From<T> + Into<T>
125{
126 /// type of the compensate.
127 type Compensate;
128
129 /// Return the compensate (lo part)
130 ///
131 /// Return the compensate, i.e., the difference of the current value and
132 /// its `T` approximation, `self.into<T>()`.
133 fn compensate(self: &Self) -> Self::Compensate;
134
135 /// Add `a` and `b` while compensating exactly for the error.
136 ///
137 /// Adds two values in extended precision, where the result can be
138 /// represented exactly. On floating point numbers, this is known as
139 /// "2sum" or compensated summation.
140 fn compensated_sum(a: T, b: T) -> Self;
141
142 /// Subtract `b` from `a` while compensating exactly for the error.
143 ///
144 /// Subtracts two values in extended precision, where the result can be
145 /// represented exactly.
146 fn compensated_diff(a: T, b: T) -> Self;
147
148 /// Multiply `a` with `b` while compensating exactly for the error.
149 ///
150 /// Multiplies two values in extended precision, where the result can be
151 /// represented exactly.
152 fn compensated_prod(a: T, b: T) -> Self;
153
154 /// Divides `a` by `b` while compensating approximately for the error.
155 ///
156 /// Divides two values in extended precision.
157 fn compensated_ratio(a: T, b: T) -> Self;
158
159 /// Compensated square root operation
160 ///
161 /// Takes the square root and maintains correction term.
162 fn compensated_sqrt(a: T) -> Self;
163
164 /// Add `large` and `small` exactly, assuming `large.abs() >= small.abs()`.
165 ///
166 /// Adds a small value `small` to a large value `large` in extended
167 /// precision, where the result can be represented exactly if `a` has
168 /// larger magnitude than `b`. (For double-double arithmetic, this
169 /// condition can be slightly relaxed.). On floating point numbers, this is
170 /// known as Kahan summation or "fast2sum".
171 ///
172 /// **Safety**: you must make sure that large is indeed the larger number.
173 unsafe fn compensated_fast_sum(large: T, small: T) -> Self {
174 return Self::compensated_sum(large, small);
175 }
176
177 /// Subtract `small` from `large` exactly, assuming `large.abs() >= small.abs()`.
178 ///
179 /// Subtracts a small value `small` from a large value `large` in extended
180 /// precision, where the result can be represented exactly if `a` has
181 /// larger magnitude than `b`. (For double-double arithmetic, this
182 /// condition can be slightly relaxed.).
183 ///
184 /// **Safety**: you must make sure that large is indeed the larger number.
185 unsafe fn compensated_fast_diff(large: T, small: T) -> Self {
186 return Self::compensated_diff(large, small);
187 }
188}
189/// Addition under the assumption of ordered arguments.
190pub trait AddFast<T = Self> : Add<T> {
191 /// Add `small` to `self`, assuming `small.abs() <= self.abs()`.
192 ///
193 /// Add a small value `small` to `self`, assuming that `small` is
194 /// smaller in magnitude than `self`. Under some specific circumstances,
195 /// this may lead to more efficient code.
196 ///
197 /// **Safety**: you must make sure that `small` is indeed the smaller number.
198 unsafe fn add_fast(self, small: T) -> Self::Output;
199}
200
201/// Subtraction under the assumption of ordered arguments.
202pub trait SubFast<T = Self> : Sub<T>{
203 /// Subtract `small` from `self`, assuming `small.abs() <= self.abs()`.
204 ///
205 /// Subtract a small value `small` from `self`, assuming that `small` is
206 /// smaller in magnitude than `self`. Under some specific circumstances,
207 /// this may lead to more efficient code.
208 ///
209 /// **Safety**: you must make sure that `small` is indeed the smaller number.
210 unsafe fn sub_fast(self, small: T) -> Self::Output;
211}
212
213
214// Public modules
215pub mod arith;
216pub mod checks;
217pub mod gauss;
218
219// Private modules
220mod circular;
221mod consts;
222mod convert;
223mod exp;
224mod funcs;
225mod hyperbolic;
226mod roots;
227mod round;
228mod traits;
229mod utils;
230
231#[cfg(test)]
232mod test_utils;