xprec/
lib.rs

1//! Fast compensated (extended-precision) arithmetic
2//!
3//! Extends the precision of basic numerical types such as `f64` by
4//! compensating for the error. For convenience, we rovide the `Df64` type,
5//! which implements  all basic arithmetic traits, but does so in quadruple
6//! (double-double) precision. This is usually faster than emulated quad
7//! precision and multi-precision by several orders of magnitude.
8//
9// Copyright (C) 2023-2025 Markus Wallerberger and others
10// SPDX-License-Identifier: MIT
11
12use core::f64;
13use std::ops::{Add, Sub};
14use num_traits::Zero;
15
16/// Type for compensated arithmetic.
17#[derive(PartialEq, PartialOrd, Clone, Copy, Debug)]
18pub struct Compensated<H, L>
19{
20    hi: H,
21    lo: L
22}
23
24impl<H, L> Compensated<H, L> {
25    /// Get the high part of the compensated value
26    pub fn hi(self) -> H {
27        self.hi
28    }
29    
30    /// Get the low part of the compensated value
31    pub fn lo(self) -> L {
32        self.lo
33    }
34}
35
36/// Compensated f64 (emulated quad precision) type.
37///
38/// Emulates quadruple precision with a pair of doubles.  This roughly doubles
39/// the mantissa bits (and thus squares the precision of double).  The range
40/// is almost the same as double, with a larger area of denormalized numbers.
41/// This is also called double-double arithmetic, compensated arithmetic, or
42/// Dekker arithmetic.
43///
44/// The rough cost in floating point operations (fl) and relative error as
45/// multiples of u² = 1.32e-32 (round-off error or half the machine epsilon) is
46/// as follows:
47///
48///   | (op)       | f64 f64 | error | Df64 f64 | error | Df64 Df64 | error |
49///   |------------|--------:|------:|---------:|------:|----------:|------:|
50///   | add_fast   |    3 fl |   0u² |     7 fl |   2u² |     17 fl |   3u² |
51///   | + -        |    6 fl |   0u² |    10 fl |   2u² |     20 fl |   3u² |
52///   | *          |    2 fl |   0u² |     6 fl |   2u² |      9 fl |   4u² |
53///   | /          |   3* fl |   1u² |    7* fl |   3u² |    28* fl |   6u² |
54///   | reciprocal |   3* fl |   1u² |          |       |    19* fl | 2.3u² |
55///   | sqrt       |   4* fl |   2u² |          |       |     8* fl |   4u² |
56///
57/// The error bounds are mostly tight analytical bounds (except for
58/// divisions).[^1]  An asterisk indicates the need for one or two double
59/// divisions, which are about an order of magnitude more expensive than
60/// regular flops on a modern CPU.
61///
62/// The table can be distilled into two rules of thumb: double-double
63/// arithmetic roughly doubles the number of significant digits at the cost of
64/// a roughly 15x slowdown compared to double arithmetic.
65///
66/// [^1]: M. Joldes, et al., ACM Trans. Math. Softw. 44, 1-27 (2018) and
67///      J.-M. Muller and L. Rideau, ACM Trans. Math. Softw. 48, 1, 9 (2022).
68///      The flop count has been reduced by 3 for divisons/reciprocals.
69///      In the case of double-double division, the bound is 10u² but largest
70///      observed error is 6u². In double by double division, we expect u². We
71///      report the largest observed error.
72pub type Df64 = Compensated<f64, f64>;
73
74impl Df64 {
75
76    /// Construct new compensated result with zero compensation.
77    #[inline(always)]
78    pub const fn new(x: f64) -> Df64 {
79        return Df64 { hi: x, lo: 0.0 };
80    }
81
82    /// Construct new compensated result for given compensation
83    ///
84    /// **Safety**: you must ensure that `lo` is a valid compensation term for
85    /// `hi`, i.e., for any finite `hi` it must hold that `hi + lo == hi`.
86    #[inline(always)]
87    pub const unsafe fn new_full(hi: f64, lo: f64) -> Df64 {
88        debug_assert!(hi + lo == hi || !hi.is_finite());
89        return Df64 { hi: hi, lo: lo };
90    }
91
92}
93
94/// Arithmetic with compensated errors.
95///
96/// This trait marks a type as a compensated arithmetic type. It maintains the
97/// result of an arithmetic operation plus a compensate, which is the
98/// difference of the operation inside `T` and the exact result (or at least
99/// more accurate result.)
100///
101/// The following operations are required:
102///
103///  | (op)       | method                      | Also known as | Exact? |
104///  |------------|-----------------------------|---------------|--------|
105///  | `a + b`    | `::compensated_sum(a, b)`   | 2sum(a, b)    | yes    |
106///  | `a - b`    | `::compensated_diff(a, b)`  | 2diff(a, b)   | yes    |
107///  | `a * b`    | `::compensated_prod(a, b)`  | 2prod(a, b)   | yes    |
108///  | `a / b`    | `::compensated_ratio(a, b)` |               | no     |
109///  | `a.sqrt()` | `::compensated_sqrt(a, b)`  |               | no     |
110///
111/// For sum and difference, there are `unsafe` versions, which may be faster
112/// because they may assume that the arguments are ordered by magnitude, i.e,
113/// `a.abs() >= b.abs()`. This constraint can be slightly relaxed.[^1]
114///
115///  | (op)    | unsafe method                   | Also known as     | Exact? |
116///  |---------|---------------------------------|-------------------|--------|
117///  | `a + b` | `::compensated_fast_sum(a, b)`  | fast2sum(a, b)    | yes*   |
118///  | `a - b` | `::compensated_fast_diff(a, b)` | fast2diff(a, b)   | yes*   |
119///
120/// **Warning**: Compensated arithmetic is not guaranteed to conform to IEEE
121/// rules when it comes to infinities. One usually gets NaN in this case.
122///
123/// [^1]: J.-M. Muller and L. Rideau, ACM Trans. Math. Softw. 48, 1, 9 (2022).
124pub trait CompensatedArithmetic<T> : From<T> + Into<T>
125{
126    /// type of the compensate.
127    type Compensate;
128
129    /// Return the compensate (lo part)
130    ///
131    /// Return the compensate, i.e., the difference of the current value and
132    /// its `T` approximation, `self.into<T>()`.
133    fn compensate(self: &Self) -> Self::Compensate;
134
135    /// Add `a` and `b` while compensating exactly for the error.
136    ///
137    /// Adds two values in extended precision, where the result can be
138    /// represented exactly. On floating point numbers, this is known as
139    /// "2sum" or compensated summation.
140    fn compensated_sum(a: T, b: T) -> Self;
141
142    /// Subtract `b` from `a` while compensating exactly for the error.
143    ///
144    /// Subtracts two values in extended precision, where the result can be
145    /// represented exactly.
146    fn compensated_diff(a: T, b: T) -> Self;
147
148    /// Multiply `a` with `b` while compensating exactly for the error.
149    ///
150    /// Multiplies two values in extended precision, where the result can be
151    /// represented exactly.
152    fn compensated_prod(a: T, b: T) -> Self;
153
154    /// Divides `a` by `b` while compensating approximately for the error.
155    ///
156    /// Divides two values in extended precision.
157    fn compensated_ratio(a: T, b: T) -> Self;
158
159    /// Compensated square root operation
160    ///
161    /// Takes the square root and maintains correction term.
162    fn compensated_sqrt(a: T) -> Self;
163
164    /// Add `large` and `small` exactly, assuming `large.abs() >= small.abs()`.
165    ///
166    /// Adds a small value `small` to a large value `large` in extended
167    /// precision, where the result can be represented exactly if `a` has
168    /// larger magnitude than `b`. (For double-double arithmetic, this
169    /// condition can be slightly relaxed.). On floating point numbers, this is
170    /// known as Kahan summation or "fast2sum".
171    ///
172    /// **Safety**: you must make sure that large is indeed the larger number.
173    unsafe fn compensated_fast_sum(large: T, small: T) -> Self {
174        return Self::compensated_sum(large, small);
175    }
176
177    /// Subtract `small` from `large` exactly, assuming `large.abs() >= small.abs()`.
178    ///
179    /// Subtracts a small value `small` from a large value `large` in extended
180    /// precision, where the result can be represented exactly if `a` has
181    /// larger magnitude than `b`. (For double-double arithmetic, this
182    /// condition can be slightly relaxed.).
183    ///
184    /// **Safety**: you must make sure that large is indeed the larger number.
185    unsafe fn compensated_fast_diff(large: T, small: T) -> Self {
186        return Self::compensated_diff(large, small);
187    }
188}
189/// Addition under the assumption of ordered arguments.
190pub trait AddFast<T = Self> : Add<T> {
191    /// Add `small` to `self`, assuming `small.abs() <= self.abs()`.
192    ///
193    /// Add a small value `small` to `self`, assuming that `small` is
194    /// smaller in magnitude than `self`. Under some specific circumstances,
195    /// this may lead to more efficient code.
196    ///
197    /// **Safety**: you must make sure that `small` is indeed the smaller number.
198    unsafe fn add_fast(self, small: T) -> Self::Output;
199}
200
201/// Subtraction under the assumption of ordered arguments.
202pub trait SubFast<T = Self> : Sub<T>{
203    /// Subtract `small` from `self`, assuming `small.abs() <= self.abs()`.
204    ///
205    /// Subtract a small value `small` from `self`, assuming that `small` is
206    /// smaller in magnitude than `self`. Under some specific circumstances,
207    /// this may lead to more efficient code.
208    ///
209    /// **Safety**: you must make sure that `small` is indeed the smaller number.
210    unsafe fn sub_fast(self, small: T) -> Self::Output;
211}
212
213
214// Public modules
215pub mod arith;
216pub mod checks;
217pub mod gauss;
218
219// Private modules
220mod circular;
221mod consts;
222mod convert;
223mod exp;
224mod funcs;
225mod hyperbolic;
226mod roots;
227mod round;
228mod traits;
229mod utils;
230
231#[cfg(test)]
232mod test_utils;
xprec/lib.rs

xprec/
lib.rs