qd/
lib.rs

1#![cfg_attr(not(test), no_std)]
2
3#[cfg(feature = "std")]
4extern crate std;
5
6use core::cmp::Ordering;
7use core::ops::*;
8
9use bytemuck::{Pod, Zeroable};
10
11mod const_fma;
12mod const_imp;
13
14#[cfg(not(feature = "std"))]
15macro_rules! pick {
16	($libm: expr, $std: expr) => {
17		$libm
18	};
19}
20
21#[cfg(feature = "std")]
22macro_rules! pick {
23	($libm: expr, $std: expr) => {
24		$std
25	};
26}
27
28mod imp {
29	use super::*;
30
31	pub use super::const_imp::*;
32
33	pub fn fma(a: f64, b: f64, c: f64) -> f64 {
34		pick!(libm::fma, f64::mul_add)(a, b, c)
35	}
36
37	pub fn sqrt(a: f64) -> f64 {
38		pick!(libm::sqrt, f64::sqrt)(a)
39	}
40
41	pub fn two_prod(a: f64, b: f64) -> Quad {
42		let p = a * b;
43		let e = fma(a, b, -p);
44		Quad(p, e)
45	}
46
47	pub fn mul(a: Quad, b: Quad) -> Quad {
48		let Quad(p0, e1) = two_prod(a.0, b.0);
49		let e1 = fma(a.0, b.1, e1);
50		let e1 = fma(a.1, b.0, e1);
51
52		const_imp::quick_two_sum(p0, e1)
53	}
54}
55
56pub mod simd;
57
58/// extended precision floating point type.
59///
60/// math operations assume that `self.1.abs() < self.0.abs() * ulp / 2.0`
61#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
62#[repr(C)]
63pub struct Quad<T = f64>(pub T, pub T);
64
65impl Quad {
66	pub const DIGITS: u32 = 31;
67	pub const EPSILON: Self = Self(f64::EPSILON * f64::EPSILON, 0.0);
68	pub const FRAC_1_LN_10: Self = Self(0.4342944819032518, 1.098319650216765e-17);
69	pub const FRAC_1_LN_2: Self = Self(1.4426950408889634, 2.0355273740931033e-17);
70	pub const INFINITY: Self = Self(f64::INFINITY, 0.0);
71	pub const LN_10: Self = Self(2.302585092994046, -2.1707562233822494e-16);
72	pub const LN_2: Self = Self(0.6931471805599453, 2.3190468138462996e-17);
73	pub const MANTISSA_DIGITS: u32 = 105;
74	pub const MAX: Self = Self(f64::MAX, f64::MAX * f64::EPSILON / 2.0);
75	pub const MAX_EXP: i32 = f64::MAX_EXP;
76	pub const MIN: Self = Self(-Self::MAX.0, -Self::MAX.1);
77	pub const MIN_EXP: i32 = f64::MIN_EXP;
78	pub const MIN_POSITIVE: Self = Self(f64::MIN_POSITIVE, 0.0);
79	pub const NAN: Self = Self(f64::NAN, f64::NAN);
80	pub const NEG_INFINITY: Self = Self(f64::NEG_INFINITY, 0.0);
81	pub const ONE: Self = Self(1.0, 0.0);
82	pub const PI: Self = Self(3.141592653589793, 1.2246467991473532e-16);
83	pub const RADIX: u32 = 2;
84	pub const ZERO: Self = Self(0.0, 0.0);
85
86	#[inline(always)]
87	pub const fn from_f64(value: f64) -> Self {
88		Quad(value, 0.0)
89	}
90
91	pub const fn add_estimate(self, rhs: Self) -> Self {
92		const_imp::add_estimate(self, rhs)
93	}
94
95	pub const fn add_accurate(self, rhs: Self) -> Self {
96		const_imp::add_accurate(self, rhs)
97	}
98
99	pub const fn sub_estimate(self, rhs: Self) -> Self {
100		const_imp::add_estimate(self, rhs.neg())
101	}
102
103	pub const fn sub_accurate(self, rhs: Self) -> Self {
104		const_imp::add_accurate(self, rhs.neg())
105	}
106
107	pub const fn neg(self) -> Self {
108		Quad(-self.0, -self.1)
109	}
110
111	pub const fn abs(self) -> Self {
112		const_imp::abs(self)
113	}
114
115	pub fn mul(self, rhs: Self) -> Self {
116		imp::mul(self, rhs)
117	}
118
119	pub const fn const_mul(self, rhs: Self) -> Self {
120		const_imp::mul(self, rhs)
121	}
122
123	pub const fn const_div(self, rhs: Self) -> Self {
124		let mut quotient = Quad(self.0 / rhs.0, 0.0);
125		let mut r = self.sub_accurate(rhs.const_mul(quotient));
126		quotient.1 = r.0 / rhs.0;
127		r = r.sub_accurate(rhs.const_mul(Quad(quotient.1, 0.0)));
128
129		let update = r.0 / rhs.0;
130
131		quotient = const_imp::quick_two_sum(quotient.0, quotient.1);
132		quotient = quotient.add_accurate(Quad(update, 0.0));
133		quotient
134	}
135
136	pub const fn const_recip(self) -> Self {
137		Self::ONE.const_div(self)
138	}
139
140	pub fn recip(self) -> Self {
141		Self::ONE.div(self)
142	}
143
144	pub fn div(self, rhs: Self) -> Self {
145		self.__div__(rhs)
146	}
147
148	fn __mul__(self, rhs: Self) -> Self {
149		imp::mul(self, rhs)
150	}
151
152	fn __div__(self, rhs: Self) -> Self {
153		let mut quotient = Quad(self.0 / rhs.0, 0.0);
154		let mut r = self.sub_accurate(rhs.mul(quotient));
155		quotient.1 = r.0 / rhs.0;
156		r = r.sub_accurate(rhs.mul(Quad(quotient.1, 0.0)));
157
158		let update = r.0 / rhs.0;
159
160		quotient = imp::quick_two_sum(quotient.0, quotient.1);
161		quotient = quotient.add_accurate(Quad(update, 0.0));
162		quotient
163	}
164
165	pub const fn eq(self, rhs: Self) -> bool {
166		self.0 == rhs.0 && self.1 == rhs.1
167	}
168
169	pub const fn ne(self, rhs: Self) -> bool {
170		self.0 != rhs.0 || self.1 != rhs.1
171	}
172
173	pub const fn is_nan(self) -> bool {
174		self.0.is_nan() || self.1.is_nan()
175	}
176
177	pub const fn is_finite(self) -> bool {
178		self.0.is_finite() && self.1.is_finite()
179	}
180
181	pub const fn partial_cmp(self, rhs: Self) -> Option<Ordering> {
182		if self.is_nan() || rhs.is_nan() {
183			return None;
184		}
185
186		if self.0 < rhs.0 {
187			Some(Ordering::Less)
188		} else if self.0 > rhs.0 {
189			Some(Ordering::Greater)
190		} else {
191			if self.1 < rhs.1 {
192				Some(Ordering::Less)
193			} else if self.1 > rhs.1 {
194				Some(Ordering::Greater)
195			} else {
196				Some(Ordering::Equal)
197			}
198		}
199	}
200
201	pub const fn lt(self, rhs: Self) -> bool {
202		self.0 < rhs.0 || (self.0 == rhs.0 && self.1 < rhs.1)
203	}
204
205	pub const fn le(self, rhs: Self) -> bool {
206		self.0 <= rhs.0 || (self.0 == rhs.0 && self.1 <= rhs.1)
207	}
208
209	pub const fn gt(self, rhs: Self) -> bool {
210		rhs.lt(self)
211	}
212
213	pub const fn ge(self, rhs: Self) -> bool {
214		rhs.le(self)
215	}
216
217	pub const fn const_sqrt(self) -> Self {
218		if self.0 == 0.0 {
219			return Self::ZERO;
220		}
221
222		let mut iterate;
223		{
224			let inv_sqrt = 1.0 / const_imp::sqrt(self.0);
225			let left = self.0 * inv_sqrt;
226			let right = self.sub_accurate(const_imp::two_prod(left, left)).0 * (inv_sqrt / 2.0);
227			iterate = const_imp::two_sum(left, right);
228		}
229		{
230			iterate = Self::ONE.const_div(iterate);
231			let left = self.0 * iterate.0;
232			let right = self.sub_accurate(const_imp::two_prod(left, left)).0 * (iterate.0 / 2.0);
233			iterate = const_imp::two_sum(left, right);
234		}
235		iterate
236	}
237
238	pub fn sqrt(self) -> Self {
239		if self.0 == 0.0 {
240			return Self::ZERO;
241		}
242
243		let mut iterate;
244		{
245			let inv_sqrt = 1.0 / imp::sqrt(self.0);
246			let left = self.0 * inv_sqrt;
247			let right = self.sub_accurate(imp::two_prod(left, left)).0 * (inv_sqrt / 2.0);
248			iterate = imp::two_sum(left, right);
249		}
250		{
251			iterate = Self::ONE.div(iterate);
252			let left = self.0 * iterate.0;
253			let right = self.sub_accurate(imp::two_prod(left, left)).0 * (iterate.0 / 2.0);
254			iterate = imp::two_sum(left, right);
255		}
256		iterate
257	}
258
259	pub fn exp(self) -> Self {
260		let value = self;
261		let exp_max = 709.0;
262		if value.0 <= -exp_max {
263			return Self(0.0, 0.0);
264		}
265		if value.0 >= exp_max {
266			return Self::INFINITY;
267		}
268		if value.0 == 0.0 {
269			return Self(1.0, 0.0);
270		}
271
272		let shift = pick!(libm::floor, f64::floor)(value.0 / Self::LN_2.0 + 0.5);
273
274		let num_squares = 9;
275		let num_terms = 9;
276
277		let scale = (1u32 << num_squares) as f64;
278		let inv_scale = scale.recip();
279
280		let r = (value - Self::LN_2 * Self(shift, 0.0)) * Self::from_f64(inv_scale);
281
282		let mut r_power = r * r;
283		let mut iterate = r + r_power * Self::from_f64(0.5);
284
285		r_power = r_power * r;
286
287		let mut coefficient = Self(6.0, 0.0).recip();
288		let mut term = coefficient * r_power;
289
290		iterate = iterate + term;
291		let tolerance = Self::EPSILON.0 * inv_scale;
292
293		for j in 4..num_terms {
294			r_power = r_power * r;
295			coefficient = coefficient / Self(j as f64, 0.0);
296			term = coefficient * r_power;
297			iterate = iterate + term;
298
299			if const_imp::fabs(term.0) <= tolerance {
300				break;
301			}
302		}
303
304		for _ in 0..num_squares {
305			iterate = iterate * iterate + iterate * Self::from_f64(2.0);
306		}
307
308		iterate = iterate + Self(1.0, 0.0);
309		let shift = pick!(libm::pow, f64::powi)(2.0f64, shift as _);
310
311		iterate * Self::from_f64(shift)
312	}
313
314	pub fn ln(self) -> Self {
315		let value = self;
316		if value.0 < 0.0 {
317			return Self::NAN;
318		}
319		if value.0 == 0.0 {
320			return Self::NEG_INFINITY;
321		}
322
323		let mut x = Self(pick!(libm::log, f64::ln)(self.0), 0.0);
324
325		x = x + value * (-x).exp();
326		x = x - Self(1.0, 0.0);
327
328		x
329	}
330
331	pub fn log2(self) -> Self {
332		self.ln() * Self::FRAC_1_LN_2
333	}
334
335	pub fn log10(self) -> Self {
336		self.ln() * Self::FRAC_1_LN_10
337	}
338
339	pub fn trunc(self) -> Self {
340		let trunc = pick!(libm::trunc, f64::trunc);
341		let trunc = Self(trunc(self.0), trunc(self.1));
342
343		if trunc.0 == self.0 { trunc } else { Quad(trunc.0, 0.0) }
344	}
345}
346
347macro_rules! impl_op {
348	({$(
349		impl $op:ident for $ty:ty {
350			fn $op_fn:ident($self:ident, $rhs:ident: Self) -> Self::Output $body:block
351		}
352	)*}) => {$(
353		impl $op for $ty {
354			type Output = $ty;
355			#[inline(always)]
356			fn $op_fn($self, $rhs: Self) -> Self::Output $body
357		}
358		impl $op<&$ty> for $ty {
359			type Output = $ty;
360			#[inline(always)]
361			fn $op_fn($self, $rhs: &$ty) -> Self::Output { let $rhs = *$rhs; $body}
362		}
363		impl $op<$ty> for &$ty {
364			type Output = $ty;
365			#[inline(always)]
366			fn $op_fn($self, $rhs: $ty) -> Self::Output $body
367		}
368		impl $op<&$ty> for &$ty {
369			type Output = $ty;
370			#[inline(always)]
371			fn $op_fn($self, $rhs: &$ty) -> Self::Output { let $rhs = *$rhs; $body}
372		}
373	)*};
374}
375
376macro_rules! impl_assign_op {
377	({$(
378		impl $op:ident for $ty:ty {
379			fn $op_fn:ident(&mut $self:ident, $rhs:ident: Self) $body:block
380		}
381	)*}) => {$(
382		impl $op for $ty {
383			#[inline(always)]
384			fn $op_fn(&mut $self, $rhs: Self) $body
385		}
386		impl $op<&$ty> for $ty {
387			#[inline(always)]
388			fn $op_fn(&mut $self, $rhs: &$ty) { let $rhs = *$rhs; $body}
389		}
390	)*};
391}
392
393impl Neg for Quad {
394	type Output = Quad;
395
396	#[inline(always)]
397	fn neg(self) -> Self::Output {
398		Quad(-self.0, -self.1)
399	}
400}
401impl Neg for &Quad {
402	type Output = Quad;
403
404	#[inline(always)]
405	fn neg(self) -> Self::Output {
406		Quad(-self.0, -self.1)
407	}
408}
409
410impl_op!({
411	impl Add for Quad {
412		fn add(self, rhs: Self) -> Self::Output {
413			self.add_estimate(rhs)
414		}
415	}
416	impl Sub for Quad {
417		fn sub(self, rhs: Self) -> Self::Output {
418			self.sub_estimate(rhs)
419		}
420	}
421	impl Mul for Quad {
422		fn mul(self, rhs: Self) -> Self::Output {
423			self.__mul__(rhs)
424		}
425	}
426	impl Div for Quad {
427		fn div(self, rhs: Self) -> Self::Output {
428			self.__div__(rhs)
429		}
430	}
431	impl Rem for Quad {
432		fn rem(self, rhs: Self) -> Self::Output {
433			self - (self / rhs).trunc() * rhs
434		}
435	}
436});
437
438impl_assign_op!({
439	impl AddAssign for Quad {
440		fn add_assign(&mut self, rhs: Self) {
441			*self = *self + rhs
442		}
443	}
444	impl SubAssign for Quad {
445		fn sub_assign(&mut self, rhs: Self) {
446			*self = *self - rhs
447		}
448	}
449	impl MulAssign for Quad {
450		fn mul_assign(&mut self, rhs: Self) {
451			*self = *self * rhs
452		}
453	}
454	impl DivAssign for Quad {
455		fn div_assign(&mut self, rhs: Self) {
456			*self = *self / rhs
457		}
458	}
459	impl RemAssign for Quad {
460		fn rem_assign(&mut self, rhs: Self) {
461			*self = *self % rhs
462		}
463	}
464});
465
466impl From<f64> for Quad {
467	#[inline(always)]
468	fn from(value: f64) -> Self {
469		Quad(value, 0.0)
470	}
471}
472
473impl num_traits::Zero for Quad {
474	fn zero() -> Self {
475		Self::ZERO
476	}
477
478	fn is_zero(&self) -> bool {
479		*self == Self::ZERO
480	}
481}
482
483impl num_traits::One for Quad {
484	fn one() -> Self {
485		Self::ONE
486	}
487
488	fn is_one(&self) -> bool {
489		*self == Self::ONE
490	}
491}
492impl num_traits::Num for Quad {
493	type FromStrRadixErr = num_traits::ParseFloatError;
494
495	fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
496		let _ = str;
497		let _ = radix;
498
499		Err(num_traits::ParseFloatError {
500			kind: num_traits::FloatErrorKind::Invalid,
501		})
502	}
503}
504
505#[cfg(test)]
506mod tests {
507	use super::*;
508
509	#[test]
510	fn test_sqrt() {
511		let x = Quad(3.5, 0.0);
512
513		let y = x.sqrt();
514		assert!(y * y - x < Quad::from(1e-30));
515
516		const {
517			let x = Quad(2.0, 0.0);
518			let y = x.const_sqrt();
519			assert!((y.const_mul(y).sub_accurate(x).abs()).lt(Quad::from_f64(1e-30)));
520		};
521	}
522
523	#[test]
524	fn test_rem() {
525		let x = Quad::from(36.0) / Quad::from(10.0);
526		assert!((x % Quad::from(0.5) - Quad::from(10.0).recip()).abs() < Quad::from(1e-30));
527	}
528}
529
530unsafe impl<T: Zeroable> Zeroable for Quad<T> {}
531unsafe impl<T: Pod> Pod for Quad<T> {}
qd/lib.rs

qd/
lib.rs