fast_fp/
lib.rs

1#![doc = include_str!("../README.md")]
2#![cfg_attr(docsrs, feature(doc_cfg))]
3
4use core::{
5    cmp, fmt,
6    iter::{Product, Sum},
7    num::FpCategory,
8    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Rem, RemAssign, Sub, SubAssign},
9};
10
11macro_rules! forward_freeze_self {
12    ($fast_ty:ident, $base_ty:ident
13     $(
14         $(#[$attr:meta])*
15         $vis:vis fn $fn_name:ident (self $(, $arg:ident : Self)* ) -> Self ;
16     )*) => {
17        $(
18            $(#[$attr])*
19            #[inline]
20            $vis fn $fn_name(self $(, $arg : Self)*) -> Self {
21                <$fast_ty>::new(<$base_ty>::$fn_name(self.freeze_raw() $(, $arg.freeze_raw())* ))
22            }
23        )*
24    };
25
26    ($fast_ty:ident, $base_ty:ident
27     $(
28         $(#[$attr:meta])*
29         $vis:vis fn $fn_name:ident (&self $(, $arg:ident : &Self)* ) -> Self ;
30     )*) => {
31        $(
32            $(#[$attr])*
33            #[inline]
34            $vis fn $fn_name(&self $(, $arg : &Self)*) -> Self {
35                <$fast_ty>::new(<$base_ty>::$fn_name(self.freeze_raw() $(, $arg.freeze_raw())* ))
36            }
37        )*
38    };
39}
40
41mod math;
42mod nalgebra;
43mod num_traits;
44
45mod poison;
46use poison::MaybePoison;
47
48// The big challenge with fast-math in general is avoiding UB, and to a lesser extent unspecified
49// values. LLVM's fast operations document "poison" behavior when given invalid inputs; poison
50// values have a relatively consistent behavior (stuff like transitivity), defined cases for UB,
51// and importantly can be limited in scope by freezing to a fixed value.
52//
53// This library manages these poison values to prevent UB. On the rust side, potentially-poison
54// values are stored in a `MaybePoison` type, similar to the std's `MaybeUninit`. This helps ensure
55// that the values would not trigger UB based on rust's semantics (for example, avoiding questions
56// of whether all bit patterns of a primitive are valid). On the C side, operations are split into
57// two groups: poison "safe" and poison "unsafe". Poison safe operations are ones which can accept
58// any input value without triggering any UB. The operation may produce a poison value, for example
59// `1.f / 0.f` with finite-math-only enabled, but not UB. Poison unsafe operations are ones which
60// could trigger UB for some input value(s). These two definitions follow LLVM's documentation on
61// poison, which explains poison can be relaxed to any value for a type, including `undef`.
62// Therefore, if poison is passed to an operation it could be relaxed to any value; if some value
63// could trigger UB, then so can poison.
64//
65// Poison safe operations are called with input values normally. They don't produce UB, so it's
66// safe to call no matter the input. The operation is assumed to potentially produce poison itself,
67// so the output is always wrapped in a `MaybePoison`.
68//
69// Poison unsafe operations must take certain precautions. First, any input arguments that are
70// `MaybePoison` are frozen using LLVM's `freeze` instruction. This produces a value with an
71// unspecified, but fixed, value which now won't be relaxed any further. Additionally, these
72// operations are compiled without any flags that potentially introduce poison, regardless of
73// enabled crate features. This ensures that the operation internally should not produce any poison
74// regardless of input value. These two steps together preclude any poison values, which should
75// prevent UB (assuming the operation was safe to call in the first place).
76//
77// All operations in rust are considered poison unsafe, and therefore must always freeze the value
78// before using it. Freezing produces a regular f32/f64
79//
80// Prior art and references
81//
82// https://github.com/rust-lang/rust/issues/21690
83// Task for general purpose fast-math in rust lang. Discussions about the right approach
84// and generalizability, including whether it should be type-based or annotation based. fast_fp
85// uses types because it's the only option available in user space, and gets good optimizations
86// useful in practice
87//
88// https://docs.rs/fast-floats/0.2.0/fast_floats/index.html
89// A crate that wraps fast intrinsics in types. Intrinsics only apply to basic ops, and they didn't
90// address poison propagation, leaving constructors unsafe
91//
92// https://llvm.org/docs/LangRef.html#fast-math-flags
93// LLVM's documentation on fast-math
94//
95// https://llvm.org/docs/LangRef.html#poisonvalues
96// LLVM's documentation on poison
97//
98// https://github.com/rust-lang/unsafe-code-guidelines/issues/71
99// notes on the validity of primitive bit patterns
100
101/// The error returned by the checked constructors of [`FF32`] and [`FF64`]
102#[derive(Clone, Debug, PartialEq)]
103pub struct InvalidValueError {
104    _priv: (),
105}
106
107impl fmt::Display for InvalidValueError {
108    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
109        f.write_str("value may not be infinite or NaN")
110    }
111}
112
113impl std::error::Error for InvalidValueError {}
114
115/// A wrapper over `f32` which enables some fast-math optimizations.
116// TODO how best to document unspecified values, including witnessing possibly varying values
117#[derive(Clone, Copy)]
118#[repr(transparent)]
119pub struct FF32(MaybePoison<f32>);
120
121/// Create a new `FF32` instance from the given float value.
122///
123/// This is syntax sugar for constructing the `FF32` type, and equivalent to `FF32::new(f)`
124///
125/// The given value **MUST NOT** be infinite or NaN, and any operations involving this value must
126/// not produce infinite or NaN results. The output of any such operation is unspecified.
127#[inline(always)]
128pub fn ff32(f: f32) -> FF32 {
129    // TODO maybe a feature flag to make this checked -> panic?
130    FF32::new(f)
131}
132
133/// A wrapper over `f64` which enables some fast-math optimizations.
134// TODO how best to document unspecified values, including witnessing possibly varying values
135#[derive(Clone, Copy)]
136#[repr(transparent)]
137pub struct FF64(MaybePoison<f64>);
138
139/// Create a new `FF64` instance from the given float value.
140///
141/// This is syntax sugar for constructing the `FF64` type, and equivalent to `FF64::new(f)`
142///
143/// The given value **MUST NOT** be infinite or NaN, and any operations involving this value must
144/// not produce infinite or NaN results. The output of any such operation is unspecified.
145#[inline(always)]
146pub fn ff64(f: f64) -> FF64 {
147    // TODO maybe a feature flag to make this checked -> panic?
148    FF64::new(f)
149}
150
151macro_rules! impl_assign_ops {
152    ($fast_ty:ident, $base_ty: ident: $($op_trait:ident, $op_fn:ident, $op:ident,)*) => {
153        $(
154            impl $op_trait <$fast_ty> for $fast_ty {
155                #[inline(always)]
156                fn $op_fn(&mut self, rhs: $fast_ty) {
157                    *self = <$fast_ty>::$op(*self, rhs)
158                }
159            }
160
161            impl $op_trait <&$fast_ty> for $fast_ty {
162                #[inline(always)]
163                fn $op_fn(&mut self, rhs: &$fast_ty) {
164                    *self = <$fast_ty>::$op(*self, rhs)
165                }
166            }
167
168            impl $op_trait <$base_ty> for $fast_ty {
169                #[inline(always)]
170                fn $op_fn(&mut self, rhs: $base_ty) {
171                    *self = <$fast_ty>::$op(*self, rhs)
172                }
173            }
174
175            impl $op_trait <&$base_ty> for $fast_ty {
176                #[inline(always)]
177                fn $op_fn(&mut self, rhs: &$base_ty) {
178                    *self = <$fast_ty>::$op(*self, rhs)
179                }
180            }
181        )*
182    }
183}
184
185macro_rules! impl_reduce_ops {
186    ($fast_ty:ident, $base_ty: ident: $($op_trait:ident, $op_fn:ident, $op:ident, $identity:expr,)*) => {
187        $(
188            impl $op_trait <$fast_ty> for $fast_ty {
189                #[inline]
190                fn $op_fn <I> (iter: I) -> Self
191                    where I: Iterator<Item = $fast_ty>
192                {
193                    iter.fold($identity, |acc, val| acc.$op(val))
194                }
195            }
196
197            impl<'a> $op_trait <&'a $fast_ty> for $fast_ty {
198                #[inline]
199                fn $op_fn <I> (iter: I) -> Self
200                    where I: Iterator<Item = &'a $fast_ty>
201                {
202                    iter.fold($identity, |acc, val| acc.$op(val))
203                }
204            }
205
206            impl $op_trait <$base_ty> for $fast_ty {
207                #[inline]
208                fn $op_fn <I> (iter: I) -> Self
209                    where I: Iterator<Item = $base_ty>
210                {
211                    iter.fold($identity, |acc, val| acc.$op(val))
212                }
213            }
214
215            impl<'a> $op_trait <&'a $base_ty> for $fast_ty {
216                #[inline]
217                fn $op_fn <I> (iter: I) -> Self
218                    where I: Iterator<Item = &'a $base_ty>
219                {
220                    iter.fold($identity, |acc, val| acc.$op(val))
221                }
222            }
223        )*
224    }
225}
226
227macro_rules! impl_fmt {
228    ($fast_ty:ident, $base_ty:ident, $($fmt_trait:path,)*) => {
229        $(
230            impl $fmt_trait for $fast_ty {
231                fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
232                    <$base_ty as $fmt_trait>::fmt(&self.freeze_raw(), f)
233                }
234            }
235        )*
236    }
237}
238
239macro_rules! impls {
240    ($fast_ty:ident, $base_ty: ident) => {
241        impl $fast_ty {
242            const ONE: $fast_ty = <$fast_ty>::new(1.0);
243            const ZERO: $fast_ty = <$fast_ty>::new(0.0);
244
245            /// The smallest finite value
246            pub const MIN: $fast_ty = <$fast_ty>::new($base_ty::MIN);
247
248            /// The smallest positive value
249            pub const MIN_POSITIVE: $fast_ty = <$fast_ty>::new($base_ty::MIN_POSITIVE);
250
251            /// The largest finite value
252            pub const MAX: $fast_ty = <$fast_ty>::new($base_ty::MAX);
253
254            #[doc = "Create a new `"]
255            #[doc = stringify!($fast_ty)]
256            #[doc = "` instance from the given float value."]
257            ///
258            /// The given value **MUST NOT** be infinite or NaN, and any operations involving this value must
259            /// not produce infinite or NaN results. The output of any such operation is unspecified.
260            #[inline(always)]
261            pub const fn new(f: $base_ty) -> Self {
262                $fast_ty(MaybePoison::new(f))
263            }
264
265            #[doc = "Create a new `"]
266            #[doc = stringify!($fast_ty)]
267            #[doc = "` instance from the given float value, returning an error if the value is infinite or NaN."]
268            ///
269            /// Note that this check is **not sufficient** to avoid all unspecified outputs, because an
270            /// operation could otherwise produce an invalid value with valid inputs (for example
271            /// `ff32(1.0) / ff32(0.0)` is unspecified). Nevertheless, this check can be useful for
272            /// limited best-effort validation.
273            #[inline(always)]
274            pub fn new_checked(f: $base_ty) -> Result<Self, InvalidValueError> {
275                // finite also checks for NaN
276                if f.is_finite() {
277                    Ok($fast_ty::new(f))
278                } else {
279                    Err(InvalidValueError { _priv: () })
280                }
281            }
282
283            #[inline(always)]
284            fn freeze_raw(self) -> $base_ty {
285                self.0.freeze()
286            }
287
288            forward_freeze_self! {
289                $fast_ty, $base_ty
290                pub fn div_euclid(self, rhs: Self) -> Self;
291                pub fn rem_euclid(self, rhs: Self) -> Self;
292                pub fn to_degrees(self) -> Self;
293                pub fn to_radians(self) -> Self;
294            }
295
296            #[inline]
297            pub fn classify(self) -> FpCategory {
298                // NaN and infinity should not be presented as possibilities to users, even if
299                // freeze ends up producing it. Results are unspecified, so Normal is just as valid
300                // as any other answer
301                match self.freeze_raw().classify() {
302                    FpCategory::Nan | FpCategory::Infinite => FpCategory::Normal,
303                    category => category
304                }
305            }
306
307            #[inline]
308            pub fn is_sign_negative(self) -> bool {
309                // must freeze to keep poison out of bool branching
310                self.freeze_raw().is_sign_negative()
311            }
312
313            #[inline]
314            pub fn is_sign_positive(self) -> bool {
315                // must freeze to keep poison out of bool branching
316                self.freeze_raw().is_sign_positive()
317            }
318
319            #[inline]
320            pub fn is_normal(self) -> bool {
321                self.classify() == FpCategory::Normal
322            }
323
324            #[inline]
325            pub fn is_subnormal(self) -> bool {
326                self.classify() == FpCategory::Subnormal
327            }
328
329            #[inline]
330            pub fn hypot(self, other: Self) -> Self {
331                (self * self + other * other).sqrt()
332            }
333
334            #[inline]
335            pub fn signum(self) -> Self {
336                Self::ONE.copysign(self)
337            }
338
339            #[inline]
340            pub fn recip(self) -> Self {
341                Self::ONE / self
342            }
343
344            #[inline]
345            pub fn fract(self) -> Self {
346                self - self.trunc()
347            }
348
349            #[inline]
350            pub fn log(self, base: Self) -> Self {
351                self.ln() / base.ln()
352            }
353
354            #[inline]
355            pub fn mul_add(self, mul: Self, add: Self) -> Self {
356                self * mul + add
357            }
358
359            #[inline]
360            pub fn sin_cos(self) -> (Self, Self) {
361                (self.sin(), self.cos())
362            }
363        }
364
365        impl_fmt! {
366            $fast_ty, $base_ty,
367            fmt::Debug, fmt::Display, fmt::LowerExp, fmt::UpperExp,
368        }
369
370        impl_assign_ops! {
371            $fast_ty, $base_ty:
372            AddAssign, add_assign, add,
373            SubAssign, sub_assign, sub,
374            MulAssign, mul_assign, mul,
375            DivAssign, div_assign, div,
376            RemAssign, rem_assign, rem,
377        }
378
379        impl_reduce_ops! {
380            $fast_ty, $base_ty:
381            Sum, sum, add, Self::ZERO,
382            Product, product, mul, Self::ONE,
383        }
384
385        // Branching on poison values is UB, so any operation that makes a bool is protected by
386        // freezing the operands. This includes [Partial]Eq and [Partial]Ord. Unfortunately
387        // freezing has a nontrivial impact on performance, so non-bool methods should be preferred
388        // when applicable, such as min/max/clamp
389        //
390        // Note however that only value copies are frozen; the original values may still be poison, and
391        // could even yield different concrete values on a subsequent freeze. This means that potentially
392        // the values are not Eq/Ord consistent. Logical consistency is left as a responsibility of
393        // the user, to maintain non inf/nan values, while the lib only ensures safety.
394
395        impl PartialEq<$fast_ty> for $fast_ty {
396            #[inline]
397            fn eq(&self, other: &$fast_ty) -> bool {
398                let this = self.freeze_raw();
399                let that = other.freeze_raw();
400
401                this == that
402            }
403        }
404
405        impl PartialEq<$base_ty> for $fast_ty {
406            #[inline]
407            fn eq(&self, other: &$base_ty) -> bool {
408                let this = self.freeze_raw();
409                let that = *other;
410
411                this == that
412            }
413        }
414
415        impl PartialEq<$fast_ty> for $base_ty {
416            #[inline]
417            fn eq(&self, other: &$fast_ty) -> bool {
418                let this = *self;
419                let that = other.freeze_raw();
420
421                this == that
422            }
423        }
424
425        impl PartialOrd<$fast_ty> for $fast_ty {
426            #[inline(always)]
427            fn partial_cmp(&self, other: &$fast_ty) -> Option<cmp::Ordering> {
428                <$base_ty>::partial_cmp(&self.freeze_raw(), &other.freeze_raw())
429            }
430
431            #[inline(always)]
432            fn lt(&self, other: &$fast_ty) -> bool {
433                self.freeze_raw() < other.freeze_raw()
434            }
435
436            #[inline(always)]
437            fn le(&self, other: &$fast_ty) -> bool {
438                self.freeze_raw() <= other.freeze_raw()
439            }
440
441            #[inline(always)]
442            fn gt(&self, other: &$fast_ty) -> bool {
443                self.freeze_raw() > other.freeze_raw()
444            }
445
446            #[inline(always)]
447            fn ge(&self, other: &$fast_ty) -> bool {
448                self.freeze_raw() >= other.freeze_raw()
449            }
450        }
451
452        impl From<$fast_ty> for $base_ty {
453            #[inline(always)]
454            fn from(from: $fast_ty) -> Self {
455                // base primitives are no longer in our API control, so we must stop poison
456                // propagation by freezing
457                from.freeze_raw()
458            }
459        }
460
461        impl From<$base_ty> for $fast_ty {
462            #[inline(always)]
463            fn from(from: $base_ty) -> Self {
464                <$fast_ty>::new(from)
465            }
466        }
467    };
468}
469
470impls! { FF32, f32 }
471impls! { FF64, f64 }