Skip to main content

malachite_float/
lib.rs

1// Copyright © 2026 Mikhail Hogrefe
2//
3// This file is part of Malachite.
4//
5// Malachite is free software: you can redistribute it and/or modify it under the terms of the GNU
6// Lesser General Public License (LGPL) as published by the Free Software Foundation; either version
7// 3 of the License, or (at your option) any later version. See <https://www.gnu.org/licenses/>.
8
9//! This crate defines [`Float`]s, which are arbitrary-precision floating-point numbers.
10//!
11//! [`Float`]s are currently experimental. They are missing many important functions. However, the
12//! functions that are currently implemented are thoroughly tested and documented, with the
13//! exception of string conversion functions. The current string conversions are incomplete and
14//! will be changed in the future to match MPFR's behavior.
15//!
16//! # Demos and benchmarks
17//! This crate comes with a `bin` target that can be used for running demos and benchmarks.
18//! - Almost all of the public functions in this crate have an associated demo. Running a demo
19//!   shows you a function's behavior on a large number of inputs. TODO
20//! - You can use a similar command to run benchmarks. TODO
21//!
22//! The list of available demos and benchmarks is not documented anywhere; you must find them by
23//! browsing through
24//! [`bin_util/demo_and_bench`](https://github.com/mhogrefe/malachite/tree/master/malachite-float/src/bin_util/demo_and_bench).
25//!
26//! # Features
27//! - `32_bit_limbs`: Sets the type of [`Limb`](malachite_nz#limbs) to [`u32`] instead of the
28//!   default, [`u64`].
29//! - `test_build`: A large proportion of the code in this crate is only used for testing. For a
30//!   typical user, building this code would result in an unnecessarily long compilation time and
31//!   an unnecessarily large binary. My solution is to only build this code when the `test_build`
32//!   feature is enabled. If you want to run unit tests, you must enable `test_build`. However,
33//!   doctests don't require it, since they only test the public interface.
34//! - `bin_build`: This feature is used to build the code for demos and benchmarks, which also
35//!   takes a long time to build. Enabling this feature also enables `test_build`.
36
37#![allow(
38    unstable_name_collisions,
39    clippy::assertions_on_constants,
40    clippy::cognitive_complexity,
41    clippy::many_single_char_names,
42    clippy::range_plus_one,
43    clippy::suspicious_arithmetic_impl,
44    clippy::suspicious_op_assign_impl,
45    clippy::too_many_arguments,
46    clippy::type_complexity,
47    clippy::upper_case_acronyms,
48    clippy::multiple_bound_locations
49)]
50#![warn(
51    clippy::cast_lossless,
52    clippy::explicit_into_iter_loop,
53    clippy::explicit_iter_loop,
54    clippy::filter_map_next,
55    clippy::large_digit_groups,
56    clippy::manual_filter_map,
57    clippy::manual_find_map,
58    clippy::map_flatten,
59    clippy::map_unwrap_or,
60    clippy::match_same_arms,
61    clippy::missing_const_for_fn,
62    clippy::mut_mut,
63    clippy::needless_borrow,
64    clippy::needless_continue,
65    clippy::needless_pass_by_value,
66    clippy::print_stdout,
67    clippy::redundant_closure_for_method_calls,
68    clippy::single_match_else,
69    clippy::trait_duplication_in_bounds,
70    clippy::type_repetition_in_bounds,
71    clippy::uninlined_format_args,
72    clippy::unused_self,
73    clippy::if_not_else,
74    clippy::manual_assert,
75    clippy::range_plus_one,
76    clippy::redundant_else,
77    clippy::semicolon_if_nothing_returned,
78    clippy::cloned_instead_of_copied,
79    clippy::flat_map_option,
80    clippy::unnecessary_wraps,
81    clippy::unnested_or_patterns,
82    clippy::use_self,
83    clippy::trivially_copy_pass_by_ref
84)]
85#![cfg_attr(
86    not(any(feature = "test_build", feature = "random", feature = "std")),
87    no_std
88)]
89
90extern crate alloc;
91
92#[macro_use]
93extern crate malachite_base;
94
95#[cfg(feature = "test_build")]
96extern crate itertools;
97
98#[cfg(feature = "test_build")]
99use crate::InnerFloat::Finite;
100use core::cmp::Ordering::{self, *};
101use core::ops::Deref;
102#[cfg(feature = "test_build")]
103use malachite_base::num::arithmetic::traits::DivisibleByPowerOf2;
104use malachite_base::num::arithmetic::traits::IsPowerOf2;
105use malachite_base::num::basic::floats::PrimitiveFloat;
106use malachite_base::num::basic::integers::PrimitiveInt;
107use malachite_base::num::basic::traits::{Infinity, NegativeInfinity};
108use malachite_base::num::conversion::traits::{ExactFrom, RoundingFrom, SciMantissaAndExponent};
109#[cfg(feature = "test_build")]
110use malachite_base::num::logic::traits::SignificantBits;
111use malachite_base::rounding_modes::RoundingMode::*;
112use malachite_nz::natural::Natural;
113use malachite_nz::platform::Limb;
114use malachite_q::Rational;
115
116/// A floating-point number.
117///
118/// `Float`s are currently experimental. They are missing many important functions. However, the
119/// functions that are currently implemented are thoroughly tested and documented, with the
120/// exception of string conversion functions. The current string conversions are incomplete and will
121/// be changed in the future to match MPFR's behavior.
122///
123/// `Float`s are similar to the primitive floats defined by the IEEE 754 standard. They include NaN,
124/// $\infty$ and $-\infty$, and positive and negative zero. There is only one NaN; there is no
125/// concept of a NaN payload.
126///
127/// All the finite `Float`s are dyadic rationals (rational numbers whose denominator is a power of
128/// 2). A finite `Float` consists of several fields:
129/// - a sign, which denotes whether the `Float` is positive or negative;
130/// - a significand, which is a [`Natural`] number whose value is equal to the `Float`'s absolute
131///   value multiplied by a power of 2;
132/// - an exponent, which is one more than the floor of the base-2 logarithm of the `Float`'s
133///   absolute value;
134/// - and finally, a precision, which is greater than zero and indicates the number of significant
135///   bits. It is common to think of a `Float` as an approximation of some real number, and the
136///   precision indicates how good the approximation is intended to be.
137///
138/// `Float`s inherit some odd behavior from the IEEE 754 standard regarding comparison. A `NaN` is
139/// not equal to any `Float`, including itself. Positive and negative zero compare as equal, despite
140/// being two distinct values. Additionally, (and this is not IEEE 754's fault), `Float`s with
141/// different precisions compare as equal if they represent the same numeric value.
142///
143/// In many cases, the above behavior is unsatisfactory, so the [`ComparableFloat`] and
144/// [`ComparableFloat`] wrappers are provided. See their documentation for a description of their
145/// comparison behavior.
146///
147/// In documentation, we will use the '$=$' sign to mean that two `Float`s are identical, writing
148/// things like $-\text{NaN}=\text{NaN}$ and $-(0.0) = -0.0$.
149///
150/// The `Float` type is designed to be very similar to the `mpfr_t` type in
151/// [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Nomenclature-and-Types), and all Malachite
152/// functions produce exactly the same result as their counterparts in MPFR, unless otherwise noted.
153///
154/// Here are the structural difference between `Float` and `mpfr_t`:
155/// - `Float` can only represent a single `NaN` value, with no sign or payload.
156/// - Only finite, nonzero `Float`s have a significand, precision, and exponent. For other `Float`s,
157///   these concepts are undefined. In particular, unlike `mpfr_t` zeros, `Float` zeros do not have
158///   a precision.
159/// - The types of `mpfr_t` components are configuration- and platform-dependent. The types of
160///   `Float` components are platform-independent, although the `Limb` type is
161///   configuration-dependent: it is `u64` by default, but may be changed to `u32` using the
162///   `--32_bit_limbs` compiler flag. The type of the exponent is always `i32` and the type of the
163///   precision is always `u64`. The `Limb` type only has a visible effect on the functions that
164///   extract the raw significand. All other functions have the same interface when compiled with
165///   either `Limb` type.
166///
167/// `Float`s whose precision is 64 bits or less can be represented without any memory allocation.
168/// (Unless Malachite is compiled with `32_bit_limbs`, in which case the limit is 32).
169#[derive(Clone)]
170pub struct Float(pub(crate) InnerFloat);
171
172// We want to limit the visibility of the `NaN`, `Zero`, `Infinity`, and `Finite` constructors to
173// within this crate. To do this, we wrap the `InnerFloat` enum in a struct that gets compiled away.
174#[derive(Clone)]
175pub(crate) enum InnerFloat {
176    NaN,
177    Infinity {
178        sign: bool,
179    },
180    Zero {
181        sign: bool,
182    },
183    Finite {
184        sign: bool,
185        exponent: i32,
186        precision: u64,
187        significand: Natural,
188    },
189}
190
191#[inline]
192pub(crate) fn significand_bits(significand: &Natural) -> u64 {
193    significand.limb_count() << Limb::LOG_WIDTH
194}
195
196impl Float {
197    /// The maximum raw exponent of any [`Float`], equal to $2^{30}-1$, or $1,073,741,823$. This is
198    /// one more than the maximum scientific exponent. If we write a [`Float`] as $\pm m2^e$, with
199    /// $1\leq m<2$ and $e$ an integer, we must have $e\leq 2^{30}-2$. If the result of a
200    /// calculation would produce a [`Float`] with an exponent larger than this, then $\pm\infty$,
201    /// the maximum finite float of the specified precision, or the minimum finite float of the
202    /// specified pecision is returned instead, depending on the rounding mode.
203    pub const MAX_EXPONENT: i32 = 0x3fff_ffff;
204    /// The minimum raw exponent of any [`Float`], equal to $-(2^{30}-1)$, or $-1,073,741,823$. This
205    /// is one more than the minimum scientific exponent. If we write a [`Float`] as $\pm m2^e$,
206    /// with $1\leq m<2$ and $e$ an integer, we must have $e\geq -2^{30}$. If the result of a
207    /// calculation would produce a [`Float`] with an exponent smaller than this, then $\pm0.0$, the
208    /// minimum positive finite [`Float`], or the maximum negative finite [`Float`] is returned
209    /// instead, depending on the rounding mode.
210    pub const MIN_EXPONENT: i32 = -Self::MAX_EXPONENT;
211
212    #[cfg(feature = "test_build")]
213    pub fn is_valid(&self) -> bool {
214        match self {
215            Self(Finite {
216                precision,
217                significand,
218                exponent,
219                ..
220            }) => {
221                if *precision == 0
222                    || !significand.is_valid()
223                    || *exponent > Self::MAX_EXPONENT
224                    || *exponent < Self::MIN_EXPONENT
225                {
226                    return false;
227                }
228                let bits = significand.significant_bits();
229                bits != 0
230                    && bits.divisible_by_power_of_2(Limb::LOG_WIDTH)
231                    && *precision <= bits
232                    && bits - precision < Limb::WIDTH
233                    && significand.divisible_by_power_of_2(bits - precision)
234            }
235            _ => true,
236        }
237    }
238}
239
240/// `ComparableFloat` is a wrapper around a [`Float`], taking the [`Float`] by value.
241///
242/// `CompatableFloat` has different comparison behavior than [`Float`]. See the [`Float`]
243/// documentation for its comparison behavior, which is largely derived from the IEEE 754
244/// specification; the `ComparableFloat` behavior, on the other hand, is more mathematically
245/// well-behaved, and respects the principle that equality should be the finest equivalence
246/// relation: that is, that two equal objects should not be different in any way.
247///
248/// To be more specific: when a [`Float`] is wrapped in a `ComparableFloat`,
249/// - `NaN` is not equal to any other [`Float`], but equal to itself;
250/// - Positive and negative zero are not equal to each other;
251/// - Ordering is total. Negative zero is ordered to be smaller than positive zero, and `NaN` is
252///   arbitrarily ordered to be between the two zeros;
253/// - Two [`Float`]s with different precisions but representing the same value are unequal, and the
254///   one with the greater precision is ordered to be larger;
255/// - The hashing function is compatible with equality.
256///
257/// The analogous wrapper for primitive floats is
258/// [`NiceFloat`](malachite_base::num::float::NiceFloat). However,
259/// [`NiceFloat`](malachite_base::num::float::NiceFloat) also facilitates better string conversion,
260/// something that isn't necessary for [`Float`]s
261///
262/// `ComparableFloat` owns its float. This is useful in many cases, for example if you want to use
263/// [`Float`]s as keys in a hash map. In other situations, it is better to use
264/// [`ComparableFloatRef`], which only has a reference to its float.
265#[derive(Clone)]
266pub struct ComparableFloat(pub Float);
267
268/// `ComparableFloatRef` is a wrapper around a [`Float`], taking the [`Float`] be reference.
269///
270/// See the [`ComparableFloat`] documentation for details.
271#[derive(Clone)]
272pub struct ComparableFloatRef<'a>(pub &'a Float);
273
274impl ComparableFloat {
275    pub const fn as_ref(&self) -> ComparableFloatRef<'_> {
276        ComparableFloatRef(&self.0)
277    }
278}
279
280impl Deref for ComparableFloat {
281    type Target = Float;
282
283    /// Allows a [`ComparableFloat`] to dereference to a [`Float`].
284    ///
285    /// ```
286    /// use malachite_base::num::basic::traits::One;
287    /// use malachite_float::{ComparableFloat, Float};
288    ///
289    /// let x = ComparableFloat(Float::ONE);
290    /// assert_eq!(*x, Float::ONE);
291    /// ```
292    fn deref(&self) -> &Float {
293        &self.0
294    }
295}
296
297impl Deref for ComparableFloatRef<'_> {
298    type Target = Float;
299
300    /// Allows a [`ComparableFloatRef`] to dereference to a [`Float`].
301    ///
302    /// ```
303    /// use malachite_base::num::basic::traits::One;
304    /// use malachite_float::{ComparableFloatRef, Float};
305    ///
306    /// let x = Float::ONE;
307    /// let y = ComparableFloatRef(&x);
308    /// assert_eq!(*y, Float::ONE);
309    /// ```
310    fn deref(&self) -> &Float {
311        self.0
312    }
313}
314
315#[allow(clippy::type_repetition_in_bounds)]
316#[doc(hidden)]
317pub fn emulate_float_to_float_fn<T: PrimitiveFloat, F: Fn(Float, u64) -> (Float, Ordering)>(
318    f: F,
319    x: T,
320) -> T
321where
322    Float: From<T> + PartialOrd<T>,
323    for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
324{
325    let x = Float::from(x);
326    let (mut result, o) = f(x.clone(), T::MANTISSA_WIDTH + 1);
327    if !result.is_normal() {
328        return T::exact_from(&result);
329    }
330    let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
331    if e < T::MIN_NORMAL_EXPONENT {
332        if e < T::MIN_EXPONENT {
333            let rm =
334                if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
335                    let down = if result > T::ZERO { Less } else { Greater };
336                    if o == down { Up } else { Down }
337                } else {
338                    Nearest
339                };
340            return T::rounding_from(&result, rm).0;
341        }
342        result = f(x, T::max_precision_for_sci_exponent(e)).0;
343    }
344    if result > T::MAX_FINITE {
345        T::INFINITY
346    } else if result < -T::MAX_FINITE {
347        T::NEGATIVE_INFINITY
348    } else {
349        T::exact_from(&result)
350    }
351}
352
353#[allow(clippy::type_repetition_in_bounds)]
354#[doc(hidden)]
355pub fn emulate_float_float_to_float_fn<
356    T: PrimitiveFloat,
357    F: Fn(Float, Float, u64) -> (Float, Ordering),
358>(
359    f: F,
360    x: T,
361    y: T,
362) -> T
363where
364    Float: From<T> + PartialOrd<T>,
365    for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
366{
367    let x = Float::from(x);
368    let y = Float::from(y);
369    let (mut result, o) = f(x.clone(), y.clone(), T::MANTISSA_WIDTH + 1);
370    if !result.is_normal() {
371        return T::exact_from(&result);
372    }
373    let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
374    if e < T::MIN_NORMAL_EXPONENT {
375        if e < T::MIN_EXPONENT {
376            let rm =
377                if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
378                    let down = if result > T::ZERO { Less } else { Greater };
379                    if o == down { Up } else { Down }
380                } else {
381                    Nearest
382                };
383            return T::rounding_from(&result, rm).0;
384        }
385        result = f(x, y, T::max_precision_for_sci_exponent(e)).0;
386    }
387    if result > T::MAX_FINITE {
388        T::INFINITY
389    } else if result < -T::MAX_FINITE {
390        T::NEGATIVE_INFINITY
391    } else {
392        T::exact_from(&result)
393    }
394}
395
396#[allow(clippy::type_repetition_in_bounds)]
397#[doc(hidden)]
398pub fn emulate_rational_to_float_fn<T: PrimitiveFloat, F: Fn(&Rational, u64) -> (Float, Ordering)>(
399    f: F,
400    x: &Rational,
401) -> T
402where
403    Float: PartialOrd<T>,
404    for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
405{
406    let (mut result, o) = f(x, T::MANTISSA_WIDTH + 1);
407    if !result.is_normal() {
408        return T::exact_from(&result);
409    }
410    let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
411    if e < T::MIN_NORMAL_EXPONENT {
412        if e < T::MIN_EXPONENT {
413            let rm =
414                if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
415                    let down = if result > T::ZERO { Less } else { Greater };
416                    if o == down { Up } else { Down }
417                } else {
418                    Nearest
419                };
420            return T::rounding_from(&result, rm).0;
421        }
422        result = f(x, T::max_precision_for_sci_exponent(e)).0;
423    }
424    if result > T::MAX_FINITE {
425        T::INFINITY
426    } else if result < -T::MAX_FINITE {
427        T::NEGATIVE_INFINITY
428    } else {
429        T::exact_from(&result)
430    }
431}
432
433#[allow(clippy::type_repetition_in_bounds)]
434#[doc(hidden)]
435pub fn emulate_rational_rational_to_float_fn<
436    T: PrimitiveFloat,
437    F: Fn(&Rational, &Rational, u64) -> (Float, Ordering),
438>(
439    f: F,
440    x: &Rational,
441    y: &Rational,
442) -> T
443where
444    Float: PartialOrd<T>,
445    for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
446{
447    let (mut result, o) = f(x, y, T::MANTISSA_WIDTH + 1);
448    if !result.is_normal() {
449        return T::exact_from(&result);
450    }
451    let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
452    if e < T::MIN_NORMAL_EXPONENT {
453        if e < T::MIN_EXPONENT {
454            let rm =
455                if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
456                    let down = if result > T::ZERO { Less } else { Greater };
457                    if o == down { Up } else { Down }
458                } else {
459                    Nearest
460                };
461            return T::rounding_from(&result, rm).0;
462        }
463        result = f(x, y, T::max_precision_for_sci_exponent(e)).0;
464    }
465    if result > T::MAX_FINITE {
466        T::INFINITY
467    } else if result < -T::MAX_FINITE {
468        T::NEGATIVE_INFINITY
469    } else {
470        T::exact_from(&result)
471    }
472}
473
474/// Given the `(Float, Ordering)` result of an operation, determines whether an overflow occurred.
475///
476/// We're defining an overflow to occur whenever the actual result is outside the representable
477/// finite range, and is rounded to either infinity or to the maximum or minimum representable
478/// finite value. An overflow can present itself in four ways:
479/// - The result is $\infty$ and the `Ordering` is `Greater`
480/// - The result is $-\infty$ and the `Ordering` is `Less`
481/// - The result is the largest finite value (of any `Float` with its precision) and the `Ordering`
482///   is `Less`
483/// - The result is the smallest (most negative) finite value (of any `Float` with its precision)
484///   and the `Ordering` is `Greater`
485///
486/// # Worst-case complexity
487/// $T(n) = O(n)$
488///
489/// $M(n) = O(1)$
490///
491/// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
492///
493/// # Examples
494/// ```
495/// use malachite_base::num::basic::traits::{Infinity, NegativeInfinity, One};
496/// use malachite_float::{test_overflow, Float};
497/// use std::cmp::Ordering::*;
498///
499/// assert!(test_overflow(&Float::INFINITY, Greater));
500/// assert!(test_overflow(&Float::NEGATIVE_INFINITY, Less));
501/// assert!(test_overflow(&Float::max_finite_value_with_prec(10), Less));
502/// assert!(test_overflow(
503///     &-Float::max_finite_value_with_prec(10),
504///     Greater
505/// ));
506///
507/// assert!(!test_overflow(&Float::INFINITY, Equal));
508/// assert!(!test_overflow(&Float::ONE, Less));
509/// ```
510pub fn test_overflow(result: &Float, o: Ordering) -> bool {
511    if o == Equal {
512        return false;
513    }
514    *result == Float::INFINITY && o == Greater
515        || *result == Float::NEGATIVE_INFINITY && o == Less
516        || *result > 0u32 && result.abs_is_max_finite_value_with_prec() && o == Less
517        || *result < 0u32 && result.abs_is_max_finite_value_with_prec() && o == Greater
518}
519
520/// Given the `(Float, Ordering)` result of an operation, determines whether an underflow occurred.
521///
522/// We're defining an underflow to occur whenever the actual result is outside the representable
523/// finite range, and is rounded to zero, to the minimum positive value, or to the maximum negative
524/// value. An underflow can present itself in four ways:
525/// - The result is $0.0$ or $-0.0$ and the `Ordering` is `Less`
526/// - The result is $0.0$ or $-0.0$ and the `Ordering` is `Greater`
527/// - The result is the smallest positive value and the `Ordering` is `Greater`
528/// - The result is the largest (least negative) negative value and the `Ordering` is `Less`
529///
530/// # Worst-case complexity
531/// $T(n) = O(n)$
532///
533/// $M(n) = O(1)$
534///
535/// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
536///
537/// # Examples
538/// ```
539/// use malachite_base::num::basic::traits::{One, Zero};
540/// use malachite_float::{test_underflow, Float};
541/// use std::cmp::Ordering::*;
542///
543/// assert!(test_underflow(&Float::ZERO, Less));
544/// assert!(test_underflow(&Float::ZERO, Greater));
545/// assert!(test_underflow(&Float::min_positive_value_prec(10), Greater));
546/// assert!(test_underflow(&-Float::min_positive_value_prec(10), Less));
547///
548/// assert!(!test_underflow(&Float::ZERO, Equal));
549/// assert!(!test_underflow(&Float::ONE, Less));
550/// ```
551pub fn test_underflow(result: &Float, o: Ordering) -> bool {
552    if o == Equal {
553        return false;
554    }
555    *result == 0u32
556        || *result > 0u32 && result.abs_is_min_positive_value() && o == Greater
557        || *result < 0u32 && result.abs_is_min_positive_value() && o == Less
558}
559
560/// Traits for arithmetic.
561pub mod arithmetic;
562#[macro_use]
563/// Basic traits for working with [`Float`]s.
564pub mod basic;
565/// Traits for comparing [`Float`]s for equality or order.
566pub mod comparison;
567/// Functions that produce [`Float`] approximations of mathematical constants, using a given
568/// precision and rounding mode.
569pub mod constants;
570/// Traits for converting to and from [`Float`]s, including converting [`Float`]s to and from
571/// strings.
572pub mod conversion;
573/// Iterators that generate [`Float`]s without repetition.
574pub mod exhaustive;
575#[cfg(feature = "random")]
576/// Iterators that generate [`Float`]s randomly.
577pub mod random;
578
579#[cfg(feature = "test_build")]
580pub mod test_util;