malachite_float/lib.rs
1// Copyright © 2026 Mikhail Hogrefe
2//
3// This file is part of Malachite.
4//
5// Malachite is free software: you can redistribute it and/or modify it under the terms of the GNU
6// Lesser General Public License (LGPL) as published by the Free Software Foundation; either version
7// 3 of the License, or (at your option) any later version. See <https://www.gnu.org/licenses/>.
8
9//! This crate defines [`Float`]s, which are arbitrary-precision floating-point numbers.
10//!
11//! [`Float`]s are currently experimental. They are missing many important functions. However, the
12//! functions that are currently implemented are thoroughly tested and documented, with the
13//! exception of string conversion functions. The current string conversions are incomplete and
14//! will be changed in the future to match MPFR's behavior.
15//!
16//! # Demos and benchmarks
17//! This crate comes with a `bin` target that can be used for running demos and benchmarks.
18//! - Almost all of the public functions in this crate have an associated demo. Running a demo
19//! shows you a function's behavior on a large number of inputs. TODO
20//! - You can use a similar command to run benchmarks. TODO
21//!
22//! The list of available demos and benchmarks is not documented anywhere; you must find them by
23//! browsing through
24//! [`bin_util/demo_and_bench`](https://github.com/mhogrefe/malachite/tree/master/malachite-float/src/bin_util/demo_and_bench).
25//!
26//! # Features
27//! - `32_bit_limbs`: Sets the type of [`Limb`](malachite_nz#limbs) to [`u32`] instead of the
28//! default, [`u64`].
29//! - `test_build`: A large proportion of the code in this crate is only used for testing. For a
30//! typical user, building this code would result in an unnecessarily long compilation time and
31//! an unnecessarily large binary. My solution is to only build this code when the `test_build`
32//! feature is enabled. If you want to run unit tests, you must enable `test_build`. However,
33//! doctests don't require it, since they only test the public interface.
34//! - `bin_build`: This feature is used to build the code for demos and benchmarks, which also
35//! takes a long time to build. Enabling this feature also enables `test_build`.
36
37#![allow(
38 unstable_name_collisions,
39 clippy::assertions_on_constants,
40 clippy::cognitive_complexity,
41 clippy::many_single_char_names,
42 clippy::range_plus_one,
43 clippy::suspicious_arithmetic_impl,
44 clippy::suspicious_op_assign_impl,
45 clippy::too_many_arguments,
46 clippy::type_complexity,
47 clippy::upper_case_acronyms,
48 clippy::multiple_bound_locations
49)]
50#![warn(
51 clippy::cast_lossless,
52 clippy::explicit_into_iter_loop,
53 clippy::explicit_iter_loop,
54 clippy::filter_map_next,
55 clippy::large_digit_groups,
56 clippy::manual_filter_map,
57 clippy::manual_find_map,
58 clippy::map_flatten,
59 clippy::map_unwrap_or,
60 clippy::match_same_arms,
61 clippy::missing_const_for_fn,
62 clippy::mut_mut,
63 clippy::needless_borrow,
64 clippy::needless_continue,
65 clippy::needless_pass_by_value,
66 clippy::print_stdout,
67 clippy::redundant_closure_for_method_calls,
68 clippy::single_match_else,
69 clippy::trait_duplication_in_bounds,
70 clippy::type_repetition_in_bounds,
71 clippy::uninlined_format_args,
72 clippy::unused_self,
73 clippy::if_not_else,
74 clippy::manual_assert,
75 clippy::range_plus_one,
76 clippy::redundant_else,
77 clippy::semicolon_if_nothing_returned,
78 clippy::cloned_instead_of_copied,
79 clippy::flat_map_option,
80 clippy::unnecessary_wraps,
81 clippy::unnested_or_patterns,
82 clippy::use_self,
83 clippy::trivially_copy_pass_by_ref
84)]
85#![cfg_attr(
86 not(any(feature = "test_build", feature = "random", feature = "std")),
87 no_std
88)]
89
90extern crate alloc;
91
92#[macro_use]
93extern crate malachite_base;
94
95#[cfg(feature = "test_build")]
96extern crate itertools;
97
98#[cfg(feature = "test_build")]
99use crate::InnerFloat::Finite;
100use core::cmp::Ordering::{self, *};
101use core::ops::Deref;
102#[cfg(feature = "test_build")]
103use malachite_base::num::arithmetic::traits::DivisibleByPowerOf2;
104use malachite_base::num::arithmetic::traits::IsPowerOf2;
105use malachite_base::num::basic::floats::PrimitiveFloat;
106use malachite_base::num::basic::integers::PrimitiveInt;
107use malachite_base::num::basic::traits::{Infinity, NegativeInfinity};
108use malachite_base::num::conversion::traits::{ExactFrom, RoundingFrom, SciMantissaAndExponent};
109#[cfg(feature = "test_build")]
110use malachite_base::num::logic::traits::SignificantBits;
111use malachite_base::rounding_modes::RoundingMode::*;
112use malachite_nz::natural::Natural;
113use malachite_nz::platform::Limb;
114use malachite_q::Rational;
115
116/// A floating-point number.
117///
118/// `Float`s are currently experimental. They are missing many important functions. However, the
119/// functions that are currently implemented are thoroughly tested and documented, with the
120/// exception of string conversion functions. The current string conversions are incomplete and will
121/// be changed in the future to match MPFR's behavior.
122///
123/// `Float`s are similar to the primitive floats defined by the IEEE 754 standard. They include NaN,
124/// $\infty$ and $-\infty$, and positive and negative zero. There is only one NaN; there is no
125/// concept of a NaN payload.
126///
127/// All the finite `Float`s are dyadic rationals (rational numbers whose denominator is a power of
128/// 2). A finite `Float` consists of several fields:
129/// - a sign, which denotes whether the `Float` is positive or negative;
130/// - a significand, which is a [`Natural`] number whose value is equal to the `Float`'s absolute
131/// value multiplied by a power of 2;
132/// - an exponent, which is one more than the floor of the base-2 logarithm of the `Float`'s
133/// absolute value;
134/// - and finally, a precision, which is greater than zero and indicates the number of significant
135/// bits. It is common to think of a `Float` as an approximation of some real number, and the
136/// precision indicates how good the approximation is intended to be.
137///
138/// `Float`s inherit some odd behavior from the IEEE 754 standard regarding comparison. A `NaN` is
139/// not equal to any `Float`, including itself. Positive and negative zero compare as equal, despite
140/// being two distinct values. Additionally, (and this is not IEEE 754's fault), `Float`s with
141/// different precisions compare as equal if they represent the same numeric value.
142///
143/// In many cases, the above behavior is unsatisfactory, so the [`ComparableFloat`] and
144/// [`ComparableFloat`] wrappers are provided. See their documentation for a description of their
145/// comparison behavior.
146///
147/// In documentation, we will use the '$=$' sign to mean that two `Float`s are identical, writing
148/// things like $-\text{NaN}=\text{NaN}$ and $-(0.0) = -0.0$.
149///
150/// The `Float` type is designed to be very similar to the `mpfr_t` type in
151/// [MPFR](https://www.mpfr.org/mpfr-current/mpfr.html#Nomenclature-and-Types), and all Malachite
152/// functions produce exactly the same result as their counterparts in MPFR, unless otherwise noted.
153///
154/// Here are the structural difference between `Float` and `mpfr_t`:
155/// - `Float` can only represent a single `NaN` value, with no sign or payload.
156/// - Only finite, nonzero `Float`s have a significand, precision, and exponent. For other `Float`s,
157/// these concepts are undefined. In particular, unlike `mpfr_t` zeros, `Float` zeros do not have
158/// a precision.
159/// - The types of `mpfr_t` components are configuration- and platform-dependent. The types of
160/// `Float` components are platform-independent, although the `Limb` type is
161/// configuration-dependent: it is `u64` by default, but may be changed to `u32` using the
162/// `--32_bit_limbs` compiler flag. The type of the exponent is always `i32` and the type of the
163/// precision is always `u64`. The `Limb` type only has a visible effect on the functions that
164/// extract the raw significand. All other functions have the same interface when compiled with
165/// either `Limb` type.
166///
167/// `Float`s whose precision is 64 bits or less can be represented without any memory allocation.
168/// (Unless Malachite is compiled with `32_bit_limbs`, in which case the limit is 32).
169#[derive(Clone)]
170pub struct Float(pub(crate) InnerFloat);
171
172// We want to limit the visibility of the `NaN`, `Zero`, `Infinity`, and `Finite` constructors to
173// within this crate. To do this, we wrap the `InnerFloat` enum in a struct that gets compiled away.
174#[derive(Clone)]
175pub(crate) enum InnerFloat {
176 NaN,
177 Infinity {
178 sign: bool,
179 },
180 Zero {
181 sign: bool,
182 },
183 Finite {
184 sign: bool,
185 exponent: i32,
186 precision: u64,
187 significand: Natural,
188 },
189}
190
191#[inline]
192pub(crate) fn significand_bits(significand: &Natural) -> u64 {
193 significand.limb_count() << Limb::LOG_WIDTH
194}
195
196impl Float {
197 /// The maximum raw exponent of any [`Float`], equal to $2^{30}-1$, or $1,073,741,823$. This is
198 /// one more than the maximum scientific exponent. If we write a [`Float`] as $\pm m2^e$, with
199 /// $1\leq m<2$ and $e$ an integer, we must have $e\leq 2^{30}-2$. If the result of a
200 /// calculation would produce a [`Float`] with an exponent larger than this, then $\pm\infty$,
201 /// the maximum finite float of the specified precision, or the minimum finite float of the
202 /// specified pecision is returned instead, depending on the rounding mode.
203 pub const MAX_EXPONENT: i32 = 0x3fff_ffff;
204 /// The minimum raw exponent of any [`Float`], equal to $-(2^{30}-1)$, or $-1,073,741,823$. This
205 /// is one more than the minimum scientific exponent. If we write a [`Float`] as $\pm m2^e$,
206 /// with $1\leq m<2$ and $e$ an integer, we must have $e\geq -2^{30}$. If the result of a
207 /// calculation would produce a [`Float`] with an exponent smaller than this, then $\pm0.0$, the
208 /// minimum positive finite [`Float`], or the maximum negative finite [`Float`] is returned
209 /// instead, depending on the rounding mode.
210 pub const MIN_EXPONENT: i32 = -Self::MAX_EXPONENT;
211
212 #[cfg(feature = "test_build")]
213 pub fn is_valid(&self) -> bool {
214 match self {
215 Self(Finite {
216 precision,
217 significand,
218 exponent,
219 ..
220 }) => {
221 if *precision == 0
222 || !significand.is_valid()
223 || *exponent > Self::MAX_EXPONENT
224 || *exponent < Self::MIN_EXPONENT
225 {
226 return false;
227 }
228 let bits = significand.significant_bits();
229 bits != 0
230 && bits.divisible_by_power_of_2(Limb::LOG_WIDTH)
231 && *precision <= bits
232 && bits - precision < Limb::WIDTH
233 && significand.divisible_by_power_of_2(bits - precision)
234 }
235 _ => true,
236 }
237 }
238}
239
240/// `ComparableFloat` is a wrapper around a [`Float`], taking the [`Float`] by value.
241///
242/// `CompatableFloat` has different comparison behavior than [`Float`]. See the [`Float`]
243/// documentation for its comparison behavior, which is largely derived from the IEEE 754
244/// specification; the `ComparableFloat` behavior, on the other hand, is more mathematically
245/// well-behaved, and respects the principle that equality should be the finest equivalence
246/// relation: that is, that two equal objects should not be different in any way.
247///
248/// To be more specific: when a [`Float`] is wrapped in a `ComparableFloat`,
249/// - `NaN` is not equal to any other [`Float`], but equal to itself;
250/// - Positive and negative zero are not equal to each other;
251/// - Ordering is total. Negative zero is ordered to be smaller than positive zero, and `NaN` is
252/// arbitrarily ordered to be between the two zeros;
253/// - Two [`Float`]s with different precisions but representing the same value are unequal, and the
254/// one with the greater precision is ordered to be larger;
255/// - The hashing function is compatible with equality.
256///
257/// The analogous wrapper for primitive floats is
258/// [`NiceFloat`](malachite_base::num::float::NiceFloat). However,
259/// [`NiceFloat`](malachite_base::num::float::NiceFloat) also facilitates better string conversion,
260/// something that isn't necessary for [`Float`]s
261///
262/// `ComparableFloat` owns its float. This is useful in many cases, for example if you want to use
263/// [`Float`]s as keys in a hash map. In other situations, it is better to use
264/// [`ComparableFloatRef`], which only has a reference to its float.
265#[derive(Clone)]
266pub struct ComparableFloat(pub Float);
267
268/// `ComparableFloatRef` is a wrapper around a [`Float`], taking the [`Float`] be reference.
269///
270/// See the [`ComparableFloat`] documentation for details.
271#[derive(Clone)]
272pub struct ComparableFloatRef<'a>(pub &'a Float);
273
274impl ComparableFloat {
275 pub const fn as_ref(&self) -> ComparableFloatRef<'_> {
276 ComparableFloatRef(&self.0)
277 }
278}
279
280impl Deref for ComparableFloat {
281 type Target = Float;
282
283 /// Allows a [`ComparableFloat`] to dereference to a [`Float`].
284 ///
285 /// ```
286 /// use malachite_base::num::basic::traits::One;
287 /// use malachite_float::{ComparableFloat, Float};
288 ///
289 /// let x = ComparableFloat(Float::ONE);
290 /// assert_eq!(*x, Float::ONE);
291 /// ```
292 fn deref(&self) -> &Float {
293 &self.0
294 }
295}
296
297impl Deref for ComparableFloatRef<'_> {
298 type Target = Float;
299
300 /// Allows a [`ComparableFloatRef`] to dereference to a [`Float`].
301 ///
302 /// ```
303 /// use malachite_base::num::basic::traits::One;
304 /// use malachite_float::{ComparableFloatRef, Float};
305 ///
306 /// let x = Float::ONE;
307 /// let y = ComparableFloatRef(&x);
308 /// assert_eq!(*y, Float::ONE);
309 /// ```
310 fn deref(&self) -> &Float {
311 self.0
312 }
313}
314
315#[allow(clippy::type_repetition_in_bounds)]
316#[doc(hidden)]
317pub fn emulate_float_to_float_fn<T: PrimitiveFloat, F: Fn(Float, u64) -> (Float, Ordering)>(
318 f: F,
319 x: T,
320) -> T
321where
322 Float: From<T> + PartialOrd<T>,
323 for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
324{
325 let x = Float::from(x);
326 let (mut result, o) = f(x.clone(), T::MANTISSA_WIDTH + 1);
327 if !result.is_normal() {
328 return T::exact_from(&result);
329 }
330 let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
331 if e < T::MIN_NORMAL_EXPONENT {
332 if e < T::MIN_EXPONENT {
333 let rm =
334 if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
335 let down = if result > T::ZERO { Less } else { Greater };
336 if o == down { Up } else { Down }
337 } else {
338 Nearest
339 };
340 return T::rounding_from(&result, rm).0;
341 }
342 result = f(x, T::max_precision_for_sci_exponent(e)).0;
343 }
344 if result > T::MAX_FINITE {
345 T::INFINITY
346 } else if result < -T::MAX_FINITE {
347 T::NEGATIVE_INFINITY
348 } else {
349 T::exact_from(&result)
350 }
351}
352
353#[allow(clippy::type_repetition_in_bounds)]
354#[doc(hidden)]
355pub fn emulate_float_float_to_float_fn<
356 T: PrimitiveFloat,
357 F: Fn(Float, Float, u64) -> (Float, Ordering),
358>(
359 f: F,
360 x: T,
361 y: T,
362) -> T
363where
364 Float: From<T> + PartialOrd<T>,
365 for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
366{
367 let x = Float::from(x);
368 let y = Float::from(y);
369 let (mut result, o) = f(x.clone(), y.clone(), T::MANTISSA_WIDTH + 1);
370 if !result.is_normal() {
371 return T::exact_from(&result);
372 }
373 let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
374 if e < T::MIN_NORMAL_EXPONENT {
375 if e < T::MIN_EXPONENT {
376 let rm =
377 if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
378 let down = if result > T::ZERO { Less } else { Greater };
379 if o == down { Up } else { Down }
380 } else {
381 Nearest
382 };
383 return T::rounding_from(&result, rm).0;
384 }
385 result = f(x, y, T::max_precision_for_sci_exponent(e)).0;
386 }
387 if result > T::MAX_FINITE {
388 T::INFINITY
389 } else if result < -T::MAX_FINITE {
390 T::NEGATIVE_INFINITY
391 } else {
392 T::exact_from(&result)
393 }
394}
395
396#[allow(clippy::type_repetition_in_bounds)]
397#[doc(hidden)]
398pub fn emulate_rational_to_float_fn<T: PrimitiveFloat, F: Fn(&Rational, u64) -> (Float, Ordering)>(
399 f: F,
400 x: &Rational,
401) -> T
402where
403 Float: PartialOrd<T>,
404 for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
405{
406 let (mut result, o) = f(x, T::MANTISSA_WIDTH + 1);
407 if !result.is_normal() {
408 return T::exact_from(&result);
409 }
410 let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
411 if e < T::MIN_NORMAL_EXPONENT {
412 if e < T::MIN_EXPONENT {
413 let rm =
414 if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
415 let down = if result > T::ZERO { Less } else { Greater };
416 if o == down { Up } else { Down }
417 } else {
418 Nearest
419 };
420 return T::rounding_from(&result, rm).0;
421 }
422 result = f(x, T::max_precision_for_sci_exponent(e)).0;
423 }
424 if result > T::MAX_FINITE {
425 T::INFINITY
426 } else if result < -T::MAX_FINITE {
427 T::NEGATIVE_INFINITY
428 } else {
429 T::exact_from(&result)
430 }
431}
432
433#[allow(clippy::type_repetition_in_bounds)]
434#[doc(hidden)]
435pub fn emulate_rational_rational_to_float_fn<
436 T: PrimitiveFloat,
437 F: Fn(&Rational, &Rational, u64) -> (Float, Ordering),
438>(
439 f: F,
440 x: &Rational,
441 y: &Rational,
442) -> T
443where
444 Float: PartialOrd<T>,
445 for<'a> T: ExactFrom<&'a Float> + RoundingFrom<&'a Float>,
446{
447 let (mut result, o) = f(x, y, T::MANTISSA_WIDTH + 1);
448 if !result.is_normal() {
449 return T::exact_from(&result);
450 }
451 let e = i64::from(<&Float as SciMantissaAndExponent<Float, i32, _>>::sci_exponent(&result));
452 if e < T::MIN_NORMAL_EXPONENT {
453 if e < T::MIN_EXPONENT {
454 let rm =
455 if e == T::MIN_EXPONENT - 1 && result.significand_ref().unwrap().is_power_of_2() {
456 let down = if result > T::ZERO { Less } else { Greater };
457 if o == down { Up } else { Down }
458 } else {
459 Nearest
460 };
461 return T::rounding_from(&result, rm).0;
462 }
463 result = f(x, y, T::max_precision_for_sci_exponent(e)).0;
464 }
465 if result > T::MAX_FINITE {
466 T::INFINITY
467 } else if result < -T::MAX_FINITE {
468 T::NEGATIVE_INFINITY
469 } else {
470 T::exact_from(&result)
471 }
472}
473
474/// Given the `(Float, Ordering)` result of an operation, determines whether an overflow occurred.
475///
476/// We're defining an overflow to occur whenever the actual result is outside the representable
477/// finite range, and is rounded to either infinity or to the maximum or minimum representable
478/// finite value. An overflow can present itself in four ways:
479/// - The result is $\infty$ and the `Ordering` is `Greater`
480/// - The result is $-\infty$ and the `Ordering` is `Less`
481/// - The result is the largest finite value (of any `Float` with its precision) and the `Ordering`
482/// is `Less`
483/// - The result is the smallest (most negative) finite value (of any `Float` with its precision)
484/// and the `Ordering` is `Greater`
485///
486/// # Worst-case complexity
487/// $T(n) = O(n)$
488///
489/// $M(n) = O(1)$
490///
491/// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
492///
493/// # Examples
494/// ```
495/// use malachite_base::num::basic::traits::{Infinity, NegativeInfinity, One};
496/// use malachite_float::{test_overflow, Float};
497/// use std::cmp::Ordering::*;
498///
499/// assert!(test_overflow(&Float::INFINITY, Greater));
500/// assert!(test_overflow(&Float::NEGATIVE_INFINITY, Less));
501/// assert!(test_overflow(&Float::max_finite_value_with_prec(10), Less));
502/// assert!(test_overflow(
503/// &-Float::max_finite_value_with_prec(10),
504/// Greater
505/// ));
506///
507/// assert!(!test_overflow(&Float::INFINITY, Equal));
508/// assert!(!test_overflow(&Float::ONE, Less));
509/// ```
510pub fn test_overflow(result: &Float, o: Ordering) -> bool {
511 if o == Equal {
512 return false;
513 }
514 *result == Float::INFINITY && o == Greater
515 || *result == Float::NEGATIVE_INFINITY && o == Less
516 || *result > 0u32 && result.abs_is_max_finite_value_with_prec() && o == Less
517 || *result < 0u32 && result.abs_is_max_finite_value_with_prec() && o == Greater
518}
519
520/// Given the `(Float, Ordering)` result of an operation, determines whether an underflow occurred.
521///
522/// We're defining an underflow to occur whenever the actual result is outside the representable
523/// finite range, and is rounded to zero, to the minimum positive value, or to the maximum negative
524/// value. An underflow can present itself in four ways:
525/// - The result is $0.0$ or $-0.0$ and the `Ordering` is `Less`
526/// - The result is $0.0$ or $-0.0$ and the `Ordering` is `Greater`
527/// - The result is the smallest positive value and the `Ordering` is `Greater`
528/// - The result is the largest (least negative) negative value and the `Ordering` is `Less`
529///
530/// # Worst-case complexity
531/// $T(n) = O(n)$
532///
533/// $M(n) = O(1)$
534///
535/// where $T$ is time, $M$ is additional memory, and $n$ is `self.significant_bits()`.
536///
537/// # Examples
538/// ```
539/// use malachite_base::num::basic::traits::{One, Zero};
540/// use malachite_float::{test_underflow, Float};
541/// use std::cmp::Ordering::*;
542///
543/// assert!(test_underflow(&Float::ZERO, Less));
544/// assert!(test_underflow(&Float::ZERO, Greater));
545/// assert!(test_underflow(&Float::min_positive_value_prec(10), Greater));
546/// assert!(test_underflow(&-Float::min_positive_value_prec(10), Less));
547///
548/// assert!(!test_underflow(&Float::ZERO, Equal));
549/// assert!(!test_underflow(&Float::ONE, Less));
550/// ```
551pub fn test_underflow(result: &Float, o: Ordering) -> bool {
552 if o == Equal {
553 return false;
554 }
555 *result == 0u32
556 || *result > 0u32 && result.abs_is_min_positive_value() && o == Greater
557 || *result < 0u32 && result.abs_is_min_positive_value() && o == Less
558}
559
560/// Traits for arithmetic.
561pub mod arithmetic;
562#[macro_use]
563/// Basic traits for working with [`Float`]s.
564pub mod basic;
565/// Traits for comparing [`Float`]s for equality or order.
566pub mod comparison;
567/// Functions that produce [`Float`] approximations of mathematical constants, using a given
568/// precision and rounding mode.
569pub mod constants;
570/// Traits for converting to and from [`Float`]s, including converting [`Float`]s to and from
571/// strings.
572pub mod conversion;
573/// Iterators that generate [`Float`]s without repetition.
574pub mod exhaustive;
575#[cfg(feature = "random")]
576/// Iterators that generate [`Float`]s randomly.
577pub mod random;
578
579#[cfg(feature = "test_build")]
580pub mod test_util;