Skip to main content

f256/
lib.rs

1// ---------------------------------------------------------------------------
2// Copyright:   (c) 2021 ff. Michael Amrhein (michael@adrhinum.de)
3// License:     This program is part of a larger application. For license
4//              details please read the file LICENSE.TXT provided together
5//              with the application.
6// ---------------------------------------------------------------------------
7// $Source: src/lib.rs $
8// $Revision: 2026-02-24T11:48:31+01:00 $
9
10#![doc = include_str!("../README.md")]
11#![cfg_attr(not(feature = "std"), no_std)]
12// activate some rustc lints
13#![deny(non_ascii_idents)]
14#![deny(unsafe_code)]
15#![warn(missing_debug_implementations)]
16#![warn(missing_docs)]
17#![warn(trivial_casts, trivial_numeric_casts)]
18// TODO: switch to #![warn(unused)]
19#![allow(unused)]
20#![allow(dead_code)]
21// activate some clippy lints
22#![warn(clippy::cast_possible_truncation)]
23#![warn(clippy::cast_possible_wrap)]
24#![warn(clippy::cast_precision_loss)]
25#![warn(clippy::cast_sign_loss)]
26#![warn(clippy::cognitive_complexity)]
27#![warn(clippy::enum_glob_use)]
28#![warn(clippy::equatable_if_let)]
29#![warn(clippy::fallible_impl_from)]
30#![warn(clippy::if_not_else)]
31#![warn(clippy::if_then_some_else_none)]
32#![warn(clippy::implicit_clone)]
33#![warn(clippy::integer_division)]
34#![warn(clippy::large_const_arrays)]
35#![warn(clippy::manual_assert)]
36#![warn(clippy::match_same_arms)]
37#![warn(clippy::mismatching_type_param_order)]
38#![warn(clippy::missing_const_for_fn)]
39#![warn(clippy::missing_errors_doc)]
40#![warn(clippy::missing_panics_doc)]
41#![warn(clippy::multiple_crate_versions)]
42#![warn(clippy::must_use_candidate)]
43#![warn(clippy::needless_lifetimes)]
44#![warn(clippy::needless_pass_by_value)]
45#![warn(clippy::print_stderr)]
46#![warn(clippy::print_stdout)]
47#![warn(clippy::semicolon_if_nothing_returned)]
48#![warn(clippy::str_to_string)]
49#![warn(clippy::undocumented_unsafe_blocks)]
50#![warn(clippy::unicode_not_nfc)]
51#![warn(clippy::unimplemented)]
52#![warn(clippy::unseparated_literal_suffix)]
53#![warn(clippy::unused_self)]
54#![warn(clippy::unwrap_in_result)]
55#![warn(clippy::use_self)]
56#![warn(clippy::used_underscore_binding)]
57#![warn(clippy::wildcard_imports)]
58
59extern crate alloc;
60extern crate core;
61
62use core::{cmp::Ordering, convert::Into, num::FpCategory, ops::Neg};
63
64use crate::big_uint::{BigUInt, DivRem, HiLo, Parity, U1024, U128, U256, U512};
65
66mod big_uint;
67mod binops;
68pub mod consts;
69mod conv;
70mod fused_ops;
71mod math;
72#[cfg(feature = "num-traits")]
73mod num_traits;
74
75/// Precision level in relation to single precision float (f32) = 8
76pub(crate) const PREC_LEVEL: u32 = 8;
77/// Total number of bits = 256
78pub(crate) const TOTAL_BITS: u32 = 1_u32 << PREC_LEVEL;
79/// Number of exponent bits = 19
80pub(crate) const EXP_BITS: u32 = 4 * PREC_LEVEL - 13;
81/// Number of significand bits p = 237
82pub(crate) const SIGNIFICAND_BITS: u32 = TOTAL_BITS - EXP_BITS;
83/// Number of fraction bits = p - 1 = 236
84pub(crate) const FRACTION_BITS: u32 = SIGNIFICAND_BITS - 1;
85/// Maximum value of biased base 2 exponent = 0x7ffff = 524287
86pub(crate) const EXP_MAX: u32 = (1_u32 << EXP_BITS) - 1;
87/// Base 2 exponent bias = 0x3ffff = 262143
88pub(crate) const EXP_BIAS: u32 = EXP_MAX >> 1;
89/// Maximum value of base 2 exponent = 0x3ffff = 262143
90#[allow(clippy::cast_possible_wrap)]
91pub(crate) const EMAX: i32 = (EXP_MAX >> 1) as i32;
92/// Minimum value of base 2 exponent = -262142
93pub(crate) const EMIN: i32 = 1 - EMAX;
94/// Number of bits in hi u128
95pub(crate) const HI_TOTAL_BITS: u32 = TOTAL_BITS >> 1;
96/// Number of bits to shift right for sign = 127
97pub(crate) const HI_SIGN_SHIFT: u32 = HI_TOTAL_BITS - 1;
98/// Number of fraction bits in hi u128 = 108
99pub(crate) const HI_FRACTION_BITS: u32 = FRACTION_BITS - HI_TOTAL_BITS;
100/// Fraction bias in hi u128 = 2¹⁰⁸ = 0x1000000000000000000000000000
101pub(crate) const HI_FRACTION_BIAS: u128 = 1_u128 << HI_FRACTION_BITS;
102/// Fraction mask in hi u128 = 0xfffffffffffffffffffffffffff
103pub(crate) const HI_FRACTION_MASK: u128 = HI_FRACTION_BIAS - 1;
104/// Exponent mask in hi u128 = 0x7ffff000000000000000000000000000
105pub(crate) const HI_EXP_MASK: u128 = (EXP_MAX as u128) << HI_FRACTION_BITS;
106/// Sign mask in hi u128 = 0x80000000000000000000000000000000
107pub(crate) const HI_SIGN_MASK: u128 = 1_u128 << HI_SIGN_SHIFT;
108/// Abs mask in hi u128 = 0x7fffffffffffffffffffffffffffffff
109pub(crate) const HI_ABS_MASK: u128 = !HI_SIGN_MASK;
110/// Value of hi u128 for NaN = 0x7ffff800000000000000000000000000
111pub(crate) const NAN_HI: u128 =
112    HI_EXP_MASK | (1_u128 << (HI_FRACTION_BITS - 1));
113/// Value of hi u128 for Inf = 0x7ffff000000000000000000000000000
114pub(crate) const INF_HI: u128 = HI_EXP_MASK;
115/// Value of hi u128 for -Inf = 0xfffff000000000000000000000000000
116pub(crate) const NEG_INF_HI: u128 = HI_SIGN_MASK | HI_EXP_MASK;
117/// Value of hi u128 for epsilon = 0x3ff13000000000000000000000000000
118pub(crate) const EPSILON_HI: u128 =
119    ((EXP_BIAS - FRACTION_BITS) as u128) << HI_FRACTION_BITS;
120/// Value of hi u128 for MAX = 0x7fffefffffffffffffffffffffffffff
121pub(crate) const MAX_HI: u128 =
122    (1_u128 << (u128::BITS - 1)) - (1_u128 << HI_FRACTION_BITS) - 1;
123/// Binary exponent for integral values
124#[allow(clippy::cast_possible_wrap)]
125pub(crate) const INT_EXP: i32 = -(FRACTION_BITS as i32);
126/// Value of hi u128 of the smallest f256 value with no fractional part (2²³⁶)
127pub(crate) const MIN_NO_FRACT_HI: u128 =
128    ((EXP_BIAS + FRACTION_BITS) as u128) << HI_FRACTION_BITS;
129/// Minimum possible subnormal power of 10 exponent =
130/// ⌊(Eₘᵢₙ + 1 - p) × log₁₀(2)⌋.
131pub(crate) const MIN_GT_ZERO_10_EXP: i32 = -78984;
132/// Constant 5, used in tests.
133pub(crate) const FIVE: f256 = f256::from_u64(5);
134
135/// A 256-bit floating point type (specifically, the “binary256” type defined
136/// in IEEE 754-2008).
137///
138/// For details see [above](index.html).
139#[allow(non_camel_case_types)]
140#[derive(Clone, Copy, Default)]
141pub struct f256 {
142    pub(crate) bits: U256,
143}
144
145/// Some f256 constants (only used to hide the internals in the doc)
146const EPSILON: f256 = f256 {
147    bits: U256::new(EPSILON_HI, 0),
148};
149const MAX: f256 = f256 {
150    bits: U256::new(MAX_HI, u128::MAX),
151};
152const MIN: f256 = MAX.negated();
153const MIN_POSITIVE: f256 = f256 {
154    bits: U256::new(HI_FRACTION_BIAS, 0),
155};
156const MIN_GT_ZERO: f256 = f256 { bits: U256::ONE };
157const NAN: f256 = f256 {
158    bits: U256::new(NAN_HI, 0),
159};
160const INFINITY: f256 = f256 {
161    bits: U256::new(INF_HI, 0),
162};
163const NEG_INFINITY: f256 = f256 {
164    bits: U256::new(NEG_INF_HI, 0),
165};
166const ZERO: f256 = f256 { bits: U256::ZERO };
167const NEG_ZERO: f256 = ZERO.negated();
168pub(crate) const ONE_HALF: f256 = f256 {
169    bits: U256::new((((EXP_BIAS - 1) as u128) << HI_FRACTION_BITS), 0),
170};
171const ONE: f256 = f256 {
172    bits: U256::new(((EXP_BIAS as u128) << HI_FRACTION_BITS), 0),
173};
174const NEG_ONE: f256 = ONE.negated();
175const TWO: f256 = f256 {
176    bits: U256::new((((1 + EXP_BIAS) as u128) << HI_FRACTION_BITS), 0),
177};
178const TEN: f256 = f256::from_u64(10);
179
180#[allow(clippy::multiple_inherent_impl)]
181impl f256 {
182    /// The radix or base of the internal representation of `f256`.
183    pub const RADIX: u32 = 2;
184
185    /// Number of significant digits in base 2: 237.
186    pub const MANTISSA_DIGITS: u32 = SIGNIFICAND_BITS;
187
188    /// Number of significant digits in base 2: 237.
189    pub const SIGNIFICANT_DIGITS: u32 = SIGNIFICAND_BITS;
190
191    /// Approximate number of significant digits in base 10: ⌊log₁₀(2²³⁷)⌋.
192    pub const DIGITS: u32 = 71;
193
194    /// The difference between `1.0` and the next larger representable number:
195    /// 2⁻²³⁶ ≈ 9.055679 × 10⁻⁷².
196    pub const EPSILON: Self = EPSILON;
197
198    /// Largest finite `f256` value:  2²⁶²¹⁴⁴ − 2²⁶¹⁹⁰⁷ ≈ 1.6113 × 10⁷⁸⁹¹³.
199    pub const MAX: Self = MAX;
200
201    /// Smallest finite `f256` value: 2²⁶¹⁹⁰⁷ - 2²⁶²¹⁴⁴ ≈ -1.6113 × 10⁷⁸⁹¹³.
202    pub const MIN: Self = MIN;
203
204    /// Smallest positive normal `f256` value: 2⁻²⁶²¹⁴² ≈ 2.4824 × 10⁻⁷⁸⁹¹³.
205    pub const MIN_POSITIVE: Self = MIN_POSITIVE;
206
207    /// Smallest positive subnormal `f256` value: 2⁻²⁶²³⁷⁸ ≈ 2.248 × 10⁻⁷⁸⁹⁸⁴.
208    pub const MIN_GT_ZERO: Self = MIN_GT_ZERO;
209
210    /// Maximum possible power of 2 exponent: Eₘₐₓ + 1 = 2¹⁸ = 262144.
211    pub const MAX_EXP: i32 = EMAX + 1;
212
213    /// One greater than the minimum possible normal power of 2 exponent:
214    /// Eₘᵢₙ + 1 = -262141.
215    pub const MIN_EXP: i32 = EMIN + 1;
216
217    /// Maximum possible power of 10 exponent: ⌊(Eₘₐₓ + 1) × log₁₀(2)⌋.
218    pub const MAX_10_EXP: i32 = 78913;
219
220    /// Minimum possible normal power of 10 exponent:
221    /// ⌊(Eₘᵢₙ + 1) × log₁₀(2)⌋.
222    pub const MIN_10_EXP: i32 = -78912;
223
224    /// Not a Number (NaN).
225    ///
226    /// Note that IEEE-745 doesn't define just a single NaN value; a plethora
227    /// of bit patterns are considered to be NaN. Furthermore, the
228    /// standard makes a difference between a "signaling" and a "quiet"
229    /// NaN, and allows inspecting its "payload" (the unspecified bits in
230    /// the bit pattern). This implementation does not make such a
231    /// difference and uses exactly one bit pattern for NaN.
232    pub const NAN: Self = NAN;
233
234    /// Infinity (∞).
235    pub const INFINITY: Self = INFINITY;
236
237    /// Negative infinity (−∞).
238    pub const NEG_INFINITY: Self = NEG_INFINITY;
239
240    /// Additive identity (0.0).
241    pub const ZERO: Self = ZERO;
242
243    /// Negative additive identity (-0.0).
244    pub const NEG_ZERO: Self = NEG_ZERO;
245
246    /// Multiplicative identity (1.0).
247    pub const ONE: Self = ONE;
248
249    /// Multiplicative negator (-1.0).
250    pub const NEG_ONE: Self = NEG_ONE;
251
252    /// Equivalent of binary base (2.0).
253    pub const TWO: Self = TWO;
254
255    /// Equivalent of decimal base (10.0).
256    pub const TEN: Self = TEN;
257
258    /// Raw assembly from sign, exponent and significand.
259    #[allow(clippy::cast_possible_wrap)]
260    #[allow(clippy::cast_sign_loss)]
261    #[inline]
262    pub(crate) const fn new(
263        sign: u32,
264        exponent: i32,
265        significand: U256,
266    ) -> Self {
267        debug_assert!(sign == 0 || sign == 1);
268        debug_assert!(exponent >= EMIN - 1 && exponent <= EMAX);
269        debug_assert!(!significand.is_zero());
270        debug_assert!((significand.hi.0 >> HI_FRACTION_BITS) <= 1_u128);
271        let biased_exp = (exponent + EXP_BIAS as i32) as u128;
272        Self {
273            bits: U256::new(
274                (significand.hi.0 & HI_FRACTION_MASK)
275                    | (biased_exp << HI_FRACTION_BITS)
276                    | ((sign as u128) << HI_SIGN_SHIFT),
277                significand.lo.0,
278            ),
279        }
280    }
281
282    /// Construct a finite, non-zero `f256` value f from sign s, quantum
283    /// exponent t and integral significand c,
284    ///
285    /// where
286    ///
287    /// * p = 237
288    /// * Eₘₐₓ = 262143
289    /// * Eₘᵢₙ = 1 - Eₘₐₓ = -262142
290    /// * s ∈ {0, 1}
291    /// * Eₘᵢₙ - p + 1 <= t <= Eₘₐₓ - p + 1
292    /// * 0 <= c < 2ᵖ
293    ///
294    /// so that f = (-1)ˢ × 2ᵗ × c.
295    #[allow(clippy::cast_possible_wrap)]
296    #[allow(clippy::cast_sign_loss)]
297    pub(crate) fn encode(s: u32, mut t: i32, mut c: U256) -> Self {
298        debug_assert!(!c.is_zero());
299        // We have an integer based representation `(-1)ˢ × 2ᵗ × c` and need
300        // to transform it into a fraction based representation
301        // `(-1)ˢ × 2ᵉ × (1 + m × 2¹⁻ᵖ)`,
302        // where `Eₘᵢₙ <= e <= Eₘₐₓ` and `0 < m < 2ᵖ⁻¹`, or
303        // `(-1)ˢ × 2ᵉ × m × 2¹⁻ᵖ`,
304        // where `e = Eₘᵢₙ - 1` and `0 < m < 2ᵖ⁻¹`.
305
306        // 1. Compensate radix shift
307        t += FRACTION_BITS as i32;
308        // 2. Normalize significand
309        let nlz = c.leading_zeros();
310        // The position of the most significant bit is `256 - nlz - 1`. We
311        // need to shift it to the position of the hidden bit, which
312        // is `256 - EXP_BITS - 1`. So we have to shift by |nlz -
313        // EXP_BITS|.
314        match nlz.cmp(&EXP_BITS) {
315            Ordering::Greater => {
316                // Shift left.
317                let shift = (nlz - EXP_BITS);
318                if t >= EMIN + shift as i32 {
319                    c <<= shift;
320                    t -= shift as i32;
321                } else {
322                    // Number is subnormal
323                    c <<= (t - EMIN) as u32;
324                    t = EMIN - 1;
325                }
326            }
327            Ordering::Less => {
328                // Shift right and round.
329                let mut shift = (EXP_BITS - nlz);
330                t += shift as i32;
331                c = c.rounding_div_pow2(shift);
332                // Rounding may have caused significand to overflow.
333                if (c.hi.0 >> (HI_FRACTION_BITS + 1)) != 0 {
334                    t += 1;
335                    c >>= 1;
336                }
337            }
338            _ => {}
339        }
340        debug_assert!(
341            (EMIN - 1..=EMAX).contains(&t),
342            "Exponent limits exceeded: {t}"
343        );
344        // 3. Assemble struct f256
345        Self::new(s, t, c)
346    }
347
348    // Returns
349    // 2ⁿ for EMIN - FRACTION_BITS <= n <= EMAX
350    //  0 for n < EMIN - FRACTION_BITS
351    //  ∞ for n > EMAX
352    #[allow(clippy::cast_possible_wrap)]
353    #[allow(clippy::cast_sign_loss)]
354    pub(crate) const fn power_of_two(n: i32) -> Self {
355        const LOW_LIM: i32 = EMIN - FRACTION_BITS as i32;
356        match n {
357            // normal range
358            EMIN..=EMAX => Self {
359                bits: U256::new(
360                    ((n + EXP_BIAS as i32) as u128) << HI_FRACTION_BITS,
361                    0_u128,
362                ),
363            },
364            // sub-normal range
365            LOW_LIM..EMIN => Self {
366                bits: U256::power_of_two((n - LOW_LIM) as u32),
367            },
368            ..LOW_LIM => Self::ZERO,
369            _ => Self::INFINITY,
370        }
371    }
372
373    /// Only public for testing!!!
374    #[doc(hidden)]
375    #[must_use]
376    pub fn from_sign_exp_signif(s: u32, t: i32, c: (u128, u128)) -> Self {
377        debug_assert!(s == 0 || s == 1);
378        let c = U256::new(c.0, c.1);
379        if c.is_zero() {
380            if t == 0 {
381                return [Self::ZERO, Self::NEG_ZERO][s as usize];
382            }
383            if t == EMAX + 1 {
384                return [Self::INFINITY, Self::NEG_INFINITY][s as usize];
385            }
386        }
387        Self::encode(s, t, c)
388    }
389
390    /// Returns the sign bit of `self`: 0 = positive, 1 = negative.
391    #[inline]
392    pub(crate) const fn sign(&self) -> u32 {
393        (self.bits.hi.0 >> HI_SIGN_SHIFT) as u32
394    }
395
396    /// Returns the biased binary exponent of `self`.
397    #[inline]
398    pub(crate) const fn biased_exponent(&self) -> u32 {
399        ((self.bits.hi.0 & HI_EXP_MASK) >> HI_FRACTION_BITS) as u32
400    }
401
402    /// Returns the quantum exponent of `self`.
403    /// Pre-condition: `self` is finite!
404    #[inline]
405    #[allow(clippy::cast_possible_wrap)]
406    pub(crate) const fn quantum_exponent(&self) -> i32 {
407        debug_assert!(
408            self.is_finite(),
409            "Attempt to extract quantum exponent from Infinity or NaN."
410        );
411        const TOTAL_BIAS: i32 = EXP_BIAS as i32 + FRACTION_BITS as i32;
412        let mut exp = self.biased_exponent() as i32;
413        exp += (exp == 0) as i32; // Adjust exp for subnormals.
414        (!self.eq_zero() as i32) * (exp - TOTAL_BIAS)
415    }
416
417    /// Returns the exponent of `self`.
418    /// Pre-condition: `self` is finite!
419    #[inline]
420    #[allow(clippy::cast_possible_wrap)]
421    pub(crate) const fn exponent(&self) -> i32 {
422        debug_assert!(
423            self.is_finite(),
424            "Attempt to extract exponent from Infinity or NaN."
425        );
426        let mut exp = self.biased_exponent() as i32;
427        exp += (exp == 0) as i32; // Adjust exp for subnormals.
428        (!self.eq_zero() as i32) * (exp - EXP_BIAS as i32)
429    }
430
431    /// Returns the fraction of `self`.
432    #[inline]
433    pub(crate) const fn fraction(&self) -> U256 {
434        U256::new(self.bits.hi.0 & HI_FRACTION_MASK, self.bits.lo.0)
435    }
436
437    /// Returns the integral significand of `self`.
438    /// Pre-condition: `self` is finite!
439    #[inline]
440    pub(crate) const fn integral_significand(&self) -> U256 {
441        debug_assert!(
442            self.is_finite(),
443            "Attempt to extract integral significand from Infinity or NaN."
444        );
445        let hidden_one =
446            ((self.biased_exponent() != 0) as u128) << HI_FRACTION_BITS;
447        U256::new(
448            (self.bits.hi.0 & HI_FRACTION_MASK) | hidden_one,
449            self.bits.lo.0,
450        )
451    }
452
453    /// Returns the significand of `self`.
454    /// Pre-condition: `self` is finite!
455    pub(crate) fn significand(&self) -> Self {
456        debug_assert!(
457            self.is_finite(),
458            "Attempt to extract significand from Infinity or NaN."
459        );
460        if self.eq_zero() {
461            return *self;
462        }
463        let mut biased_exp = EXP_BIAS;
464        let mut bits =
465            U256::new((self.bits.hi.0 & HI_FRACTION_MASK), self.bits.lo.0);
466        if self.biased_exponent() == 0 {
467            // self is subnormal
468            let shift = (bits.leading_zeros() - EXP_BITS);
469            bits = bits.shift_left(shift);
470            biased_exp -= shift + 1;
471        }
472        bits.hi.0 += (biased_exp as u128) << HI_FRACTION_BITS;
473        Self { bits }
474    }
475
476    /// Extract sign s, quantum exponent t and integral significand c from
477    /// a finite `f256` f,
478    ///
479    /// where
480    ///
481    /// * p = 237
482    /// * Eₘₐₓ = 262143
483    /// * Eₘᵢₙ = 1 - Eₘₐₓ = -262142
484    /// * s ∈ {0, 1}
485    /// * Eₘᵢₙ - p + 1 <= t <= Eₘₐₓ - p + 1
486    /// * 0 <= c < 2ᵖ
487    ///
488    /// so that (-1)ˢ × 2ᵗ × c = f.
489    #[allow(clippy::cast_possible_wrap)]
490    pub(crate) fn decode(&self) -> (u32, i32, U256) {
491        debug_assert!(
492            self.is_finite(),
493            "Attempt to extract sign, exponent and significand from Infinity \
494             or NaN."
495        );
496        // We have a fraction based representation
497        // `(-1)ˢ × 2ᵉ × (1 + m × 2¹⁻ᵖ)`, where `Eₘᵢₙ <= e <= Eₘₐₓ` and
498        // `0 < m < 2ᵖ⁻¹`
499        // or
500        // `(-1)ˢ × 2ᵉ × m × 2¹⁻ᵖ`, where `e = Eₘᵢₙ - 1` and
501        // `0 < m < 2ᵖ⁻¹`
502        // and need to transform it into an integer based representation
503        // `(-1)ˢ × 2ᵗ × c`.
504        let (s, mut t, mut c) = split_f256_enc(self);
505        if !c.is_zero() {
506            let ntz = c.trailing_zeros();
507            c >>= ntz;
508            t += ntz as i32;
509        }
510        (s, t, c)
511    }
512
513    /// Only public for testing!!!
514    #[doc(hidden)]
515    #[must_use]
516    pub fn as_sign_exp_signif(&self) -> (u32, i32, (u128, u128)) {
517        let (s, t, c) = self.decode();
518        (s, t, (c.hi.0, c.lo.0))
519    }
520
521    /// Returns `true` if this value is `NaN`.
522    #[must_use]
523    #[inline]
524    pub const fn is_nan(self) -> bool {
525        ((self.bits.hi.0 & HI_ABS_MASK) | (self.bits.lo.0 != 0) as u128)
526            > HI_EXP_MASK
527    }
528
529    /// Returns `true` if this value is positive infinity or negative
530    /// infinity, and `false` otherwise.
531    #[must_use]
532    #[inline]
533    pub const fn is_infinite(self) -> bool {
534        (self.bits.hi.0 & HI_ABS_MASK) == HI_EXP_MASK && self.bits.lo.0 == 0
535    }
536
537    /// Returns `true` if this number is neither infinite nor NaN.
538    #[must_use]
539    #[inline]
540    pub const fn is_finite(self) -> bool {
541        (self.bits.hi.0 & HI_EXP_MASK) != HI_EXP_MASK
542    }
543
544    /// Returns `true` if the number is subnormal.
545    #[must_use]
546    #[inline]
547    pub const fn is_subnormal(self) -> bool {
548        (self.bits.hi.0 & HI_EXP_MASK) == 0 && !self.eq_zero()
549    }
550
551    /// Returns `true` if the number is neither zero, infinite, subnormal, or
552    /// NaN.
553    #[must_use]
554    #[inline]
555    pub const fn is_normal(self) -> bool {
556        self.biased_exponent().wrapping_sub(1) < EXP_MAX - 1
557    }
558
559    /// Returns the floating point category of the number. If only one
560    /// property is going to be tested, it is generally faster to use the
561    /// specific predicate instead.
562    #[inline]
563    #[must_use]
564    #[allow(clippy::match_overlapping_arm)]
565    pub const fn classify(&self) -> FpCategory {
566        let abs_bits_sticky = abs_bits_sticky(&abs_bits(self));
567        match abs_bits_sticky {
568            0 => FpCategory::Zero,
569            INF_HI => FpCategory::Infinite,
570            NAN_HI => FpCategory::Nan,
571            ..=HI_FRACTION_MASK => FpCategory::Subnormal,
572            _ => FpCategory::Normal,
573        }
574    }
575
576    /// Returns `true` if `self` is equal to `+0.0` or `-0.0`.
577    #[must_use]
578    #[inline]
579    pub const fn eq_zero(self) -> bool {
580        (self.bits.hi.0 << 1) == 0 && self.bits.lo.0 == 0
581    }
582
583    /// Returns `true` if `self` is either not a number, infinite or equal to
584    /// zero.
585    #[must_use]
586    #[inline]
587    pub const fn is_special(self) -> bool {
588        (self.bits.hi.0 & HI_ABS_MASK | (self.bits.lo.0 != 0) as u128)
589            .wrapping_sub(1)
590            >= MAX_HI
591    }
592
593    /// Returns `true` if `self` has a positive sign, including `+0.0`,
594    /// positive infinity and NaN.
595    #[must_use]
596    #[inline]
597    pub const fn is_sign_positive(self) -> bool {
598        self.bits.hi.0 < HI_SIGN_MASK
599    }
600
601    /// Returns `true` if `self` has a negative sign, including `-0.0` and
602    /// negative infinity.
603    #[must_use]
604    #[inline]
605    pub const fn is_sign_negative(self) -> bool {
606        self.bits.hi.0 >= HI_SIGN_MASK
607    }
608
609    /// Returns `true` if `self` represents an integer, i. e. `self` ∈ ℤ.
610    #[must_use]
611    #[inline]
612    pub(crate) fn is_integer(self) -> bool {
613        is_int(&abs_bits(&self))
614    }
615
616    /// Returns parity of `self` if `self` represents an integer
617    #[allow(clippy::cast_possible_wrap)]
618    pub(crate) fn parity(&self) -> Option<Parity> {
619        if self.is_special() {
620            return [None, Some(Parity::Even)][self.eq_zero() as usize];
621        }
622        let (_, exp, signif) = split_f256_enc(self);
623        let ntz = signif.trailing_zeros();
624        const LIM: i32 = FRACTION_BITS as i32;
625        match exp + ntz as i32 {
626            // `self` is not an int
627            ..=-1 => None,
628            // self < 2²³⁶ => check last int bit
629            0..=LIM => Some((signif >> exp.unsigned_abs()).parity()),
630            // `self` >= 2²³⁶ => allways even
631            _ => Some(Parity::Even),
632        }
633    }
634
635    /// Returns the unit in the last place of `self`.
636    ///
637    /// ULP denotes the magnitude of the last significand digit of `self`.
638    #[must_use]
639    pub fn ulp(&self) -> Self {
640        let abs_bits_self = abs_bits(self);
641        let mut exp_bits = exp_bits(&abs_bits_self);
642        if exp_bits < EXP_MAX {
643            // `self` is finite.
644            let mut bits = U256::new(HI_FRACTION_BIAS, 0_u128);
645            let sh = FRACTION_BITS
646                .saturating_sub(exp_bits - norm_bit(&abs_bits_self));
647            exp_bits = exp_bits.saturating_sub(FRACTION_BITS + 1);
648            bits >>= sh;
649            bits.hi.0 += (exp_bits as u128) << HI_FRACTION_BITS;
650            Self { bits }
651        } else {
652            // `self` is infinite or nan.
653            NAN
654        }
655    }
656
657    /// Returns true, if |self - other| <= self.ulp()
658    #[must_use]
659    #[inline]
660    pub(crate) fn almost_eq(&self, other: &Self) -> bool {
661        (self - other).abs() <= self.ulp()
662    }
663
664    /// Returns true, if |self - other| <= self.ulp() * 2ⁿ
665    /// Only public for testing!!!
666    #[doc(hidden)]
667    #[must_use]
668    #[inline]
669    pub fn diff_within_n_bits(&self, other: &Self, n: u32) -> bool {
670        (self - other).abs() <= self.ulp().mul_pow2(n)
671    }
672
673    /// Returns the reciprocal (multiplicative inverse) of `self`.
674    ///
675    /// # Examples
676    ///
677    /// ```
678    /// # use f256::f256;
679    /// let f = f256::from(16);
680    /// let r = f256::from(0.0625);
681    /// assert_eq!(f.recip(), r);
682    ///
683    /// assert_eq!(f256::INFINITY.recip(), f256::ZERO);
684    /// assert_eq!(f256::NEG_ZERO.recip(), f256::NEG_INFINITY);
685    /// assert!(f256::NAN.recip().is_nan());
686    /// ```
687    #[must_use]
688    #[inline]
689    pub fn recip(self) -> Self {
690        Self::ONE / self
691    }
692
693    /// Converts radians to degrees.
694    ///
695    /// Returns self * (180 / π)
696    #[must_use]
697    #[inline]
698    #[allow(clippy::cast_possible_wrap)]
699    pub fn to_degrees(self) -> Self {
700        // 1 rad = 180 / π ≅ M / 2²⁵⁰
701        const M: U256 = U256::new(
702            304636616676435425756912514760952666071,
703            69798147688063442975655060594812004816,
704        );
705        const SH: i32 = 250_i32;
706        let signif = self.integral_significand();
707        let exp = self.quantum_exponent();
708        let (lo, hi) = M.widening_mul(&signif);
709        let mut t = U512::from_hi_lo(hi, lo);
710        let sh = signif.msb() + 256 - SIGNIFICAND_BITS;
711        t = t.rounding_div_pow2(sh);
712        Self::encode(self.sign(), exp - SH + sh as i32, t.lo)
713    }
714
715    /// Converts degrees to radians.
716    ///
717    /// Returns self * (π / 180)
718    #[must_use]
719    #[inline]
720    #[allow(clippy::cast_possible_wrap)]
721    pub fn to_radians(self) -> Self {
722        // π / 180 ≅ M / 2²⁶¹
723        const M: U256 = U256::new(
724            190049526055994088508387621895443694809,
725            953738875812114979603059177117484306,
726        );
727        const SH: i32 = 261_i32;
728        let signif = self.integral_significand();
729        let exp = self.quantum_exponent();
730        let (lo, hi) = M.widening_mul(&signif);
731        let mut t = U512::from_hi_lo(hi, lo);
732        let sh = signif.msb() + 256 - SIGNIFICAND_BITS;
733        t = t.rounding_div_pow2(sh);
734        Self::encode(self.sign(), exp - SH + sh as i32, t.lo)
735    }
736
737    /// Returns the maximum of the two numbers, ignoring NaN.
738    ///
739    /// If one of the arguments is NaN, then the other argument is returned.
740    /// This follows the IEEE-754 2008 semantics for maxNum, except for
741    /// handling of signaling NaNs; this function handles all NaNs the
742    /// same way and avoids maxNum's problems with associativity.
743    #[must_use]
744    #[inline]
745    pub fn max(self, other: Self) -> Self {
746        if other > self || self.is_nan() {
747            return other;
748        }
749        self
750    }
751
752    /// Returns the minimum of the two numbers, ignoring NaN.
753    ///
754    /// If one of the arguments is NaN, then the other argument is returned.
755    /// This follows the IEEE-754 2008 semantics for minNum, except for
756    /// handling of signaling NaNs; this function handles all NaNs the
757    /// same way and avoids minNum's problems with associativity.
758    #[must_use]
759    #[inline]
760    pub fn min(self, other: Self) -> Self {
761        if other < self || self.is_nan() {
762            return other;
763        }
764        self
765    }
766
767    /// Raw transmutation to `(u128, u128)` ((self.bits.hi.0, self.bits.lo.0),
768    /// each in native endian order).
769    #[inline]
770    #[must_use]
771    pub const fn to_bits(&self) -> (u128, u128) {
772        (self.bits.hi.0, self.bits.lo.0)
773    }
774
775    /// Raw transmutation from `(u128, u128)` ((self.bits.hi.0,
776    /// self.bits.lo.0), each in native endian order).
777    #[inline]
778    #[must_use]
779    pub const fn from_bits(bits: (u128, u128)) -> Self {
780        Self {
781            bits: U256::new(bits.0, bits.1),
782        }
783    }
784
785    /// Return the memory representation of this floating point number as a
786    /// byte array in big-endian (network) byte order.
787    #[must_use]
788    #[inline]
789    #[allow(unsafe_code)]
790    pub const fn to_be_bytes(self) -> [u8; 32] {
791        let bytes =
792            [self.bits.hi.0.to_be_bytes(), self.bits.lo.0.to_be_bytes()];
793        // SAFETY: safe because size of [[u8; 16]; 2] == size of [u8; 32]
794        unsafe { core::mem::transmute(bytes) }
795    }
796
797    /// Return the memory representation of this floating point number as a
798    /// byte array in little-endian byte order.
799    #[must_use]
800    #[inline]
801    #[allow(unsafe_code)]
802    pub const fn to_le_bytes(self) -> [u8; 32] {
803        let bytes =
804            [self.bits.lo.0.to_le_bytes(), self.bits.hi.0.to_le_bytes()];
805        // SAFETY: safe because size of [[u8; 16]; 2] == size of [u8; 32]
806        unsafe { core::mem::transmute(bytes) }
807    }
808
809    /// Return the memory representation of this floating point number as a
810    /// byte array in native byte order.
811    #[must_use]
812    #[inline]
813    #[allow(unsafe_code)]
814    pub const fn to_ne_bytes(self) -> [u8; 32] {
815        let bits = self.to_bits();
816        // SAFETY: safe because size of (u128, u128) == size of [u8; 32]
817        unsafe { core::mem::transmute(bits) }
818    }
819
820    /// Create a floating point value from its representation as a byte array
821    /// in big endian.
822    #[must_use]
823    #[inline]
824    #[allow(unsafe_code)]
825    pub const fn from_be_bytes(bytes: [u8; 32]) -> Self {
826        // SAFETY: safe because size of [[u8; 16]; 2] == size of [u8; 32]
827        let bits: [[u8; 16]; 2] = unsafe { core::mem::transmute(bytes) };
828        Self {
829            bits: U256::new(
830                u128::from_be_bytes(bits[0]),
831                u128::from_be_bytes(bits[1]),
832            ),
833        }
834    }
835
836    /// Create a floating point value from its representation as a byte array
837    /// in little endian.
838    #[must_use]
839    #[inline]
840    #[allow(unsafe_code)]
841    pub const fn from_le_bytes(bytes: [u8; 32]) -> Self {
842        // SAFETY: safe because size of [[u8; 16]; 2] == size of [u8; 32]
843        let bits: [[u8; 16]; 2] = unsafe { core::mem::transmute(bytes) };
844        Self {
845            bits: U256::new(
846                u128::from_le_bytes(bits[1]),
847                u128::from_le_bytes(bits[0]),
848            ),
849        }
850    }
851
852    /// Create a floating point value from its representation as a byte array
853    /// in native endian.
854    #[must_use]
855    #[inline]
856    #[allow(unsafe_code)]
857    pub const fn from_ne_bytes(bytes: [u8; 32]) -> Self {
858        // SAFETY: safe because size of (u128, u128) == size of [u8; 32]
859        let bits: (u128, u128) = unsafe { core::mem::transmute(bytes) };
860        Self::from_bits(bits)
861    }
862
863    /// Return the ordering between `self` and `other`.
864    ///
865    /// Unlike the standard partial comparison between floating point numbers,
866    /// this comparison always produces an ordering in accordance to
867    /// the `totalOrder` predicate as defined in the IEEE 754 (2008 revision)
868    /// floating point standard. The values are ordered in the following
869    /// sequence:
870    ///
871    /// - negative NaN
872    /// - negative infinity
873    /// - negative numbers
874    /// - negative subnormal numbers
875    /// - negative zero
876    /// - positive zero
877    /// - positive subnormal numbers
878    /// - positive numbers
879    /// - positive infinity
880    /// - positive NaN.
881    ///
882    /// The ordering established by this function does not always agree with
883    /// the [`PartialOrd`] and [`PartialEq`] implementations of `f256`.
884    /// For example, they consider negative and positive zero equal, while
885    /// `total_cmp` doesn't.
886    #[must_use]
887    pub fn total_cmp(&self, other: &Self) -> Ordering {
888        // The internal representation of `f256` values gives - besides their
889        // sign - a total ordering following the intended mathematical
890        // ordering.
891        match (self.sign(), other.sign()) {
892            (0, 0) => self.bits.cmp(&other.bits),
893            (1, 0) => Ordering::Less,
894            (0, 1) => Ordering::Greater,
895            _ => other.bits.cmp(&self.bits),
896        }
897    }
898
899    /// Restrict a value to a certain interval unless it is NaN.
900    ///
901    /// Returns `max` if `self` is greater than `max`, and `min` if `self` is
902    /// less than `min`. Otherwise this returns `self`.
903    ///
904    /// Note that this function returns NaN if the initial value was NaN as
905    /// well.
906    ///
907    /// # Panics
908    ///
909    /// Panics if `min > max`, `min` is NaN, or `max` is NaN.
910    ///
911    /// # Examples
912    ///
913    /// ```
914    /// # use f256::f256;
915    /// let min = f256::EPSILON;
916    /// let max = f256::TWO;
917    /// let f = f256::ONE;
918    /// assert_eq!(f.clamp(min, max), f);
919    /// assert_eq!((-f).clamp(min, max), min);
920    ///
921    /// assert_eq!(f256::INFINITY.clamp(f256::MIN, f256::MAX), f256::MAX);
922    /// assert!(f256::NAN.clamp(f256::NEG_INFINITY, f256::INFINITY).is_nan());
923    //// ```
924    #[must_use]
925    #[inline]
926    pub fn clamp(self, min: Self, max: Self) -> Self {
927        assert!(min <= max);
928        if self < min {
929            min
930        } else if self > max {
931            max
932        } else {
933            self
934        }
935    }
936
937    /// Computes the absolute value of `self`.
938    ///
939    /// # Examples
940    ///
941    /// ```
942    /// # use f256::f256;
943    /// let f = f256::from(138);
944    /// assert_eq!(f.abs(), f);
945    /// assert_eq!((-f).abs(), f);
946    ///
947    /// assert_eq!(f256::MIN.abs(), f256::MAX);
948    /// assert_eq!(f256::NEG_INFINITY.abs(), f256::INFINITY);
949    /// assert!(f256::NAN.abs().is_nan());
950    //// ```
951    #[inline(always)]
952    #[must_use]
953    pub const fn abs(&self) -> Self {
954        Self {
955            bits: U256::new(self.bits.hi.0 & HI_ABS_MASK, self.bits.lo.0),
956        }
957    }
958
959    /// Returns a number composed of the magnitude of `self` and the sign of
960    /// `other`.
961    ///
962    /// Equal to `self` if the sign of `self` and `other` are the same,
963    /// otherwise equal to `-self`.
964    /// If `self` is a NaN, then a NaN with the same payload as `self` and
965    /// the sign bit of `other` is returned.
966    /// If `other` is a NaN, then this operation will still carry over its
967    /// sign into the result. Note that IEEE 754 doesn't assign any meaning to
968    /// the sign bit in case of a NaN, and as Rust doesn't guarantee that the
969    /// bit pattern of NaNs are conserved over arithmetic operations, the
970    /// result of `copysign` with `other` being a NaN might produce an
971    /// unexpected or non-portable result.
972    #[must_use = "Method does not mutate the original value."]
973    #[inline]
974    pub const fn copysign(self, other: Self) -> Self {
975        Self {
976            bits: U256::new(
977                (self.bits.hi.0 & HI_ABS_MASK)
978                    | (other.bits.hi.0 & HI_SIGN_MASK),
979                self.bits.lo.0,
980            ),
981        }
982    }
983
984    /// Returns a number that represents the sign of `self`.
985    ///
986    /// - `1.0` if the number is positive, `+0.0` or `INFINITY`
987    /// - `-1.0` if the number is negative, `-0.0` or `NEG_INFINITY`
988    /// - NaN if the number is NaN
989    #[must_use = "method does not mutate the original value"]
990    #[inline]
991    pub const fn signum(self) -> Self {
992        if self.is_nan() {
993            Self::NAN
994        } else {
995            Self::ONE.copysign(self)
996        }
997    }
998
999    // Returns
1000    // * self, if self is an integral value,
1001    // * value returned by lt1, if 0 < self < 1,
1002    // * return value of gt1 otherwise.
1003    #[must_use]
1004    fn as_integer(
1005        &self,
1006        lt1: fn(u32, U256) -> Self,
1007        gt1: fn(u32, U256) -> Self,
1008    ) -> Self {
1009        let mut abs_bits = abs_bits(self);
1010        if is_int(&abs_bits) || abs_bits.is_special() {
1011            *self
1012        } else {
1013            let sign = self.sign();
1014            if abs_bits.hi.0 < ONE.bits.hi.0 {
1015                // 0 < |self| < 1
1016                lt1(sign, abs_bits)
1017            } else {
1018                // 1 < |self| < 2²³⁶
1019                gt1(sign, abs_bits)
1020            }
1021        }
1022    }
1023
1024    /// Returns the integer part of `self`. This means that non-integer
1025    /// numbers are always truncated towards zero.
1026    ///
1027    /// # Examples
1028    ///
1029    /// ```
1030    /// # use f256::f256;
1031    /// let f = f256::from(177);
1032    /// let g = f256::from(177.503_f64);
1033    /// let h = -g;
1034    ///
1035    /// assert_eq!(f.trunc(), f);
1036    /// assert_eq!(g.trunc(), f);
1037    /// assert_eq!(h.trunc(), -f);
1038    //// ```
1039    #[must_use]
1040    pub fn trunc(&self) -> Self {
1041        self.as_integer(
1042            |sign, abs_bits| Self::ZERO,
1043            |sign, abs_bits| {
1044                let n_fract_bits =
1045                    FRACTION_BITS - (exp_bits(&abs_bits) - EXP_BIAS);
1046                let mut int_bits = (abs_bits >> n_fract_bits) << n_fract_bits;
1047                int_bits.hi.0 |= (sign as u128) << HI_SIGN_SHIFT;
1048                Self { bits: int_bits }
1049            },
1050        )
1051    }
1052
1053    /// Returns the fractional part of `self`.
1054    ///
1055    /// # Examples
1056    ///
1057    /// ```
1058    /// # use f256::f256;
1059    /// let f = f256::from(177);
1060    /// let g = f256::from(177.503_f64);
1061    /// let h = -g;
1062    ///
1063    /// assert_eq!(f.fract(), f256::ZERO);
1064    /// assert_eq!(g.fract(), g - f);
1065    /// assert_eq!(h.fract(), f - g);
1066    //// ```
1067    #[inline]
1068    #[must_use]
1069    pub fn fract(&self) -> Self {
1070        self - self.trunc()
1071    }
1072
1073    /// Returns the integer and the fractional part of `self`.
1074    #[inline]
1075    #[must_use]
1076    pub fn split(&self) -> (Self, Self) {
1077        let int_part = self.trunc();
1078        (int_part, self - int_part)
1079    }
1080
1081    /// Returns the smallest integer greater than or equal to `self`.
1082    ///
1083    /// # Examples
1084    ///
1085    /// ```
1086    /// # use f256::f256;
1087    /// let f = f256::from(28);
1088    /// let g = f256::from(27.04_f64);
1089    /// let h = -g;
1090    ///
1091    /// assert_eq!(f.ceil(), f);
1092    /// assert_eq!(g.ceil(), f);
1093    /// assert_eq!(h.ceil(), f256::ONE - f);
1094    //// ```
1095    #[must_use]
1096    pub fn ceil(&self) -> Self {
1097        self.as_integer(
1098            |sign, abs_bits| [Self::ONE, Self::ZERO][sign as usize],
1099            |sign, abs_bits| {
1100                let n_fract_bits =
1101                    FRACTION_BITS - (exp_bits(&abs_bits) - EXP_BIAS);
1102                let mut int_bits = abs_bits >> n_fract_bits;
1103                int_bits += &U256::new(0, (sign == 0) as u128);
1104                int_bits <<= n_fract_bits;
1105                int_bits.hi.0 |= (sign as u128) << HI_SIGN_SHIFT;
1106                Self { bits: int_bits }
1107            },
1108        )
1109    }
1110
1111    /// Returns the largest integer less than or equal to `self`.
1112    ///
1113    /// # Examples
1114    ///
1115    /// ```
1116    /// # use f256::f256;
1117    /// let f = f256::from(57);
1118    /// let g = f256::from(57.009_f64);
1119    /// let h = -g;
1120    ///
1121    /// assert_eq!(f.floor(), f);
1122    /// assert_eq!(g.floor(), f);
1123    /// assert_eq!(h.floor(), -f - f256::ONE);
1124    //// ```
1125    #[must_use]
1126    pub fn floor(&self) -> Self {
1127        self.as_integer(
1128            |sign, abs_bits| [Self::ZERO, Self::NEG_ONE][sign as usize],
1129            |sign, abs_bits| {
1130                let n_fract_bits =
1131                    FRACTION_BITS - (exp_bits(&abs_bits) - EXP_BIAS);
1132                let mut int_bits = abs_bits >> n_fract_bits;
1133                int_bits += &U256::new(0, sign as u128);
1134                int_bits <<= n_fract_bits;
1135                int_bits.hi.0 |= (sign as u128) << HI_SIGN_SHIFT;
1136                Self { bits: int_bits }
1137            },
1138        )
1139    }
1140
1141    /// Returns the nearest integer to `self`. Rounds half-way cases away from
1142    /// zero.
1143    ///
1144    /// # Examples
1145    ///
1146    /// ```
1147    /// # use f256::f256;
1148    /// let f = f256::from(27);
1149    /// let g = f256::from(26.704_f64);
1150    /// let h = f256::from(26.5_f64);
1151    /// assert_eq!(f.round(), f);
1152    /// assert_eq!(g.round(), f);
1153    /// assert_eq!(h.round(), f);
1154    //// ```
1155    #[must_use]
1156    pub fn round(&self) -> Self {
1157        self.as_integer(
1158            |sign, abs_bits| {
1159                if abs_bits.hi.0 < ONE_HALF.bits.hi.0 {
1160                    // 0 < |self| < ½
1161                    Self::ZERO
1162                } else {
1163                    // ½ <= |self| < 1
1164                    [Self::ONE, Self::NEG_ONE][sign as usize]
1165                }
1166            },
1167            |sign, abs_bits| {
1168                let n_fract_bits =
1169                    FRACTION_BITS - (exp_bits(&abs_bits) - EXP_BIAS);
1170                let tie = U256::ONE << (n_fract_bits - 1);
1171                let rem = abs_bits.rem_pow2(n_fract_bits);
1172                let mut int_bits = abs_bits >> n_fract_bits;
1173                if rem >= tie {
1174                    int_bits.incr();
1175                }
1176                int_bits <<= n_fract_bits;
1177                int_bits.hi.0 |= (sign as u128) << HI_SIGN_SHIFT;
1178                Self { bits: int_bits }
1179            },
1180        )
1181    }
1182
1183    /// Returns the nearest integer to `self`. Rounds half-way cases to the
1184    /// nearest even.
1185    ///
1186    /// # Examples
1187    ///
1188    /// ```
1189    /// # use f256::f256;
1190    /// let f = f256::from(28);
1191    /// let g = f256::from(27.704_f64);
1192    /// let h = f256::from(27.5_f64);
1193    /// let h = f256::from(28.5_f64);
1194    /// assert_eq!(f.round_tie_even(), f);
1195    /// assert_eq!(g.round_tie_even(), f);
1196    /// assert_eq!(h.round_tie_even(), f);
1197    //// ```
1198    #[must_use]
1199    pub fn round_tie_even(&self) -> Self {
1200        self.as_integer(
1201            |sign, abs_bits| {
1202                if abs_bits.hi.0 <= ONE_HALF.bits.hi.0 {
1203                    // 0 < |self| <= ½
1204                    Self::ZERO
1205                } else {
1206                    // ½ < |self| < 1
1207                    [Self::ONE, Self::NEG_ONE][sign as usize]
1208                }
1209            },
1210            |sign, abs_bits| {
1211                let n_fract_bits =
1212                    FRACTION_BITS - (exp_bits(&abs_bits) - EXP_BIAS);
1213                let mut int_bits = abs_bits.rounding_div_pow2(n_fract_bits);
1214                int_bits <<= n_fract_bits;
1215                int_bits.hi.0 |= (sign as u128) << HI_SIGN_SHIFT;
1216                Self { bits: int_bits }
1217            },
1218        )
1219    }
1220
1221    /// Returns the additive inverse of `self`.
1222    #[inline(always)]
1223    // TODO: Inline in impl Neg when trait fns can be const.
1224    pub(crate) const fn negated(&self) -> Self {
1225        Self {
1226            bits: U256::new(self.bits.hi.0 ^ HI_SIGN_MASK, self.bits.lo.0),
1227        }
1228    }
1229
1230    /// Calculates the midpoint (average) between `self` and `rhs`.
1231    ///
1232    /// This returns NaN when *either* argument is NaN or if a combination of
1233    /// +inf and -inf is provided as arguments.
1234    #[doc(alias = "average")]
1235    #[must_use]
1236    pub fn midpoint(self, other: Self) -> Self {
1237        const LO: f256 = f256::new(
1238            0,
1239            exp(&MIN_POSITIVE.bits) + 1,
1240            signif(&MIN_POSITIVE.bits),
1241        );
1242        const HI: f256 = f256::new(0, exp(&MAX.bits) - 1, signif(&MAX.bits));
1243        let (a, b) = (self, other);
1244        let abs_a = a.abs();
1245        let abs_b = b.abs();
1246        if abs_a <= HI && abs_b <= HI {
1247            // Overflow is impossible
1248            (a + b).div2()
1249        } else if abs_a < LO {
1250            // No need to halve `a` (would underflow)
1251            b.div2()
1252        } else if abs_b < LO {
1253            // No need to halve `b` (would underflow)
1254            a.div2()
1255        } else {
1256            // Safe to halve `a` and `b`
1257            (a.div2()) + (b.div2())
1258        }
1259    }
1260
1261    /// Compute the distance between the origin and a point (`x`, `y`) on the
1262    /// Euclidean plane. Equivalently, compute the length of the hypotenuse of
1263    /// a right-angle triangle with other sides having length `x.abs()` and
1264    /// `y.abs()`.
1265    #[inline(always)]
1266    #[must_use]
1267    pub fn hypot(self, other: Self) -> Self {
1268        fused_ops::hypot::hypot(&self, &other)
1269    }
1270
1271    /// Returns the least number greater than `self`.
1272    ///
1273    /// Maps the input value as follows:
1274    ///  - Self::NAN => Self::NAN
1275    ///  - Self::NEG_INFINITY ==> Self::MIN
1276    ///  - -Self::MIN_GT_ZERO => Self::NEG_ZERO
1277    ///  - Self::ZERO or Self::NEG_ZERO => Self::MIN_GT_ZERO
1278    ///  - Self::MAX or Self::INFINITY => Self::INFINITY
1279    ///  - otherwise => unique least value greater than `self`
1280    ///
1281    /// The identity `x.next_up() == -(-x).next_down()` holds for all
1282    /// non-NaN `x`. When `x` is finite `x == x.next_down().next_up()` also
1283    /// holds.
1284    #[doc(alias = "nextUp")]
1285    #[must_use]
1286    pub fn next_up(self) -> Self {
1287        if self.is_nan() || self == Self::INFINITY {
1288            return self;
1289        }
1290        let abs_bits = abs_bits(&self);
1291        if abs_bits.is_zero() {
1292            return Self::MIN_GT_ZERO;
1293        }
1294        let mut res = self;
1295        if res.bits.hi.0 == abs_bits.hi.0 {
1296            res.bits.incr();
1297        } else {
1298            res.bits.decr();
1299        }
1300        res
1301    }
1302
1303    /// Returns the greatest number less than `self`.
1304    ///
1305    /// Maps the input value as follows:
1306    ///  - Self::NAN => Self::NAN
1307    ///  - Self::INFINITY ==> Self::MAX
1308    ///  - Self::MIN_GT_ZERO => Self::ZERO
1309    ///  - Self::ZERO or Self::NEG_ZERO => -Self::MIN_GT_ZERO
1310    ///  - Self::MIN or Self::NEG_INFINITY => Self::NEG_INFINITY
1311    ///  - otherwise => unique greatest value less than `self`
1312    ///
1313    /// The identity `x.next_down() == -(-x).next_up()` holds for all
1314    /// non-NaN `x`. When `x` is finite `x == x.next_down().next_up()` also
1315    /// holds.
1316    #[doc(alias = "nextDown")]
1317    #[must_use]
1318    pub fn next_down(self) -> Self {
1319        if self.is_nan() || self == Self::NEG_INFINITY {
1320            return self;
1321        }
1322        let abs_bits = abs_bits(&self);
1323        if abs_bits.is_zero() {
1324            return -Self::MIN_GT_ZERO;
1325        }
1326        let mut res = self;
1327        if res.bits.hi.0 == abs_bits.hi.0 {
1328            res.bits.decr();
1329        } else {
1330            res.bits.incr();
1331        }
1332        res
1333    }
1334
1335    /// Returns 2 * `self`
1336    #[inline(always)]
1337    #[must_use]
1338    pub fn mul2(&self) -> Self {
1339        self.mul_pow2(1)
1340    }
1341
1342    /// Returns `self` * 2ⁿ
1343    #[must_use]
1344    #[allow(clippy::cast_possible_wrap)]
1345    pub fn mul_pow2(&self, n: u32) -> Self {
1346        let abs_bits = abs_bits(self);
1347        if abs_bits.is_special() {
1348            // self is either NaN, infinite or equal 0
1349            return *self;
1350        }
1351        // self is finite and non-zero.
1352        let exp_bits = exp_bits(&abs_bits);
1353        if exp_bits.saturating_add(n) >= EXP_MAX {
1354            return [Self::INFINITY, Self::NEG_INFINITY][self.sign() as usize];
1355        }
1356        if exp_bits == 0 {
1357            // self is subnornal
1358            const EXP: i32 = 1 - (EXP_BIAS as i32 + FRACTION_BITS as i32);
1359            let fraction = fraction(&abs_bits);
1360            let exp = EXP + n as i32;
1361            return Self::from_sign_exp_signif(
1362                self.sign(),
1363                exp,
1364                (fraction.hi.0, fraction.lo.0),
1365            );
1366        }
1367        // self is normal.
1368        Self {
1369            bits: U256::new(
1370                self.bits.hi.0 + ((n as u128) << HI_FRACTION_BITS),
1371                self.bits.lo.0,
1372            ),
1373        }
1374    }
1375
1376    /// Fused multiply-add.
1377    ///
1378    /// Computes `(self * f) + a` with only one rounding error, yielding a
1379    /// more accurate result than a non-fused multiply-add.
1380    #[inline(always)]
1381    #[must_use]
1382    pub fn mul_add(self, f: Self, a: Self) -> Self {
1383        fused_ops::fma::fma(&self, &f, &a)
1384    }
1385
1386    /// Fused sum of squares.
1387    ///
1388    /// Computes `(self * self) + (other * other)` with only one rounding
1389    /// error, yielding a more accurate result than a non-fused sum of
1390    /// squares.
1391    #[inline(always)]
1392    #[must_use]
1393    pub fn sum_of_squares(self, other: Self) -> Self {
1394        fused_ops::sos::sos(&self, &other)
1395    }
1396
1397    /// Computes `self * self` .
1398    #[inline(always)]
1399    #[must_use]
1400    pub fn square(self) -> Self {
1401        self * self
1402    }
1403
1404    /// Fused square-add.
1405    ///
1406    /// Computes `(self * self) + a` with only one rounding error, yielding a
1407    /// more accurate result than a non-fused square-add.
1408    #[inline(always)]
1409    #[must_use]
1410    pub fn square_add(self, a: Self) -> Self {
1411        fused_ops::fma::fma(&self, &self, &a)
1412    }
1413
1414    /// Returns `self` / 2 (rounded tie to even)
1415    #[inline(always)]
1416    #[must_use]
1417    pub fn div2(&self) -> Self {
1418        self.div_pow2(1)
1419    }
1420
1421    /// Returns `self` / 2ⁿ (rounded tie to even)
1422    #[must_use]
1423    #[allow(clippy::cast_possible_wrap)]
1424    pub fn div_pow2(&self, n: u32) -> Self {
1425        let abs_bits = abs_bits(self);
1426        if abs_bits.is_special() {
1427            // self is either NaN, infinite or equal 0
1428            return *self;
1429        }
1430        // self is finite and non-zero.
1431        let exp_bits = exp_bits(&abs_bits);
1432        if exp_bits <= n {
1433            // result is subnornal or underflows to zero
1434            const EXP: i32 = 1 - (EXP_BIAS as i32 + FRACTION_BITS as i32);
1435            let shr = n - exp_bits + norm_bit(&abs_bits);
1436            if shr > self.bits.msb() {
1437                return [Self::ZERO, Self::NEG_ZERO][self.sign() as usize];
1438            }
1439            let signif = signif(&abs_bits).rounding_div_pow2(shr);
1440            if signif.is_zero() {
1441                return [Self::ZERO, Self::NEG_ZERO][self.sign() as usize];
1442            }
1443            return Self::from_sign_exp_signif(
1444                self.sign(),
1445                EXP,
1446                (signif.hi.0, signif.lo.0),
1447            );
1448        }
1449        // self is normal.
1450        Self {
1451            bits: U256::new(
1452                self.bits.hi.0 - ((n as u128) << HI_FRACTION_BITS),
1453                self.bits.lo.0,
1454            ),
1455        }
1456    }
1457
1458    /// Calculates Euclidean division, the matching method for rem_euclid.
1459    ///
1460    /// This computes the integer n such that
1461    /// self = n * rhs + self.rem_euclid(rhs).
1462    /// In other words, the result is self / rhs rounded to the integer n
1463    /// such that self >= n * rhs.
1464    #[inline]
1465    #[must_use]
1466    pub fn div_euclid(self, rhs: Self) -> Self {
1467        (self / rhs).floor()
1468    }
1469
1470    /// Calculates the least nonnegative remainder of self (mod rhs).
1471    ///
1472    /// In particular, the return value r satisfies 0.0 <= r < rhs.abs() in
1473    /// most cases. However, due to a floating point round-off error it can
1474    /// result in r == rhs.abs(), violating the mathematical definition, if
1475    /// self is much smaller than rhs.abs() in magnitude and self < 0.0.
1476    /// This result is not an element of the function's codomain, but it is
1477    /// the closest floating point number in the real numbers and thus
1478    /// fulfills the property
1479    /// self == self.div_euclid(rhs) * rhs + self.rem_euclid(rhs)
1480    /// approximately.
1481    #[inline]
1482    #[must_use]
1483    pub fn rem_euclid(self, rhs: Self) -> Self {
1484        self.div_euclid(rhs).mul_add(-rhs, self)
1485    }
1486
1487    /// Computes the floating-point sum s := a ⊕ b and the floating-point
1488    /// error t := a + b − ( a ⊕ b ) so that s + t = a + b, where ⊕ denotes
1489    /// the addition rounded to nearest.
1490    ///
1491    /// Note:
1492    /// When a ⊕ b is infinite or not a number, NaN is returned as t.
1493    #[inline(always)]
1494    #[must_use]
1495    pub fn rn2sum(self, rhs: Self) -> (Self, Self) {
1496        sum(&self, &rhs)
1497    }
1498
1499    /// Computes the floating-point sum s := a ⊕ b and the floating-point
1500    /// error t := a + b − ( a ⊕ b ) so that s + t = a + b, where ⊕ denotes
1501    /// the addition rounded to nearest.
1502    ///
1503    /// Note:
1504    /// When a ⊕ b is infinite or not a number, NaN is returned as t.
1505    ///
1506    /// Pre-condition: |a| >= |b|
1507    #[inline(always)]
1508    #[must_use]
1509    pub fn frn2sum(self, rhs: Self) -> (Self, Self) {
1510        fast_sum(&self, &rhs)
1511    }
1512
1513    /// Computes the floating-point product p := a ⨂ b and the floating-point
1514    /// error t := a × b − ( a ⊗ b ) so that p + t = a × b, where ⨂ denotes
1515    /// the multiplication rounded to nearest.
1516    ///
1517    /// Note:
1518    /// When a ⊕ b is infinite or not a number, NaN is returned as t.
1519    ///
1520    /// Pre-condition: |a ⨂ b| > 0 => exp(a) + exp(b) >= Eₘᵢₙ + 236
1521    #[inline(always)]
1522    #[must_use]
1523    pub fn rn2mul(self, rhs: Self) -> (Self, Self) {
1524        fast_mul(&self, &rhs)
1525    }
1526}
1527
1528impl Neg for f256 {
1529    type Output = Self;
1530
1531    #[inline(always)]
1532    fn neg(self) -> Self::Output {
1533        self.negated()
1534    }
1535}
1536
1537impl Neg for &f256 {
1538    type Output = <f256 as Neg>::Output;
1539
1540    #[inline(always)]
1541    fn neg(self) -> Self::Output {
1542        self.negated()
1543    }
1544}
1545
1546// Some helper functions working on the binary encoded representation of an
1547// f256 value
1548
1549/// Returns the high bits of f reduced to its sign bit.
1550#[inline(always)]
1551pub(crate) const fn sign_bits_hi(f: &f256) -> u128 {
1552    f.bits.hi.0 & HI_SIGN_MASK
1553}
1554
1555/// Returns the representation of f.abs().
1556#[inline(always)]
1557pub(crate) const fn abs_bits(f: &f256) -> U256 {
1558    U256::new(f.bits.hi.0 & HI_ABS_MASK, f.bits.lo.0)
1559}
1560
1561/// Returns the high bits of `abs_bits` or'ed with 1 if the lower bits of
1562/// `abs_bits` != 0.
1563#[inline(always)]
1564pub(crate) const fn abs_bits_sticky(abs_bits: &U256) -> u128 {
1565    abs_bits.hi.0 | (abs_bits.lo.0 != 0) as u128
1566}
1567
1568/// Returns true if `abs_bits` represent an integer
1569#[allow(clippy::cast_possible_wrap)]
1570pub(crate) fn is_int(abs_bits: &U256) -> bool {
1571    *abs_bits == U256::ZERO ||
1572        // |self| >= 2²³⁶
1573        abs_bits.hi.0 >= MIN_NO_FRACT_HI ||
1574            // all fractional bits = 0
1575            exp(abs_bits) >=
1576                FRACTION_BITS.saturating_sub(abs_bits.trailing_zeros()) as i32
1577}
1578
1579pub(crate) trait BinEncSpecial {
1580    /// Returns true if self represents |Inf|, NaN or |0|.
1581    fn is_special(&self) -> bool;
1582}
1583
1584impl BinEncSpecial for u128 {
1585    #[inline(always)]
1586    fn is_special(&self) -> bool {
1587        self.wrapping_sub(1) >= MAX_HI
1588    }
1589}
1590
1591impl BinEncSpecial for U256 {
1592    #[inline(always)]
1593    fn is_special(&self) -> bool {
1594        abs_bits_sticky(self).is_special()
1595    }
1596}
1597
1598pub(crate) trait BinEncAnySpecial {
1599    /// Returns true if any element of self represents |Inf|, NaN or |0|.
1600    fn any_special(&self) -> bool;
1601
1602    /// Returns true if any element of self represents a subnormal.
1603    fn any_subnormal(&self) -> bool;
1604
1605    /// Returns true if any element of self represents a non-normal.
1606    fn any_non_normal(&self) -> bool;
1607}
1608
1609impl BinEncAnySpecial for (u128, u128) {
1610    #[inline(always)]
1611    fn any_special(&self) -> bool {
1612        self.0.wrapping_sub(1) >= MAX_HI || self.1.wrapping_sub(1) >= MAX_HI
1613    }
1614
1615    #[inline(always)]
1616    fn any_subnormal(&self) -> bool {
1617        self.0 <= HI_FRACTION_MASK || self.1 <= HI_FRACTION_MASK
1618    }
1619
1620    #[inline(always)]
1621    fn any_non_normal(&self) -> bool {
1622        self.any_special() || self.any_subnormal()
1623    }
1624}
1625
1626impl BinEncAnySpecial for (u128, u128, u128) {
1627    #[inline(always)]
1628    fn any_special(&self) -> bool {
1629        self.0.wrapping_sub(1) >= MAX_HI
1630            || self.1.wrapping_sub(1) >= MAX_HI
1631            || self.2.wrapping_sub(1) >= MAX_HI
1632    }
1633
1634    #[inline(always)]
1635    fn any_subnormal(&self) -> bool {
1636        self.0 <= HI_FRACTION_MASK
1637            || self.1 <= HI_FRACTION_MASK
1638            || self.2 <= HI_FRACTION_MASK
1639    }
1640
1641    #[inline(always)]
1642    fn any_non_normal(&self) -> bool {
1643        self.any_special() || self.any_subnormal()
1644    }
1645}
1646
1647/// Returns 0 if `abs_bits` represents a subnormal f256 or ZERO, 1 otherwise.
1648#[inline(always)]
1649pub(crate) const fn norm_bit(abs_bits: &U256) -> u32 {
1650    (abs_bits.hi.0 >= HI_FRACTION_BIAS) as u32
1651}
1652
1653/// Returns the biased exponent from `abs_bits`.
1654#[inline(always)]
1655pub(crate) const fn exp_bits(abs_bits: &U256) -> u32 {
1656    (abs_bits.hi.0 >> HI_FRACTION_BITS) as u32
1657}
1658
1659/// Returns the unbiased exponent from `abs_bits`.
1660#[inline(always)]
1661#[allow(clippy::cast_possible_wrap)]
1662pub(crate) const fn exp(abs_bits: &U256) -> i32 {
1663    debug_assert!(!abs_bits.is_zero());
1664    let mut exp = (abs_bits.hi.0 >> HI_FRACTION_BITS) as i32;
1665    exp + (exp == 0) as i32 - EXP_BIAS as i32
1666}
1667
1668/// Returns the fraction from `abs_bits`.
1669#[inline(always)]
1670pub(crate) const fn fraction(abs_bits: &U256) -> U256 {
1671    U256::new(abs_bits.hi.0 & HI_FRACTION_MASK, abs_bits.lo.0)
1672}
1673
1674/// Returns the integral significand from `abs_bits`.
1675#[inline(always)]
1676pub(crate) const fn signif(abs_bits: &U256) -> U256 {
1677    U256::new(
1678        (((abs_bits.hi.0 >= HI_FRACTION_BIAS) as u128) << HI_FRACTION_BITS)
1679            | (abs_bits.hi.0 & HI_FRACTION_MASK),
1680        abs_bits.lo.0,
1681    )
1682}
1683
1684/// Returns the normalized integral significand and the corresponding shift
1685/// from `abs_bits`.
1686#[inline(always)]
1687pub(crate) fn norm_signif(abs_bits: &U256) -> (U256, u32) {
1688    debug_assert!(!abs_bits.is_zero());
1689    let signif = signif(abs_bits);
1690    let shift = FRACTION_BITS - signif.msb();
1691    (signif.shift_left(shift), shift)
1692}
1693
1694/// Returns the normalized integral significand and the corresponding exponent
1695/// from `abs_bits`.
1696#[inline(always)]
1697#[allow(clippy::cast_possible_wrap)]
1698pub(crate) fn norm_signif_exp(abs_bits: &U256) -> (U256, i32) {
1699    let (signif, shift) = norm_signif(abs_bits);
1700    let exp = exp(abs_bits) - shift as i32;
1701    (signif, exp)
1702}
1703
1704/// Returns the left adjusted integral significand and the corresponding
1705/// shift from `abs_bits`.
1706#[inline(always)]
1707pub(crate) fn left_adj_signif(abs_bits: &U256) -> (U256, u32) {
1708    debug_assert!(!abs_bits.is_zero());
1709    let signif = signif(abs_bits);
1710    let shift = signif.leading_zeros();
1711    (signif.shift_left(shift), shift)
1712}
1713
1714/// Extract sign, quantum exponent and integral significand from f
1715#[allow(clippy::cast_possible_wrap)]
1716pub(crate) const fn split_f256_enc(f: &f256) -> (u32, i32, U256) {
1717    const TOTAL_BIAS: i32 = EXP_BIAS as i32 + FRACTION_BITS as i32;
1718    let sign = f.sign();
1719    let abs_bits = abs_bits(f);
1720    let exp_bits = exp_bits(&abs_bits);
1721    let fraction = fraction(&abs_bits);
1722    match (exp_bits, fraction) {
1723        (0, U256::ZERO) => (sign, 0, U256::ZERO),
1724        (0, _) => (sign, 1 - TOTAL_BIAS, fraction),
1725        (EXP_MAX, _) => (sign, exp_bits as i32, fraction),
1726        _ => (
1727            sign,
1728            exp_bits as i32 - TOTAL_BIAS,
1729            U256::new(fraction.hi.0 | HI_FRACTION_BIAS, fraction.lo.0),
1730        ),
1731    }
1732}
1733
1734/// Computes the rounded sum of two f256 values and the remainder.
1735///
1736/// Pre-condition: a >= b
1737#[inline]
1738pub(crate) fn fast_sum(a: &f256, b: &f256) -> (f256, f256) {
1739    debug_assert!(a.abs() >= b.abs());
1740    let s = a + b;
1741    if s.is_finite() {
1742        (s, b - (s - a))
1743    } else {
1744        (s, f256::NAN)
1745    }
1746}
1747
1748/// Computes the rounded sum of two f256 values and the remainder.
1749pub(crate) fn sum(a: &f256, b: &f256) -> (f256, f256) {
1750    let s = a + b;
1751    if s.is_finite() {
1752        let ta = a - (s - b);
1753        let tb = b - (s - a);
1754        let r = ta + tb;
1755        (s, r)
1756    } else {
1757        (s, f256::NAN)
1758    }
1759}
1760
1761/// Computes the rounded product of two f256 values and the remainder.
1762#[inline]
1763pub(crate) fn fast_mul(a: &f256, b: &f256) -> (f256, f256) {
1764    // debug_assert!(a.biased_exponent() + b.biased_exponent() >= EXP_BIAS);
1765    let p = a * b;
1766    if p.is_finite() {
1767        let r = a.mul_add(*b, -p);
1768        (p, r)
1769    } else {
1770        (p, f256::NAN)
1771    }
1772}
1773
1774#[cfg(test)]
1775mod repr_tests {
1776    use super::*;
1777
1778    #[test]
1779    fn test_zero() {
1780        let z = f256::ZERO;
1781        assert_eq!(z.sign(), 0);
1782        assert_eq!(z.quantum_exponent(), 0);
1783        assert_eq!(z.integral_significand(), U256::default());
1784        assert_eq!(z.decode(), (0, 0, U256::default()));
1785        assert_eq!(z.exponent(), 0);
1786        assert_eq!(z.significand(), f256::ZERO);
1787        assert!(z.is_integer());
1788        assert_eq!(z.signum(), f256::ONE);
1789    }
1790
1791    #[test]
1792    fn test_neg_zero() {
1793        let z = f256::NEG_ZERO;
1794        assert_eq!(z.sign(), 1);
1795        assert_eq!(z.quantum_exponent(), 0);
1796        assert_eq!(z.integral_significand(), U256::default());
1797        assert_eq!(z.decode(), (1, 0, U256::default()));
1798        assert_eq!(z.exponent(), 0);
1799        assert_eq!(z.significand(), f256::ZERO);
1800        assert!(z.is_integer());
1801        assert_eq!(z.signum(), f256::NEG_ONE);
1802    }
1803
1804    #[test]
1805    fn test_one() {
1806        let i = f256::ONE;
1807        assert_eq!(i.sign(), 0);
1808        assert_eq!(i.signum(), f256::ONE);
1809        assert_eq!(i.biased_exponent(), EXP_BIAS);
1810        assert_eq!(i.quantum_exponent(), INT_EXP);
1811        assert_eq!(
1812            i.integral_significand(),
1813            U256::new(1_u128 << HI_FRACTION_BITS, 0)
1814        );
1815        assert_eq!(i.decode(), (0, 0, U256::ONE));
1816        assert_eq!(i.exponent(), 0);
1817        assert_eq!(i.significand(), f256::ONE);
1818        assert!(i.is_integer());
1819    }
1820
1821    #[test]
1822    fn test_neg_one() {
1823        let i = f256::NEG_ONE;
1824        assert_eq!(i.sign(), 1);
1825        assert_eq!(i.signum(), f256::NEG_ONE);
1826        assert_eq!(i.biased_exponent(), EXP_BIAS);
1827        assert_eq!(i.quantum_exponent(), INT_EXP);
1828        assert_eq!(
1829            i.integral_significand(),
1830            U256::new(1_u128 << HI_FRACTION_BITS, 0)
1831        );
1832        assert_eq!(i.decode(), (1, 0, U256::ONE));
1833        assert_eq!(i.exponent(), 0);
1834        assert_eq!(i.significand(), f256::ONE);
1835        assert!(i.is_integer());
1836    }
1837
1838    #[test]
1839    fn test_normal_integral() {
1840        let i = f256::TWO;
1841        assert_eq!(i.sign(), 0);
1842        assert_eq!(i.signum(), f256::ONE);
1843        assert_eq!(i.biased_exponent(), EXP_BIAS + 1);
1844        assert_eq!(i.quantum_exponent(), INT_EXP + 1);
1845        assert_eq!(
1846            i.integral_significand(),
1847            U256::new(1_u128 << HI_FRACTION_BITS, 0)
1848        );
1849        assert_eq!(i.decode(), (0, 1, U256::ONE));
1850        assert_eq!(i.exponent(), 1);
1851        assert_eq!(i.significand(), f256::ONE);
1852        assert!(i.is_integer());
1853    }
1854
1855    #[test]
1856    fn test_normal_float() {
1857        let f = f256::from(-3.5_f64);
1858        assert_eq!(f.sign(), 1);
1859        assert_eq!(f.signum(), f256::NEG_ONE);
1860        assert_eq!(f.quantum_exponent(), -235);
1861        assert_eq!(
1862            f.integral_significand(),
1863            U256::new(567907468902246771870523036008448, 0)
1864        );
1865        assert_eq!(f.decode(), (1, -1, U256::new(0_u128, 7_u128)));
1866        assert_eq!(f.exponent(), 1);
1867        assert_eq!(f.significand(), f.abs() / f256::TWO);
1868        assert!(!f.is_integer());
1869    }
1870
1871    #[test]
1872    #[allow(clippy::cast_possible_wrap)]
1873    fn test_subnormal() {
1874        let f = f256::MIN_GT_ZERO;
1875        assert_eq!(f.sign(), 0);
1876        assert_eq!(f.signum(), f256::ONE);
1877        assert_eq!(f.quantum_exponent(), EMIN - FRACTION_BITS as i32);
1878        assert_eq!(f.integral_significand(), U256::ONE);
1879        assert_eq!(f.decode(), (0, EMIN - FRACTION_BITS as i32, U256::ONE));
1880        assert_eq!(f.exponent(), EMIN);
1881        assert_eq!(
1882            f.significand(),
1883            f256::from_sign_exp_signif(0, -(FRACTION_BITS as i32), (0, 1))
1884        );
1885        let f = f256::from_sign_exp_signif(
1886            1,
1887            EMIN - FRACTION_BITS as i32 + 17,
1888            (7, 29),
1889        );
1890        assert!(f.is_subnormal());
1891        assert_eq!(f.exponent(), EMIN);
1892        assert_eq!(
1893            f.significand(),
1894            f256::from_sign_exp_signif(0, 17 - (FRACTION_BITS as i32), (7, 29))
1895        );
1896    }
1897}
1898
1899#[cfg(test)]
1900mod encode_decode_tests {
1901    use super::*;
1902
1903    #[test]
1904    fn test_normal() {
1905        let sign = 1_u32;
1906        let exponent = -23_i32;
1907        let significand = U256::new(39, 10000730744);
1908        let f = f256::encode(sign, exponent, significand);
1909        let (s, t, c) = f.decode();
1910        let g = f256::encode(s, t, c);
1911        assert_eq!(f, g);
1912    }
1913
1914    #[test]
1915    fn test_subnormal() {
1916        let sign = 0_u32;
1917        let exponent = EMIN - 235_i32;
1918        let significand = U256::new(u128::MAX >> (EXP_BITS + 2), 0);
1919        let f = f256::encode(sign, exponent, significand);
1920        assert!(f.is_subnormal());
1921        let (s, t, c) = f.decode();
1922        let g = f256::encode(s, t, c);
1923        assert_eq!(f, g);
1924        let f = f256::MIN_GT_ZERO;
1925        let (s, t, c) = f.decode();
1926        let g = f256::encode(s, t, c);
1927        assert_eq!(f, g);
1928    }
1929}
1930
1931#[cfg(test)]
1932mod raw_bits_tests {
1933    use super::*;
1934
1935    #[test]
1936    fn test_to_from_bits() {
1937        let f = f256::TEN;
1938        let bits = f.to_bits();
1939        assert_eq!(bits.0, f.bits.hi.0);
1940        assert_eq!(bits.1, f.bits.lo.0);
1941        let g = f256::from_bits(bits);
1942        assert_eq!(f, g);
1943    }
1944
1945    #[test]
1946    fn test_to_from_ne_bytes() {
1947        let f = f256::TEN;
1948        let bytes = f.to_ne_bytes();
1949        let g = f256::from_ne_bytes(bytes);
1950        assert_eq!(f, g);
1951    }
1952
1953    #[test]
1954    fn test_to_from_be_bytes() {
1955        let f = f256::TEN;
1956        let bytes = f.to_be_bytes();
1957        let g = f256::from_be_bytes(bytes);
1958        assert_eq!(f, g);
1959    }
1960
1961    #[test]
1962    fn test_to_from_le_bytes() {
1963        let f = f256::TEN;
1964        let bytes = f.to_le_bytes();
1965        let g = f256::from_le_bytes(bytes);
1966        assert_eq!(f, g);
1967    }
1968}
1969
1970#[cfg(test)]
1971mod total_ordering_tests {
1972    use super::*;
1973
1974    #[test]
1975    fn test_total_ordering() {
1976        let values = [
1977            -f256::NAN,
1978            f256::NEG_INFINITY,
1979            f256::MIN,
1980            f256::NEG_ONE,
1981            -f256::MIN_POSITIVE,
1982            -f256::MIN_GT_ZERO,
1983            f256::NEG_ZERO,
1984            f256::ZERO,
1985            f256::MIN_GT_ZERO,
1986            f256::MIN_POSITIVE,
1987            f256::ONE,
1988            f256::MAX,
1989            f256::INFINITY,
1990            f256::NAN,
1991        ];
1992        for idx in 0..values.len() - 1 {
1993            let (a, b) = (values[idx], values[idx + 1]);
1994            assert!(a.total_cmp(&b).is_lt(), "{:?} !< {:?}", a.bits, b.bits);
1995        }
1996    }
1997}
1998
1999#[cfg(test)]
2000mod copysign_tests {
2001    use super::*;
2002
2003    #[test]
2004    fn test_non_nan() {
2005        assert_eq!(f256::TWO.copysign(f256::NEG_INFINITY), -f256::TWO);
2006        assert_eq!(f256::NEG_ONE.copysign(f256::TEN), f256::ONE);
2007        assert_eq!(f256::TEN.copysign(f256::TWO), f256::TEN);
2008        assert_eq!(f256::TEN.negated().copysign(-f256::TWO), -f256::TEN);
2009    }
2010
2011    #[test]
2012    fn test_nan() {
2013        let neg_nan = f256::NAN.copysign(f256::NEG_ONE);
2014        assert!(neg_nan.is_nan());
2015        assert!(neg_nan.is_sign_negative());
2016        assert!(f256::NAN.copysign(f256::NAN).is_nan());
2017        assert_eq!(f256::NEG_ONE.copysign(f256::NAN), f256::ONE);
2018        assert_eq!(f256::ONE.copysign(neg_nan), f256::NEG_ONE);
2019    }
2020}
2021
2022#[cfg(test)]
2023mod split_tests {
2024    use super::*;
2025
2026    #[test]
2027    fn test_normal() {
2028        let f = f256::from(17);
2029        let g = f256::from(17.625_f64);
2030        let h = -g;
2031        let (fi, ff) = f.split();
2032        assert_eq!(fi, f);
2033        assert_eq!(fi.quantum_exponent(), f.quantum_exponent());
2034        assert_eq!(ff, f256::ZERO);
2035        let (gi, gf) = g.split();
2036        assert_eq!(gi, f);
2037        assert_eq!(gi.quantum_exponent(), g.quantum_exponent());
2038        assert_eq!(gf, g - f);
2039        assert_eq!(h.split(), (-f, (f - g)));
2040    }
2041
2042    #[test]
2043    fn test_lt_1() {
2044        let f = f256::from(0.99999_f64);
2045        let (fi, ff) = f.split();
2046        assert_eq!(fi, f256::ZERO);
2047        assert_eq!(fi.quantum_exponent(), 0);
2048        assert_eq!(ff, f);
2049        assert_eq!(ff.quantum_exponent(), f.quantum_exponent());
2050    }
2051
2052    #[test]
2053    fn test_subnormal() {
2054        let f = f256::MIN_GT_ZERO;
2055        assert_eq!(f.split(), (f256::ZERO, f256::MIN_GT_ZERO));
2056    }
2057}
2058
2059#[cfg(test)]
2060mod ulp_tests {
2061    use super::*;
2062
2063    #[test]
2064    fn test_special() {
2065        assert_eq!(f256::ZERO.ulp(), MIN_GT_ZERO);
2066        assert!(f256::INFINITY.ulp().is_nan());
2067        assert!(f256::NEG_INFINITY.ulp().is_nan());
2068        assert!(f256::NAN.ulp().is_nan());
2069    }
2070
2071    #[test]
2072    #[allow(clippy::cast_possible_wrap)]
2073    fn test_normal() {
2074        assert_eq!(f256::ONE.ulp(), f256::EPSILON);
2075        assert_eq!(f256::TWO.ulp(), f256::TWO * f256::EPSILON);
2076        assert_eq!(f256::from(3).ulp(), f256::TWO * f256::EPSILON);
2077        assert_eq!(f256::TEN.ulp(), f256::from(8) * f256::EPSILON);
2078        assert_eq!(f256::MIN_POSITIVE.ulp(), f256::MIN_GT_ZERO);
2079        assert_eq!(
2080            f256::MIN.ulp(),
2081            f256::from_sign_exp_signif(0, EMAX - FRACTION_BITS as i32, (0, 1),)
2082        );
2083    }
2084
2085    #[test]
2086    fn test_subnormal() {
2087        let f = f256::MIN_POSITIVE - f256::MIN_GT_ZERO;
2088        assert_eq!(f.ulp(), f256::MIN_GT_ZERO);
2089        assert_eq!(f256::MIN_GT_ZERO.ulp(), f256::MIN_GT_ZERO);
2090    }
2091}
2092
2093#[cfg(test)]
2094mod next_up_down_tests {
2095    use super::*;
2096
2097    #[test]
2098    fn test_special() {
2099        assert!(f256::NAN.next_up().is_nan());
2100        assert!(f256::NAN.next_down().is_nan());
2101        assert_eq!(f256::NEG_ZERO.next_up(), MIN_GT_ZERO);
2102        assert_eq!(f256::NEG_ZERO.next_down(), -MIN_GT_ZERO);
2103        assert_eq!(f256::ZERO.next_up(), MIN_GT_ZERO);
2104        assert_eq!(f256::ZERO.next_down(), -MIN_GT_ZERO);
2105        assert_eq!(f256::INFINITY.next_up(), f256::INFINITY);
2106        assert_eq!(f256::INFINITY.next_down(), f256::MAX);
2107        assert_eq!(f256::NEG_INFINITY.next_up(), f256::MIN);
2108        assert_eq!(f256::NEG_INFINITY.next_down(), f256::NEG_INFINITY);
2109    }
2110
2111    #[test]
2112    fn test_overflow() {
2113        assert_eq!(f256::MAX.next_up(), f256::INFINITY);
2114    }
2115
2116    #[test]
2117    fn test_underflow() {
2118        assert_eq!(f256::MIN.next_down(), f256::NEG_INFINITY);
2119    }
2120
2121    #[test]
2122    fn test_normal() {
2123        assert_eq!(f256::ONE.next_up(), f256::ONE + f256::ONE.ulp());
2124        assert_eq!(f256::ONE.next_down(), f256::ONE - f256::EPSILON.div2());
2125        let f = f256::MAX.div_pow2(3);
2126        assert_eq!(f.next_up(), f + f.ulp());
2127        assert_eq!(f.next_down(), f - f.ulp());
2128        let f = f256::MIN / f256::TEN;
2129        assert_eq!(f.next_up().next_down(), f);
2130        assert_eq!(f.next_down().next_up(), f);
2131    }
2132
2133    #[test]
2134    fn test_subnormal() {
2135        assert_eq!((-f256::MIN_GT_ZERO).next_up(), f256::NEG_ZERO);
2136        assert_eq!(f256::MIN_GT_ZERO.next_down(), f256::ZERO);
2137        let f = f256::EPSILON.div_pow2(3);
2138        assert_eq!(f.next_up(), f + f.ulp());
2139        assert_eq!(f.next_down(), f - f.ulp().div2());
2140        let f = f256::MIN_GT_ZERO * f256::TEN;
2141        assert_eq!(f.next_up().next_down(), f);
2142        assert_eq!(f.next_down().next_up(), f);
2143    }
2144}
2145
2146#[cfg(test)]
2147mod parity_tests {
2148    use super::*;
2149
2150    #[test]
2151    fn test_parity() {
2152        assert_eq!(f256::ZERO.parity(), Some(Parity::Even));
2153        assert_eq!(f256::NEG_ZERO.parity(), Some(Parity::Even));
2154        assert_eq!(f256::TEN.parity(), Some(Parity::Even));
2155        assert_eq!(f256::NEG_ONE.parity(), Some(Parity::Odd));
2156        assert_eq!(f256::MAX.parity(), Some(Parity::Even));
2157        assert_eq!(f256::MIN.parity(), Some(Parity::Even));
2158        let f = f256::TWO.mul_pow2(FRACTION_BITS) - f256::ONE;
2159        assert_eq!(f.parity(), Some(Parity::Odd));
2160        assert!(f256::MIN_GT_ZERO.parity().is_none());
2161        assert!(f256::MIN_POSITIVE.parity().is_none());
2162        assert!(f256::NAN.parity().is_none());
2163        assert!(f256::NEG_INFINITY.parity().is_none());
2164        assert!(f256::INFINITY.parity().is_none());
2165        assert!(ONE_HALF.parity().is_none());
2166        assert!(EPSILON.parity().is_none());
2167    }
2168}
2169
2170#[cfg(test)]
2171mod midpoint_tests {
2172    use super::*;
2173
2174    #[test]
2175    #[allow(clippy::cognitive_complexity)]
2176    fn test_midpoint() {
2177        assert!(f256::NAN.midpoint(f256::ONE).is_nan());
2178        assert!(f256::ONE.midpoint(f256::NAN).is_nan());
2179        assert!(f256::INFINITY.midpoint(f256::NEG_INFINITY).is_nan());
2180        assert!(f256::NEG_INFINITY.midpoint(f256::INFINITY).is_nan());
2181        assert_eq!(f256::MAX.midpoint(f256::MAX), f256::MAX);
2182        assert_eq!(f256::MIN.midpoint(f256::MIN), f256::MIN);
2183        assert_eq!(
2184            f256::MIN_POSITIVE.midpoint(f256::MIN_POSITIVE),
2185            f256::MIN_POSITIVE
2186        );
2187        let max_half = f256::MAX.div2();
2188        assert_eq!(max_half.midpoint(max_half), max_half);
2189        assert_eq!(f256::MAX.midpoint(f256::NEG_ZERO), max_half);
2190        assert_eq!(f256::MAX.midpoint(f256::NEG_ONE), max_half);
2191        assert_eq!(f256::ONE.midpoint(f256::MIN_POSITIVE), ONE_HALF);
2192        assert_eq!(f256::MIN_POSITIVE.midpoint(f256::ONE), ONE_HALF);
2193        assert_eq!(
2194            f256::MIN_POSITIVE.midpoint(-f256::MIN_POSITIVE),
2195            f256::ZERO
2196        );
2197        let f = max_half.next_up();
2198        assert_eq!(f.midpoint(f256::MIN_POSITIVE), f.div2());
2199        assert_eq!(f256::MIN_POSITIVE.midpoint(f), f.div2());
2200    }
2201}
2202
2203#[cfg(test)]
2204mod mul_pow2_tests {
2205    use super::*;
2206
2207    #[test]
2208    fn test_special() {
2209        assert_eq!(f256::ZERO.mul_pow2(4), f256::ZERO);
2210        assert_eq!(f256::INFINITY.mul_pow2(1), f256::INFINITY);
2211        assert_eq!(f256::NEG_INFINITY.mul_pow2(1), f256::NEG_INFINITY);
2212        assert!(f256::NAN.mul_pow2(38).is_nan());
2213    }
2214
2215    #[test]
2216    fn test_normal() {
2217        let f = f256::TEN.mul_pow2(4);
2218        assert_eq!(f, f256::from(160));
2219        let g = f256::from(0.0793).mul_pow2(3);
2220        assert_eq!(g, f256::from(0.6344));
2221    }
2222
2223    #[test]
2224    fn test_overflow() {
2225        let f = f256::MAX.mul_pow2(1);
2226        assert_eq!(f, f256::INFINITY);
2227        let f = f256::MIN.mul_pow2(5);
2228        assert_eq!(f, f256::NEG_INFINITY);
2229    }
2230
2231    #[test]
2232    fn test_subnormal() {
2233        let f = f256::MIN_POSITIVE - f256::MIN_GT_ZERO;
2234        assert!(f.is_subnormal());
2235        let g = f.mul_pow2(1);
2236        assert!(g.is_normal());
2237        assert_eq!(g, f * f256::TWO);
2238        let f = f256::MIN_GT_ZERO;
2239        let g = f.mul_pow2(5);
2240        assert!(g.is_subnormal());
2241        assert_eq!(g, f * f256::from(32));
2242    }
2243
2244    #[test]
2245    fn test_subnormal_overflow() {
2246        let f = f256::MIN_GT_ZERO;
2247        let g = f.mul_pow2(u32::MAX);
2248        assert_eq!(g, f256::INFINITY);
2249    }
2250}
2251
2252#[cfg(test)]
2253mod div_pow2_tests {
2254    use super::*;
2255
2256    #[test]
2257    fn test_special() {
2258        assert_eq!(f256::ZERO.div_pow2(4), f256::ZERO);
2259        assert_eq!(f256::INFINITY.div_pow2(1), f256::INFINITY);
2260        assert_eq!(f256::NEG_INFINITY.div_pow2(1), f256::NEG_INFINITY);
2261        assert!(f256::NAN.div_pow2(38).is_nan());
2262    }
2263
2264    #[test]
2265    fn test_normal() {
2266        let f = f256::TEN;
2267        assert_eq!(f.div_pow2(3), f256::from(1.25));
2268        let g = f256::from(0.0793);
2269        assert_eq!(g.div_pow2(5), g / f256::from(32));
2270        let h = f256::MIN_POSITIVE.negated();
2271        assert_eq!(h.div_pow2(65), h / f256::from(36893488147419103232_u128));
2272        let f = f256::MIN_POSITIVE.div2();
2273        assert_eq!(f, f256::MIN_POSITIVE / f256::TWO);
2274        let f = f256::MIN_POSITIVE.div_pow2(236);
2275        assert_eq!(f, f256::MIN_GT_ZERO);
2276    }
2277
2278    #[test]
2279    fn test_underflow() {
2280        let f = f256::MIN_GT_ZERO.div_pow2(1);
2281        assert_eq!(f, f256::ZERO);
2282        let f = f256::MIN_POSITIVE.negated().div_pow2(SIGNIFICAND_BITS);
2283        assert_eq!(f, f256::NEG_ZERO);
2284    }
2285
2286    #[test]
2287    fn test_subnormal() {
2288        let f = f256::MIN_POSITIVE - f256::MIN_GT_ZERO;
2289        assert!(f.is_subnormal());
2290        let g = f.div_pow2(7);
2291        assert!(g.is_subnormal());
2292        assert_eq!(g, f / f256::from(128));
2293    }
2294}
2295
2296#[cfg(test)]
2297mod rn2sum_tests {
2298    use super::*;
2299    use crate::consts::{FRAC_PI_4, PI};
2300
2301    #[test]
2302    #[allow(clippy::cognitive_complexity)]
2303    fn test_special() {
2304        let tests = [
2305            (f256::ZERO, f256::ZERO, f256::ZERO, f256::ZERO),
2306            (f256::TEN, f256::ZERO, f256::TEN, f256::ZERO),
2307            (f256::INFINITY, f256::ZERO, f256::INFINITY, f256::NAN),
2308            (f256::INFINITY, f256::TWO, f256::INFINITY, f256::NAN),
2309            (f256::INFINITY, f256::INFINITY, f256::INFINITY, f256::NAN),
2310            (
2311                f256::NEG_INFINITY,
2312                f256::ZERO,
2313                f256::NEG_INFINITY,
2314                f256::NAN,
2315            ),
2316            (f256::NEG_INFINITY, f256::TEN, f256::NEG_INFINITY, f256::NAN),
2317            (
2318                f256::NEG_INFINITY,
2319                f256::NEG_INFINITY,
2320                f256::NEG_INFINITY,
2321                f256::NAN,
2322            ),
2323            (f256::INFINITY, f256::NEG_INFINITY, f256::NAN, f256::NAN),
2324        ];
2325        for (a, b, s, t) in tests {
2326            if s.is_finite() {
2327                assert_eq!(a.rn2sum(b), (s, t));
2328                assert_eq!(b.rn2sum(a), (s, t));
2329                if a.abs() >= b.abs() {
2330                    assert_eq!(a.frn2sum(b), (s, t));
2331                }
2332            } else if s.is_nan() {
2333                let (s, t) = a.rn2sum(b);
2334                assert!(s.is_nan());
2335                assert!(t.is_nan());
2336                let (s, t) = b.rn2sum(a);
2337                assert!(s.is_nan());
2338                assert!(t.is_nan());
2339                if a.abs() >= b.abs() {
2340                    let (s, t) = a.frn2sum(b);
2341                    assert!(s.is_nan());
2342                    assert!(t.is_nan());
2343                }
2344            } else {
2345                let (u, t) = a.rn2sum(b);
2346                assert_eq! {u, s};
2347                assert!(t.is_nan());
2348                let (s, t) = b.rn2sum(a);
2349                assert_eq! {u, s};
2350                assert!(t.is_nan());
2351                if a.abs() >= b.abs() {
2352                    let (u, t) = a.frn2sum(b);
2353                    assert_eq! {u, s};
2354                    assert!(t.is_nan());
2355                }
2356            }
2357        }
2358    }
2359
2360    #[test]
2361    fn test_overflow() {
2362        let f = f256::MAX;
2363        let (s, t) = f.rn2sum(f.ulp());
2364        assert_eq!(s, f256::INFINITY);
2365        assert!(t.is_nan());
2366        let (s, t) = f.frn2sum(f.ulp());
2367        assert_eq!(s, f256::INFINITY);
2368        assert!(t.is_nan());
2369    }
2370
2371    #[test]
2372    fn test_normal_with_error() {
2373        let x = PI;
2374        let y = FRAC_PI_4;
2375        let (s, t) = x.rn2sum(y);
2376        assert_eq!(s, x + y);
2377        assert_eq!(t, x.ulp().div2());
2378        let (s, t) = x.frn2sum(y);
2379        assert_eq!(s, x + y);
2380        assert_eq!(t, x.ulp().div2());
2381    }
2382}
2383
2384#[cfg(test)]
2385mod rn2mul_tests {
2386    use super::*;
2387    use crate::consts::PI;
2388
2389    #[test]
2390    fn test_special() {
2391        let tests = [
2392            (f256::ZERO, f256::ZERO, f256::ZERO, f256::ZERO),
2393            (f256::TEN, f256::ZERO, f256::ZERO, f256::ZERO),
2394            (f256::INFINITY, f256::ZERO, f256::NAN, f256::NAN),
2395            (f256::INFINITY, f256::TWO, f256::INFINITY, f256::NAN),
2396            (f256::INFINITY, f256::INFINITY, f256::INFINITY, f256::NAN),
2397            (f256::NEG_INFINITY, f256::ZERO, f256::NAN, f256::NAN),
2398            (f256::NEG_INFINITY, f256::TEN, f256::NEG_INFINITY, f256::NAN),
2399            (
2400                f256::NEG_INFINITY,
2401                f256::NEG_INFINITY,
2402                f256::INFINITY,
2403                f256::NAN,
2404            ),
2405            (
2406                f256::INFINITY,
2407                f256::NEG_INFINITY,
2408                f256::NEG_INFINITY,
2409                f256::NAN,
2410            ),
2411        ];
2412        for (a, b, s, t) in tests {
2413            if s.is_finite() {
2414                assert_eq!(a.rn2mul(b), (s, t));
2415                assert_eq!(b.rn2mul(a), (s, t));
2416            } else if s.is_nan() {
2417                let (s, t) = a.rn2mul(b);
2418                assert!(s.is_nan());
2419                assert!(t.is_nan());
2420                let (s, t) = b.rn2mul(a);
2421                assert!(s.is_nan());
2422                assert!(t.is_nan());
2423            } else {
2424                let (u, t) = a.rn2mul(b);
2425                assert_eq! {u, s};
2426                assert!(t.is_nan());
2427                let (s, t) = b.rn2mul(a);
2428                assert_eq! {u, s};
2429                assert!(t.is_nan());
2430            }
2431        }
2432    }
2433
2434    #[test]
2435    fn test_overflow() {
2436        let a = f256::MAX;
2437        let b = f256::ONE.next_up();
2438        let (s, t) = a.rn2mul(b);
2439        assert_eq!(s, f256::INFINITY);
2440        assert!(t.is_nan());
2441        let f = f256::MAX.sqrt().next_up();
2442        let (s, t) = f.rn2mul(f);
2443        assert_eq!(s, f256::INFINITY);
2444        assert!(t.is_nan());
2445    }
2446
2447    #[test]
2448    fn test_normal_with_error() {
2449        let x = PI;
2450        let y = f256::ONE - f256::ONE / f256::TWO.square();
2451        let (s, t) = x.rn2mul(y);
2452        assert_eq!(s, x * y);
2453        assert_eq!(t, -x.ulp().div2());
2454    }
2455}