Skip to main content

cbor_core/
float.rs

1//! Floating-point handling for CBOR::Core.
2//!
3//! CBOR distinguishes three floating-point widths (f16/f32/f64) and CBOR::Core
4//! requires each value to be encoded in its _shortest_ exact form. This module
5//! provides [`Float`], a value type that stores the raw bits at the chosen
6//! width, along with the IEEE 754 conversion helpers needed to pick that
7//! shortest form while preserving NaN payloads and the sign of zero.
8
9use crate::{
10    DataType, Error, Result,
11    codec::{Argument, Head, Major},
12    view::ValueView,
13};
14
15// IEEE 754 half-precision conversion functions.
16//
17// These are implemented by direct bit manipulation rather than the `as`
18// operator so that NaN payloads survive intact and the functions remain
19// usable in `const` contexts.
20
21// Widen f16 bits to an f64 value with identical NaN payload and sign of zero.
22const fn f16_to_f64(bits: u16) -> f64 {
23    let bits = bits as u64;
24    let sign = (bits >> 15) & 1;
25    let exp = (bits >> 10) & 0x1f;
26    let sig = bits & 0x03ff;
27
28    let bits64 = if exp == 0 {
29        if sig == 0 {
30            sign << 63
31        } else {
32            let shift = sig.leading_zeros() - (64 - 10);
33            let sig = (sig << (shift + 1)) & 0x03ff;
34            let exp64 = 1023 - 15 - shift as u64;
35            sign << 63 | exp64 << 52 | sig << 42
36        }
37    } else if exp == 0x1f {
38        sign << 63 | 0x7ff0_0000_0000_0000 | sig << 42
39    } else {
40        let exp64 = exp + (1023 - 15);
41        sign << 63 | exp64 << 52 | sig << 42
42    };
43
44    f64::from_bits(bits64)
45}
46
47// Widen f16 bits to an f32 value with identical NaN payload and sign of zero.
48const fn f16_to_f32(bits: u16) -> f32 {
49    let bits = bits as u32;
50    let sign = (bits >> 15) & 1;
51    let exp = (bits >> 10) & 0x1f;
52    let sig = bits & 0x03ff;
53
54    let bits32 = if exp == 0 {
55        if sig == 0 {
56            sign << 31
57        } else {
58            let shift = sig.leading_zeros() - (32 - 10);
59            let sig = (sig << (shift + 1)) & 0x03ff;
60            let exp32 = 127 - 15 - shift;
61            (sign << 31) | (exp32 << 23) | (sig << 13)
62        }
63    } else if exp == 0x1f {
64        (sign << 31) | 0x7f80_0000 | (sig << 13)
65    } else {
66        let exp32 = exp + (127 - 15);
67        (sign << 31) | (exp32 << 23) | (sig << 13)
68    };
69
70    f32::from_bits(bits32)
71}
72
73// Narrow an f64 value to f16 bits using round-to-nearest-even.
74//
75// Handles subnormals, overflow to infinity, and the normal-to-subnormal
76// boundary explicitly. NaN payloads are truncated to the top 10 significand
77// bits (and forced non-zero) so the result remains a NaN.
78const fn f64_to_f16(value: f64) -> u16 {
79    let bits = value.to_bits();
80    let sign_bit = ((bits >> 48) & 0x8000) as u16; // 1 Bit
81    let exp = ((bits >> 52) & 0x7ff) as i32; // 11 Bits
82    let sig = bits & 0x000f_ffff_ffff_ffff; // 52 Bits
83
84    match exp {
85        0 => return sign_bit,
86
87        0x7ff => {
88            if sig == 0 {
89                return sign_bit | 0x7c00;
90            } else {
91                let sig16 = (sig >> 42) as u16;
92                return sign_bit | 0x7c00 | if sig16 == 0 { 1 } else { sig16 }; // sig16.max(1);
93            }
94        }
95
96        _ => (),
97    }
98
99    let exp16 = exp - 1008;
100
101    if exp16 >= 0x1f {
102        return sign_bit | 0x7c00;
103    }
104
105    if exp16 <= 0 {
106        let full_sig = sig | 0x0010_0000_0000_0000;
107        let shift = (1 - exp16) as u64 + 42;
108
109        if shift >= 64 {
110            if shift == 64 && full_sig > (1_u64 << 52) {
111                return sign_bit | 1;
112            } else {
113                return sign_bit;
114            }
115        } else {
116            let shifted = full_sig >> shift;
117            let remainder = full_sig & ((1_u64 << shift) - 1);
118            let halfway = 1_u64 << (shift - 1);
119            let round_up = remainder > halfway || (remainder == halfway && (shifted & 1) != 0);
120            let sig16 = (shifted as u16) + round_up as u16;
121            return sign_bit | sig16;
122        }
123    }
124
125    let sig10 = (sig >> 42) as u16;
126    let remainder = sig & 0x3ff_ffff_ffff;
127    let halfway = 0x200_0000_0000_u64;
128    let round_up = remainder > halfway || (remainder == halfway && (sig10 & 1) != 0);
129    let sig16 = sig10 + round_up as u16;
130
131    if sig16 >= 0x0400 {
132        sign_bit | (((exp16 as u16) + 1) << 10)
133    } else {
134        sign_bit | ((exp16 as u16) << 10) | sig16
135    }
136}
137
138// Reinterpret f32 NaN bits as f64 NaN bits without hardware conversion.
139//
140// Hardware `f32 as f64` casts are allowed to canonicalize NaN payloads on
141// some platforms. This helper side-steps that by assembling the f64 bit
142// pattern directly: the sign moves to the top and the 23-bit f32 significand
143// is placed in the top 23 bits of the f64 significand.
144const fn f32_nan_to_f64(bits: u32) -> f64 {
145    let sign_bit = ((bits & 0x8000_0000) as u64) << 32;
146    let payload = ((bits & 0x007f_ffff) as u64) << 29;
147    f64::from_bits(sign_bit | 0x7ff0_0000_0000_0000 | payload)
148}
149
150/// Raw bits of a float at its chosen storage width (f16, f32, or f64).
151///
152/// `Inner` is kept private so that `Float` can treat "shortest form" as an
153/// invariant: every constructor reduces to the narrowest variant that
154/// preserves the full value (payload included).
155#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
156pub(crate) enum Inner {
157    F16(u16),
158    F32(u32),
159    F64(u64),
160}
161
162impl Inner {
163    // Select the shortest IEEE 754 form that preserves `x` bit-exactly.
164    //
165    // For finite values, round-trip checks decide whether f16 or f32 is
166    // lossless. For non-finite values (Infinity / NaN) the significand is
167    // inspected directly: f16 is used when the bottom 42 significand bits
168    // are zero, f32 when the bottom 29 are zero, otherwise f64.
169    const fn new(x: f64) -> Self {
170        if x.is_finite() {
171            let bits16 = f64_to_f16(x);
172
173            if f16_to_f64(bits16).to_bits() == x.to_bits() {
174                Inner::F16(bits16)
175            } else if ((x as f32) as f64).to_bits() == x.to_bits() {
176                Inner::F32((x as f32).to_bits())
177            } else {
178                Inner::F64(x.to_bits())
179            }
180        } else {
181            let bits64 = x.to_bits();
182            let sign_bit = bits64 & 0x8000_0000_0000_0000;
183
184            if (bits64 & 0x3ff_ffff_ffff) == 0 {
185                let bits = (bits64 >> 42) & 0x7fff | (sign_bit >> 48);
186                Self::F16(bits as u16)
187            } else if (bits64 & 0x1fff_ffff) == 0 {
188                let bits = (bits64 >> 29) & 0x7fff_ffff | (sign_bit >> 32);
189                Self::F32(bits as u32)
190            } else {
191                Self::F64(bits64)
192            }
193        }
194    }
195}
196
197/// A floating-point value stored in its shortest CBOR encoding form.
198///
199/// Internally the raw bits are stored as f16, f32, or f64: whichever is the
200/// shortest form that preserves the value exactly (including NaN payloads
201/// and the sign of zero). CBOR::Core's deterministic encoding rules require
202/// this "shortest form" selection, so a `Float` mirrors the bytes that will
203/// be written on the wire.
204///
205/// Two `Float` values are equal iff they encode to the same CBOR bytes.
206/// This differs from IEEE 754 equality in two ways:
207///
208/// * `Float(+0.0) != Float(-0.0)` because they encode to different CBOR bytes.
209/// * Two NaNs compare equal if and only if they have identical payloads and
210///   sign, since that determines the encoding.
211///
212/// # Construction
213///
214/// * [`Float::new`] for floats and integers.
215/// * [`Float::with_payload`] for non-finite values with a given payload.
216///
217/// # Examples
218///
219/// ```
220/// use cbor_core::Float;
221///
222/// // Shortest-form storage: 1.0 fits in f16.
223/// assert_eq!(Float::new(1.0_f64).data_type(), cbor_core::DataType::Float16);
224///
225/// // Non-finite round-trip via payload.
226/// let nan = Float::with_payload(1);
227/// assert!(nan.to_f64().is_nan());
228/// assert_eq!(nan.to_payload(), Ok(1));
229/// ```
230#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
231pub struct Float(pub(crate) Inner);
232
233impl ValueView for Float {
234    fn head(&self) -> Head {
235        match self.0 {
236            Inner::F16(bits) => Head::new(Major::SimpleOrFloat, Argument::U16(bits)),
237            Inner::F32(bits) => Head::new(Major::SimpleOrFloat, Argument::U32(bits)),
238            Inner::F64(bits) => Head::new(Major::SimpleOrFloat, Argument::U64(bits)),
239        }
240    }
241
242    fn payload(&self) -> crate::view::Payload<'_> {
243        crate::view::Payload::None
244    }
245}
246
247impl Float {
248    /// Create a floating-point value in shortest CBOR form.
249    ///
250    /// Equivalent to `Float::from(value)`. The constructor chooses the
251    /// narrowest CBOR::Core deterministic encoding width that represents
252    /// `value` exactly.
253    ///
254    /// Accepted input types: `f32`, `f64`, `u8`, `u16`, `u32`, `i8`, `i16`, `i32`,
255    /// `bool` (`false` becomes `0.0`, `true` becomes `1.0`).
256    ///
257    /// 64-bit integers are intentionally rejected because they are not
258    /// losslessly representable as `f64` in general.
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// use cbor_core::{DataType, Float};
264    ///
265    /// assert_eq!(Float::new(0.0_f64).data_type(), DataType::Float16);
266    /// assert_eq!(Float::new(true).to_f64(), 1.0);
267    /// ```
268    #[must_use]
269    pub fn new(value: impl Into<Self>) -> Self {
270        value.into()
271    }
272
273    /// Create a non-finite floating-point value from a payload.
274    ///
275    /// The payload is a 53-bit integer, laid out as described in section
276    /// 2.3.4.2 of `draft-rundgren-cbor-core-25`. Bit 52 becomes the sign bit
277    /// of the resulting float, while bits 51-0 form the significand in
278    /// _reversed_ order.
279    ///
280    /// Bit reversal keeps a given bit position invariant
281    /// across the f16, f32, and f64 encodings: bit 0 of the payload is
282    /// always the most-significant significand bit. The result is stored in
283    /// the shortest CBOR form that preserves the payload.
284    ///
285    /// | Payload               | CBOR encoding         | Diagnostic notation       |
286    /// |----------------------:|-----------------------|---------------------------|
287    /// | `0`                   | [0xf9, 0x7c 0x00]     | `Infinity`                |
288    /// | `0x01`                | [0xf9, 0x7e 0x00]     | `NaN`                     |
289    /// | `0x10_0000_0000_0000` | [0xf9, 0xfc 0x00]     | `-Infinity`               |
290    ///
291    /// The maximum allowed payload is `0x1f_ffff_ffff_ffff` (53 bits).
292    ///
293    /// # Panics
294    ///
295    /// Panics if `payload` exceeds the 53-bit maximum.
296    ///
297    /// # Examples
298    ///
299    /// ```
300    /// use cbor_core::Float;
301    ///
302    /// assert!(Float::with_payload(0).to_f64().is_infinite());
303    /// assert!(Float::with_payload(1).to_f64().is_nan());
304    /// assert_eq!(Float::with_payload(2).to_payload(), Ok(2));
305    /// ```
306    #[must_use]
307    pub const fn with_payload(payload: u64) -> Self {
308        let sign_bit = payload & 0x10_0000_0000_0000; // payload width 53 bits, sign_bit = MSB
309        let lower52 = payload ^ sign_bit; // lower 52 bits
310
311        if lower52 <= 0x3ff {
312            let sig = ((lower52 as u16) << 6).reverse_bits();
313            let sign_bit = (sign_bit >> 37) as u16;
314            Self(Inner::F16(sign_bit | 0x7c00 | sig))
315        } else if lower52 <= 0x7f_ffff {
316            let sig = ((lower52 as u32) << 9).reverse_bits();
317            let sign_bit = (sign_bit >> 21) as u32;
318            Self(Inner::F32(sign_bit | 0x7f80_0000 | sig))
319        } else if lower52 <= 0x0f_ffff_ffff_ffff {
320            let sig = (lower52 << 12).reverse_bits();
321            let sign_bit = sign_bit << 11;
322            Self(Inner::F64(sign_bit | 0x7ff0_0000_0000_0000 | sig))
323        } else {
324            panic!("payload exceeds maximum allowed value")
325        }
326    }
327
328    /// Create a `Float` from an `f64`, usable in `const` context.
329    ///
330    /// `const` counterpart of `Float::from(value)` / [`Float::new`]. The
331    /// value is reduced to the shortest CBOR form (f16, f32, or f64) that
332    /// preserves it bit-exactly, including NaN payloads and the sign of
333    /// zero.
334    ///
335    /// ```
336    /// use cbor_core::Float;
337    ///
338    /// const F: Float = Float::from_f64(1.0);
339    /// assert_eq!(F.to_f64(), 1.0);
340    /// ```
341    #[must_use]
342    pub const fn from_f64(value: f64) -> Self {
343        Self(Inner::new(value))
344    }
345
346    /// Create a `Float` from an `f32`, usable in `const` context.
347    ///
348    /// `const` counterpart of `Float::from(value)` / [`Float::new`] for
349    /// f32 inputs. NaN payloads are widened without hardware
350    /// canonicalization; the result is then stored in the shortest CBOR
351    /// form that preserves the value.
352    ///
353    /// ```
354    /// use cbor_core::Float;
355    ///
356    /// const F: Float = Float::from_f32(1.0);
357    /// assert_eq!(F.to_f32(), Ok(1.0));
358    /// ```
359    #[must_use]
360    pub const fn from_f32(value: f32) -> Self {
361        if value.is_nan() {
362            // NaN-safe: bit manipulation to avoid hardware canonicalization
363            Self(Inner::new(f32_nan_to_f64(value.to_bits())))
364        } else {
365            Self(Inner::new(value as f64))
366        }
367    }
368
369    /// Return the [`DataType`] indicating the storage width (f16, f32, or f64).
370    ///
371    /// ```
372    /// use cbor_core::{Float, DataType};
373    ///
374    /// assert_eq!(Float::new(1.5).data_type(), DataType::Float16);
375    /// assert_eq!(Float::new(1.00048828125).data_type(), DataType::Float32);
376    /// assert_eq!(Float::new(1.1).data_type(), DataType::Float64);
377    /// ```
378    #[must_use]
379    pub const fn data_type(&self) -> DataType {
380        match self.0 {
381            Inner::F16(_) => DataType::Float16,
382            Inner::F32(_) => DataType::Float32,
383            Inner::F64(_) => DataType::Float64,
384        }
385    }
386
387    #[must_use]
388    pub(crate) const fn from_bits_u16(bits: u16) -> Self {
389        Self(Inner::F16(bits))
390    }
391
392    pub(crate) const fn from_bits_u32(bits: u32) -> Result<Self> {
393        let float = Self(Inner::F32(bits));
394        if matches!(Inner::new(float.to_f64()), Inner::F32(_)) {
395            Ok(float)
396        } else {
397            Err(Error::NonDeterministic)
398        }
399    }
400
401    pub(crate) const fn from_bits_u64(bits: u64) -> Result<Self> {
402        let float = Self(Inner::F64(bits));
403        if matches!(Inner::new(float.to_f64()), Inner::F64(_)) {
404            Ok(float)
405        } else {
406            Err(Error::NonDeterministic)
407        }
408    }
409
410    /// Widen to `f64`, preserving the exact bit pattern.
411    ///
412    /// Finite values widen losslessly. For NaN values the payload bits are
413    /// copied verbatim (without hardware canonicalization).
414    #[must_use]
415    pub const fn to_f64(self) -> f64 {
416        match self.0 {
417            Inner::F16(bits) => f16_to_f64(bits),
418            Inner::F32(bits) => {
419                let f = f32::from_bits(bits);
420                if f.is_nan() { f32_nan_to_f64(bits) } else { f as f64 }
421            }
422            Inner::F64(bits) => f64::from_bits(bits),
423        }
424    }
425
426    /// Narrow to `f32` when the value fits exactly.
427    ///
428    /// Returns `Err(Error::Precision)` when the underlying storage is f64,
429    /// since f64 values cannot in general be narrowed without loss. f16 and
430    /// f32 values convert losslessly; NaN payloads are preserved.
431    pub const fn to_f32(self) -> Result<f32> {
432        match self.0 {
433            Inner::F16(bits) => Ok(f16_to_f32(bits)),
434            Inner::F32(bits) => Ok(f32::from_bits(bits)),
435            Inner::F64(_) => Err(Error::Precision),
436        }
437    }
438
439    /// Retrieve the 53-bit payload of a non-finite value.
440    ///
441    /// Returns [`Err(Error::InvalidValue)`](Error::InvalidValue) for finite
442    /// floats. For non-finite values, the payload is reconstructed from the
443    /// underlying f16/f32/f64 bits by the inverse of [`Float::with_payload`].
444    ///
445    /// ```
446    /// use cbor_core::{Float, Error};
447    ///
448    /// for payload in [0, 1, 2, 0x400, 0x1fffffffffffff] {
449    ///     assert_eq!(Float::with_payload(payload).to_payload(), Ok(payload));
450    /// }
451    ///
452    /// assert_eq!(Float::new(1.0).to_payload(), Err(Error::InvalidValue));
453    /// ```
454    pub const fn to_payload(self) -> Result<u64> {
455        if self.is_finite() {
456            Err(Error::InvalidValue)
457        } else {
458            let sign_bit;
459            let sig;
460
461            match self.0 {
462                Inner::F16(bits) => {
463                    sign_bit = ((bits & 0x8000) as u64) << 37;
464                    sig = (bits.reverse_bits() >> 6) as u64;
465                }
466                Inner::F32(bits) => {
467                    sign_bit = ((bits & 0x8000_0000) as u64) << 21;
468                    sig = (bits.reverse_bits() >> 9) as u64;
469                }
470                Inner::F64(bits) => {
471                    sign_bit = (bits & 0x8000_0000_0000_0000) >> 11;
472                    sig = bits.reverse_bits() >> 12;
473                }
474            }
475
476            Ok(sign_bit | sig)
477        }
478    }
479
480    /// Return `true` if this is a finite floating-point value.
481    ///
482    /// A value is non-finite when its exponent field is all ones (that is,
483    /// `Infinity`, `-Infinity`, or any NaN).
484    ///
485    /// Non-finite values have a payload.
486    #[must_use]
487    pub const fn is_finite(self) -> bool {
488        match self.0 {
489            Inner::F16(bits) => bits & 0x7c00 != 0x7c00,
490            Inner::F32(bits) => bits & 0x7f80_0000 != 0x7f80_0000,
491            Inner::F64(bits) => bits & 0x7ff0_0000_0000_0000 != 0x7ff0_0000_0000_0000,
492        }
493    }
494}
495
496// --- From floating-point types ---
497
498impl From<f64> for Float {
499    fn from(value: f64) -> Self {
500        Self::from_f64(value)
501    }
502}
503
504impl From<f32> for Float {
505    fn from(value: f32) -> Self {
506        Self::from_f32(value)
507    }
508}
509
510// --- From integer types (lossless conversion to f64, like std) ---
511
512macro_rules! try_from {
513    ($type:ty) => {
514        impl From<$type> for Float {
515            fn from(value: $type) -> Self {
516                Self::from(value as f64)
517            }
518        }
519    };
520}
521
522try_from!(u8);
523try_from!(u16);
524try_from!(u32);
525
526try_from!(i8);
527try_from!(i16);
528try_from!(i32);
529
530impl From<bool> for Float {
531    fn from(value: bool) -> Self {
532        Self(if value { Inner::new(1.0) } else { Inner::new(0.0) })
533    }
534}
535
536#[cfg(test)]
537mod tests {
538    use super::*;
539
540    fn f16_is_nan(bits: u16) -> bool {
541        (bits & 0x7fff) > 0x7c00
542    }
543
544    // =====================================================================
545    // f16 → f64 conversion
546    // =====================================================================
547
548    #[test]
549    fn to_f64_zero() {
550        assert_eq!(f16_to_f64(0x0000), 0.0);
551        assert!(f16_to_f64(0x0000).is_sign_positive());
552    }
553
554    #[test]
555    fn to_f64_neg_zero() {
556        let v = f16_to_f64(0x8000);
557        assert_eq!(v.to_bits(), (-0.0_f64).to_bits());
558    }
559
560    #[test]
561    fn to_f64_one() {
562        assert_eq!(f16_to_f64(0x3c00), 1.0);
563    }
564
565    #[test]
566    fn to_f64_neg_one() {
567        assert_eq!(f16_to_f64(0xbc00), -1.0);
568    }
569
570    #[test]
571    fn to_f64_max_normal() {
572        assert_eq!(f16_to_f64(0x7bff), 65504.0);
573    }
574
575    #[test]
576    fn to_f64_min_positive_normal() {
577        assert_eq!(f16_to_f64(0x0400), 0.00006103515625);
578    }
579
580    #[test]
581    fn to_f64_min_positive_subnormal() {
582        assert_eq!(f16_to_f64(0x0001), 5.960464477539063e-8);
583    }
584
585    #[test]
586    fn to_f64_max_subnormal() {
587        assert_eq!(f16_to_f64(0x03ff), 0.00006097555160522461);
588    }
589
590    #[test]
591    fn to_f64_infinity() {
592        assert_eq!(f16_to_f64(0x7c00), f64::INFINITY);
593    }
594
595    #[test]
596    fn to_f64_neg_infinity() {
597        assert_eq!(f16_to_f64(0xfc00), f64::NEG_INFINITY);
598    }
599
600    #[test]
601    fn to_f64_nan() {
602        assert!(f16_to_f64(0x7e00).is_nan());
603    }
604
605    #[test]
606    fn to_f64_nan_preserves_payload() {
607        let bits = f16_to_f64(0x7c01).to_bits();
608        assert_eq!(bits, 0x7ff0_0400_0000_0000);
609    }
610
611    #[test]
612    fn to_f64_two() {
613        assert_eq!(f16_to_f64(0x4000), 2.0);
614    }
615
616    #[test]
617    fn to_f64_one_point_five() {
618        assert_eq!(f16_to_f64(0x3e00), 1.5);
619    }
620
621    // =====================================================================
622    // f16 → f32 conversion
623    // =====================================================================
624
625    #[test]
626    fn to_f32_zero() {
627        assert_eq!(f16_to_f32(0x0000), 0.0_f32);
628        assert!(f16_to_f32(0x0000).is_sign_positive());
629    }
630
631    #[test]
632    fn to_f32_neg_zero() {
633        assert_eq!(f16_to_f32(0x8000).to_bits(), (-0.0_f32).to_bits());
634    }
635
636    #[test]
637    fn to_f32_one() {
638        assert_eq!(f16_to_f32(0x3c00), 1.0_f32);
639    }
640
641    #[test]
642    fn to_f32_neg_one() {
643        assert_eq!(f16_to_f32(0xbc00), -1.0_f32);
644    }
645
646    #[test]
647    fn to_f32_two() {
648        assert_eq!(f16_to_f32(0x4000), 2.0_f32);
649    }
650
651    #[test]
652    fn to_f32_one_point_five() {
653        assert_eq!(f16_to_f32(0x3e00), 1.5_f32);
654    }
655
656    #[test]
657    fn to_f32_max_normal() {
658        assert_eq!(f16_to_f32(0x7bff), 65504.0_f32);
659    }
660
661    #[test]
662    fn to_f32_min_positive_normal() {
663        assert_eq!(f16_to_f32(0x0400), 0.000061035156_f32);
664    }
665
666    #[test]
667    fn to_f32_min_positive_subnormal() {
668        assert_eq!(f16_to_f32(0x0001), 5.9604645e-8_f32);
669    }
670
671    #[test]
672    fn to_f32_max_subnormal() {
673        assert_eq!(f16_to_f32(0x03ff), 0.00006097555_f32);
674    }
675
676    #[test]
677    fn to_f32_infinity() {
678        assert_eq!(f16_to_f32(0x7c00), f32::INFINITY);
679    }
680
681    #[test]
682    fn to_f32_neg_infinity() {
683        assert_eq!(f16_to_f32(0xfc00), f32::NEG_INFINITY);
684    }
685
686    #[test]
687    fn to_f32_nan() {
688        assert!(f16_to_f32(0x7e00).is_nan());
689    }
690
691    #[test]
692    fn to_f32_nan_preserves_payload() {
693        let bits = f16_to_f32(0x7c01).to_bits();
694        // f16 sig bit 0 → f32 sig bit shifted left by 13
695        assert_eq!(bits, 0x7f80_2000);
696    }
697
698    #[test]
699    fn to_f32_agrees_with_f16_to_f64() {
700        // Every non-NaN f16 → f32 must equal f16 → f64 cast to f32
701        for bits in 0..=0x7fff_u16 {
702            if f16_is_nan(bits) {
703                continue;
704            }
705            let via_f32 = f16_to_f32(bits);
706            let via_f64 = f16_to_f64(bits) as f32;
707            assert_eq!(via_f32.to_bits(), via_f64.to_bits(), "mismatch for bits 0x{bits:04x}");
708
709            let neg = bits | 0x8000;
710            let via_f32n = f16_to_f32(neg);
711            let via_f64n = f16_to_f64(neg) as f32;
712            assert_eq!(via_f32n.to_bits(), via_f64n.to_bits(), "mismatch for bits 0x{neg:04x}");
713        }
714    }
715
716    // =====================================================================
717    // f64 → f16 conversion (round-to-nearest-even)
718    // =====================================================================
719
720    #[test]
721    fn from_f64_zero() {
722        assert_eq!(f64_to_f16(0.0), 0x0000);
723    }
724
725    #[test]
726    fn from_f64_neg_zero() {
727        assert_eq!(f64_to_f16(-0.0), 0x8000);
728    }
729
730    #[test]
731    fn from_f64_one() {
732        assert_eq!(f64_to_f16(1.0), 0x3c00);
733    }
734
735    #[test]
736    fn from_f64_neg_one() {
737        assert_eq!(f64_to_f16(-1.0), 0xbc00);
738    }
739
740    #[test]
741    fn from_f64_max_normal() {
742        assert_eq!(f64_to_f16(65504.0), 0x7bff);
743    }
744
745    #[test]
746    fn from_f64_overflow_to_infinity() {
747        assert_eq!(f64_to_f16(65520.0), 0x7c00);
748    }
749
750    #[test]
751    fn from_f64_infinity() {
752        assert_eq!(f64_to_f16(f64::INFINITY), 0x7c00);
753    }
754
755    #[test]
756    fn from_f64_neg_infinity() {
757        assert_eq!(f64_to_f16(f64::NEG_INFINITY), 0xfc00);
758    }
759
760    #[test]
761    fn from_f64_nan() {
762        assert!(f16_is_nan(f64_to_f16(f64::NAN)));
763    }
764
765    #[test]
766    fn from_f64_min_positive_subnormal() {
767        assert_eq!(f64_to_f16(5.960464477539063e-8), 0x0001);
768    }
769
770    #[test]
771    fn from_f64_min_positive_normal() {
772        assert_eq!(f64_to_f16(0.00006103515625), 0x0400);
773    }
774
775    // =====================================================================
776    // Round-to-nearest-even: critical boundary tests
777    // =====================================================================
778
779    #[test]
780    fn rounding_exactly_halfway_rounds_to_even_down() {
781        let halfway = f64::from_bits(0x3FF0_0200_0000_0000);
782        assert_eq!(f64_to_f16(halfway), 0x3c00);
783    }
784
785    #[test]
786    fn rounding_exactly_halfway_rounds_to_even_up() {
787        let halfway = f64::from_bits(0x3FF0_0600_0000_0000);
788        assert_eq!(f64_to_f16(halfway), 0x3c02);
789    }
790
791    #[test]
792    fn rounding_just_below_halfway_rounds_down() {
793        let below = f64::from_bits(0x3FF0_01FF_FFFF_FFFF);
794        assert_eq!(f64_to_f16(below), 0x3c00);
795    }
796
797    #[test]
798    fn rounding_just_above_halfway_rounds_up() {
799        let above = f64::from_bits(0x3FF0_0200_0000_0001);
800        assert_eq!(f64_to_f16(above), 0x3c01);
801    }
802
803    #[test]
804    fn rounding_subnormal_halfway_rounds_to_even() {
805        let val = 1.5 * 5.960464477539063e-8;
806        assert_eq!(f64_to_f16(val), 0x0002);
807    }
808
809    #[test]
810    fn rounding_subnormal_halfway_even_down() {
811        let val = 2.5 * 5.960464477539063e-8;
812        assert_eq!(f64_to_f16(val), 0x0002);
813    }
814
815    #[test]
816    fn rounding_normal_to_subnormal_boundary() {
817        let min_normal = 0.00006103515625_f64;
818        assert_eq!(f64_to_f16(min_normal), 0x0400);
819
820        let below = f64::from_bits(min_normal.to_bits() - 1);
821        assert_eq!(f64_to_f16(below), 0x0400);
822    }
823
824    #[test]
825    fn rounding_overflow_at_max() {
826        assert_eq!(f64_to_f16(65504.0), 0x7bff);
827        assert_eq!(f64_to_f16(65519.99), 0x7bff);
828        assert_eq!(f64_to_f16(65520.0), 0x7c00);
829    }
830
831    #[test]
832    fn rounding_tiny_to_zero() {
833        assert_eq!(f64_to_f16(1e-30), 0x0000);
834        assert_eq!(f64_to_f16(-1e-30), 0x8000);
835    }
836
837    #[test]
838    fn rounding_tiny_to_min_subnormal() {
839        let half_min: f64 = 0.5 * 5.960464477539063e-8;
840        assert_eq!(f64_to_f16(half_min), 0x0000);
841
842        let above = f64::from_bits(half_min.to_bits() + 1);
843        assert_eq!(f64_to_f16(above), 0x0001);
844    }
845
846    // =====================================================================
847    // Roundtrip: f64 → f16 → f64
848    // =====================================================================
849
850    #[test]
851    fn roundtrip_all_exact_f16_values() {
852        for bits in 0..=0x7fff_u16 {
853            if f16_is_nan(bits) {
854                continue;
855            }
856            let f = f16_to_f64(bits);
857            let h2 = f64_to_f16(f);
858            assert_eq!(bits, h2, "roundtrip failed for bits 0x{bits:04x}");
859
860            // Also check negative
861            let neg_bits = bits | 0x8000;
862            let fn_ = f16_to_f64(neg_bits);
863            let hn2 = f64_to_f16(fn_);
864            assert_eq!(neg_bits, hn2, "roundtrip failed for bits 0x{neg_bits:04x}");
865        }
866    }
867}