1use crate::{
10 DataType, Error, Result,
11 codec::{Argument, Head, Major},
12 view::ValueView,
13};
14
15const fn f16_to_f64(bits: u16) -> f64 {
23 let bits = bits as u64;
24 let sign = (bits >> 15) & 1;
25 let exp = (bits >> 10) & 0x1f;
26 let sig = bits & 0x03ff;
27
28 let bits64 = if exp == 0 {
29 if sig == 0 {
30 sign << 63
31 } else {
32 let shift = sig.leading_zeros() - (64 - 10);
33 let sig = (sig << (shift + 1)) & 0x03ff;
34 let exp64 = 1023 - 15 - shift as u64;
35 sign << 63 | exp64 << 52 | sig << 42
36 }
37 } else if exp == 0x1f {
38 sign << 63 | 0x7ff0_0000_0000_0000 | sig << 42
39 } else {
40 let exp64 = exp + (1023 - 15);
41 sign << 63 | exp64 << 52 | sig << 42
42 };
43
44 f64::from_bits(bits64)
45}
46
47const fn f16_to_f32(bits: u16) -> f32 {
49 let bits = bits as u32;
50 let sign = (bits >> 15) & 1;
51 let exp = (bits >> 10) & 0x1f;
52 let sig = bits & 0x03ff;
53
54 let bits32 = if exp == 0 {
55 if sig == 0 {
56 sign << 31
57 } else {
58 let shift = sig.leading_zeros() - (32 - 10);
59 let sig = (sig << (shift + 1)) & 0x03ff;
60 let exp32 = 127 - 15 - shift;
61 (sign << 31) | (exp32 << 23) | (sig << 13)
62 }
63 } else if exp == 0x1f {
64 (sign << 31) | 0x7f80_0000 | (sig << 13)
65 } else {
66 let exp32 = exp + (127 - 15);
67 (sign << 31) | (exp32 << 23) | (sig << 13)
68 };
69
70 f32::from_bits(bits32)
71}
72
73const fn f64_to_f16(value: f64) -> u16 {
79 let bits = value.to_bits();
80 let sign_bit = ((bits >> 48) & 0x8000) as u16; let exp = ((bits >> 52) & 0x7ff) as i32; let sig = bits & 0x000f_ffff_ffff_ffff; match exp {
85 0 => return sign_bit,
86
87 0x7ff => {
88 if sig == 0 {
89 return sign_bit | 0x7c00;
90 } else {
91 let sig16 = (sig >> 42) as u16;
92 return sign_bit | 0x7c00 | if sig16 == 0 { 1 } else { sig16 }; }
94 }
95
96 _ => (),
97 }
98
99 let exp16 = exp - 1008;
100
101 if exp16 >= 0x1f {
102 return sign_bit | 0x7c00;
103 }
104
105 if exp16 <= 0 {
106 let full_sig = sig | 0x0010_0000_0000_0000;
107 let shift = (1 - exp16) as u64 + 42;
108
109 if shift >= 64 {
110 if shift == 64 && full_sig > (1_u64 << 52) {
111 return sign_bit | 1;
112 } else {
113 return sign_bit;
114 }
115 } else {
116 let shifted = full_sig >> shift;
117 let remainder = full_sig & ((1_u64 << shift) - 1);
118 let halfway = 1_u64 << (shift - 1);
119 let round_up = remainder > halfway || (remainder == halfway && (shifted & 1) != 0);
120 let sig16 = (shifted as u16) + round_up as u16;
121 return sign_bit | sig16;
122 }
123 }
124
125 let sig10 = (sig >> 42) as u16;
126 let remainder = sig & 0x3ff_ffff_ffff;
127 let halfway = 0x200_0000_0000_u64;
128 let round_up = remainder > halfway || (remainder == halfway && (sig10 & 1) != 0);
129 let sig16 = sig10 + round_up as u16;
130
131 if sig16 >= 0x0400 {
132 sign_bit | (((exp16 as u16) + 1) << 10)
133 } else {
134 sign_bit | ((exp16 as u16) << 10) | sig16
135 }
136}
137
138const fn f32_nan_to_f64(bits: u32) -> f64 {
145 let sign_bit = ((bits & 0x8000_0000) as u64) << 32;
146 let payload = ((bits & 0x007f_ffff) as u64) << 29;
147 f64::from_bits(sign_bit | 0x7ff0_0000_0000_0000 | payload)
148}
149
150#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
156pub(crate) enum Inner {
157 F16(u16),
158 F32(u32),
159 F64(u64),
160}
161
162impl Inner {
163 const fn new(x: f64) -> Self {
170 if x.is_finite() {
171 let bits16 = f64_to_f16(x);
172
173 if f16_to_f64(bits16).to_bits() == x.to_bits() {
174 Inner::F16(bits16)
175 } else if ((x as f32) as f64).to_bits() == x.to_bits() {
176 Inner::F32((x as f32).to_bits())
177 } else {
178 Inner::F64(x.to_bits())
179 }
180 } else {
181 let bits64 = x.to_bits();
182 let sign_bit = bits64 & 0x8000_0000_0000_0000;
183
184 if (bits64 & 0x3ff_ffff_ffff) == 0 {
185 let bits = (bits64 >> 42) & 0x7fff | (sign_bit >> 48);
186 Self::F16(bits as u16)
187 } else if (bits64 & 0x1fff_ffff) == 0 {
188 let bits = (bits64 >> 29) & 0x7fff_ffff | (sign_bit >> 32);
189 Self::F32(bits as u32)
190 } else {
191 Self::F64(bits64)
192 }
193 }
194 }
195}
196
197#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
231pub struct Float(pub(crate) Inner);
232
233impl ValueView for Float {
234 fn head(&self) -> Head {
235 match self.0 {
236 Inner::F16(bits) => Head::new(Major::SimpleOrFloat, Argument::U16(bits)),
237 Inner::F32(bits) => Head::new(Major::SimpleOrFloat, Argument::U32(bits)),
238 Inner::F64(bits) => Head::new(Major::SimpleOrFloat, Argument::U64(bits)),
239 }
240 }
241
242 fn payload(&self) -> crate::view::Payload<'_> {
243 crate::view::Payload::None
244 }
245}
246
247impl Float {
248 #[must_use]
269 pub fn new(value: impl Into<Self>) -> Self {
270 value.into()
271 }
272
273 #[must_use]
307 pub const fn with_payload(payload: u64) -> Self {
308 let sign_bit = payload & 0x10_0000_0000_0000; let lower52 = payload ^ sign_bit; if lower52 <= 0x3ff {
312 let sig = ((lower52 as u16) << 6).reverse_bits();
313 let sign_bit = (sign_bit >> 37) as u16;
314 Self(Inner::F16(sign_bit | 0x7c00 | sig))
315 } else if lower52 <= 0x7f_ffff {
316 let sig = ((lower52 as u32) << 9).reverse_bits();
317 let sign_bit = (sign_bit >> 21) as u32;
318 Self(Inner::F32(sign_bit | 0x7f80_0000 | sig))
319 } else if lower52 <= 0x0f_ffff_ffff_ffff {
320 let sig = (lower52 << 12).reverse_bits();
321 let sign_bit = sign_bit << 11;
322 Self(Inner::F64(sign_bit | 0x7ff0_0000_0000_0000 | sig))
323 } else {
324 panic!("payload exceeds maximum allowed value")
325 }
326 }
327
328 #[must_use]
342 pub const fn from_f64(value: f64) -> Self {
343 Self(Inner::new(value))
344 }
345
346 #[must_use]
360 pub const fn from_f32(value: f32) -> Self {
361 if value.is_nan() {
362 Self(Inner::new(f32_nan_to_f64(value.to_bits())))
364 } else {
365 Self(Inner::new(value as f64))
366 }
367 }
368
369 #[must_use]
379 pub const fn data_type(&self) -> DataType {
380 match self.0 {
381 Inner::F16(_) => DataType::Float16,
382 Inner::F32(_) => DataType::Float32,
383 Inner::F64(_) => DataType::Float64,
384 }
385 }
386
387 #[must_use]
388 pub(crate) const fn from_bits_u16(bits: u16) -> Self {
389 Self(Inner::F16(bits))
390 }
391
392 pub(crate) const fn from_bits_u32(bits: u32) -> Result<Self> {
393 let float = Self(Inner::F32(bits));
394 if matches!(Inner::new(float.to_f64()), Inner::F32(_)) {
395 Ok(float)
396 } else {
397 Err(Error::NonDeterministic)
398 }
399 }
400
401 pub(crate) const fn from_bits_u64(bits: u64) -> Result<Self> {
402 let float = Self(Inner::F64(bits));
403 if matches!(Inner::new(float.to_f64()), Inner::F64(_)) {
404 Ok(float)
405 } else {
406 Err(Error::NonDeterministic)
407 }
408 }
409
410 #[must_use]
415 pub const fn to_f64(self) -> f64 {
416 match self.0 {
417 Inner::F16(bits) => f16_to_f64(bits),
418 Inner::F32(bits) => {
419 let f = f32::from_bits(bits);
420 if f.is_nan() { f32_nan_to_f64(bits) } else { f as f64 }
421 }
422 Inner::F64(bits) => f64::from_bits(bits),
423 }
424 }
425
426 pub const fn to_f32(self) -> Result<f32> {
432 match self.0 {
433 Inner::F16(bits) => Ok(f16_to_f32(bits)),
434 Inner::F32(bits) => Ok(f32::from_bits(bits)),
435 Inner::F64(_) => Err(Error::Precision),
436 }
437 }
438
439 pub const fn to_payload(self) -> Result<u64> {
455 if self.is_finite() {
456 Err(Error::InvalidValue)
457 } else {
458 let sign_bit;
459 let sig;
460
461 match self.0 {
462 Inner::F16(bits) => {
463 sign_bit = ((bits & 0x8000) as u64) << 37;
464 sig = (bits.reverse_bits() >> 6) as u64;
465 }
466 Inner::F32(bits) => {
467 sign_bit = ((bits & 0x8000_0000) as u64) << 21;
468 sig = (bits.reverse_bits() >> 9) as u64;
469 }
470 Inner::F64(bits) => {
471 sign_bit = (bits & 0x8000_0000_0000_0000) >> 11;
472 sig = bits.reverse_bits() >> 12;
473 }
474 }
475
476 Ok(sign_bit | sig)
477 }
478 }
479
480 #[must_use]
487 pub const fn is_finite(self) -> bool {
488 match self.0 {
489 Inner::F16(bits) => bits & 0x7c00 != 0x7c00,
490 Inner::F32(bits) => bits & 0x7f80_0000 != 0x7f80_0000,
491 Inner::F64(bits) => bits & 0x7ff0_0000_0000_0000 != 0x7ff0_0000_0000_0000,
492 }
493 }
494}
495
496impl From<f64> for Float {
499 fn from(value: f64) -> Self {
500 Self::from_f64(value)
501 }
502}
503
504impl From<f32> for Float {
505 fn from(value: f32) -> Self {
506 Self::from_f32(value)
507 }
508}
509
510macro_rules! try_from {
513 ($type:ty) => {
514 impl From<$type> for Float {
515 fn from(value: $type) -> Self {
516 Self::from(value as f64)
517 }
518 }
519 };
520}
521
522try_from!(u8);
523try_from!(u16);
524try_from!(u32);
525
526try_from!(i8);
527try_from!(i16);
528try_from!(i32);
529
530impl From<bool> for Float {
531 fn from(value: bool) -> Self {
532 Self(if value { Inner::new(1.0) } else { Inner::new(0.0) })
533 }
534}
535
536#[cfg(test)]
537mod tests {
538 use super::*;
539
540 fn f16_is_nan(bits: u16) -> bool {
541 (bits & 0x7fff) > 0x7c00
542 }
543
544 #[test]
549 fn to_f64_zero() {
550 assert_eq!(f16_to_f64(0x0000), 0.0);
551 assert!(f16_to_f64(0x0000).is_sign_positive());
552 }
553
554 #[test]
555 fn to_f64_neg_zero() {
556 let v = f16_to_f64(0x8000);
557 assert_eq!(v.to_bits(), (-0.0_f64).to_bits());
558 }
559
560 #[test]
561 fn to_f64_one() {
562 assert_eq!(f16_to_f64(0x3c00), 1.0);
563 }
564
565 #[test]
566 fn to_f64_neg_one() {
567 assert_eq!(f16_to_f64(0xbc00), -1.0);
568 }
569
570 #[test]
571 fn to_f64_max_normal() {
572 assert_eq!(f16_to_f64(0x7bff), 65504.0);
573 }
574
575 #[test]
576 fn to_f64_min_positive_normal() {
577 assert_eq!(f16_to_f64(0x0400), 0.00006103515625);
578 }
579
580 #[test]
581 fn to_f64_min_positive_subnormal() {
582 assert_eq!(f16_to_f64(0x0001), 5.960464477539063e-8);
583 }
584
585 #[test]
586 fn to_f64_max_subnormal() {
587 assert_eq!(f16_to_f64(0x03ff), 0.00006097555160522461);
588 }
589
590 #[test]
591 fn to_f64_infinity() {
592 assert_eq!(f16_to_f64(0x7c00), f64::INFINITY);
593 }
594
595 #[test]
596 fn to_f64_neg_infinity() {
597 assert_eq!(f16_to_f64(0xfc00), f64::NEG_INFINITY);
598 }
599
600 #[test]
601 fn to_f64_nan() {
602 assert!(f16_to_f64(0x7e00).is_nan());
603 }
604
605 #[test]
606 fn to_f64_nan_preserves_payload() {
607 let bits = f16_to_f64(0x7c01).to_bits();
608 assert_eq!(bits, 0x7ff0_0400_0000_0000);
609 }
610
611 #[test]
612 fn to_f64_two() {
613 assert_eq!(f16_to_f64(0x4000), 2.0);
614 }
615
616 #[test]
617 fn to_f64_one_point_five() {
618 assert_eq!(f16_to_f64(0x3e00), 1.5);
619 }
620
621 #[test]
626 fn to_f32_zero() {
627 assert_eq!(f16_to_f32(0x0000), 0.0_f32);
628 assert!(f16_to_f32(0x0000).is_sign_positive());
629 }
630
631 #[test]
632 fn to_f32_neg_zero() {
633 assert_eq!(f16_to_f32(0x8000).to_bits(), (-0.0_f32).to_bits());
634 }
635
636 #[test]
637 fn to_f32_one() {
638 assert_eq!(f16_to_f32(0x3c00), 1.0_f32);
639 }
640
641 #[test]
642 fn to_f32_neg_one() {
643 assert_eq!(f16_to_f32(0xbc00), -1.0_f32);
644 }
645
646 #[test]
647 fn to_f32_two() {
648 assert_eq!(f16_to_f32(0x4000), 2.0_f32);
649 }
650
651 #[test]
652 fn to_f32_one_point_five() {
653 assert_eq!(f16_to_f32(0x3e00), 1.5_f32);
654 }
655
656 #[test]
657 fn to_f32_max_normal() {
658 assert_eq!(f16_to_f32(0x7bff), 65504.0_f32);
659 }
660
661 #[test]
662 fn to_f32_min_positive_normal() {
663 assert_eq!(f16_to_f32(0x0400), 0.000061035156_f32);
664 }
665
666 #[test]
667 fn to_f32_min_positive_subnormal() {
668 assert_eq!(f16_to_f32(0x0001), 5.9604645e-8_f32);
669 }
670
671 #[test]
672 fn to_f32_max_subnormal() {
673 assert_eq!(f16_to_f32(0x03ff), 0.00006097555_f32);
674 }
675
676 #[test]
677 fn to_f32_infinity() {
678 assert_eq!(f16_to_f32(0x7c00), f32::INFINITY);
679 }
680
681 #[test]
682 fn to_f32_neg_infinity() {
683 assert_eq!(f16_to_f32(0xfc00), f32::NEG_INFINITY);
684 }
685
686 #[test]
687 fn to_f32_nan() {
688 assert!(f16_to_f32(0x7e00).is_nan());
689 }
690
691 #[test]
692 fn to_f32_nan_preserves_payload() {
693 let bits = f16_to_f32(0x7c01).to_bits();
694 assert_eq!(bits, 0x7f80_2000);
696 }
697
698 #[test]
699 fn to_f32_agrees_with_f16_to_f64() {
700 for bits in 0..=0x7fff_u16 {
702 if f16_is_nan(bits) {
703 continue;
704 }
705 let via_f32 = f16_to_f32(bits);
706 let via_f64 = f16_to_f64(bits) as f32;
707 assert_eq!(via_f32.to_bits(), via_f64.to_bits(), "mismatch for bits 0x{bits:04x}");
708
709 let neg = bits | 0x8000;
710 let via_f32n = f16_to_f32(neg);
711 let via_f64n = f16_to_f64(neg) as f32;
712 assert_eq!(via_f32n.to_bits(), via_f64n.to_bits(), "mismatch for bits 0x{neg:04x}");
713 }
714 }
715
716 #[test]
721 fn from_f64_zero() {
722 assert_eq!(f64_to_f16(0.0), 0x0000);
723 }
724
725 #[test]
726 fn from_f64_neg_zero() {
727 assert_eq!(f64_to_f16(-0.0), 0x8000);
728 }
729
730 #[test]
731 fn from_f64_one() {
732 assert_eq!(f64_to_f16(1.0), 0x3c00);
733 }
734
735 #[test]
736 fn from_f64_neg_one() {
737 assert_eq!(f64_to_f16(-1.0), 0xbc00);
738 }
739
740 #[test]
741 fn from_f64_max_normal() {
742 assert_eq!(f64_to_f16(65504.0), 0x7bff);
743 }
744
745 #[test]
746 fn from_f64_overflow_to_infinity() {
747 assert_eq!(f64_to_f16(65520.0), 0x7c00);
748 }
749
750 #[test]
751 fn from_f64_infinity() {
752 assert_eq!(f64_to_f16(f64::INFINITY), 0x7c00);
753 }
754
755 #[test]
756 fn from_f64_neg_infinity() {
757 assert_eq!(f64_to_f16(f64::NEG_INFINITY), 0xfc00);
758 }
759
760 #[test]
761 fn from_f64_nan() {
762 assert!(f16_is_nan(f64_to_f16(f64::NAN)));
763 }
764
765 #[test]
766 fn from_f64_min_positive_subnormal() {
767 assert_eq!(f64_to_f16(5.960464477539063e-8), 0x0001);
768 }
769
770 #[test]
771 fn from_f64_min_positive_normal() {
772 assert_eq!(f64_to_f16(0.00006103515625), 0x0400);
773 }
774
775 #[test]
780 fn rounding_exactly_halfway_rounds_to_even_down() {
781 let halfway = f64::from_bits(0x3FF0_0200_0000_0000);
782 assert_eq!(f64_to_f16(halfway), 0x3c00);
783 }
784
785 #[test]
786 fn rounding_exactly_halfway_rounds_to_even_up() {
787 let halfway = f64::from_bits(0x3FF0_0600_0000_0000);
788 assert_eq!(f64_to_f16(halfway), 0x3c02);
789 }
790
791 #[test]
792 fn rounding_just_below_halfway_rounds_down() {
793 let below = f64::from_bits(0x3FF0_01FF_FFFF_FFFF);
794 assert_eq!(f64_to_f16(below), 0x3c00);
795 }
796
797 #[test]
798 fn rounding_just_above_halfway_rounds_up() {
799 let above = f64::from_bits(0x3FF0_0200_0000_0001);
800 assert_eq!(f64_to_f16(above), 0x3c01);
801 }
802
803 #[test]
804 fn rounding_subnormal_halfway_rounds_to_even() {
805 let val = 1.5 * 5.960464477539063e-8;
806 assert_eq!(f64_to_f16(val), 0x0002);
807 }
808
809 #[test]
810 fn rounding_subnormal_halfway_even_down() {
811 let val = 2.5 * 5.960464477539063e-8;
812 assert_eq!(f64_to_f16(val), 0x0002);
813 }
814
815 #[test]
816 fn rounding_normal_to_subnormal_boundary() {
817 let min_normal = 0.00006103515625_f64;
818 assert_eq!(f64_to_f16(min_normal), 0x0400);
819
820 let below = f64::from_bits(min_normal.to_bits() - 1);
821 assert_eq!(f64_to_f16(below), 0x0400);
822 }
823
824 #[test]
825 fn rounding_overflow_at_max() {
826 assert_eq!(f64_to_f16(65504.0), 0x7bff);
827 assert_eq!(f64_to_f16(65519.99), 0x7bff);
828 assert_eq!(f64_to_f16(65520.0), 0x7c00);
829 }
830
831 #[test]
832 fn rounding_tiny_to_zero() {
833 assert_eq!(f64_to_f16(1e-30), 0x0000);
834 assert_eq!(f64_to_f16(-1e-30), 0x8000);
835 }
836
837 #[test]
838 fn rounding_tiny_to_min_subnormal() {
839 let half_min: f64 = 0.5 * 5.960464477539063e-8;
840 assert_eq!(f64_to_f16(half_min), 0x0000);
841
842 let above = f64::from_bits(half_min.to_bits() + 1);
843 assert_eq!(f64_to_f16(above), 0x0001);
844 }
845
846 #[test]
851 fn roundtrip_all_exact_f16_values() {
852 for bits in 0..=0x7fff_u16 {
853 if f16_is_nan(bits) {
854 continue;
855 }
856 let f = f16_to_f64(bits);
857 let h2 = f64_to_f16(f);
858 assert_eq!(bits, h2, "roundtrip failed for bits 0x{bits:04x}");
859
860 let neg_bits = bits | 0x8000;
862 let fn_ = f16_to_f64(neg_bits);
863 let hn2 = f64_to_f16(fn_);
864 assert_eq!(neg_bits, hn2, "roundtrip failed for bits 0x{neg_bits:04x}");
865 }
866 }
867}