1#![doc = include_str!("../README.md")]
10#![warn(missing_docs)]
11
12pub mod detail;
13pub mod example;
14
15use core::cmp::Ordering;
16use core::f64::consts::LOG10_2;
17use core::ops::Neg;
18
19#[allow(clippy::upper_case_acronyms)]
27#[derive(Debug, Clone, Copy, PartialEq, Eq)]
28pub enum NanStyle {
29 IEEE,
34
35 FN,
41
42 FNUZ,
49}
50
51#[allow(clippy::excessive_precision)]
52const LOG2_SIGNIFICAND: [f64; 16] = [
53 -2.0,
54 -1.0,
55 -4.150_374_992_788_438_13e-1,
56 -1.926_450_779_423_958_81e-1,
57 -9.310_940_439_148_146_51e-2,
58 -4.580_368_961_312_478_86e-2,
59 -2.272_007_650_008_352_89e-2,
60 -1.131_531_322_783_414_61e-2,
61 -5.646_563_141_142_062_72e-3,
62 -2.820_519_062_378_662_63e-3,
63 -1.409_570_254_671_353_63e-3,
64 -7.046_129_765_893_727_06e-4,
65 -3.522_634_716_290_213_85e-4,
66 -1.761_209_842_740_240_62e-4,
67 -8.805_780_458_002_638_34e-5,
68 -4.402_823_044_177_721_15e-5,
69];
70
71pub trait Minifloat: Copy + PartialEq + PartialOrd + Neg<Output = Self> {
82 type Bits;
84
85 const S: bool = true;
87
88 const E: u32;
90
91 const M: u32;
93
94 const B: i32 = (1 << (Self::E - 1)) - 1;
96
97 const N: NanStyle = NanStyle::IEEE;
99
100 const BITWIDTH: u32 = Self::S as u32 + Self::E + Self::M;
102
103 const RADIX: u32 = 2;
105
106 const MANTISSA_DIGITS: u32 = Self::M + 1;
110
111 const MAX_EXP: i32 = (1 << Self::E)
115 - Self::B
116 - match Self::N {
117 NanStyle::IEEE => 1,
118 NanStyle::FN => (Self::M == 0) as i32,
119 NanStyle::FNUZ => 0,
120 };
121
122 const MIN_EXP: i32 = 2 - Self::B;
131
132 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
136 const DIGITS: u32 = (Self::M as f64 * crate::LOG10_2) as u32;
137
138 #[allow(clippy::cast_possible_truncation)]
142 const MAX_10_EXP: i32 = {
143 let exponent = (1 << Self::E) - Self::B - matches!(Self::N, NanStyle::IEEE) as i32;
144 let precision = Self::M + !matches!(Self::N, NanStyle::FN) as u32;
145 let log2_max = exponent as f64 + crate::LOG2_SIGNIFICAND[precision as usize];
146 (log2_max * crate::LOG10_2) as i32
147 };
148
149 #[allow(clippy::cast_possible_truncation)]
153 const MIN_10_EXP: i32 = ((Self::MIN_EXP - 1) as f64 * crate::LOG10_2) as i32;
154
155 const NAN: Self;
157
158 const HUGE: Self;
164
165 const MAX: Self;
167
168 const TINY: Self;
170
171 const MIN_POSITIVE: Self;
175
176 const EPSILON: Self;
182
183 const MIN: Self;
187
188 #[must_use]
190 fn from_bits(v: Self::Bits) -> Self;
191
192 #[must_use]
194 fn to_bits(self) -> Self::Bits;
195
196 #[must_use]
204 fn total_cmp(&self, other: &Self) -> Ordering;
205
206 #[must_use]
208 fn is_nan(self) -> bool;
209
210 #[must_use]
212 fn is_infinite(self) -> bool;
213
214 #[must_use]
216 fn is_finite(self) -> bool;
217
218 #[must_use]
222 fn is_subnormal(self) -> bool {
223 matches!(self.classify(), core::num::FpCategory::Subnormal)
224 }
225
226 #[must_use]
230 fn is_normal(self) -> bool {
231 matches!(self.classify(), core::num::FpCategory::Normal)
232 }
233
234 #[must_use]
239 fn classify(self) -> core::num::FpCategory;
240
241 #[must_use]
243 fn abs(self) -> Self;
244
245 #[must_use]
247 fn is_sign_positive(self) -> bool;
248
249 #[must_use]
251 fn is_sign_negative(self) -> bool;
252}
253
254#[doc(hidden)]
256#[macro_export]
257macro_rules! __conditionally_define_infinities {
258 (impl $name:ident, IEEE) => {
259 impl $name {
260 pub const INFINITY: Self = Self::HUGE;
262
263 pub const NEG_INFINITY: Self = Self(Self::HUGE.0 | (1 << (Self::E + Self::M)));
265 }
266 };
267 (impl $name:ident, $n:ident) => {};
268}
269
270#[doc(hidden)]
275#[macro_export]
276macro_rules! __select_sized_trait {
277 (u8, $name:ident, $e:expr, $m:expr) => {
278 impl $crate::Most8<$m> for $name {
279 const E: u32 = Self::E;
280 const B: i32 = Self::B;
281 const N: $crate::NanStyle = Self::N;
282
283 const NAN: Self = Self::NAN;
284 const HUGE: Self = Self::HUGE;
285 const MAX: Self = Self::MAX;
286 const TINY: Self = Self::TINY;
287 const MIN_POSITIVE: Self = Self::MIN_POSITIVE;
288 const EPSILON: Self = Self::EPSILON;
289 const MIN: Self = Self::MIN;
290
291 fn from_bits(v: u8) -> Self {
292 Self::from_bits(v)
293 }
294
295 fn to_bits(self) -> u8 {
296 self.to_bits()
297 }
298
299 fn total_cmp(&self, other: &Self) -> core::cmp::Ordering {
300 Self::total_cmp_key(self.0).cmp(&Self::total_cmp_key(other.0))
301 }
302 }
303 };
304 (u16, $name:ident, $e:expr, $m:expr) => {
305 impl $crate::Most16<$m> for $name {
306 const E: u32 = Self::E;
307 const B: i32 = Self::B;
308 const N: $crate::NanStyle = Self::N;
309
310 const NAN: Self = Self::NAN;
311 const HUGE: Self = Self::HUGE;
312 const MAX: Self = Self::MAX;
313 const TINY: Self = Self::TINY;
314 const MIN_POSITIVE: Self = Self::MIN_POSITIVE;
315 const EPSILON: Self = Self::EPSILON;
316 const MIN: Self = Self::MIN;
317
318 fn from_bits(v: u16) -> Self {
319 Self::from_bits(v)
320 }
321
322 fn to_bits(self) -> u16 {
323 self.to_bits()
324 }
325
326 fn total_cmp(&self, other: &Self) -> core::cmp::Ordering {
327 Self::total_cmp_key(self.0).cmp(&Self::total_cmp_key(other.0))
328 }
329 }
330 };
331}
332
333#[macro_export]
355macro_rules! minifloat {
356 ($vis:vis struct $name:ident($bits:tt): $e:expr, $m:expr, $b:expr, $n:ident) => {
357 #[allow(non_camel_case_types)]
358 #[doc = concat!("A minifloat with bit-layout S1E", $e, "M", $m)]
359 #[derive(Debug, Clone, Copy, Default)]
360 $vis struct $name($bits);
361
362 impl $name {
363 pub const E: u32 = $e;
365
366 pub const M: u32 = $m;
370
371 pub const B: i32 = $b;
373
374 pub const N: $crate::NanStyle = $crate::NanStyle::$n;
376
377 pub const BITWIDTH: u32 = 1 + Self::E + Self::M;
379
380 pub const RADIX: u32 = 2;
382
383 pub const MANTISSA_DIGITS: u32 = $m + 1;
387
388 pub const MAX_EXP: i32 = (1 << Self::E)
392 - Self::B
393 - match Self::N {
394 $crate::NanStyle::IEEE => 1,
395 $crate::NanStyle::FN => (Self::M == 0) as i32,
396 $crate::NanStyle::FNUZ => 0,
397 };
398
399 pub const MIN_EXP: i32 = 2 - Self::B;
408
409 pub const NAN: Self = Self(match Self::N {
411 $crate::NanStyle::IEEE => ((1 << (Self::E + 1)) - 1) << (Self::M - 1),
412 $crate::NanStyle::FN => (1 << (Self::E + Self::M)) - 1,
413 $crate::NanStyle::FNUZ => 1 << (Self::E + Self::M),
414 });
415
416 pub const HUGE: Self = Self(match Self::N {
422 $crate::NanStyle::IEEE => ((1 << Self::E) - 1) << Self::M,
423 $crate::NanStyle::FN => (1 << (Self::E + Self::M)) - 2,
424 $crate::NanStyle::FNUZ => (1 << (Self::E + Self::M)) - 1,
425 });
426
427 pub const MAX: Self = Self(Self::HUGE.0 - matches!(Self::N, $crate::NanStyle::IEEE) as $bits);
429
430 pub const TINY: Self = Self(1);
432
433 pub const MIN_POSITIVE: Self = Self(1 << Self::M);
437
438 #[allow(clippy::cast_possible_wrap)]
444 pub const EPSILON: Self = Self(match Self::B - Self::M as i32 {
445 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
446 s @ 1.. => (s as $bits) << Self::M,
447 s => 1 << (Self::M as i32 - 1 + s),
448 });
449
450 pub const MIN: Self = Self(Self::MAX.0 | 1 << (Self::E + Self::M));
454
455 const ABS_MASK: $bits = (1 << (Self::E + Self::M)) - 1;
457
458 #[doc = concat!("Raw transmutation from [`", stringify!($bits), "`]")]
459 #[must_use]
460 pub const fn from_bits(v: $bits) -> Self {
461 Self($bits::MAX >> ($bits::BITS - Self::BITWIDTH) & v)
462 }
463
464 #[doc = concat!("Raw transmutation to [`", stringify!($bits), "`]")]
465 #[must_use]
466 pub const fn to_bits(self) -> $bits {
467 self.0
468 }
469
470 #[must_use]
472 pub const fn is_nan(self) -> bool {
473 match Self::N {
474 #[allow(clippy::bad_bit_mask)]
475 $crate::NanStyle::IEEE => self.0 & Self::ABS_MASK > Self::HUGE.0,
476 $crate::NanStyle::FN => self.0 & Self::ABS_MASK == Self::NAN.0 & Self::ABS_MASK,
477 $crate::NanStyle::FNUZ => self.0 == Self::NAN.0,
478 }
479 }
480
481 #[must_use]
483 pub const fn is_infinite(self) -> bool {
484 matches!(Self::N, $crate::NanStyle::IEEE) && self.0 & Self::ABS_MASK == Self::HUGE.0
485 }
486
487 #[must_use]
489 pub const fn is_finite(self) -> bool {
490 match Self::N {
491 $crate::NanStyle::IEEE => self.0 & Self::ABS_MASK < Self::HUGE.0,
492 _ => !self.is_nan(),
493 }
494 }
495
496 #[must_use]
500 pub const fn is_subnormal(self) -> bool {
501 matches!(self.classify(), core::num::FpCategory::Subnormal)
502 }
503
504 #[must_use]
508 pub const fn is_normal(self) -> bool {
509 matches!(self.classify(), core::num::FpCategory::Normal)
510 }
511
512 #[must_use]
517 pub const fn classify(self) -> core::num::FpCategory {
518 if self.is_nan() {
519 core::num::FpCategory::Nan
520 } else if self.is_infinite() {
521 core::num::FpCategory::Infinite
522 } else {
523 let exp_mask = ((1 << Self::E) - 1) << Self::M;
524 let man_mask = (1 << Self::M) - 1;
525
526 match (self.0 & exp_mask, self.0 & man_mask) {
527 (0, 0) => core::num::FpCategory::Zero,
528 (0, _) => core::num::FpCategory::Subnormal,
529 (_, _) => core::num::FpCategory::Normal,
530 }
531 }
532 }
533
534 #[must_use]
536 pub const fn abs(self) -> Self {
537 if matches!(Self::N, $crate::NanStyle::FNUZ) && self.0 == Self::NAN.0 {
538 return Self::NAN;
539 }
540 Self::from_bits(self.to_bits() & Self::ABS_MASK)
541 }
542
543 #[must_use]
545 pub const fn is_sign_positive(self) -> bool {
546 self.0 >> (Self::E + Self::M) & 1 == 0
547 }
548
549 #[must_use]
551 pub const fn is_sign_negative(self) -> bool {
552 self.0 >> (Self::E + Self::M) & 1 == 1
553 }
554
555 const fn total_cmp_key(x: $bits) -> $bits {
559 let sign = 1 << (Self::E + Self::M);
560 let mask = ((x & sign) >> (Self::E + Self::M)) * (sign - 1);
561 x ^ (sign | mask)
562 }
563
564 #[must_use]
569 #[allow(clippy::cast_possible_wrap)]
570 pub fn from_f32(x: f32) -> Self {
571 if x.is_nan() {
572 let sign_bit = <$bits>::from(x.is_sign_negative()) << (Self::E + Self::M);
573 return Self::from_bits(Self::NAN.0 | sign_bit);
574 }
575
576 let bits = $crate::detail::round_f32_to_precision::<$m>(x).to_bits();
577 let sign_bit = ((bits >> 31) as $bits) << (Self::E + Self::M);
578 let diff = (Self::MIN_EXP - f32::MIN_EXP) << Self::M;
579 let magnitude = bits << 1 >> (f32::MANTISSA_DIGITS - Self::M);
580 let magnitude = magnitude as i32 - diff;
581
582 if magnitude < 1 << Self::M {
583 let ticks = f64::from(x.abs()) * $crate::detail::exp2i(Self::MANTISSA_DIGITS as i32 - Self::MIN_EXP);
584 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
585 let ticks = ticks.round_ties_even() as $bits;
586 return Self::from_bits(
587 (<$bits>::from(Self::N != $crate::NanStyle::FNUZ || ticks != 0) * sign_bit) | ticks,
588 );
589 }
590
591 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
592 Self::from_bits(magnitude.min(i32::from(Self::HUGE.to_bits())) as $bits | sign_bit)
593 }
594
595 #[must_use]
600 #[allow(clippy::cast_possible_wrap)]
601 pub fn from_f64(x: f64) -> Self {
602 if x.is_nan() {
603 let sign_bit = <$bits>::from(x.is_sign_negative()) << (Self::E + Self::M);
604 return Self::from_bits(Self::NAN.to_bits() | sign_bit);
605 }
606
607 let bits = $crate::detail::round_f64_to_precision::<$m>(x).to_bits();
608 let sign_bit = ((bits >> 63) as $bits) << (Self::E + Self::M);
609 let diff = i64::from(Self::MIN_EXP - f64::MIN_EXP) << Self::M;
610 let magnitude = bits << 1 >> (f64::MANTISSA_DIGITS - Self::M);
611 let magnitude = magnitude as i64 - diff;
612
613 if magnitude < 1 << Self::M {
614 let ticks = x.abs() * $crate::detail::exp2i(Self::MANTISSA_DIGITS as i32 - Self::MIN_EXP);
615 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
616 let ticks = ticks.round_ties_even() as $bits;
617 return Self::from_bits(
618 (<$bits>::from(Self::N != $crate::NanStyle::FNUZ || ticks != 0) * sign_bit) | ticks,
619 );
620 }
621
622 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
623 Self::from_bits(magnitude.min(i64::from(Self::HUGE.to_bits())) as $bits | sign_bit)
624 }
625
626 fn fast_to_f32(self) -> f32 {
631 let sign = if self.is_sign_negative() { -1.0 } else { 1.0 };
632 let magnitude = self.to_bits() & Self::ABS_MASK;
633
634 if self.is_nan() {
635 return f32::NAN.copysign(sign);
636 }
637 if self.is_infinite() {
638 return f32::INFINITY * sign;
639 }
640 if magnitude < 1 << Self::M {
641 #[allow(clippy::cast_possible_wrap)]
642 let shift = Self::MIN_EXP - Self::MANTISSA_DIGITS as i32;
643 #[allow(clippy::cast_possible_truncation)]
644 return ($crate::detail::exp2i(shift) * f64::from(sign) * f64::from(magnitude)) as f32;
645 }
646 let shift = f32::MANTISSA_DIGITS - Self::MANTISSA_DIGITS;
647 #[allow(clippy::cast_sign_loss)]
648 let diff = (Self::MIN_EXP - f32::MIN_EXP) as u32;
649 let diff = diff << (f32::MANTISSA_DIGITS - 1);
650 let sign = u32::from(self.is_sign_negative()) << 31;
651 f32::from_bits(((u32::from(magnitude) << shift) + diff) | sign)
652 }
653
654 fn fast_to_f64(self) -> f64 {
659 let sign = if self.is_sign_negative() { -1.0 } else { 1.0 };
660 let magnitude = self.to_bits() & Self::ABS_MASK;
661
662 if self.is_nan() {
663 return f64::NAN.copysign(sign);
664 }
665 if self.is_infinite() {
666 return f64::INFINITY * sign;
667 }
668 if magnitude < 1 << Self::M {
669 #[allow(clippy::cast_possible_wrap)]
670 let shift = Self::MIN_EXP - Self::MANTISSA_DIGITS as i32;
671 return $crate::detail::exp2i(shift) * sign * f64::from(magnitude);
672 }
673 let shift = f64::MANTISSA_DIGITS - Self::MANTISSA_DIGITS;
674 #[allow(clippy::cast_sign_loss)]
675 let diff = (Self::MIN_EXP - f64::MIN_EXP) as u64;
676 let diff = diff << (f64::MANTISSA_DIGITS - 1);
677 let sign = u64::from(self.is_sign_negative()) << 63;
678 f64::from_bits(((u64::from(magnitude) << shift) + diff) | sign)
679 }
680
681 fn as_f64(self) -> f64 {
686 let bias = (1 << (Self::E - 1)) - 1;
687 let sign = if self.is_sign_negative() { -1.0 } else { 1.0 };
688 let magnitude = self.abs().to_bits();
689
690 if self.is_nan() {
691 return f64::NAN.copysign(sign);
692 }
693 if self.is_infinite() {
694 return f64::INFINITY * sign;
695 }
696 if i32::from(magnitude) >= (f64::MAX_EXP + bias) << Self::M {
697 return f64::INFINITY * sign;
698 }
699 if magnitude < 1 << Self::M {
700 #[allow(clippy::cast_possible_wrap)]
701 let shift = Self::MIN_EXP - Self::MANTISSA_DIGITS as i32;
702 return $crate::detail::exp2i(shift) * sign * f64::from(magnitude);
703 }
704 if i32::from(magnitude >> Self::M) < f64::MIN_EXP + bias {
705 let significand = (magnitude & ((1 << Self::M) - 1)) | 1 << Self::M;
706 let exponent = i32::from(magnitude >> Self::M) - bias;
707 #[allow(clippy::cast_possible_wrap)]
708 return $crate::detail::exp2i(exponent - Self::M as i32) * sign * f64::from(significand);
709 }
710 let shift = f64::MANTISSA_DIGITS - Self::MANTISSA_DIGITS;
711 #[allow(clippy::cast_sign_loss)]
712 let diff = (Self::MIN_EXP - f64::MIN_EXP) as u64;
713 let diff = diff << (f64::MANTISSA_DIGITS - 1);
714 let sign = u64::from(self.is_sign_negative()) << 63;
715 f64::from_bits(((u64::from(magnitude) << shift) + diff) | sign)
716 }
717
718 #[must_use]
720 pub fn to_f64(self) -> f64 {
721 let lossless = f64::MANTISSA_DIGITS >= Self::MANTISSA_DIGITS
722 && f64::MAX_EXP >= Self::MAX_EXP
723 && f64::MIN_EXP <= Self::MIN_EXP;
724
725 if lossless {
726 self.fast_to_f64()
727 } else {
728 self.as_f64()
729 }
730 }
731
732 #[must_use]
734 pub fn to_f32(self) -> f32 {
735 let lossless = f32::MANTISSA_DIGITS >= Self::MANTISSA_DIGITS
736 && f32::MAX_EXP >= Self::MAX_EXP
737 && f32::MIN_EXP <= Self::MIN_EXP;
738
739 if lossless {
740 return self.fast_to_f32();
741 }
742 #[allow(clippy::cast_possible_truncation)]
746 return self.to_f64() as f32;
747 }
748 }
749
750 const _: () = assert!($name::BITWIDTH <= 16);
751 const _: () = assert!($name::E >= 2);
752 const _: () = assert!($name::M > 0 || !matches!($name::N, $crate::NanStyle::IEEE));
753 const _: () = assert!($name::MAX_EXP >= 1);
754 const _: () = assert!($name::MIN_EXP <= 1);
755
756 impl PartialEq for $name {
757 fn eq(&self, other: &Self) -> bool {
758 let eq = self.0 == other.0 && !self.is_nan();
759 eq || !matches!(Self::N, $crate::NanStyle::FNUZ) && (self.0 | other.0) & Self::ABS_MASK == 0
760 }
761 }
762
763 impl PartialOrd for $name {
764 fn partial_cmp(&self, other: &Self) -> Option<core::cmp::Ordering> {
765 if self.is_nan() || other.is_nan() {
766 return None;
767 }
768 if self == other {
769 return Some(core::cmp::Ordering::Equal);
770 }
771
772 let sign = (self.0 | other.0) >> (Self::E + Self::M) & 1 == 1;
773
774 Some(if (self.0 > other.0) ^ sign {
775 core::cmp::Ordering::Greater
776 } else {
777 core::cmp::Ordering::Less
778 })
779 }
780 }
781
782 impl core::ops::Neg for $name {
783 type Output = Self;
784
785 fn neg(self) -> Self::Output {
786 let flag = matches!(Self::N, $crate::NanStyle::FNUZ) && self.0 & Self::ABS_MASK == 0;
787 let switch = <$bits>::from(!flag) << (Self::E + Self::M);
788 Self(self.0 ^ switch)
789 }
790 }
791
792 impl $crate::Minifloat for $name {
793 type Bits = $bits;
794 const E: u32 = $e;
795 const M: u32 = $m;
796 const B: i32 = $b;
797 const N: $crate::NanStyle = $crate::NanStyle::$n;
798
799 const NAN: Self = Self::NAN;
800 const HUGE: Self = Self::HUGE;
801 const MAX: Self = Self::MAX;
802 const TINY: Self = Self::TINY;
803 const MIN_POSITIVE: Self = Self::MIN_POSITIVE;
804 const EPSILON: Self = Self::EPSILON;
805 const MIN: Self = Self::MIN;
806
807 fn from_bits(v: Self::Bits) -> Self {
808 Self::from_bits(v)
809 }
810
811 fn to_bits(self) -> Self::Bits {
812 self.to_bits()
813 }
814
815 fn total_cmp(&self, other: &Self) -> core::cmp::Ordering {
816 Self::total_cmp_key(self.0).cmp(&Self::total_cmp_key(other.0))
817 }
818
819 fn is_nan(self) -> bool {
820 self.is_nan()
821 }
822
823 fn is_infinite(self) -> bool {
824 self.is_infinite()
825 }
826
827 fn is_finite(self) -> bool {
828 self.is_finite()
829 }
830
831 fn classify(self) -> core::num::FpCategory {
832 self.classify()
833 }
834
835 fn abs(self) -> Self {
836 self.abs()
837 }
838
839 fn is_sign_positive(self) -> bool {
840 self.is_sign_positive()
841 }
842
843 fn is_sign_negative(self) -> bool {
844 self.is_sign_negative()
845 }
846 }
847
848 $crate::__conditionally_define_infinities!(impl $name, $n);
849 };
850 ($vis:vis struct $name:ident($bits:tt): $e:expr, $m:expr, $n:ident) => {
851 $crate::minifloat!($vis struct $name($bits): $e, $m, (1 << ($e - 1)) - 1, $n);
852 };
853 ($vis:vis struct $name:ident($bits:tt): $e:expr, $m:expr, $b:expr) => {
854 $crate::minifloat!($vis struct $name($bits): $e, $m, $b, IEEE);
855 };
856 ($vis:vis struct $name:ident($bits:tt): $e:expr, $m:expr) => {
857 $crate::minifloat!($vis struct $name($bits): $e, $m, (1 << ($e - 1)) - 1, IEEE);
858 };
859}
860
861minifloat!(pub struct F16(u16): 5, 10);
862minifloat!(pub struct BF16(u16): 8, 7);