unsigned_float/
lib.rs

1//! Unsigned floating-point formats for values that can never be negative.
2//!
3//! This crate provides compact unsigned float newtypes with IEEE-like exponent
4//! and mantissa fields, but no sign bit. The missing sign bit can be spent on
5//! precision or range, removes negative zero, and makes total ordering a raw
6//! unsigned integer comparison.
7//!
8//! The ergonomic aliases [`Uf8`], [`Uf16`], and [`Uf32`] point at the default
9//! concrete layouts [`Uf8E4M4`], [`Uf16E5M11`], and [`Uf32E8M24`]. Alternate
10//! layouts such as [`Uf8E5M3`] and [`Uf16E6M10`] are exported as distinct types
11//! so their range and precision tradeoffs stay explicit.
12//! With the `f128` feature enabled, [`Uf64`] is also available and promotes
13//! through nightly primitive `f128`.
14//!
15//! # Conversions
16//!
17//! Explicit constructors such as [`Uf8::from_f32`] encode the input into the
18//! target format. Negative native values become NaN, and overflow becomes
19//! infinity.
20//!
21//! Use [`TryFrom`] when invalid or unrepresentable inputs should be rejected:
22//!
23//! ```
24//! use unsigned_float::{ConversionError, Uf16};
25//!
26//! assert_eq!(Uf16::try_from(42_u32), Ok(Uf16::from_f32(42.0)));
27//! assert_eq!(Uf16::try_from(-1_i32), Err(ConversionError::Negative));
28//! ```
29//!
30#![no_std]
31#![cfg_attr(feature = "f16", feature(f16))]
32#![cfg_attr(feature = "f128", feature(f128))]
33
34#[cfg(test)]
35extern crate std;
36
37mod convert;
38mod dispatch;
39mod uf16;
40mod uf32;
41#[cfg(feature = "f128")]
42mod uf64;
43mod uf8;
44
45pub use convert::ConversionError;
46pub use uf8::{Uf8, Uf8E4M4, Uf8E5M3};
47pub use uf16::{Uf16, Uf16E5M11, Uf16E6M10};
48pub use uf32::{Uf32, Uf32E8M24};
49#[cfg(feature = "f128")]
50pub use uf64::{Uf64, Uf64E11M52};
51
52#[cfg(test)]
53mod tests {
54    #[cfg(feature = "f128")]
55    use super::Uf64;
56    use super::{ConversionError, Uf8, Uf8E5M3, Uf16, Uf16E6M10, Uf32};
57
58    #[test]
59    fn canonical_one_bits_match_the_layouts() {
60        assert_eq!(Uf8::ONE.to_bits(), 0x70);
61        assert_eq!(Uf8E5M3::ONE.to_bits(), 0x78);
62        assert_eq!(Uf16::ONE.to_bits(), 0x7800);
63        assert_eq!(Uf16E6M10::ONE.to_bits(), 0x7c00);
64        assert_eq!(Uf32::ONE.to_bits(), 0x7f00_0000);
65        #[cfg(feature = "f128")]
66        assert_eq!(Uf64::ONE.to_bits(), 0x3ff0_0000_0000_0000);
67    }
68
69    #[test]
70    fn uf8_finite_values_round_trip_through_f32() {
71        for bits in u8::MIN..=u8::MAX {
72            let value = Uf8::from_bits(bits);
73
74            if value.is_nan() {
75                continue;
76            }
77
78            assert_eq!(Uf8::from_f32(value.to_f32()).to_bits(), bits);
79        }
80    }
81
82    #[test]
83    fn uf8_e5m3_finite_values_round_trip_through_f32() {
84        for bits in u8::MIN..=u8::MAX {
85            let value = Uf8E5M3::from_bits(bits);
86
87            if value.is_nan() {
88                continue;
89            }
90
91            assert_eq!(Uf8E5M3::from_f32(value.to_f32()).to_bits(), bits);
92        }
93    }
94
95    #[test]
96    fn conversions_handle_special_values() {
97        assert!(Uf8::from_f32(f32::NAN).is_nan());
98        assert!(Uf8E5M3::from_f32(f32::NAN).is_nan());
99        assert!(Uf16::from_f32(f32::NEG_INFINITY).is_nan());
100        assert!(Uf16E6M10::from_f32(f32::NEG_INFINITY).is_nan());
101        assert!(Uf32::from_f64(-1.0).is_nan());
102        #[cfg(feature = "f128")]
103        assert!(Uf64::from_f64(-1.0).is_nan());
104
105        assert!(Uf8::from_f32(f32::INFINITY).is_infinite());
106        assert!(Uf8E5M3::from_f32(f32::INFINITY).is_infinite());
107        assert!(Uf16::from_f32(f32::INFINITY).is_infinite());
108        assert!(Uf16E6M10::from_f32(f32::INFINITY).is_infinite());
109        assert!(Uf32::from_f64(f64::INFINITY).is_infinite());
110        #[cfg(feature = "f128")]
111        assert!(Uf64::from_f64(f64::INFINITY).is_infinite());
112    }
113
114    #[test]
115    fn try_from_f64_rejects_invalid_or_unrepresentable_values() {
116        assert_eq!(Uf8::try_from(-1.0_f64), Err(ConversionError::Negative));
117        assert_eq!(Uf16::try_from(f64::NAN), Err(ConversionError::Nan));
118        assert_eq!(
119            Uf32::try_from(f64::INFINITY),
120            Err(ConversionError::Infinite)
121        );
122
123        assert_eq!(Uf8::try_from(1.0e20_f64), Err(ConversionError::Overflow));
124        assert_eq!(Uf16::try_from(1.0e20_f64), Err(ConversionError::Overflow));
125        assert_eq!(Uf8::try_from(1.0e-20_f64), Err(ConversionError::Underflow));
126
127        assert_eq!(Uf8::try_from(2.0_f64), Ok(Uf8::from_f32(2.0)));
128        assert_eq!(Uf8E5M3::try_from(2.0_f64), Ok(Uf8E5M3::from_f32(2.0)));
129        assert_eq!(Uf16::try_from(2.0_f64), Ok(Uf16::from_f32(2.0)));
130        assert_eq!(Uf16E6M10::try_from(2.0_f64), Ok(Uf16E6M10::from_f32(2.0)));
131        assert_eq!(Uf32::try_from(2.0_f64), Ok(Uf32::from_f64(2.0)));
132        #[cfg(feature = "f128")]
133        assert_eq!(Uf64::try_from_f64(2.0_f64), Ok(Uf64::from_f64(2.0)));
134    }
135
136    #[test]
137    fn try_from_integer_types() {
138        assert_eq!(Uf8::try_from(2_u8), Ok(Uf8::from_f32(2.0)));
139        assert_eq!(Uf8E5M3::try_from(2_u8), Ok(Uf8E5M3::from_f32(2.0)));
140        assert_eq!(Uf16::try_from(1024_u32), Ok(Uf16::from_f32(1024.0)));
141        assert_eq!(
142            Uf16E6M10::try_from(1024_u32),
143            Ok(Uf16E6M10::from_f32(1024.0))
144        );
145        assert_eq!(Uf32::try_from(1024_u64), Ok(Uf32::from_f64(1024.0)));
146        #[cfg(feature = "f128")]
147        assert_eq!(Uf64::try_from(1024_u64), Ok(Uf64::from_f64(1024.0)));
148
149        assert_eq!(Uf8::try_from(-1_i8), Err(ConversionError::Negative));
150        assert_eq!(Uf8::try_from(u128::MAX), Err(ConversionError::Overflow));
151    }
152
153    #[cfg(feature = "f16")]
154    #[test]
155    fn f16_conversions_are_available_when_enabled() {
156        let native = 2.0_f16;
157
158        assert_eq!(Uf8::from_f16(native).to_f16(), native);
159        assert_eq!(Uf8E5M3::from_f16(native).to_f16(), native);
160        assert_eq!(Uf16::from_f16(native).to_f16(), native);
161        assert_eq!(Uf16E6M10::from_f16(native).to_f16(), native);
162        assert_eq!(Uf32::from_f16(native).to_f16(), native);
163        #[cfg(feature = "f128")]
164        assert_eq!(Uf64::from_f16(native).to_f16(), native);
165
166        assert_eq!(Uf8::from(native), Uf8::from_f16(native));
167        assert_eq!(Uf8E5M3::from(native), Uf8E5M3::from_f16(native));
168        assert_eq!(Uf16::from(native), Uf16::from_f16(native));
169        assert_eq!(Uf16E6M10::from(native), Uf16E6M10::from_f16(native));
170        assert_eq!(Uf32::from(native), Uf32::from_f16(native));
171        #[cfg(feature = "f128")]
172        assert_eq!(Uf64::from(native), Uf64::from_f16(native));
173
174        let _: f16 = Uf8::from_f16(native).into();
175        let _: f16 = Uf8E5M3::from_f16(native).into();
176        let _: f16 = Uf16::from_f16(native).into();
177        let _: f16 = Uf16E6M10::from_f16(native).into();
178        let _: f16 = Uf32::from_f16(native).into();
179        #[cfg(feature = "f128")]
180        let _: f16 = Uf64::from_f16(native).into();
181    }
182
183    #[test]
184    fn subnormal_values_decode_correctly() {
185        assert_eq!(Uf8::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-10));
186        assert_eq!(Uf8E5M3::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-17));
187        assert_eq!(Uf16::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-25));
188        assert_eq!(Uf16E6M10::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-40));
189        assert_eq!(Uf32::MIN_POSITIVE.to_f64(), 2.0_f64.powi(-150));
190        #[cfg(feature = "f128")]
191        assert_eq!(
192            Uf64::MIN_POSITIVE.to_f64(),
193            f64::MIN_POSITIVE / 2.0_f64.powi(52)
194        );
195    }
196
197    #[test]
198    fn arithmetic_promotes_computes_and_demotes() {
199        assert_eq!((Uf8::from_f32(1.0) + Uf8::from_f32(1.0)).to_f32(), 2.0);
200        assert_eq!(
201            (Uf8E5M3::from_f32(1.0) + Uf8E5M3::from_f32(1.0)).to_f32(),
202            2.0
203        );
204        assert_eq!((Uf16::from_f32(3.0) * Uf16::from_f32(0.5)).to_f32(), 1.5);
205        assert_eq!(
206            (Uf16E6M10::from_f32(3.0) * Uf16E6M10::from_f32(0.5)).to_f32(),
207            1.5
208        );
209        assert_eq!((Uf32::from_f64(9.0) / Uf32::from_f64(3.0)).to_f64(), 3.0);
210        #[cfg(feature = "f128")]
211        assert_eq!((Uf64::from_f64(9.0) / Uf64::from_f64(3.0)).to_f64(), 3.0);
212    }
213
214    #[cfg(any(not(feature = "f16"), feature = "soft-float"))]
215    #[test]
216    fn uf8_lut_matches_promoted_arithmetic() {
217        for a_bits in u8::MIN..=u8::MAX {
218            for b_bits in u8::MIN..=u8::MAX {
219                let a = Uf8::from_bits(a_bits);
220                let b = Uf8::from_bits(b_bits);
221                let a_f32 = a.to_f32();
222                let b_f32 = b.to_f32();
223
224                assert_eq!((a + b).to_bits(), Uf8::from_f32(a_f32 + b_f32).to_bits());
225                assert_eq!((a - b).to_bits(), Uf8::from_f32(a_f32 - b_f32).to_bits());
226                assert_eq!((a * b).to_bits(), Uf8::from_f32(a_f32 * b_f32).to_bits());
227                assert_eq!((a / b).to_bits(), Uf8::from_f32(a_f32 / b_f32).to_bits());
228            }
229        }
230    }
231
232    #[cfg(any(not(feature = "f16"), feature = "soft-float"))]
233    #[test]
234    fn uf8_e5m3_lut_matches_promoted_arithmetic() {
235        for a_bits in u8::MIN..=u8::MAX {
236            for b_bits in u8::MIN..=u8::MAX {
237                let a = Uf8E5M3::from_bits(a_bits);
238                let b = Uf8E5M3::from_bits(b_bits);
239                let a_f32 = a.to_f32();
240                let b_f32 = b.to_f32();
241
242                assert_eq!(
243                    (a + b).to_bits(),
244                    Uf8E5M3::from_f32(a_f32 + b_f32).to_bits()
245                );
246                assert_eq!(
247                    (a - b).to_bits(),
248                    Uf8E5M3::from_f32(a_f32 - b_f32).to_bits()
249                );
250                assert_eq!(
251                    (a * b).to_bits(),
252                    Uf8E5M3::from_f32(a_f32 * b_f32).to_bits()
253                );
254                assert_eq!(
255                    (a / b).to_bits(),
256                    Uf8E5M3::from_f32(a_f32 / b_f32).to_bits()
257                );
258            }
259        }
260    }
261
262    #[test]
263    fn negative_subtraction_result_is_nan() {
264        assert!((Uf8::from_f32(1.0) - Uf8::from_f32(2.0)).is_nan());
265        assert!((Uf8E5M3::from_f32(1.0) - Uf8E5M3::from_f32(2.0)).is_nan());
266        assert!((Uf16::from_f32(1.0) - Uf16::from_f32(2.0)).is_nan());
267        assert!((Uf16E6M10::from_f32(1.0) - Uf16E6M10::from_f32(2.0)).is_nan());
268        assert!((Uf32::from_f64(1.0) - Uf32::from_f64(2.0)).is_nan());
269        #[cfg(feature = "f128")]
270        assert!((Uf64::from_f64(1.0) - Uf64::from_f64(2.0)).is_nan());
271    }
272
273    #[test]
274    fn raw_bits_define_total_ordering() {
275        assert!(Uf8::ZERO < Uf8::MIN_POSITIVE);
276        assert!(Uf8::MAX < Uf8::INFINITY);
277        assert!(Uf8::INFINITY < Uf8::NAN);
278
279        assert!(Uf8E5M3::ZERO < Uf8E5M3::MIN_POSITIVE);
280        assert!(Uf8E5M3::MAX < Uf8E5M3::INFINITY);
281        assert!(Uf8E5M3::INFINITY < Uf8E5M3::NAN);
282
283        assert!(Uf16::ZERO < Uf16::MIN_POSITIVE);
284        assert!(Uf16::MAX < Uf16::INFINITY);
285        assert!(Uf16::INFINITY < Uf16::NAN);
286
287        assert!(Uf16E6M10::ZERO < Uf16E6M10::MIN_POSITIVE);
288        assert!(Uf16E6M10::MAX < Uf16E6M10::INFINITY);
289        assert!(Uf16E6M10::INFINITY < Uf16E6M10::NAN);
290
291        assert!(Uf32::ZERO < Uf32::MIN_POSITIVE);
292        assert!(Uf32::MAX < Uf32::INFINITY);
293        assert!(Uf32::INFINITY < Uf32::NAN);
294
295        #[cfg(feature = "f128")]
296        {
297            assert!(Uf64::ZERO < Uf64::MIN_POSITIVE);
298            assert!(Uf64::MAX < Uf64::INFINITY);
299            assert!(Uf64::INFINITY < Uf64::NAN);
300        }
301    }
302
303    #[test]
304    fn round_to_nearest_even_when_encoding() {
305        assert_eq!(Uf8::from_f32(1.0 + 1.0 / 32.0).to_bits(), 0x70);
306        assert_eq!(Uf8::from_f32(1.0 + 3.0 / 32.0).to_bits(), 0x72);
307
308        assert_eq!(
309            Uf32::from_f64(1.0 + 2.0_f64.powi(-25)).to_bits(),
310            Uf32::ONE.to_bits()
311        );
312        assert_eq!(
313            Uf32::from_f64(1.0 + 3.0 * 2.0_f64.powi(-25)).to_bits(),
314            Uf32::ONE.to_bits() + 2
315        );
316    }
317}
unsigned_float/lib.rs

unsigned_float/
lib.rs