Skip to main content

unsigned_float/
lib.rs

1//! Unsigned floating-point formats for values that can never be negative.
2//!
3//! This crate provides compact unsigned float newtypes with IEEE-like exponent
4//! and mantissa fields, but no sign bit. The missing sign bit can be spent on
5//! precision or range, removes negative zero, and makes total ordering a raw
6//! unsigned integer comparison.
7//!
8//! The ergonomic aliases [`Uf8`], [`Uf16`], and [`Uf32`] point at the default
9//! concrete layouts [`Uf8E4M4`], [`Uf16E5M11`], and [`Uf32E8M24`]. Alternate
10//! layouts such as [`Uf8E5M3`] and [`Uf16E6M10`] are exported as distinct types
11//! so their range and precision tradeoffs stay explicit.
12//! With the `f128` feature enabled, [`Uf64`] is also available and promotes
13//! through nightly primitive `f128`.
14//!
15//! # Conversions
16//!
17//! Explicit constructors such as [`Uf8::from_f32`] encode the input into the
18//! target format. Negative native values become NaN, and overflow becomes
19//! infinity.
20//!
21//! Use [`TryFrom`] when invalid or unrepresentable inputs should be rejected:
22//!
23//! ```
24//! use unsigned_float::{ConversionError, Uf16};
25//!
26//! assert_eq!(Uf16::try_from(42_u32), Ok(Uf16::from_f32(42.0)));
27//! assert_eq!(Uf16::try_from(-1_i32), Err(ConversionError::Negative));
28//! ```
29//!
30//! # Exponents
31//!
32//! Use [`PowUf`] to raise native floats to unsigned-float exponents:
33//!
34//! ```
35//! use unsigned_float::{PowUf, Uf16};
36//!
37//! let root = 9.0_f32.powuf(Uf16::from_f32(0.5));
38//! assert_eq!(root, 3.0);
39//! ```
40//!
41//! `PowUf` uses exact kernels for common exponent shapes such as zero, one,
42//! one-half, and small integers, then falls back to `libm` for the general
43//! fractional case.
44//!
45#![no_std]
46#![cfg_attr(feature = "f16", feature(f16))]
47#![cfg_attr(feature = "f128", feature(f128))]
48
49#[cfg(test)]
50extern crate std;
51
52mod convert;
53mod dispatch;
54mod pow;
55mod uf16;
56mod uf32;
57#[cfg(feature = "f128")]
58mod uf64;
59mod uf8;
60
61pub use convert::ConversionError;
62pub use pow::PowUf;
63pub use uf8::{Uf8, Uf8E4M4, Uf8E5M3};
64pub use uf16::{Uf16, Uf16E5M11, Uf16E6M10};
65pub use uf32::{Uf32, Uf32E8M24};
66#[cfg(feature = "f128")]
67pub use uf64::{Uf64, Uf64E11M52};
68
69#[cfg(test)]
70mod tests {
71    #[cfg(feature = "f128")]
72    use super::Uf64;
73    use super::{ConversionError, PowUf, Uf8, Uf8E5M3, Uf16, Uf16E6M10, Uf32};
74
75    #[test]
76    fn canonical_one_bits_match_the_layouts() {
77        assert_eq!(Uf8::ONE.to_bits(), 0x70);
78        assert_eq!(Uf8E5M3::ONE.to_bits(), 0x78);
79        assert_eq!(Uf16::ONE.to_bits(), 0x7800);
80        assert_eq!(Uf16E6M10::ONE.to_bits(), 0x7c00);
81        assert_eq!(Uf32::ONE.to_bits(), 0x7f00_0000);
82        #[cfg(feature = "f128")]
83        assert_eq!(Uf64::ONE.to_bits(), 0x3ff0_0000_0000_0000);
84    }
85
86    #[test]
87    fn uf8_finite_values_round_trip_through_f32() {
88        for bits in u8::MIN..=u8::MAX {
89            let value = Uf8::from_bits(bits);
90
91            if value.is_nan() {
92                continue;
93            }
94
95            assert_eq!(Uf8::from_f32(value.to_f32()).to_bits(), bits);
96        }
97    }
98
99    #[test]
100    fn uf8_e5m3_finite_values_round_trip_through_f32() {
101        for bits in u8::MIN..=u8::MAX {
102            let value = Uf8E5M3::from_bits(bits);
103
104            if value.is_nan() {
105                continue;
106            }
107
108            assert_eq!(Uf8E5M3::from_f32(value.to_f32()).to_bits(), bits);
109        }
110    }
111
112    #[test]
113    fn conversions_handle_special_values() {
114        assert!(Uf8::from_f32(f32::NAN).is_nan());
115        assert!(Uf8E5M3::from_f32(f32::NAN).is_nan());
116        assert!(Uf16::from_f32(f32::NEG_INFINITY).is_nan());
117        assert!(Uf16E6M10::from_f32(f32::NEG_INFINITY).is_nan());
118        assert!(Uf32::from_f64(-1.0).is_nan());
119        #[cfg(feature = "f128")]
120        assert!(Uf64::from_f64(-1.0).is_nan());
121
122        assert!(Uf8::from_f32(f32::INFINITY).is_infinite());
123        assert!(Uf8E5M3::from_f32(f32::INFINITY).is_infinite());
124        assert!(Uf16::from_f32(f32::INFINITY).is_infinite());
125        assert!(Uf16E6M10::from_f32(f32::INFINITY).is_infinite());
126        assert!(Uf32::from_f64(f64::INFINITY).is_infinite());
127        #[cfg(feature = "f128")]
128        assert!(Uf64::from_f64(f64::INFINITY).is_infinite());
129    }
130
131    #[test]
132    fn try_from_f64_rejects_invalid_or_unrepresentable_values() {
133        assert_eq!(Uf8::try_from(-1.0_f64), Err(ConversionError::Negative));
134        assert_eq!(Uf16::try_from(f64::NAN), Err(ConversionError::Nan));
135        assert_eq!(
136            Uf32::try_from(f64::INFINITY),
137            Err(ConversionError::Infinite)
138        );
139
140        assert_eq!(Uf8::try_from(1.0e20_f64), Err(ConversionError::Overflow));
141        assert_eq!(Uf16::try_from(1.0e20_f64), Err(ConversionError::Overflow));
142        assert_eq!(Uf8::try_from(1.0e-20_f64), Err(ConversionError::Underflow));
143
144        assert_eq!(Uf8::try_from(2.0_f64), Ok(Uf8::from_f32(2.0)));
145        assert_eq!(Uf8E5M3::try_from(2.0_f64), Ok(Uf8E5M3::from_f32(2.0)));
146        assert_eq!(Uf16::try_from(2.0_f64), Ok(Uf16::from_f32(2.0)));
147        assert_eq!(Uf16E6M10::try_from(2.0_f64), Ok(Uf16E6M10::from_f32(2.0)));
148        assert_eq!(Uf32::try_from(2.0_f64), Ok(Uf32::from_f64(2.0)));
149        #[cfg(feature = "f128")]
150        assert_eq!(Uf64::try_from_f64(2.0_f64), Ok(Uf64::from_f64(2.0)));
151    }
152
153    #[test]
154    fn try_from_integer_types() {
155        assert_eq!(Uf8::try_from(2_u8), Ok(Uf8::from_f32(2.0)));
156        assert_eq!(Uf8E5M3::try_from(2_u8), Ok(Uf8E5M3::from_f32(2.0)));
157        assert_eq!(Uf16::try_from(1024_u32), Ok(Uf16::from_f32(1024.0)));
158        assert_eq!(
159            Uf16E6M10::try_from(1024_u32),
160            Ok(Uf16E6M10::from_f32(1024.0))
161        );
162        assert_eq!(Uf32::try_from(1024_u64), Ok(Uf32::from_f64(1024.0)));
163        #[cfg(feature = "f128")]
164        assert_eq!(Uf64::try_from(1024_u64), Ok(Uf64::from_f64(1024.0)));
165
166        assert_eq!(Uf8::try_from(-1_i8), Err(ConversionError::Negative));
167        assert_eq!(Uf8::try_from(u128::MAX), Err(ConversionError::Overflow));
168    }
169
170    #[cfg(feature = "f16")]
171    #[test]
172    fn f16_conversions_are_available_when_enabled() {
173        let native = 2.0_f16;
174
175        assert_eq!(Uf8::from_f16(native).to_f16(), native);
176        assert_eq!(Uf8E5M3::from_f16(native).to_f16(), native);
177        assert_eq!(Uf16::from_f16(native).to_f16(), native);
178        assert_eq!(Uf16E6M10::from_f16(native).to_f16(), native);
179        assert_eq!(Uf32::from_f16(native).to_f16(), native);
180        #[cfg(feature = "f128")]
181        assert_eq!(Uf64::from_f16(native).to_f16(), native);
182
183        assert_eq!(Uf8::from(native), Uf8::from_f16(native));
184        assert_eq!(Uf8E5M3::from(native), Uf8E5M3::from_f16(native));
185        assert_eq!(Uf16::from(native), Uf16::from_f16(native));
186        assert_eq!(Uf16E6M10::from(native), Uf16E6M10::from_f16(native));
187        assert_eq!(Uf32::from(native), Uf32::from_f16(native));
188        #[cfg(feature = "f128")]
189        assert_eq!(Uf64::from(native), Uf64::from_f16(native));
190
191        let _: f16 = Uf8::from_f16(native).into();
192        let _: f16 = Uf8E5M3::from_f16(native).into();
193        let _: f16 = Uf16::from_f16(native).into();
194        let _: f16 = Uf16E6M10::from_f16(native).into();
195        let _: f16 = Uf32::from_f16(native).into();
196        #[cfg(feature = "f128")]
197        let _: f16 = Uf64::from_f16(native).into();
198    }
199
200    #[test]
201    fn subnormal_values_decode_correctly() {
202        assert_eq!(Uf8::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-10));
203        assert_eq!(Uf8E5M3::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-17));
204        assert_eq!(Uf16::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-25));
205        assert_eq!(Uf16E6M10::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-40));
206        assert_eq!(Uf32::MIN_POSITIVE.to_f64(), 2.0_f64.powi(-150));
207        #[cfg(feature = "f128")]
208        assert_eq!(
209            Uf64::MIN_POSITIVE.to_f64(),
210            f64::MIN_POSITIVE / 2.0_f64.powi(52)
211        );
212    }
213
214    #[test]
215    fn arithmetic_promotes_computes_and_demotes() {
216        assert_eq!((Uf8::from_f32(1.0) + Uf8::from_f32(1.0)).to_f32(), 2.0);
217        assert_eq!(
218            (Uf8E5M3::from_f32(1.0) + Uf8E5M3::from_f32(1.0)).to_f32(),
219            2.0
220        );
221        assert_eq!((Uf16::from_f32(3.0) * Uf16::from_f32(0.5)).to_f32(), 1.5);
222        assert_eq!(
223            (Uf16E6M10::from_f32(3.0) * Uf16E6M10::from_f32(0.5)).to_f32(),
224            1.5
225        );
226        assert_eq!((Uf32::from_f64(9.0) / Uf32::from_f64(3.0)).to_f64(), 3.0);
227        #[cfg(feature = "f128")]
228        assert_eq!((Uf64::from_f64(9.0) / Uf64::from_f64(3.0)).to_f64(), 3.0);
229    }
230
231    #[test]
232    fn native_float_bases_can_use_unsigned_float_exponents() {
233        assert_eq!(9.0_f32.powuf(Uf8::from_f32(0.5)), 3.0);
234        assert_eq!(9.0_f32.powuf(Uf8E5M3::from_f32(0.5)), 3.0);
235        assert_eq!(9.0_f32.powuf(Uf16::from_f32(0.5)), 3.0);
236        assert_eq!(9.0_f64.powuf(Uf16E6M10::from_f32(0.5)), 3.0);
237        assert_eq!(9.0_f64.powuf(Uf32::from_f64(0.5)), 3.0);
238        assert_eq!(2.0_f32.powuf(Uf16::from_f32(8.0)), 256.0);
239        assert_eq!((-2.0_f32).powuf(Uf8::from_f32(3.0)), -8.0);
240        assert_eq!(f32::NAN.powuf(Uf8::ZERO), 1.0);
241        assert!((16.0_f64.powuf(Uf32::from_f64(1.25)) - 32.0).abs() < 1.0e-12);
242
243        #[cfg(feature = "f128")]
244        {
245            assert_eq!(9.0_f64.powuf(Uf64::from_f64(0.5)), 3.0);
246            assert_eq!(2.0_f64.powuf(Uf64::from_f64(10.0)), 1024.0);
247        }
248    }
249
250    #[cfg(any(not(feature = "f16"), feature = "soft-float"))]
251    #[test]
252    fn uf8_lut_matches_promoted_arithmetic() {
253        for a_bits in u8::MIN..=u8::MAX {
254            for b_bits in u8::MIN..=u8::MAX {
255                let a = Uf8::from_bits(a_bits);
256                let b = Uf8::from_bits(b_bits);
257                let a_f32 = a.to_f32();
258                let b_f32 = b.to_f32();
259
260                assert_eq!((a + b).to_bits(), Uf8::from_f32(a_f32 + b_f32).to_bits());
261                assert_eq!((a - b).to_bits(), Uf8::from_f32(a_f32 - b_f32).to_bits());
262                assert_eq!((a * b).to_bits(), Uf8::from_f32(a_f32 * b_f32).to_bits());
263                assert_eq!((a / b).to_bits(), Uf8::from_f32(a_f32 / b_f32).to_bits());
264            }
265        }
266    }
267
268    #[cfg(any(not(feature = "f16"), feature = "soft-float"))]
269    #[test]
270    fn uf8_e5m3_lut_matches_promoted_arithmetic() {
271        for a_bits in u8::MIN..=u8::MAX {
272            for b_bits in u8::MIN..=u8::MAX {
273                let a = Uf8E5M3::from_bits(a_bits);
274                let b = Uf8E5M3::from_bits(b_bits);
275                let a_f32 = a.to_f32();
276                let b_f32 = b.to_f32();
277
278                assert_eq!(
279                    (a + b).to_bits(),
280                    Uf8E5M3::from_f32(a_f32 + b_f32).to_bits()
281                );
282                assert_eq!(
283                    (a - b).to_bits(),
284                    Uf8E5M3::from_f32(a_f32 - b_f32).to_bits()
285                );
286                assert_eq!(
287                    (a * b).to_bits(),
288                    Uf8E5M3::from_f32(a_f32 * b_f32).to_bits()
289                );
290                assert_eq!(
291                    (a / b).to_bits(),
292                    Uf8E5M3::from_f32(a_f32 / b_f32).to_bits()
293                );
294            }
295        }
296    }
297
298    #[test]
299    fn negative_subtraction_result_is_nan() {
300        assert!((Uf8::from_f32(1.0) - Uf8::from_f32(2.0)).is_nan());
301        assert!((Uf8E5M3::from_f32(1.0) - Uf8E5M3::from_f32(2.0)).is_nan());
302        assert!((Uf16::from_f32(1.0) - Uf16::from_f32(2.0)).is_nan());
303        assert!((Uf16E6M10::from_f32(1.0) - Uf16E6M10::from_f32(2.0)).is_nan());
304        assert!((Uf32::from_f64(1.0) - Uf32::from_f64(2.0)).is_nan());
305        #[cfg(feature = "f128")]
306        assert!((Uf64::from_f64(1.0) - Uf64::from_f64(2.0)).is_nan());
307    }
308
309    #[test]
310    fn raw_bits_define_total_ordering() {
311        assert!(Uf8::ZERO < Uf8::MIN_POSITIVE);
312        assert!(Uf8::MAX < Uf8::INFINITY);
313        assert!(Uf8::INFINITY < Uf8::NAN);
314
315        assert!(Uf8E5M3::ZERO < Uf8E5M3::MIN_POSITIVE);
316        assert!(Uf8E5M3::MAX < Uf8E5M3::INFINITY);
317        assert!(Uf8E5M3::INFINITY < Uf8E5M3::NAN);
318
319        assert!(Uf16::ZERO < Uf16::MIN_POSITIVE);
320        assert!(Uf16::MAX < Uf16::INFINITY);
321        assert!(Uf16::INFINITY < Uf16::NAN);
322
323        assert!(Uf16E6M10::ZERO < Uf16E6M10::MIN_POSITIVE);
324        assert!(Uf16E6M10::MAX < Uf16E6M10::INFINITY);
325        assert!(Uf16E6M10::INFINITY < Uf16E6M10::NAN);
326
327        assert!(Uf32::ZERO < Uf32::MIN_POSITIVE);
328        assert!(Uf32::MAX < Uf32::INFINITY);
329        assert!(Uf32::INFINITY < Uf32::NAN);
330
331        #[cfg(feature = "f128")]
332        {
333            assert!(Uf64::ZERO < Uf64::MIN_POSITIVE);
334            assert!(Uf64::MAX < Uf64::INFINITY);
335            assert!(Uf64::INFINITY < Uf64::NAN);
336        }
337    }
338
339    #[test]
340    fn round_to_nearest_even_when_encoding() {
341        assert_eq!(Uf8::from_f32(1.0 + 1.0 / 32.0).to_bits(), 0x70);
342        assert_eq!(Uf8::from_f32(1.0 + 3.0 / 32.0).to_bits(), 0x72);
343
344        assert_eq!(
345            Uf32::from_f64(1.0 + 2.0_f64.powi(-25)).to_bits(),
346            Uf32::ONE.to_bits()
347        );
348        assert_eq!(
349            Uf32::from_f64(1.0 + 3.0 * 2.0_f64.powi(-25)).to_bits(),
350            Uf32::ONE.to_bits() + 2
351        );
352    }
353}