unsigned_float/
lib.rs

1//! Unsigned floating-point formats for values that can never be negative.
2//!
3//! This crate provides compact unsigned float newtypes with IEEE-like exponent
4//! and mantissa fields, but no sign bit. The missing sign bit can be spent on
5//! precision or range, removes negative zero, and makes total ordering a raw
6//! unsigned integer comparison.
7//!
8//! The ergonomic aliases [`Uf8`], [`Uf16`], and [`Uf32`] point at the default
9//! concrete layouts [`Uf8E4M4`], [`Uf16E5M11`], and [`Uf32E8M24`]. Alternate
10//! layouts such as [`Uf8E5M3`] and [`Uf16E6M10`] are exported as distinct types
11//! so their range and precision tradeoffs stay explicit.
12//! With the `f128` feature enabled, [`Uf64`] is also available and promotes
13//! through nightly primitive `f128`.
14//!
15//! # Conversions
16//!
17//! Explicit constructors such as [`Uf8::from_f32`] encode the input into the
18//! target format. Negative native values become NaN, and overflow becomes
19//! infinity.
20//!
21//! Use [`TryFrom`] when invalid or unrepresentable inputs should be rejected:
22//!
23//! ```
24//! use unsigned_float::{ConversionError, Uf16};
25//!
26//! assert_eq!(Uf16::try_from(42_u32), Ok(Uf16::from_f32(42.0)));
27//! assert_eq!(Uf16::try_from(-1_i32), Err(ConversionError::Negative));
28//! ```
29//!
30//! # Exponents
31//!
32//! Use [`PowUf`] to raise native floats to unsigned-float exponents:
33//!
34//! ```
35//! use unsigned_float::{PowUf, Uf16};
36//!
37//! let root = 9.0_f32.powuf(Uf16::from_f32(0.5));
38//! assert_eq!(root, 3.0);
39//! ```
40//!
41//! `PowUf` uses exact kernels for common exponent shapes such as zero, one,
42//! one-half, and small integers, then falls back to `libm` for the general
43//! fractional case. Same-layout and cross-layout UF8 exponentiation uses
44//! generated lookup tables and returns the UF8 layout of the base.
45//!
46//! [`Pow1mUf`] evaluates `(1 - u)^a` directly. This keeps the complement
47//! operation explicit and lets UF8 use generated lookup tables without
48//! materializing `1 - u` as a separately rounded value.
49//!
50#![no_std]
51#![cfg_attr(feature = "f16", feature(f16))]
52#![cfg_attr(feature = "f128", feature(f128))]
53
54#[cfg(test)]
55extern crate std;
56
57mod convert;
58mod dispatch;
59mod pow;
60mod uf16;
61mod uf32;
62#[cfg(feature = "f128")]
63mod uf64;
64mod uf8;
65
66pub use convert::ConversionError;
67pub use pow::{Pow1mUf, PowUf};
68pub use uf8::{Uf8, Uf8E4M4, Uf8E5M3};
69pub use uf16::{Uf16, Uf16E5M11, Uf16E6M10};
70pub use uf32::{Uf32, Uf32E8M24};
71#[cfg(feature = "f128")]
72pub use uf64::{Uf64, Uf64E11M52};
73
74#[cfg(test)]
75mod tests {
76    #[cfg(feature = "f128")]
77    use super::Uf64;
78    use super::{ConversionError, Pow1mUf, PowUf, Uf8, Uf8E5M3, Uf16, Uf16E6M10, Uf32};
79
80    #[test]
81    fn canonical_one_bits_match_the_layouts() {
82        assert_eq!(Uf8::ONE.to_bits(), 0x70);
83        assert_eq!(Uf8E5M3::ONE.to_bits(), 0x78);
84        assert_eq!(Uf16::ONE.to_bits(), 0x7800);
85        assert_eq!(Uf16E6M10::ONE.to_bits(), 0x7c00);
86        assert_eq!(Uf32::ONE.to_bits(), 0x7f00_0000);
87        #[cfg(feature = "f128")]
88        assert_eq!(Uf64::ONE.to_bits(), 0x3ff0_0000_0000_0000);
89    }
90
91    #[test]
92    fn uf8_finite_values_round_trip_through_f32() {
93        for bits in u8::MIN..=u8::MAX {
94            let value = Uf8::from_bits(bits);
95
96            if value.is_nan() {
97                continue;
98            }
99
100            assert_eq!(Uf8::from_f32(value.to_f32()).to_bits(), bits);
101        }
102    }
103
104    #[test]
105    fn uf8_e5m3_finite_values_round_trip_through_f32() {
106        for bits in u8::MIN..=u8::MAX {
107            let value = Uf8E5M3::from_bits(bits);
108
109            if value.is_nan() {
110                continue;
111            }
112
113            assert_eq!(Uf8E5M3::from_f32(value.to_f32()).to_bits(), bits);
114        }
115    }
116
117    #[test]
118    fn conversions_handle_special_values() {
119        assert!(Uf8::from_f32(f32::NAN).is_nan());
120        assert!(Uf8E5M3::from_f32(f32::NAN).is_nan());
121        assert!(Uf16::from_f32(f32::NEG_INFINITY).is_nan());
122        assert!(Uf16E6M10::from_f32(f32::NEG_INFINITY).is_nan());
123        assert!(Uf32::from_f64(-1.0).is_nan());
124        #[cfg(feature = "f128")]
125        assert!(Uf64::from_f64(-1.0).is_nan());
126
127        assert!(Uf8::from_f32(f32::INFINITY).is_infinite());
128        assert!(Uf8E5M3::from_f32(f32::INFINITY).is_infinite());
129        assert!(Uf16::from_f32(f32::INFINITY).is_infinite());
130        assert!(Uf16E6M10::from_f32(f32::INFINITY).is_infinite());
131        assert!(Uf32::from_f64(f64::INFINITY).is_infinite());
132        #[cfg(feature = "f128")]
133        assert!(Uf64::from_f64(f64::INFINITY).is_infinite());
134    }
135
136    #[test]
137    fn try_from_f64_rejects_invalid_or_unrepresentable_values() {
138        assert_eq!(Uf8::try_from(-1.0_f64), Err(ConversionError::Negative));
139        assert_eq!(Uf16::try_from(f64::NAN), Err(ConversionError::Nan));
140        assert_eq!(
141            Uf32::try_from(f64::INFINITY),
142            Err(ConversionError::Infinite)
143        );
144
145        assert_eq!(Uf8::try_from(1.0e20_f64), Err(ConversionError::Overflow));
146        assert_eq!(Uf16::try_from(1.0e20_f64), Err(ConversionError::Overflow));
147        assert_eq!(Uf8::try_from(1.0e-20_f64), Err(ConversionError::Underflow));
148
149        assert_eq!(Uf8::try_from(2.0_f64), Ok(Uf8::from_f32(2.0)));
150        assert_eq!(Uf8E5M3::try_from(2.0_f64), Ok(Uf8E5M3::from_f32(2.0)));
151        assert_eq!(Uf16::try_from(2.0_f64), Ok(Uf16::from_f32(2.0)));
152        assert_eq!(Uf16E6M10::try_from(2.0_f64), Ok(Uf16E6M10::from_f32(2.0)));
153        assert_eq!(Uf32::try_from(2.0_f64), Ok(Uf32::from_f64(2.0)));
154        #[cfg(feature = "f128")]
155        assert_eq!(Uf64::try_from_f64(2.0_f64), Ok(Uf64::from_f64(2.0)));
156    }
157
158    #[test]
159    fn try_from_integer_types() {
160        assert_eq!(Uf8::try_from(2_u8), Ok(Uf8::from_f32(2.0)));
161        assert_eq!(Uf8E5M3::try_from(2_u8), Ok(Uf8E5M3::from_f32(2.0)));
162        assert_eq!(Uf16::try_from(1024_u32), Ok(Uf16::from_f32(1024.0)));
163        assert_eq!(
164            Uf16E6M10::try_from(1024_u32),
165            Ok(Uf16E6M10::from_f32(1024.0))
166        );
167        assert_eq!(Uf32::try_from(1024_u64), Ok(Uf32::from_f64(1024.0)));
168        #[cfg(feature = "f128")]
169        assert_eq!(Uf64::try_from(1024_u64), Ok(Uf64::from_f64(1024.0)));
170
171        assert_eq!(Uf8::try_from(-1_i8), Err(ConversionError::Negative));
172        assert_eq!(Uf8::try_from(u128::MAX), Err(ConversionError::Overflow));
173    }
174
175    #[cfg(feature = "f16")]
176    #[test]
177    fn f16_conversions_are_available_when_enabled() {
178        let native = 2.0_f16;
179
180        assert_eq!(Uf8::from_f16(native).to_f16(), native);
181        assert_eq!(Uf8E5M3::from_f16(native).to_f16(), native);
182        assert_eq!(Uf16::from_f16(native).to_f16(), native);
183        assert_eq!(Uf16E6M10::from_f16(native).to_f16(), native);
184        assert_eq!(Uf32::from_f16(native).to_f16(), native);
185        #[cfg(feature = "f128")]
186        assert_eq!(Uf64::from_f16(native).to_f16(), native);
187
188        assert_eq!(Uf8::from(native), Uf8::from_f16(native));
189        assert_eq!(Uf8E5M3::from(native), Uf8E5M3::from_f16(native));
190        assert_eq!(Uf16::from(native), Uf16::from_f16(native));
191        assert_eq!(Uf16E6M10::from(native), Uf16E6M10::from_f16(native));
192        assert_eq!(Uf32::from(native), Uf32::from_f16(native));
193        #[cfg(feature = "f128")]
194        assert_eq!(Uf64::from(native), Uf64::from_f16(native));
195
196        let _: f16 = Uf8::from_f16(native).into();
197        let _: f16 = Uf8E5M3::from_f16(native).into();
198        let _: f16 = Uf16::from_f16(native).into();
199        let _: f16 = Uf16E6M10::from_f16(native).into();
200        let _: f16 = Uf32::from_f16(native).into();
201        #[cfg(feature = "f128")]
202        let _: f16 = Uf64::from_f16(native).into();
203    }
204
205    #[test]
206    fn subnormal_values_decode_correctly() {
207        assert_eq!(Uf8::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-10));
208        assert_eq!(Uf8E5M3::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-17));
209        assert_eq!(Uf16::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-25));
210        assert_eq!(Uf16E6M10::MIN_POSITIVE.to_f32(), 2.0_f32.powi(-40));
211        assert_eq!(Uf32::MIN_POSITIVE.to_f64(), 2.0_f64.powi(-150));
212        #[cfg(feature = "f128")]
213        assert_eq!(
214            Uf64::MIN_POSITIVE.to_f64(),
215            f64::MIN_POSITIVE / 2.0_f64.powi(52)
216        );
217    }
218
219    #[test]
220    fn arithmetic_promotes_computes_and_demotes() {
221        assert_eq!((Uf8::from_f32(1.0) + Uf8::from_f32(1.0)).to_f32(), 2.0);
222        assert_eq!(
223            (Uf8E5M3::from_f32(1.0) + Uf8E5M3::from_f32(1.0)).to_f32(),
224            2.0
225        );
226        assert_eq!((Uf16::from_f32(3.0) * Uf16::from_f32(0.5)).to_f32(), 1.5);
227        assert_eq!(
228            (Uf16E6M10::from_f32(3.0) * Uf16E6M10::from_f32(0.5)).to_f32(),
229            1.5
230        );
231        assert_eq!((Uf32::from_f64(9.0) / Uf32::from_f64(3.0)).to_f64(), 3.0);
232        #[cfg(feature = "f128")]
233        assert_eq!((Uf64::from_f64(9.0) / Uf64::from_f64(3.0)).to_f64(), 3.0);
234    }
235
236    #[test]
237    fn native_float_bases_can_use_unsigned_float_exponents() {
238        assert_eq!(9.0_f32.powuf(Uf8::from_f32(0.5)), 3.0);
239        assert_eq!(9.0_f32.powuf(Uf8E5M3::from_f32(0.5)), 3.0);
240        assert_eq!(9.0_f32.powuf(Uf16::from_f32(0.5)), 3.0);
241        assert_eq!(9.0_f64.powuf(Uf16E6M10::from_f32(0.5)), 3.0);
242        assert_eq!(9.0_f64.powuf(Uf32::from_f64(0.5)), 3.0);
243        assert_eq!(2.0_f32.powuf(Uf16::from_f32(8.0)), 256.0);
244        assert_eq!((-2.0_f32).powuf(Uf8::from_f32(3.0)), -8.0);
245        assert_eq!(f32::NAN.powuf(Uf8::ZERO), 1.0);
246        assert!((16.0_f64.powuf(Uf32::from_f64(1.25)) - 32.0).abs() < 1.0e-12);
247
248        #[cfg(feature = "f128")]
249        {
250            assert_eq!(9.0_f64.powuf(Uf64::from_f64(0.5)), 3.0);
251            assert_eq!(2.0_f64.powuf(Uf64::from_f64(10.0)), 1024.0);
252        }
253    }
254
255    #[test]
256    fn native_float_bases_can_use_complement_exponents() {
257        assert_eq!(0.75_f32.pow1muf(Uf8::from_f32(0.5)), 0.5);
258        assert_eq!(0.75_f32.pow1muf(Uf16::from_f32(0.5)), 0.5);
259        assert_eq!(0.5_f64.pow1muf(Uf32::from_f64(2.0)), 0.25);
260        assert_eq!(f32::NAN.pow1muf(Uf8::ZERO), 1.0);
261        assert!(
262            (0.25_f64.pow1muf(Uf16E6M10::from_f32(1.25)) - 0.697_953_644_326_574_7).abs() < 1.0e-15
263        );
264
265        #[cfg(feature = "f128")]
266        {
267            assert_eq!(0.5_f64.pow1muf(Uf64::from_f64(2.0)), 0.25);
268        }
269    }
270
271    #[test]
272    fn uf8_pow_lut_matches_promoted_arithmetic() {
273        assert_eq!(Uf8::from_f32(9.0).powuf(Uf8::from_f32(0.5)).to_f32(), 3.0);
274
275        for a_bits in u8::MIN..=u8::MAX {
276            for b_bits in u8::MIN..=u8::MAX {
277                let a = Uf8::from_bits(a_bits);
278                let b = Uf8::from_bits(b_bits);
279
280                assert_eq!(
281                    a.powuf(b).to_bits(),
282                    Uf8::from_f32(a.to_f32().powuf(b)).to_bits()
283                );
284            }
285        }
286    }
287
288    #[test]
289    fn uf8_e5m3_pow_lut_matches_promoted_arithmetic() {
290        assert_eq!(
291            Uf8E5M3::from_f32(9.0)
292                .powuf(Uf8E5M3::from_f32(0.5))
293                .to_f32(),
294            3.0
295        );
296
297        for a_bits in u8::MIN..=u8::MAX {
298            for b_bits in u8::MIN..=u8::MAX {
299                let a = Uf8E5M3::from_bits(a_bits);
300                let b = Uf8E5M3::from_bits(b_bits);
301
302                assert_eq!(
303                    a.powuf(b).to_bits(),
304                    Uf8E5M3::from_f32(a.to_f32().powuf(b)).to_bits()
305                );
306            }
307        }
308    }
309
310    #[test]
311    fn uf8_cross_layout_pow_luts_match_promoted_arithmetic() {
312        assert_eq!(
313            Uf8::from_f32(9.0).powuf(Uf8E5M3::from_f32(0.5)).to_f32(),
314            3.0
315        );
316        assert_eq!(
317            Uf8E5M3::from_f32(9.0).powuf(Uf8::from_f32(0.5)).to_f32(),
318            3.0
319        );
320
321        for a_bits in u8::MIN..=u8::MAX {
322            for b_bits in u8::MIN..=u8::MAX {
323                let e4m4 = Uf8::from_bits(a_bits);
324                let e5m3 = Uf8E5M3::from_bits(b_bits);
325
326                assert_eq!(
327                    e4m4.powuf(e5m3).to_bits(),
328                    Uf8::from_f32(e4m4.to_f32().powuf(e5m3)).to_bits()
329                );
330                assert_eq!(
331                    e5m3.powuf(e4m4).to_bits(),
332                    Uf8E5M3::from_f32(e5m3.to_f32().powuf(e4m4)).to_bits()
333                );
334            }
335        }
336    }
337
338    #[test]
339    fn uf8_pow1m_lut_matches_promoted_arithmetic() {
340        assert_eq!(
341            Uf8::from_f32(0.75).pow1muf(Uf8::from_f32(0.5)).to_f32(),
342            0.5
343        );
344
345        for a_bits in u8::MIN..=u8::MAX {
346            for b_bits in u8::MIN..=u8::MAX {
347                let u = Uf8::from_bits(a_bits);
348                let exponent = Uf8::from_bits(b_bits);
349
350                assert_eq!(
351                    u.pow1muf(exponent).to_bits(),
352                    Uf8::from_f32(u.to_f32().pow1muf(exponent)).to_bits()
353                );
354            }
355        }
356    }
357
358    #[test]
359    fn uf8_e5m3_pow1m_lut_matches_promoted_arithmetic() {
360        assert_eq!(
361            Uf8E5M3::from_f32(0.75)
362                .pow1muf(Uf8E5M3::from_f32(0.5))
363                .to_f32(),
364            0.5
365        );
366
367        for a_bits in u8::MIN..=u8::MAX {
368            for b_bits in u8::MIN..=u8::MAX {
369                let u = Uf8E5M3::from_bits(a_bits);
370                let exponent = Uf8E5M3::from_bits(b_bits);
371
372                assert_eq!(
373                    u.pow1muf(exponent).to_bits(),
374                    Uf8E5M3::from_f32(u.to_f32().pow1muf(exponent)).to_bits()
375                );
376            }
377        }
378    }
379
380    #[test]
381    fn uf8_cross_layout_pow1m_luts_match_promoted_arithmetic() {
382        assert_eq!(
383            Uf8::from_f32(0.75).pow1muf(Uf8E5M3::from_f32(0.5)).to_f32(),
384            0.5
385        );
386        assert_eq!(
387            Uf8E5M3::from_f32(0.75).pow1muf(Uf8::from_f32(0.5)).to_f32(),
388            0.5
389        );
390
391        for a_bits in u8::MIN..=u8::MAX {
392            for b_bits in u8::MIN..=u8::MAX {
393                let e4m4 = Uf8::from_bits(a_bits);
394                let e5m3 = Uf8E5M3::from_bits(b_bits);
395
396                assert_eq!(
397                    e4m4.pow1muf(e5m3).to_bits(),
398                    Uf8::from_f32(e4m4.to_f32().pow1muf(e5m3)).to_bits()
399                );
400                assert_eq!(
401                    e5m3.pow1muf(e4m4).to_bits(),
402                    Uf8E5M3::from_f32(e5m3.to_f32().pow1muf(e4m4)).to_bits()
403                );
404            }
405        }
406    }
407
408    #[cfg(any(not(feature = "f16"), feature = "soft-float"))]
409    #[test]
410    fn uf8_lut_matches_promoted_arithmetic() {
411        for a_bits in u8::MIN..=u8::MAX {
412            for b_bits in u8::MIN..=u8::MAX {
413                let a = Uf8::from_bits(a_bits);
414                let b = Uf8::from_bits(b_bits);
415                let a_f32 = a.to_f32();
416                let b_f32 = b.to_f32();
417
418                assert_eq!((a + b).to_bits(), Uf8::from_f32(a_f32 + b_f32).to_bits());
419                assert_eq!((a - b).to_bits(), Uf8::from_f32(a_f32 - b_f32).to_bits());
420                assert_eq!((a * b).to_bits(), Uf8::from_f32(a_f32 * b_f32).to_bits());
421                assert_eq!((a / b).to_bits(), Uf8::from_f32(a_f32 / b_f32).to_bits());
422            }
423        }
424    }
425
426    #[cfg(any(not(feature = "f16"), feature = "soft-float"))]
427    #[test]
428    fn uf8_e5m3_lut_matches_promoted_arithmetic() {
429        for a_bits in u8::MIN..=u8::MAX {
430            for b_bits in u8::MIN..=u8::MAX {
431                let a = Uf8E5M3::from_bits(a_bits);
432                let b = Uf8E5M3::from_bits(b_bits);
433                let a_f32 = a.to_f32();
434                let b_f32 = b.to_f32();
435
436                assert_eq!(
437                    (a + b).to_bits(),
438                    Uf8E5M3::from_f32(a_f32 + b_f32).to_bits()
439                );
440                assert_eq!(
441                    (a - b).to_bits(),
442                    Uf8E5M3::from_f32(a_f32 - b_f32).to_bits()
443                );
444                assert_eq!(
445                    (a * b).to_bits(),
446                    Uf8E5M3::from_f32(a_f32 * b_f32).to_bits()
447                );
448                assert_eq!(
449                    (a / b).to_bits(),
450                    Uf8E5M3::from_f32(a_f32 / b_f32).to_bits()
451                );
452            }
453        }
454    }
455
456    #[test]
457    fn negative_subtraction_result_is_nan() {
458        assert!((Uf8::from_f32(1.0) - Uf8::from_f32(2.0)).is_nan());
459        assert!((Uf8E5M3::from_f32(1.0) - Uf8E5M3::from_f32(2.0)).is_nan());
460        assert!((Uf16::from_f32(1.0) - Uf16::from_f32(2.0)).is_nan());
461        assert!((Uf16E6M10::from_f32(1.0) - Uf16E6M10::from_f32(2.0)).is_nan());
462        assert!((Uf32::from_f64(1.0) - Uf32::from_f64(2.0)).is_nan());
463        #[cfg(feature = "f128")]
464        assert!((Uf64::from_f64(1.0) - Uf64::from_f64(2.0)).is_nan());
465    }
466
467    #[test]
468    fn raw_bits_define_total_ordering() {
469        assert!(Uf8::ZERO < Uf8::MIN_POSITIVE);
470        assert!(Uf8::MAX < Uf8::INFINITY);
471        assert!(Uf8::INFINITY < Uf8::NAN);
472
473        assert!(Uf8E5M3::ZERO < Uf8E5M3::MIN_POSITIVE);
474        assert!(Uf8E5M3::MAX < Uf8E5M3::INFINITY);
475        assert!(Uf8E5M3::INFINITY < Uf8E5M3::NAN);
476
477        assert!(Uf16::ZERO < Uf16::MIN_POSITIVE);
478        assert!(Uf16::MAX < Uf16::INFINITY);
479        assert!(Uf16::INFINITY < Uf16::NAN);
480
481        assert!(Uf16E6M10::ZERO < Uf16E6M10::MIN_POSITIVE);
482        assert!(Uf16E6M10::MAX < Uf16E6M10::INFINITY);
483        assert!(Uf16E6M10::INFINITY < Uf16E6M10::NAN);
484
485        assert!(Uf32::ZERO < Uf32::MIN_POSITIVE);
486        assert!(Uf32::MAX < Uf32::INFINITY);
487        assert!(Uf32::INFINITY < Uf32::NAN);
488
489        #[cfg(feature = "f128")]
490        {
491            assert!(Uf64::ZERO < Uf64::MIN_POSITIVE);
492            assert!(Uf64::MAX < Uf64::INFINITY);
493            assert!(Uf64::INFINITY < Uf64::NAN);
494        }
495    }
496
497    #[test]
498    fn round_to_nearest_even_when_encoding() {
499        assert_eq!(Uf8::from_f32(1.0 + 1.0 / 32.0).to_bits(), 0x70);
500        assert_eq!(Uf8::from_f32(1.0 + 3.0 / 32.0).to_bits(), 0x72);
501
502        assert_eq!(
503            Uf32::from_f64(1.0 + 2.0_f64.powi(-25)).to_bits(),
504            Uf32::ONE.to_bits()
505        );
506        assert_eq!(
507            Uf32::from_f64(1.0 + 3.0 * 2.0_f64.powi(-25)).to_bits(),
508            Uf32::ONE.to_bits() + 2
509        );
510    }
511}
unsigned_float/lib.rs

unsigned_float/
lib.rs