Skip to main content

unsigned_float/
uf8.rs

1use core::cmp::Ordering;
2use core::fmt;
3use core::ops::{Add, Div, Mul, Sub};
4
5use crate::{ConversionError, dispatch};
6
7macro_rules! impl_float_format {
8    ($ty:ty, $to_float:ident) => {
9        impl fmt::Display for $ty {
10            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
11                fmt::Display::fmt(&self.$to_float(), f)
12            }
13        }
14
15        impl fmt::LowerExp for $ty {
16            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17                fmt::LowerExp::fmt(&self.$to_float(), f)
18            }
19        }
20
21        impl fmt::UpperExp for $ty {
22            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23                fmt::UpperExp::fmt(&self.$to_float(), f)
24            }
25        }
26    };
27}
28
29#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
30#[repr(transparent)]
31/// An 8-bit unsigned float with 4 exponent bits and 4 mantissa bits.
32pub struct Uf8E4M4(u8);
33
34/// Default 8-bit unsigned float layout.
35pub type Uf8 = Uf8E4M4;
36
37impl Uf8E4M4 {
38    pub const EXPONENT_BITS: u32 = 4;
39    pub const MANTISSA_BITS: u32 = 4;
40    pub const EXPONENT_BIAS: i32 = 7;
41    pub const EXPONENT_MASK: u8 = 0xf0;
42    pub const MANTISSA_MASK: u8 = 0x0f;
43
44    pub const ZERO: Self = Self(0);
45    pub const ONE: Self = Self(0x70);
46    pub const INFINITY: Self = Self(0xf0);
47    pub const NAN: Self = Self(0xf8);
48    pub const MAX: Self = Self(0xef);
49    pub const MIN_POSITIVE: Self = Self(0x01);
50    pub const MIN_NORMAL: Self = Self(0x10);
51
52    pub const fn from_bits(bits: u8) -> Self {
53        Self(bits)
54    }
55
56    pub const fn to_bits(self) -> u8 {
57        self.0
58    }
59
60    pub fn from_f32(value: f32) -> Self {
61        Self(dispatch::f32_to_uf8(value))
62    }
63
64    pub fn to_f32(self) -> f32 {
65        dispatch::uf8_to_f32(self.0)
66    }
67
68    pub fn from_f64(value: f64) -> Self {
69        Self(dispatch::f32_to_uf8(value as f32))
70    }
71
72    pub fn to_f64(self) -> f64 {
73        self.to_f32() as f64
74    }
75
76    pub fn try_from_f64(value: f64) -> Result<Self, ConversionError> {
77        crate::convert::check_finite_non_negative(value)?;
78
79        let encoded = Self::from_f64(value);
80        crate::convert::check_encoded(value, encoded.is_zero(), encoded.is_infinite())?;
81
82        Ok(encoded)
83    }
84
85    #[cfg(feature = "f16")]
86    pub fn from_f16(value: f16) -> Self {
87        Self::from_f32(value as f32)
88    }
89
90    #[cfg(feature = "f16")]
91    pub fn to_f16(self) -> f16 {
92        self.to_f32() as f16
93    }
94
95    pub const fn exponent(self) -> u8 {
96        (self.0 & Self::EXPONENT_MASK) >> Self::MANTISSA_BITS
97    }
98
99    pub const fn mantissa(self) -> u8 {
100        self.0 & Self::MANTISSA_MASK
101    }
102
103    pub const fn is_zero(self) -> bool {
104        self.0 == 0
105    }
106
107    pub const fn is_nan(self) -> bool {
108        self.exponent() == 0x0f && self.mantissa() != 0
109    }
110
111    pub const fn is_infinite(self) -> bool {
112        self.0 == Self::INFINITY.0
113    }
114
115    pub const fn is_finite(self) -> bool {
116        self.exponent() != 0x0f
117    }
118
119    pub const fn is_subnormal(self) -> bool {
120        self.exponent() == 0 && self.mantissa() != 0
121    }
122}
123
124impl From<f32> for Uf8E4M4 {
125    fn from(value: f32) -> Self {
126        Self::from_f32(value)
127    }
128}
129
130#[cfg(feature = "f16")]
131impl From<f16> for Uf8E4M4 {
132    fn from(value: f16) -> Self {
133        Self::from_f16(value)
134    }
135}
136
137impl From<Uf8E4M4> for f32 {
138    fn from(value: Uf8E4M4) -> Self {
139        value.to_f32()
140    }
141}
142
143#[cfg(feature = "f16")]
144impl From<Uf8E4M4> for f16 {
145    fn from(value: Uf8E4M4) -> Self {
146        value.to_f16()
147    }
148}
149
150impl From<Uf8E4M4> for f64 {
151    fn from(value: Uf8E4M4) -> Self {
152        value.to_f64()
153    }
154}
155
156impl Ord for Uf8E4M4 {
157    fn cmp(&self, other: &Self) -> Ordering {
158        self.0.cmp(&other.0)
159    }
160}
161
162impl PartialOrd for Uf8E4M4 {
163    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
164        Some(self.cmp(other))
165    }
166}
167
168impl Add for Uf8E4M4 {
169    type Output = Self;
170
171    fn add(self, rhs: Self) -> Self::Output {
172        Self(dispatch::add_uf8(self.0, rhs.0))
173    }
174}
175
176impl Sub for Uf8E4M4 {
177    type Output = Self;
178
179    fn sub(self, rhs: Self) -> Self::Output {
180        Self(dispatch::sub_uf8(self.0, rhs.0))
181    }
182}
183
184impl Mul for Uf8E4M4 {
185    type Output = Self;
186
187    fn mul(self, rhs: Self) -> Self::Output {
188        Self(dispatch::mul_uf8(self.0, rhs.0))
189    }
190}
191
192impl Div for Uf8E4M4 {
193    type Output = Self;
194
195    fn div(self, rhs: Self) -> Self::Output {
196        Self(dispatch::div_uf8(self.0, rhs.0))
197    }
198}
199
200impl fmt::Debug for Uf8E4M4 {
201    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
202        f.debug_tuple("Uf8E4M4").field(&self.to_f32()).finish()
203    }
204}
205
206impl_float_format!(Uf8E4M4, to_f32);
207
208#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
209#[repr(transparent)]
210/// An 8-bit unsigned float with 5 exponent bits and 3 mantissa bits.
211pub struct Uf8E5M3(u8);
212
213impl Uf8E5M3 {
214    pub const EXPONENT_BITS: u32 = 5;
215    pub const MANTISSA_BITS: u32 = 3;
216    pub const EXPONENT_BIAS: i32 = 15;
217    pub const EXPONENT_MASK: u8 = 0xf8;
218    pub const MANTISSA_MASK: u8 = 0x07;
219
220    pub const ZERO: Self = Self(0);
221    pub const ONE: Self = Self(0x78);
222    pub const INFINITY: Self = Self(0xf8);
223    pub const NAN: Self = Self(0xfc);
224    pub const MAX: Self = Self(0xf7);
225    pub const MIN_POSITIVE: Self = Self(0x01);
226    pub const MIN_NORMAL: Self = Self(0x08);
227
228    pub const fn from_bits(bits: u8) -> Self {
229        Self(bits)
230    }
231
232    pub const fn to_bits(self) -> u8 {
233        self.0
234    }
235
236    pub fn from_f32(value: f32) -> Self {
237        Self(dispatch::f32_to_uf8_e5m3(value))
238    }
239
240    pub fn to_f32(self) -> f32 {
241        dispatch::uf8_e5m3_to_f32(self.0)
242    }
243
244    pub fn from_f64(value: f64) -> Self {
245        Self::from_f32(value as f32)
246    }
247
248    pub fn to_f64(self) -> f64 {
249        self.to_f32() as f64
250    }
251
252    pub fn try_from_f64(value: f64) -> Result<Self, ConversionError> {
253        crate::convert::check_finite_non_negative(value)?;
254
255        let encoded = Self::from_f64(value);
256        crate::convert::check_encoded(value, encoded.is_zero(), encoded.is_infinite())?;
257
258        Ok(encoded)
259    }
260
261    #[cfg(feature = "f16")]
262    pub fn from_f16(value: f16) -> Self {
263        Self::from_f32(value as f32)
264    }
265
266    #[cfg(feature = "f16")]
267    pub fn to_f16(self) -> f16 {
268        self.to_f32() as f16
269    }
270
271    pub const fn exponent(self) -> u8 {
272        (self.0 & Self::EXPONENT_MASK) >> Self::MANTISSA_BITS
273    }
274
275    pub const fn mantissa(self) -> u8 {
276        self.0 & Self::MANTISSA_MASK
277    }
278
279    pub const fn is_zero(self) -> bool {
280        self.0 == 0
281    }
282
283    pub const fn is_nan(self) -> bool {
284        self.exponent() == 0x1f && self.mantissa() != 0
285    }
286
287    pub const fn is_infinite(self) -> bool {
288        self.0 == Self::INFINITY.0
289    }
290
291    pub const fn is_finite(self) -> bool {
292        self.exponent() != 0x1f
293    }
294
295    pub const fn is_subnormal(self) -> bool {
296        self.exponent() == 0 && self.mantissa() != 0
297    }
298}
299
300impl From<f32> for Uf8E5M3 {
301    fn from(value: f32) -> Self {
302        Self::from_f32(value)
303    }
304}
305
306#[cfg(feature = "f16")]
307impl From<f16> for Uf8E5M3 {
308    fn from(value: f16) -> Self {
309        Self::from_f16(value)
310    }
311}
312
313impl From<Uf8E5M3> for f32 {
314    fn from(value: Uf8E5M3) -> Self {
315        value.to_f32()
316    }
317}
318
319#[cfg(feature = "f16")]
320impl From<Uf8E5M3> for f16 {
321    fn from(value: Uf8E5M3) -> Self {
322        value.to_f16()
323    }
324}
325
326impl From<Uf8E5M3> for f64 {
327    fn from(value: Uf8E5M3) -> Self {
328        value.to_f64()
329    }
330}
331
332impl Ord for Uf8E5M3 {
333    fn cmp(&self, other: &Self) -> Ordering {
334        self.0.cmp(&other.0)
335    }
336}
337
338impl PartialOrd for Uf8E5M3 {
339    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
340        Some(self.cmp(other))
341    }
342}
343
344impl Add for Uf8E5M3 {
345    type Output = Self;
346
347    fn add(self, rhs: Self) -> Self::Output {
348        Self(dispatch::add_uf8_e5m3(self.0, rhs.0))
349    }
350}
351
352impl Sub for Uf8E5M3 {
353    type Output = Self;
354
355    fn sub(self, rhs: Self) -> Self::Output {
356        Self(dispatch::sub_uf8_e5m3(self.0, rhs.0))
357    }
358}
359
360impl Mul for Uf8E5M3 {
361    type Output = Self;
362
363    fn mul(self, rhs: Self) -> Self::Output {
364        Self(dispatch::mul_uf8_e5m3(self.0, rhs.0))
365    }
366}
367
368impl Div for Uf8E5M3 {
369    type Output = Self;
370
371    fn div(self, rhs: Self) -> Self::Output {
372        Self(dispatch::div_uf8_e5m3(self.0, rhs.0))
373    }
374}
375
376impl fmt::Debug for Uf8E5M3 {
377    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378        f.debug_tuple("Uf8E5M3").field(&self.to_f32()).finish()
379    }
380}
381
382impl_float_format!(Uf8E5M3, to_f32);