unsigned_float/
uf16.rs

1use core::cmp::Ordering;
2use core::fmt;
3use core::ops::{Add, Div, Mul, Sub};
4
5use crate::{ConversionError, dispatch};
6
7macro_rules! impl_float_format {
8    ($ty:ty, $to_float:ident) => {
9        impl fmt::Display for $ty {
10            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
11                fmt::Display::fmt(&self.$to_float(), f)
12            }
13        }
14
15        impl fmt::LowerExp for $ty {
16            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17                fmt::LowerExp::fmt(&self.$to_float(), f)
18            }
19        }
20
21        impl fmt::UpperExp for $ty {
22            fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23                fmt::UpperExp::fmt(&self.$to_float(), f)
24            }
25        }
26    };
27}
28
29#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
30#[repr(transparent)]
31/// A 16-bit unsigned float with 5 exponent bits and 11 mantissa bits.
32pub struct Uf16E5M11(u16);
33
34/// Default 16-bit unsigned float layout.
35pub type Uf16 = Uf16E5M11;
36
37impl Uf16E5M11 {
38    pub const EXPONENT_BITS: u32 = 5;
39    pub const MANTISSA_BITS: u32 = 11;
40    pub const EXPONENT_BIAS: i32 = 15;
41    pub const EXPONENT_MASK: u16 = 0xf800;
42    pub const MANTISSA_MASK: u16 = 0x07ff;
43
44    pub const ZERO: Self = Self(0);
45    pub const ONE: Self = Self(0x7800);
46    pub const INFINITY: Self = Self(0xf800);
47    pub const NAN: Self = Self(0xfc00);
48    pub const MAX: Self = Self(0xf7ff);
49    pub const MIN_POSITIVE: Self = Self(0x0001);
50    pub const MIN_NORMAL: Self = Self(0x0800);
51
52    pub const fn from_bits(bits: u16) -> Self {
53        Self(bits)
54    }
55
56    pub const fn to_bits(self) -> u16 {
57        self.0
58    }
59
60    pub fn from_f32(value: f32) -> Self {
61        Self(dispatch::f32_to_uf16(value))
62    }
63
64    pub fn to_f32(self) -> f32 {
65        dispatch::uf16_to_f32(self.0)
66    }
67
68    pub fn from_f64(value: f64) -> Self {
69        Self::from_f32(value as f32)
70    }
71
72    pub fn to_f64(self) -> f64 {
73        self.to_f32() as f64
74    }
75
76    pub fn try_from_f64(value: f64) -> Result<Self, ConversionError> {
77        crate::convert::check_finite_non_negative(value)?;
78
79        let encoded = Self::from_f64(value);
80        crate::convert::check_encoded(value, encoded.is_zero(), encoded.is_infinite())?;
81
82        Ok(encoded)
83    }
84
85    #[cfg(feature = "f16")]
86    pub fn from_f16(value: f16) -> Self {
87        Self::from_f32(value as f32)
88    }
89
90    #[cfg(feature = "f16")]
91    pub fn to_f16(self) -> f16 {
92        self.to_f32() as f16
93    }
94
95    pub const fn exponent(self) -> u16 {
96        (self.0 & Self::EXPONENT_MASK) >> Self::MANTISSA_BITS
97    }
98
99    pub const fn mantissa(self) -> u16 {
100        self.0 & Self::MANTISSA_MASK
101    }
102
103    pub const fn is_zero(self) -> bool {
104        self.0 == 0
105    }
106
107    pub const fn is_nan(self) -> bool {
108        self.exponent() == 0x1f && self.mantissa() != 0
109    }
110
111    pub const fn is_infinite(self) -> bool {
112        self.0 == Self::INFINITY.0
113    }
114
115    pub const fn is_finite(self) -> bool {
116        self.exponent() != 0x1f
117    }
118
119    pub const fn is_subnormal(self) -> bool {
120        self.exponent() == 0 && self.mantissa() != 0
121    }
122}
123
124impl From<f32> for Uf16E5M11 {
125    fn from(value: f32) -> Self {
126        Self::from_f32(value)
127    }
128}
129
130#[cfg(feature = "f16")]
131impl From<f16> for Uf16E5M11 {
132    fn from(value: f16) -> Self {
133        Self::from_f16(value)
134    }
135}
136
137impl From<Uf16E5M11> for f32 {
138    fn from(value: Uf16E5M11) -> Self {
139        value.to_f32()
140    }
141}
142
143#[cfg(feature = "f16")]
144impl From<Uf16E5M11> for f16 {
145    fn from(value: Uf16E5M11) -> Self {
146        value.to_f16()
147    }
148}
149
150impl From<Uf16E5M11> for f64 {
151    fn from(value: Uf16E5M11) -> Self {
152        value.to_f64()
153    }
154}
155
156impl Ord for Uf16E5M11 {
157    fn cmp(&self, other: &Self) -> Ordering {
158        self.0.cmp(&other.0)
159    }
160}
161
162impl PartialOrd for Uf16E5M11 {
163    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
164        Some(self.cmp(other))
165    }
166}
167
168impl Add for Uf16E5M11 {
169    type Output = Self;
170
171    fn add(self, rhs: Self) -> Self::Output {
172        Self(dispatch::add_uf16(self.0, rhs.0))
173    }
174}
175
176impl Sub for Uf16E5M11 {
177    type Output = Self;
178
179    fn sub(self, rhs: Self) -> Self::Output {
180        Self(dispatch::sub_uf16(self.0, rhs.0))
181    }
182}
183
184impl Mul for Uf16E5M11 {
185    type Output = Self;
186
187    fn mul(self, rhs: Self) -> Self::Output {
188        Self(dispatch::mul_uf16(self.0, rhs.0))
189    }
190}
191
192impl Div for Uf16E5M11 {
193    type Output = Self;
194
195    fn div(self, rhs: Self) -> Self::Output {
196        Self(dispatch::div_uf16(self.0, rhs.0))
197    }
198}
199
200impl fmt::Debug for Uf16E5M11 {
201    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
202        f.debug_tuple("Uf16E5M11").field(&self.to_f32()).finish()
203    }
204}
205
206impl_float_format!(Uf16E5M11, to_f32);
207
208#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
209#[repr(transparent)]
210/// A 16-bit unsigned float with 6 exponent bits and 10 mantissa bits.
211pub struct Uf16E6M10(u16);
212
213impl Uf16E6M10 {
214    pub const EXPONENT_BITS: u32 = 6;
215    pub const MANTISSA_BITS: u32 = 10;
216    pub const EXPONENT_BIAS: i32 = 31;
217    pub const EXPONENT_MASK: u16 = 0xfc00;
218    pub const MANTISSA_MASK: u16 = 0x03ff;
219
220    pub const ZERO: Self = Self(0);
221    pub const ONE: Self = Self(0x7c00);
222    pub const INFINITY: Self = Self(0xfc00);
223    pub const NAN: Self = Self(0xfe00);
224    pub const MAX: Self = Self(0xfbff);
225    pub const MIN_POSITIVE: Self = Self(0x0001);
226    pub const MIN_NORMAL: Self = Self(0x0400);
227
228    pub const fn from_bits(bits: u16) -> Self {
229        Self(bits)
230    }
231
232    pub const fn to_bits(self) -> u16 {
233        self.0
234    }
235
236    pub fn from_f32(value: f32) -> Self {
237        Self(dispatch::f32_to_uf16_e6m10(value))
238    }
239
240    pub fn to_f32(self) -> f32 {
241        dispatch::uf16_e6m10_to_f32(self.0)
242    }
243
244    pub fn from_f64(value: f64) -> Self {
245        Self::from_f32(value as f32)
246    }
247
248    pub fn to_f64(self) -> f64 {
249        self.to_f32() as f64
250    }
251
252    pub fn try_from_f64(value: f64) -> Result<Self, ConversionError> {
253        crate::convert::check_finite_non_negative(value)?;
254
255        let encoded = Self::from_f64(value);
256        crate::convert::check_encoded(value, encoded.is_zero(), encoded.is_infinite())?;
257
258        Ok(encoded)
259    }
260
261    #[cfg(feature = "f16")]
262    pub fn from_f16(value: f16) -> Self {
263        Self::from_f32(value as f32)
264    }
265
266    #[cfg(feature = "f16")]
267    pub fn to_f16(self) -> f16 {
268        self.to_f32() as f16
269    }
270
271    pub const fn exponent(self) -> u16 {
272        (self.0 & Self::EXPONENT_MASK) >> Self::MANTISSA_BITS
273    }
274
275    pub const fn mantissa(self) -> u16 {
276        self.0 & Self::MANTISSA_MASK
277    }
278
279    pub const fn is_zero(self) -> bool {
280        self.0 == 0
281    }
282
283    pub const fn is_nan(self) -> bool {
284        self.exponent() == 0x3f && self.mantissa() != 0
285    }
286
287    pub const fn is_infinite(self) -> bool {
288        self.0 == Self::INFINITY.0
289    }
290
291    pub const fn is_finite(self) -> bool {
292        self.exponent() != 0x3f
293    }
294
295    pub const fn is_subnormal(self) -> bool {
296        self.exponent() == 0 && self.mantissa() != 0
297    }
298}
299
300impl From<f32> for Uf16E6M10 {
301    fn from(value: f32) -> Self {
302        Self::from_f32(value)
303    }
304}
305
306#[cfg(feature = "f16")]
307impl From<f16> for Uf16E6M10 {
308    fn from(value: f16) -> Self {
309        Self::from_f16(value)
310    }
311}
312
313impl From<Uf16E6M10> for f32 {
314    fn from(value: Uf16E6M10) -> Self {
315        value.to_f32()
316    }
317}
318
319#[cfg(feature = "f16")]
320impl From<Uf16E6M10> for f16 {
321    fn from(value: Uf16E6M10) -> Self {
322        value.to_f16()
323    }
324}
325
326impl From<Uf16E6M10> for f64 {
327    fn from(value: Uf16E6M10) -> Self {
328        value.to_f64()
329    }
330}
331
332impl Ord for Uf16E6M10 {
333    fn cmp(&self, other: &Self) -> Ordering {
334        self.0.cmp(&other.0)
335    }
336}
337
338impl PartialOrd for Uf16E6M10 {
339    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
340        Some(self.cmp(other))
341    }
342}
343
344impl Add for Uf16E6M10 {
345    type Output = Self;
346
347    fn add(self, rhs: Self) -> Self::Output {
348        Self(dispatch::add_uf16_e6m10(self.0, rhs.0))
349    }
350}
351
352impl Sub for Uf16E6M10 {
353    type Output = Self;
354
355    fn sub(self, rhs: Self) -> Self::Output {
356        Self(dispatch::sub_uf16_e6m10(self.0, rhs.0))
357    }
358}
359
360impl Mul for Uf16E6M10 {
361    type Output = Self;
362
363    fn mul(self, rhs: Self) -> Self::Output {
364        Self(dispatch::mul_uf16_e6m10(self.0, rhs.0))
365    }
366}
367
368impl Div for Uf16E6M10 {
369    type Output = Self;
370
371    fn div(self, rhs: Self) -> Self::Output {
372        Self(dispatch::div_uf16_e6m10(self.0, rhs.0))
373    }
374}
375
376impl fmt::Debug for Uf16E6M10 {
377    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378        f.debug_tuple("Uf16E6M10").field(&self.to_f32()).finish()
379    }
380}
381
382impl_float_format!(Uf16E6M10, to_f32);
unsigned_float/uf16.rs

unsigned_float/
uf16.rs