1use core::cmp::Ordering;
2use core::fmt;
3use core::ops::{Add, Div, Mul, Sub};
4
5use crate::{ConversionError, dispatch};
6
7macro_rules! impl_float_format {
8 ($ty:ty, $to_float:ident) => {
9 impl fmt::Display for $ty {
10 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
11 fmt::Display::fmt(&self.$to_float(), f)
12 }
13 }
14
15 impl fmt::LowerExp for $ty {
16 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17 fmt::LowerExp::fmt(&self.$to_float(), f)
18 }
19 }
20
21 impl fmt::UpperExp for $ty {
22 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23 fmt::UpperExp::fmt(&self.$to_float(), f)
24 }
25 }
26 };
27}
28
29#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
30#[repr(transparent)]
31pub struct Uf16E5M11(u16);
33
34pub type Uf16 = Uf16E5M11;
36
37impl Uf16E5M11 {
38 pub const EXPONENT_BITS: u32 = 5;
39 pub const MANTISSA_BITS: u32 = 11;
40 pub const EXPONENT_BIAS: i32 = 15;
41 pub const EXPONENT_MASK: u16 = 0xf800;
42 pub const MANTISSA_MASK: u16 = 0x07ff;
43
44 pub const ZERO: Self = Self(0);
45 pub const ONE: Self = Self(0x7800);
46 pub const INFINITY: Self = Self(0xf800);
47 pub const NAN: Self = Self(0xfc00);
48 pub const MAX: Self = Self(0xf7ff);
49 pub const MIN_POSITIVE: Self = Self(0x0001);
50 pub const MIN_NORMAL: Self = Self(0x0800);
51
52 pub const fn from_bits(bits: u16) -> Self {
53 Self(bits)
54 }
55
56 pub const fn to_bits(self) -> u16 {
57 self.0
58 }
59
60 pub fn from_f32(value: f32) -> Self {
61 Self(dispatch::f32_to_uf16(value))
62 }
63
64 pub fn to_f32(self) -> f32 {
65 dispatch::uf16_to_f32(self.0)
66 }
67
68 pub fn from_f64(value: f64) -> Self {
69 Self::from_f32(value as f32)
70 }
71
72 pub fn to_f64(self) -> f64 {
73 self.to_f32() as f64
74 }
75
76 pub fn try_from_f64(value: f64) -> Result<Self, ConversionError> {
77 crate::convert::check_finite_non_negative(value)?;
78
79 let encoded = Self::from_f64(value);
80 crate::convert::check_encoded(value, encoded.is_zero(), encoded.is_infinite())?;
81
82 Ok(encoded)
83 }
84
85 #[cfg(feature = "f16")]
86 pub fn from_f16(value: f16) -> Self {
87 Self::from_f32(value as f32)
88 }
89
90 #[cfg(feature = "f16")]
91 pub fn to_f16(self) -> f16 {
92 self.to_f32() as f16
93 }
94
95 pub const fn exponent(self) -> u16 {
96 (self.0 & Self::EXPONENT_MASK) >> Self::MANTISSA_BITS
97 }
98
99 pub const fn mantissa(self) -> u16 {
100 self.0 & Self::MANTISSA_MASK
101 }
102
103 pub const fn is_zero(self) -> bool {
104 self.0 == 0
105 }
106
107 pub const fn is_nan(self) -> bool {
108 self.exponent() == 0x1f && self.mantissa() != 0
109 }
110
111 pub const fn is_infinite(self) -> bool {
112 self.0 == Self::INFINITY.0
113 }
114
115 pub const fn is_finite(self) -> bool {
116 self.exponent() != 0x1f
117 }
118
119 pub const fn is_subnormal(self) -> bool {
120 self.exponent() == 0 && self.mantissa() != 0
121 }
122}
123
124impl From<f32> for Uf16E5M11 {
125 fn from(value: f32) -> Self {
126 Self::from_f32(value)
127 }
128}
129
130#[cfg(feature = "f16")]
131impl From<f16> for Uf16E5M11 {
132 fn from(value: f16) -> Self {
133 Self::from_f16(value)
134 }
135}
136
137impl From<Uf16E5M11> for f32 {
138 fn from(value: Uf16E5M11) -> Self {
139 value.to_f32()
140 }
141}
142
143#[cfg(feature = "f16")]
144impl From<Uf16E5M11> for f16 {
145 fn from(value: Uf16E5M11) -> Self {
146 value.to_f16()
147 }
148}
149
150impl From<Uf16E5M11> for f64 {
151 fn from(value: Uf16E5M11) -> Self {
152 value.to_f64()
153 }
154}
155
156impl Ord for Uf16E5M11 {
157 fn cmp(&self, other: &Self) -> Ordering {
158 self.0.cmp(&other.0)
159 }
160}
161
162impl PartialOrd for Uf16E5M11 {
163 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
164 Some(self.cmp(other))
165 }
166}
167
168impl Add for Uf16E5M11 {
169 type Output = Self;
170
171 fn add(self, rhs: Self) -> Self::Output {
172 Self(dispatch::add_uf16(self.0, rhs.0))
173 }
174}
175
176impl Sub for Uf16E5M11 {
177 type Output = Self;
178
179 fn sub(self, rhs: Self) -> Self::Output {
180 Self(dispatch::sub_uf16(self.0, rhs.0))
181 }
182}
183
184impl Mul for Uf16E5M11 {
185 type Output = Self;
186
187 fn mul(self, rhs: Self) -> Self::Output {
188 Self(dispatch::mul_uf16(self.0, rhs.0))
189 }
190}
191
192impl Div for Uf16E5M11 {
193 type Output = Self;
194
195 fn div(self, rhs: Self) -> Self::Output {
196 Self(dispatch::div_uf16(self.0, rhs.0))
197 }
198}
199
200impl fmt::Debug for Uf16E5M11 {
201 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
202 f.debug_tuple("Uf16E5M11").field(&self.to_f32()).finish()
203 }
204}
205
206impl_float_format!(Uf16E5M11, to_f32);
207
208#[derive(Clone, Copy, Default, Hash, PartialEq, Eq)]
209#[repr(transparent)]
210pub struct Uf16E6M10(u16);
212
213impl Uf16E6M10 {
214 pub const EXPONENT_BITS: u32 = 6;
215 pub const MANTISSA_BITS: u32 = 10;
216 pub const EXPONENT_BIAS: i32 = 31;
217 pub const EXPONENT_MASK: u16 = 0xfc00;
218 pub const MANTISSA_MASK: u16 = 0x03ff;
219
220 pub const ZERO: Self = Self(0);
221 pub const ONE: Self = Self(0x7c00);
222 pub const INFINITY: Self = Self(0xfc00);
223 pub const NAN: Self = Self(0xfe00);
224 pub const MAX: Self = Self(0xfbff);
225 pub const MIN_POSITIVE: Self = Self(0x0001);
226 pub const MIN_NORMAL: Self = Self(0x0400);
227
228 pub const fn from_bits(bits: u16) -> Self {
229 Self(bits)
230 }
231
232 pub const fn to_bits(self) -> u16 {
233 self.0
234 }
235
236 pub fn from_f32(value: f32) -> Self {
237 Self(dispatch::f32_to_uf16_e6m10(value))
238 }
239
240 pub fn to_f32(self) -> f32 {
241 dispatch::uf16_e6m10_to_f32(self.0)
242 }
243
244 pub fn from_f64(value: f64) -> Self {
245 Self::from_f32(value as f32)
246 }
247
248 pub fn to_f64(self) -> f64 {
249 self.to_f32() as f64
250 }
251
252 pub fn try_from_f64(value: f64) -> Result<Self, ConversionError> {
253 crate::convert::check_finite_non_negative(value)?;
254
255 let encoded = Self::from_f64(value);
256 crate::convert::check_encoded(value, encoded.is_zero(), encoded.is_infinite())?;
257
258 Ok(encoded)
259 }
260
261 #[cfg(feature = "f16")]
262 pub fn from_f16(value: f16) -> Self {
263 Self::from_f32(value as f32)
264 }
265
266 #[cfg(feature = "f16")]
267 pub fn to_f16(self) -> f16 {
268 self.to_f32() as f16
269 }
270
271 pub const fn exponent(self) -> u16 {
272 (self.0 & Self::EXPONENT_MASK) >> Self::MANTISSA_BITS
273 }
274
275 pub const fn mantissa(self) -> u16 {
276 self.0 & Self::MANTISSA_MASK
277 }
278
279 pub const fn is_zero(self) -> bool {
280 self.0 == 0
281 }
282
283 pub const fn is_nan(self) -> bool {
284 self.exponent() == 0x3f && self.mantissa() != 0
285 }
286
287 pub const fn is_infinite(self) -> bool {
288 self.0 == Self::INFINITY.0
289 }
290
291 pub const fn is_finite(self) -> bool {
292 self.exponent() != 0x3f
293 }
294
295 pub const fn is_subnormal(self) -> bool {
296 self.exponent() == 0 && self.mantissa() != 0
297 }
298}
299
300impl From<f32> for Uf16E6M10 {
301 fn from(value: f32) -> Self {
302 Self::from_f32(value)
303 }
304}
305
306#[cfg(feature = "f16")]
307impl From<f16> for Uf16E6M10 {
308 fn from(value: f16) -> Self {
309 Self::from_f16(value)
310 }
311}
312
313impl From<Uf16E6M10> for f32 {
314 fn from(value: Uf16E6M10) -> Self {
315 value.to_f32()
316 }
317}
318
319#[cfg(feature = "f16")]
320impl From<Uf16E6M10> for f16 {
321 fn from(value: Uf16E6M10) -> Self {
322 value.to_f16()
323 }
324}
325
326impl From<Uf16E6M10> for f64 {
327 fn from(value: Uf16E6M10) -> Self {
328 value.to_f64()
329 }
330}
331
332impl Ord for Uf16E6M10 {
333 fn cmp(&self, other: &Self) -> Ordering {
334 self.0.cmp(&other.0)
335 }
336}
337
338impl PartialOrd for Uf16E6M10 {
339 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
340 Some(self.cmp(other))
341 }
342}
343
344impl Add for Uf16E6M10 {
345 type Output = Self;
346
347 fn add(self, rhs: Self) -> Self::Output {
348 Self(dispatch::add_uf16_e6m10(self.0, rhs.0))
349 }
350}
351
352impl Sub for Uf16E6M10 {
353 type Output = Self;
354
355 fn sub(self, rhs: Self) -> Self::Output {
356 Self(dispatch::sub_uf16_e6m10(self.0, rhs.0))
357 }
358}
359
360impl Mul for Uf16E6M10 {
361 type Output = Self;
362
363 fn mul(self, rhs: Self) -> Self::Output {
364 Self(dispatch::mul_uf16_e6m10(self.0, rhs.0))
365 }
366}
367
368impl Div for Uf16E6M10 {
369 type Output = Self;
370
371 fn div(self, rhs: Self) -> Self::Output {
372 Self(dispatch::div_uf16_e6m10(self.0, rhs.0))
373 }
374}
375
376impl fmt::Debug for Uf16E6M10 {
377 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
378 f.debug_tuple("Uf16E6M10").field(&self.to_f32()).finish()
379 }
380}
381
382impl_float_format!(Uf16E6M10, to_f32);