1use std::cmp::Ordering;
26use std::fmt;
27use std::hash::{Hash, Hasher};
28use std::ops::{Add, Div, Mul, Neg, Sub};
29
30#[repr(transparent)]
43#[derive(Copy, Clone, Default)]
44#[allow(non_camel_case_types)]
45pub struct float16(u16);
46
47const SIGN_MASK: u16 = 0x8000;
49const EXP_MASK: u16 = 0x7C00;
50const MANTISSA_MASK: u16 = 0x03FF;
51const EXP_SHIFT: u32 = 10;
52const MAX_EXP: i32 = 31;
54
55const INFINITY_BITS: u16 = 0x7C00;
57const NEG_INFINITY_BITS: u16 = 0xFC00;
58const QUIET_NAN_BITS: u16 = 0x7E00;
59
60impl float16 {
61 #[inline(always)]
67 pub const fn from_bits(bits: u16) -> Self {
68 Self(bits)
69 }
70
71 #[inline(always)]
73 pub const fn to_bits(self) -> u16 {
74 self.0
75 }
76
77 pub const ZERO: Self = Self(0x0000);
81
82 pub const NEG_ZERO: Self = Self(0x8000);
84
85 pub const INFINITY: Self = Self(INFINITY_BITS);
87
88 pub const NEG_INFINITY: Self = Self(NEG_INFINITY_BITS);
90
91 pub const NAN: Self = Self(QUIET_NAN_BITS);
93
94 pub const MAX: Self = Self(0x7BFF);
96
97 pub const MIN_POSITIVE: Self = Self(0x0400);
99
100 pub const MIN_POSITIVE_SUBNORMAL: Self = Self(0x0001);
102
103 pub fn from_f32(value: f32) -> Self {
115 let bits = value.to_bits();
116 let sign = bits & 0x8000_0000;
117 let exp = ((bits >> 23) & 0xFF) as i32;
118 let mantissa = bits & 0x007F_FFFF;
119
120 if exp == 255 {
122 if mantissa == 0 {
124 return Self(((sign >> 16) | INFINITY_BITS as u32) as u16);
126 } else {
127 let nan_payload = (mantissa >> 13) & MANTISSA_MASK as u32;
129 let quiet_bit = 0x0200; return Self(
131 ((sign >> 16) | INFINITY_BITS as u32 | quiet_bit | nan_payload) as u16,
132 );
133 }
134 }
135
136 let exp16 = exp - 127 + 15;
138
139 if exp == 0 && mantissa == 0 {
141 return Self((sign >> 16) as u16);
142 }
143
144 if exp16 >= MAX_EXP {
146 return Self(((sign >> 16) | INFINITY_BITS as u32) as u16);
148 }
149
150 if exp16 <= 0 {
152 if exp16 < -10 {
154 return Self((sign >> 16) as u16);
156 }
157
158 let shift = 1 - exp16;
161 let implicit_bit = 1u32 << 23; let full_mantissa = implicit_bit | mantissa;
163
164 let shift_total = 13 + shift;
166 let round_bit = 1u32 << (shift_total - 1);
167 let sticky_mask = round_bit - 1;
168 let sticky = (full_mantissa & sticky_mask) != 0;
169 let mantissa16 = full_mantissa >> shift_total;
170
171 let result = if (full_mantissa & round_bit) != 0 && (sticky || (mantissa16 & 1) != 0) {
173 mantissa16 + 1
174 } else {
175 mantissa16
176 };
177
178 return Self(((sign >> 16) | result) as u16);
179 }
180
181 let round_bit = 1u32 << 12; let sticky_mask = round_bit - 1;
187 let sticky = (mantissa & sticky_mask) != 0;
188 let mantissa10 = mantissa >> 13;
189
190 let rounded_mantissa = if (mantissa & round_bit) != 0 && (sticky || (mantissa10 & 1) != 0) {
192 mantissa10 + 1
193 } else {
194 mantissa10
195 };
196
197 if rounded_mantissa > MANTISSA_MASK as u32 {
199 let new_exp = exp16 + 1;
201 if new_exp >= MAX_EXP {
202 return Self(((sign >> 16) | INFINITY_BITS as u32) as u16);
204 }
205 return Self(((sign >> 16) | ((new_exp as u32) << EXP_SHIFT)) as u16);
207 }
208
209 let result = (sign >> 16) | ((exp16 as u32) << EXP_SHIFT) | rounded_mantissa;
211 Self(result as u16)
212 }
213
214 pub fn to_f32(self) -> f32 {
218 let bits = self.0;
219 let sign = (bits & SIGN_MASK) as u32;
220 let exp = ((bits & EXP_MASK) >> EXP_SHIFT) as i32;
221 let mantissa = (bits & MANTISSA_MASK) as u32;
222
223 if exp == MAX_EXP {
225 if mantissa == 0 {
227 return f32::from_bits((sign << 16) | 0x7F80_0000);
229 } else {
230 let nan_payload = mantissa << 13;
232 return f32::from_bits((sign << 16) | 0x7F80_0000 | nan_payload);
233 }
234 }
235
236 if exp == 0 {
237 if mantissa == 0 {
238 return f32::from_bits(sign << 16);
240 } else {
241 let mut m = mantissa;
244 let mut e = -14i32; while (m & 0x0400) == 0 {
248 m <<= 1;
249 e -= 1;
250 }
251
252 m &= 0x03FF;
254
255 let exp32 = e + 127;
257 let mantissa32 = m << 13;
258
259 return f32::from_bits((sign << 16) | ((exp32 as u32) << 23) | mantissa32);
260 }
261 }
262
263 let exp32 = exp - 15 + 127; let mantissa32 = mantissa << 13; f32::from_bits((sign << 16) | ((exp32 as u32) << 23) | mantissa32)
268 }
269
270 #[inline]
274 pub fn is_nan(self) -> bool {
275 (self.0 & EXP_MASK) == EXP_MASK && (self.0 & MANTISSA_MASK) != 0
276 }
277
278 #[inline]
280 pub fn is_infinite(self) -> bool {
281 (self.0 & EXP_MASK) == EXP_MASK && (self.0 & MANTISSA_MASK) == 0
282 }
283
284 #[inline]
286 pub fn is_finite(self) -> bool {
287 (self.0 & EXP_MASK) != EXP_MASK
288 }
289
290 #[inline]
292 pub fn is_normal(self) -> bool {
293 let exp = self.0 & EXP_MASK;
294 exp != 0 && exp != EXP_MASK
295 }
296
297 #[inline]
299 pub fn is_subnormal(self) -> bool {
300 (self.0 & EXP_MASK) == 0 && (self.0 & MANTISSA_MASK) != 0
301 }
302
303 #[inline]
305 pub fn is_zero(self) -> bool {
306 (self.0 & !SIGN_MASK) == 0
307 }
308
309 #[inline]
311 pub fn is_sign_negative(self) -> bool {
312 (self.0 & SIGN_MASK) != 0
313 }
314
315 #[inline]
317 pub fn is_sign_positive(self) -> bool {
318 (self.0 & SIGN_MASK) == 0
319 }
320
321 #[inline]
325 pub fn eq_value(self, other: Self) -> bool {
326 if self.is_nan() || other.is_nan() {
327 false
328 } else if self.is_zero() && other.is_zero() {
329 true } else {
331 self.0 == other.0
332 }
333 }
334
335 #[inline]
337 pub fn partial_cmp_value(self, other: Self) -> Option<Ordering> {
338 self.to_f32().partial_cmp(&other.to_f32())
339 }
340
341 #[inline]
345 pub fn total_cmp(self, other: Self) -> Ordering {
346 self.to_f32().total_cmp(&other.to_f32())
347 }
348
349 #[inline]
353 #[allow(clippy::should_implement_trait)]
354 pub fn add(self, rhs: Self) -> Self {
355 Self::from_f32(self.to_f32() + rhs.to_f32())
356 }
357
358 #[inline]
360 #[allow(clippy::should_implement_trait)]
361 pub fn sub(self, rhs: Self) -> Self {
362 Self::from_f32(self.to_f32() - rhs.to_f32())
363 }
364
365 #[inline]
367 #[allow(clippy::should_implement_trait)]
368 pub fn mul(self, rhs: Self) -> Self {
369 Self::from_f32(self.to_f32() * rhs.to_f32())
370 }
371
372 #[inline]
374 #[allow(clippy::should_implement_trait)]
375 pub fn div(self, rhs: Self) -> Self {
376 Self::from_f32(self.to_f32() / rhs.to_f32())
377 }
378
379 #[inline]
381 #[allow(clippy::should_implement_trait)]
382 pub fn neg(self) -> Self {
383 Self(self.0 ^ SIGN_MASK)
384 }
385
386 #[inline]
388 pub fn abs(self) -> Self {
389 Self(self.0 & !SIGN_MASK)
390 }
391}
392
393impl PartialEq for float16 {
397 #[inline]
398 fn eq(&self, other: &Self) -> bool {
399 self.0 == other.0
400 }
401}
402
403impl Eq for float16 {}
404
405impl Hash for float16 {
406 #[inline]
407 fn hash<H: Hasher>(&self, state: &mut H) {
408 self.0.hash(state);
409 }
410}
411
412impl PartialOrd for float16 {
414 #[inline]
415 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
416 self.to_f32().partial_cmp(&other.to_f32())
417 }
418}
419
420impl Add for float16 {
422 type Output = Self;
423 #[inline]
424 fn add(self, rhs: Self) -> Self {
425 Self::add(self, rhs)
426 }
427}
428
429impl Sub for float16 {
430 type Output = Self;
431 #[inline]
432 fn sub(self, rhs: Self) -> Self {
433 Self::sub(self, rhs)
434 }
435}
436
437impl Mul for float16 {
438 type Output = Self;
439 #[inline]
440 fn mul(self, rhs: Self) -> Self {
441 Self::mul(self, rhs)
442 }
443}
444
445impl Div for float16 {
446 type Output = Self;
447 #[inline]
448 fn div(self, rhs: Self) -> Self {
449 Self::div(self, rhs)
450 }
451}
452
453impl Neg for float16 {
454 type Output = Self;
455 #[inline]
456 fn neg(self) -> Self {
457 Self::neg(self)
458 }
459}
460
461impl fmt::Display for float16 {
463 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
464 write!(f, "{}", self.to_f32())
465 }
466}
467
468impl fmt::Debug for float16 {
469 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
470 write!(f, "float16({})", self.to_f32())
471 }
472}
473
474#[cfg(test)]
475mod tests {
476 use super::*;
477
478 #[test]
479 fn test_zero() {
480 assert_eq!(float16::ZERO.to_bits(), 0x0000);
481 assert!(float16::ZERO.is_zero());
482 assert!(!float16::ZERO.is_sign_negative());
483
484 assert_eq!(float16::NEG_ZERO.to_bits(), 0x8000);
485 assert!(float16::NEG_ZERO.is_zero());
486 assert!(float16::NEG_ZERO.is_sign_negative());
487 }
488
489 #[test]
490 fn test_infinity() {
491 assert_eq!(float16::INFINITY.to_bits(), 0x7C00);
492 assert!(float16::INFINITY.is_infinite());
493 assert!(!float16::INFINITY.is_nan());
494
495 assert_eq!(float16::NEG_INFINITY.to_bits(), 0xFC00);
496 assert!(float16::NEG_INFINITY.is_infinite());
497 assert!(float16::NEG_INFINITY.is_sign_negative());
498 }
499
500 #[test]
501 fn test_nan() {
502 assert!(float16::NAN.is_nan());
503 assert!(!float16::NAN.is_infinite());
504 assert!(!float16::NAN.is_finite());
505 }
506
507 #[test]
508 fn test_special_values_conversion() {
509 assert_eq!(float16::from_f32(f32::INFINITY), float16::INFINITY);
511 assert_eq!(float16::from_f32(f32::NEG_INFINITY), float16::NEG_INFINITY);
512 assert_eq!(float16::INFINITY.to_f32(), f32::INFINITY);
513 assert_eq!(float16::NEG_INFINITY.to_f32(), f32::NEG_INFINITY);
514
515 assert_eq!(float16::from_f32(0.0), float16::ZERO);
517 assert_eq!(float16::from_f32(-0.0), float16::NEG_ZERO);
518 assert_eq!(float16::ZERO.to_f32(), 0.0);
519
520 assert!(float16::from_f32(f32::NAN).is_nan());
522 assert!(float16::NAN.to_f32().is_nan());
523 }
524
525 #[test]
526 fn test_max_min_values() {
527 let max_f32 = 65504.0f32;
529 assert_eq!(float16::from_f32(max_f32), float16::MAX);
530 assert_eq!(float16::MAX.to_f32(), max_f32);
531
532 let min_normal = 2.0f32.powi(-14);
534 assert_eq!(float16::from_f32(min_normal), float16::MIN_POSITIVE);
535 assert_eq!(float16::MIN_POSITIVE.to_f32(), min_normal);
536
537 let min_subnormal = 2.0f32.powi(-24);
539 let h = float16::from_f32(min_subnormal);
540 assert_eq!(h, float16::MIN_POSITIVE_SUBNORMAL);
541 assert!(h.is_subnormal());
542 }
543
544 #[test]
545 fn test_overflow() {
546 let too_large = 70000.0f32;
548 assert_eq!(float16::from_f32(too_large), float16::INFINITY);
549 assert_eq!(float16::from_f32(-too_large), float16::NEG_INFINITY);
550 }
551
552 #[test]
553 fn test_underflow() {
554 let very_small = 2.0f32.powi(-30);
556 let h = float16::from_f32(very_small);
557 assert!(h.is_zero() || h.is_subnormal());
558 }
559
560 #[test]
561 fn test_rounding() {
562 let one = float16::from_f32(1.0);
565 assert_eq!(one.to_f32(), 1.0);
566
567 let one_half = float16::from_f32(1.5);
569 assert_eq!(one_half.to_f32(), 1.5);
570 }
571
572 #[test]
573 fn test_arithmetic() {
574 let a = float16::from_f32(1.5);
575 let b = float16::from_f32(2.5);
576
577 assert_eq!((a + b).to_f32(), 4.0);
578 assert_eq!((b - a).to_f32(), 1.0);
579 assert_eq!((a * b).to_f32(), 3.75);
580 assert_eq!((-a).to_f32(), -1.5);
581 assert_eq!(a.abs().to_f32(), 1.5);
582 assert_eq!((-a).abs().to_f32(), 1.5);
583 }
584
585 #[test]
586 fn test_comparison() {
587 let a = float16::from_f32(1.0);
588 let b = float16::from_f32(2.0);
589 let nan = float16::NAN;
590
591 assert_eq!(a, a);
593 assert_ne!(a, b);
594
595 assert!(a.eq_value(a));
597 assert!(!a.eq_value(b));
598 assert!(!nan.eq_value(nan)); assert!(float16::ZERO.eq_value(float16::NEG_ZERO));
602
603 assert_eq!(a.partial_cmp_value(b), Some(Ordering::Less));
605 assert_eq!(b.partial_cmp_value(a), Some(Ordering::Greater));
606 assert_eq!(a.partial_cmp_value(a), Some(Ordering::Equal));
607 assert_eq!(nan.partial_cmp_value(a), None);
608 }
609
610 #[test]
611 fn test_classification() {
612 assert!(float16::from_f32(1.0).is_normal());
613 assert!(float16::from_f32(1.0).is_finite());
614 assert!(!float16::from_f32(1.0).is_zero());
615 assert!(!float16::from_f32(1.0).is_subnormal());
616
617 assert!(float16::MIN_POSITIVE_SUBNORMAL.is_subnormal());
618 assert!(!float16::MIN_POSITIVE_SUBNORMAL.is_normal());
619 }
620}