use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::ops::{Add, Div, Mul, Neg, Sub};
#[repr(transparent)]
#[derive(Copy, Clone, Default)]
#[allow(non_camel_case_types)]
pub struct float16(u16);
const SIGN_MASK: u16 = 0x8000;
const EXP_MASK: u16 = 0x7C00;
const MANTISSA_MASK: u16 = 0x03FF;
const EXP_SHIFT: u32 = 10;
const MAX_EXP: i32 = 31;
const INFINITY_BITS: u16 = 0x7C00;
const NEG_INFINITY_BITS: u16 = 0xFC00;
const QUIET_NAN_BITS: u16 = 0x7E00;
impl float16 {
#[inline(always)]
pub const fn from_bits(bits: u16) -> Self {
Self(bits)
}
#[inline(always)]
pub const fn to_bits(self) -> u16 {
self.0
}
pub const ZERO: Self = Self(0x0000);
pub const NEG_ZERO: Self = Self(0x8000);
pub const INFINITY: Self = Self(INFINITY_BITS);
pub const NEG_INFINITY: Self = Self(NEG_INFINITY_BITS);
pub const NAN: Self = Self(QUIET_NAN_BITS);
pub const MAX: Self = Self(0x7BFF);
pub const MIN_POSITIVE: Self = Self(0x0400);
pub const MIN_POSITIVE_SUBNORMAL: Self = Self(0x0001);
pub fn from_f32(value: f32) -> Self {
let bits = value.to_bits();
let sign = bits & 0x8000_0000;
let exp = ((bits >> 23) & 0xFF) as i32;
let mantissa = bits & 0x007F_FFFF;
if exp == 255 {
if mantissa == 0 {
return Self(((sign >> 16) | INFINITY_BITS as u32) as u16);
} else {
let nan_payload = (mantissa >> 13) & MANTISSA_MASK as u32;
let quiet_bit = 0x0200; return Self(
((sign >> 16) | INFINITY_BITS as u32 | quiet_bit | nan_payload) as u16,
);
}
}
let exp16 = exp - 127 + 15;
if exp == 0 && mantissa == 0 {
return Self((sign >> 16) as u16);
}
if exp16 >= MAX_EXP {
return Self(((sign >> 16) | INFINITY_BITS as u32) as u16);
}
if exp16 <= 0 {
if exp16 < -10 {
return Self((sign >> 16) as u16);
}
let shift = 1 - exp16;
let implicit_bit = 1u32 << 23; let full_mantissa = implicit_bit | mantissa;
let shift_total = 13 + shift;
let round_bit = 1u32 << (shift_total - 1);
let sticky_mask = round_bit - 1;
let sticky = (full_mantissa & sticky_mask) != 0;
let mantissa16 = full_mantissa >> shift_total;
let result = if (full_mantissa & round_bit) != 0 && (sticky || (mantissa16 & 1) != 0) {
mantissa16 + 1
} else {
mantissa16
};
return Self(((sign >> 16) | result) as u16);
}
let round_bit = 1u32 << 12; let sticky_mask = round_bit - 1;
let sticky = (mantissa & sticky_mask) != 0;
let mantissa10 = mantissa >> 13;
let rounded_mantissa = if (mantissa & round_bit) != 0 && (sticky || (mantissa10 & 1) != 0) {
mantissa10 + 1
} else {
mantissa10
};
if rounded_mantissa > MANTISSA_MASK as u32 {
let new_exp = exp16 + 1;
if new_exp >= MAX_EXP {
return Self(((sign >> 16) | INFINITY_BITS as u32) as u16);
}
return Self(((sign >> 16) | ((new_exp as u32) << EXP_SHIFT)) as u16);
}
let result = (sign >> 16) | ((exp16 as u32) << EXP_SHIFT) | rounded_mantissa;
Self(result as u16)
}
pub fn to_f32(self) -> f32 {
let bits = self.0;
let sign = (bits & SIGN_MASK) as u32;
let exp = ((bits & EXP_MASK) >> EXP_SHIFT) as i32;
let mantissa = (bits & MANTISSA_MASK) as u32;
if exp == MAX_EXP {
if mantissa == 0 {
return f32::from_bits((sign << 16) | 0x7F80_0000);
} else {
let nan_payload = mantissa << 13;
return f32::from_bits((sign << 16) | 0x7F80_0000 | nan_payload);
}
}
if exp == 0 {
if mantissa == 0 {
return f32::from_bits(sign << 16);
} else {
let mut m = mantissa;
let mut e = -14i32;
while (m & 0x0400) == 0 {
m <<= 1;
e -= 1;
}
m &= 0x03FF;
let exp32 = e + 127;
let mantissa32 = m << 13;
return f32::from_bits((sign << 16) | ((exp32 as u32) << 23) | mantissa32);
}
}
let exp32 = exp - 15 + 127; let mantissa32 = mantissa << 13;
f32::from_bits((sign << 16) | ((exp32 as u32) << 23) | mantissa32)
}
#[inline]
pub fn is_nan(self) -> bool {
(self.0 & EXP_MASK) == EXP_MASK && (self.0 & MANTISSA_MASK) != 0
}
#[inline]
pub fn is_infinite(self) -> bool {
(self.0 & EXP_MASK) == EXP_MASK && (self.0 & MANTISSA_MASK) == 0
}
#[inline]
pub fn is_finite(self) -> bool {
(self.0 & EXP_MASK) != EXP_MASK
}
#[inline]
pub fn is_normal(self) -> bool {
let exp = self.0 & EXP_MASK;
exp != 0 && exp != EXP_MASK
}
#[inline]
pub fn is_subnormal(self) -> bool {
(self.0 & EXP_MASK) == 0 && (self.0 & MANTISSA_MASK) != 0
}
#[inline]
pub fn is_zero(self) -> bool {
(self.0 & !SIGN_MASK) == 0
}
#[inline]
pub fn is_sign_negative(self) -> bool {
(self.0 & SIGN_MASK) != 0
}
#[inline]
pub fn is_sign_positive(self) -> bool {
(self.0 & SIGN_MASK) == 0
}
#[inline]
pub fn eq_value(self, other: Self) -> bool {
if self.is_nan() || other.is_nan() {
false
} else if self.is_zero() && other.is_zero() {
true } else {
self.0 == other.0
}
}
#[inline]
pub fn partial_cmp_value(self, other: Self) -> Option<Ordering> {
self.to_f32().partial_cmp(&other.to_f32())
}
#[inline]
pub fn total_cmp(self, other: Self) -> Ordering {
self.to_f32().total_cmp(&other.to_f32())
}
#[inline]
#[allow(clippy::should_implement_trait)]
pub fn add(self, rhs: Self) -> Self {
Self::from_f32(self.to_f32() + rhs.to_f32())
}
#[inline]
#[allow(clippy::should_implement_trait)]
pub fn sub(self, rhs: Self) -> Self {
Self::from_f32(self.to_f32() - rhs.to_f32())
}
#[inline]
#[allow(clippy::should_implement_trait)]
pub fn mul(self, rhs: Self) -> Self {
Self::from_f32(self.to_f32() * rhs.to_f32())
}
#[inline]
#[allow(clippy::should_implement_trait)]
pub fn div(self, rhs: Self) -> Self {
Self::from_f32(self.to_f32() / rhs.to_f32())
}
#[inline]
#[allow(clippy::should_implement_trait)]
pub fn neg(self) -> Self {
Self(self.0 ^ SIGN_MASK)
}
#[inline]
pub fn abs(self) -> Self {
Self(self.0 & !SIGN_MASK)
}
}
impl PartialEq for float16 {
#[inline]
fn eq(&self, other: &Self) -> bool {
self.0 == other.0
}
}
impl Eq for float16 {}
impl Hash for float16 {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) {
self.0.hash(state);
}
}
impl PartialOrd for float16 {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.to_f32().partial_cmp(&other.to_f32())
}
}
impl Add for float16 {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self {
Self::add(self, rhs)
}
}
impl Sub for float16 {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self {
Self::sub(self, rhs)
}
}
impl Mul for float16 {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self {
Self::mul(self, rhs)
}
}
impl Div for float16 {
type Output = Self;
#[inline]
fn div(self, rhs: Self) -> Self {
Self::div(self, rhs)
}
}
impl Neg for float16 {
type Output = Self;
#[inline]
fn neg(self) -> Self {
Self::neg(self)
}
}
impl fmt::Display for float16 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.to_f32())
}
}
impl fmt::Debug for float16 {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "float16({})", self.to_f32())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_zero() {
assert_eq!(float16::ZERO.to_bits(), 0x0000);
assert!(float16::ZERO.is_zero());
assert!(!float16::ZERO.is_sign_negative());
assert_eq!(float16::NEG_ZERO.to_bits(), 0x8000);
assert!(float16::NEG_ZERO.is_zero());
assert!(float16::NEG_ZERO.is_sign_negative());
}
#[test]
fn test_infinity() {
assert_eq!(float16::INFINITY.to_bits(), 0x7C00);
assert!(float16::INFINITY.is_infinite());
assert!(!float16::INFINITY.is_nan());
assert_eq!(float16::NEG_INFINITY.to_bits(), 0xFC00);
assert!(float16::NEG_INFINITY.is_infinite());
assert!(float16::NEG_INFINITY.is_sign_negative());
}
#[test]
fn test_nan() {
assert!(float16::NAN.is_nan());
assert!(!float16::NAN.is_infinite());
assert!(!float16::NAN.is_finite());
}
#[test]
fn test_special_values_conversion() {
assert_eq!(float16::from_f32(f32::INFINITY), float16::INFINITY);
assert_eq!(float16::from_f32(f32::NEG_INFINITY), float16::NEG_INFINITY);
assert_eq!(float16::INFINITY.to_f32(), f32::INFINITY);
assert_eq!(float16::NEG_INFINITY.to_f32(), f32::NEG_INFINITY);
assert_eq!(float16::from_f32(0.0), float16::ZERO);
assert_eq!(float16::from_f32(-0.0), float16::NEG_ZERO);
assert_eq!(float16::ZERO.to_f32(), 0.0);
assert!(float16::from_f32(f32::NAN).is_nan());
assert!(float16::NAN.to_f32().is_nan());
}
#[test]
fn test_max_min_values() {
let max_f32 = 65504.0f32;
assert_eq!(float16::from_f32(max_f32), float16::MAX);
assert_eq!(float16::MAX.to_f32(), max_f32);
let min_normal = 2.0f32.powi(-14);
assert_eq!(float16::from_f32(min_normal), float16::MIN_POSITIVE);
assert_eq!(float16::MIN_POSITIVE.to_f32(), min_normal);
let min_subnormal = 2.0f32.powi(-24);
let h = float16::from_f32(min_subnormal);
assert_eq!(h, float16::MIN_POSITIVE_SUBNORMAL);
assert!(h.is_subnormal());
}
#[test]
fn test_overflow() {
let too_large = 70000.0f32;
assert_eq!(float16::from_f32(too_large), float16::INFINITY);
assert_eq!(float16::from_f32(-too_large), float16::NEG_INFINITY);
}
#[test]
fn test_underflow() {
let very_small = 2.0f32.powi(-30);
let h = float16::from_f32(very_small);
assert!(h.is_zero() || h.is_subnormal());
}
#[test]
fn test_rounding() {
let one = float16::from_f32(1.0);
assert_eq!(one.to_f32(), 1.0);
let one_half = float16::from_f32(1.5);
assert_eq!(one_half.to_f32(), 1.5);
}
#[test]
fn test_arithmetic() {
let a = float16::from_f32(1.5);
let b = float16::from_f32(2.5);
assert_eq!((a + b).to_f32(), 4.0);
assert_eq!((b - a).to_f32(), 1.0);
assert_eq!((a * b).to_f32(), 3.75);
assert_eq!((-a).to_f32(), -1.5);
assert_eq!(a.abs().to_f32(), 1.5);
assert_eq!((-a).abs().to_f32(), 1.5);
}
#[test]
fn test_comparison() {
let a = float16::from_f32(1.0);
let b = float16::from_f32(2.0);
let nan = float16::NAN;
assert_eq!(a, a);
assert_ne!(a, b);
assert!(a.eq_value(a));
assert!(!a.eq_value(b));
assert!(!nan.eq_value(nan));
assert!(float16::ZERO.eq_value(float16::NEG_ZERO));
assert_eq!(a.partial_cmp_value(b), Some(Ordering::Less));
assert_eq!(b.partial_cmp_value(a), Some(Ordering::Greater));
assert_eq!(a.partial_cmp_value(a), Some(Ordering::Equal));
assert_eq!(nan.partial_cmp_value(a), None);
}
#[test]
fn test_classification() {
assert!(float16::from_f32(1.0).is_normal());
assert!(float16::from_f32(1.0).is_finite());
assert!(!float16::from_f32(1.0).is_zero());
assert!(!float16::from_f32(1.0).is_subnormal());
assert!(float16::MIN_POSITIVE_SUBNORMAL.is_subnormal());
assert!(!float16::MIN_POSITIVE_SUBNORMAL.is_normal());
}
}