use core::cmp::Ordering;
use core::fmt::{Debug, Display};
use core::ops::{
Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Rem, RemAssign, Sub, SubAssign,
};
#[derive(Clone, Copy, Default)]
#[repr(transparent)]
pub struct F16(u16);
impl F16 {
pub const ZERO: Self = Self(0x0000);
pub const ONE: Self = Self(0x3C00);
pub const NEG_ONE: Self = Self(0xBC00);
pub const TWO: Self = Self(0x4000);
pub const HALF: Self = Self(0x3800);
pub const PI: Self = Self(0x4248); pub const TWO_PI: Self = Self(0x4648); pub const FRAC_PI_2: Self = Self(0x3E48); pub const FRAC_PI_4: Self = Self(0x3A48); pub const E: Self = Self(0x4170); pub const LN_2: Self = Self(0x398C); pub const FRAC_1_SQRT_2: Self = Self(0x3B50); pub const INFINITY: Self = Self(0x7C00);
pub const NEG_INFINITY: Self = Self(0xFC00);
pub const NAN: Self = Self(0x7E00);
pub const EPSILON: Self = Self(0x1400); pub const MIN_POSITIVE: Self = Self(0x0400); pub const MAX: Self = Self(0x7BFF); pub const MIN: Self = Self(0xFBFF);
#[inline]
#[must_use]
pub const fn from_bits(bits: u16) -> Self {
Self(bits)
}
#[inline]
#[must_use]
pub const fn to_bits(self) -> u16 {
self.0
}
#[inline]
#[must_use]
pub const fn is_nan(self) -> bool {
(self.0 & 0x7FFF) > 0x7C00
}
#[inline]
#[must_use]
pub const fn is_infinite(self) -> bool {
(self.0 & 0x7FFF) == 0x7C00
}
#[inline]
#[must_use]
pub const fn is_finite(self) -> bool {
(self.0 & 0x7C00) != 0x7C00
}
#[inline]
#[must_use]
pub const fn is_zero(self) -> bool {
(self.0 & 0x7FFF) == 0
}
#[inline]
#[must_use]
pub const fn is_sign_negative(self) -> bool {
(self.0 & 0x8000) != 0
}
#[inline]
#[must_use]
pub const fn is_sign_positive(self) -> bool {
(self.0 & 0x8000) == 0
}
#[inline]
#[must_use]
pub fn is_normal(self) -> bool {
let exp = (self.0 >> 10) & 0x1F;
exp != 0 && exp != 0x1F
}
#[inline]
#[must_use]
pub const fn abs(self) -> Self {
Self(self.0 & 0x7FFF)
}
#[inline]
#[must_use]
pub const fn neg(self) -> Self {
Self(self.0 ^ 0x8000)
}
#[inline]
#[must_use]
pub const fn copysign(self, sign: Self) -> Self {
Self((self.0 & 0x7FFF) | (sign.0 & 0x8000))
}
#[inline]
#[must_use]
pub fn signum(self) -> Self {
if self.is_nan() {
Self::NAN
} else if self.is_zero() {
Self::ZERO
} else if self.is_sign_negative() {
Self::NEG_ONE
} else {
Self::ONE
}
}
#[inline]
#[must_use]
pub fn recip(self) -> Self {
Self::ONE / self
}
#[inline]
#[must_use]
pub fn classify(self) -> core::num::FpCategory {
let exp = (self.0 >> 10) & 0x1F;
let mantissa = self.0 & 0x03FF;
if exp == 0x1F {
if mantissa == 0 {
core::num::FpCategory::Infinite
} else {
core::num::FpCategory::Nan
}
} else if exp == 0 {
if mantissa == 0 {
core::num::FpCategory::Zero
} else {
core::num::FpCategory::Subnormal
}
} else {
core::num::FpCategory::Normal
}
}
}
impl F16 {
#[inline]
#[must_use]
pub fn from_f32(value: f32) -> Self {
let bits = value.to_bits();
let sign = (bits >> 16) & 0x8000;
let exponent = ((bits >> 23) & 0xFF) as i32;
let mantissa = bits & 0x007F_FFFF;
if exponent == 255 {
if mantissa != 0 {
return Self::NAN;
}
return Self((sign | 0x7C00) as u16);
}
if exponent == 0 {
return Self(sign as u16);
}
let new_exp = exponent - 127 + 15;
if new_exp >= 31 {
return Self((sign | 0x7C00) as u16);
}
if new_exp <= 0 {
if new_exp < -10 {
return Self(sign as u16);
}
let mant = (mantissa | 0x0080_0000) >> (14 - new_exp);
return Self((sign | (mant >> 13)) as u16);
}
let new_mant = mantissa >> 13;
Self((sign | ((new_exp as u32) << 10) | new_mant) as u16)
}
#[inline]
#[must_use]
pub fn to_f32(self) -> f32 {
let sign = u32::from(self.0 & 0x8000) << 16;
let exponent = (self.0 >> 10) & 0x1F;
let mantissa = u32::from(self.0 & 0x03FF);
if exponent == 0 {
if mantissa == 0 {
return f32::from_bits(sign);
}
let mut e = 1i32;
let mut m = mantissa;
while (m & 0x0400) == 0 {
m <<= 1;
e += 1;
}
let f32_exp = 127i32 - 15i32 + 1i32 - e;
if f32_exp <= 0 {
return f32::from_bits(sign);
}
let new_exp = (f32_exp as u32) << 23;
let new_mant = (m & 0x03FF) << 13;
return f32::from_bits(sign | new_exp | new_mant);
}
if exponent == 31 {
let inf_or_nan = if mantissa == 0 {
0x7F80_0000
} else {
0x7FC0_0000
};
return f32::from_bits(sign | inf_or_nan);
}
let new_exp = (i32::from(exponent) - 15 + 127) as u32;
let new_mant = mantissa << 13;
f32::from_bits(sign | (new_exp << 23) | new_mant)
}
}
impl F16 {
#[inline]
#[must_use]
pub fn from_f64(value: f64) -> Self {
Self::from_f32(value as f32)
}
#[inline]
#[must_use]
pub fn to_f64(self) -> f64 {
f64::from(self.to_f32())
}
}
impl F16 {
#[inline]
pub fn sqrt(self) -> Self {
Self::from_f32(self.to_f32().sqrt())
}
#[inline]
pub fn sin(self) -> Self {
Self::from_f32(self.to_f32().sin())
}
#[inline]
pub fn cos(self) -> Self {
Self::from_f32(self.to_f32().cos())
}
#[inline]
pub fn sin_cos(self) -> (Self, Self) {
let (s, c) = self.to_f32().sin_cos();
(Self::from_f32(s), Self::from_f32(c))
}
#[inline]
pub fn tan(self) -> Self {
Self::from_f32(self.to_f32().tan())
}
#[inline]
pub fn asin(self) -> Self {
Self::from_f32(self.to_f32().asin())
}
#[inline]
pub fn acos(self) -> Self {
Self::from_f32(self.to_f32().acos())
}
#[inline]
pub fn atan(self) -> Self {
Self::from_f32(self.to_f32().atan())
}
#[inline]
pub fn atan2(self, other: Self) -> Self {
Self::from_f32(self.to_f32().atan2(other.to_f32()))
}
#[inline]
pub fn exp(self) -> Self {
Self::from_f32(self.to_f32().exp())
}
#[inline]
pub fn exp_m1(self) -> Self {
Self::from_f32(self.to_f32().exp_m1())
}
#[inline]
pub fn exp2(self) -> Self {
Self::from_f32(self.to_f32().exp2())
}
#[inline]
pub fn ln(self) -> Self {
Self::from_f32(self.to_f32().ln())
}
#[inline]
pub fn ln_1p(self) -> Self {
Self::from_f32(self.to_f32().ln_1p())
}
#[inline]
pub fn log(self, base: Self) -> Self {
Self::from_f32(self.to_f32().log(base.to_f32()))
}
#[inline]
pub fn log2(self) -> Self {
Self::from_f32(self.to_f32().log2())
}
#[inline]
pub fn log10(self) -> Self {
Self::from_f32(self.to_f32().log10())
}
#[inline]
pub fn powf(self, exp: Self) -> Self {
Self::from_f32(self.to_f32().powf(exp.to_f32()))
}
#[inline]
pub fn powi(self, n: i32) -> Self {
Self::from_f32(self.to_f32().powi(n))
}
#[inline]
pub fn cbrt(self) -> Self {
Self::from_f32(self.to_f32().cbrt())
}
#[inline]
pub fn hypot(self, other: Self) -> Self {
Self::from_f32(self.to_f32().hypot(other.to_f32()))
}
#[inline]
pub fn mul_add(self, a: Self, b: Self) -> Self {
Self::from_f32(self.to_f32().mul_add(a.to_f32(), b.to_f32()))
}
#[inline]
pub fn floor(self) -> Self {
Self::from_f32(self.to_f32().floor())
}
#[inline]
pub fn ceil(self) -> Self {
Self::from_f32(self.to_f32().ceil())
}
#[inline]
pub fn round(self) -> Self {
Self::from_f32(self.to_f32().round())
}
#[inline]
pub fn trunc(self) -> Self {
Self::from_f32(self.to_f32().trunc())
}
#[inline]
pub fn fract(self) -> Self {
Self::from_f32(self.to_f32().fract())
}
#[inline]
pub fn min(self, other: Self) -> Self {
Self::from_f32(self.to_f32().min(other.to_f32()))
}
#[inline]
pub fn max(self, other: Self) -> Self {
Self::from_f32(self.to_f32().max(other.to_f32()))
}
#[inline]
pub fn abs_sub(self, other: Self) -> Self {
Self::abs(self - other)
}
#[inline]
pub fn sinh(self) -> Self {
Self::from_f32(self.to_f32().sinh())
}
#[inline]
pub fn cosh(self) -> Self {
Self::from_f32(self.to_f32().cosh())
}
#[inline]
pub fn tanh(self) -> Self {
Self::from_f32(self.to_f32().tanh())
}
#[inline]
pub fn asinh(self) -> Self {
Self::from_f32(self.to_f32().asinh())
}
#[inline]
pub fn acosh(self) -> Self {
Self::from_f32(self.to_f32().acosh())
}
#[inline]
pub fn atanh(self) -> Self {
Self::from_f32(self.to_f32().atanh())
}
#[inline]
pub fn to_degrees(self) -> Self {
Self::from_f32(self.to_f32().to_degrees())
}
#[inline]
pub fn to_radians(self) -> Self {
Self::from_f32(self.to_f32().to_radians())
}
}
impl Add for F16 {
type Output = Self;
#[inline]
fn add(self, rhs: Self) -> Self::Output {
Self::from_f32(self.to_f32() + rhs.to_f32())
}
}
impl AddAssign for F16 {
#[inline]
fn add_assign(&mut self, rhs: Self) {
*self = *self + rhs;
}
}
impl Sub for F16 {
type Output = Self;
#[inline]
fn sub(self, rhs: Self) -> Self::Output {
Self::from_f32(self.to_f32() - rhs.to_f32())
}
}
impl SubAssign for F16 {
#[inline]
fn sub_assign(&mut self, rhs: Self) {
*self = *self - rhs;
}
}
impl Mul for F16 {
type Output = Self;
#[inline]
fn mul(self, rhs: Self) -> Self::Output {
Self::from_f32(self.to_f32() * rhs.to_f32())
}
}
impl MulAssign for F16 {
#[inline]
fn mul_assign(&mut self, rhs: Self) {
*self = *self * rhs;
}
}
impl Div for F16 {
type Output = Self;
#[inline]
fn div(self, rhs: Self) -> Self::Output {
Self::from_f32(self.to_f32() / rhs.to_f32())
}
}
impl DivAssign for F16 {
#[inline]
fn div_assign(&mut self, rhs: Self) {
*self = *self / rhs;
}
}
impl Rem for F16 {
type Output = Self;
#[inline]
fn rem(self, rhs: Self) -> Self::Output {
Self::from_f32(self.to_f32() % rhs.to_f32())
}
}
impl RemAssign for F16 {
#[inline]
fn rem_assign(&mut self, rhs: Self) {
*self = *self % rhs;
}
}
impl Neg for F16 {
type Output = Self;
#[inline]
fn neg(self) -> Self::Output {
Self::neg(self)
}
}
impl Add<&F16> for F16 {
type Output = Self;
#[inline]
fn add(self, rhs: &F16) -> Self {
self + *rhs
}
}
impl Sub<&F16> for F16 {
type Output = Self;
#[inline]
fn sub(self, rhs: &F16) -> Self {
self - *rhs
}
}
impl Mul<&F16> for F16 {
type Output = Self;
#[inline]
fn mul(self, rhs: &F16) -> Self {
self * *rhs
}
}
impl Div<&F16> for F16 {
type Output = Self;
#[inline]
fn div(self, rhs: &F16) -> Self {
self / *rhs
}
}
impl Rem<&F16> for F16 {
type Output = Self;
#[inline]
fn rem(self, rhs: &F16) -> Self {
self % *rhs
}
}
impl AddAssign<&F16> for F16 {
#[inline]
fn add_assign(&mut self, rhs: &F16) {
*self = *self + *rhs;
}
}
impl SubAssign<&F16> for F16 {
#[inline]
fn sub_assign(&mut self, rhs: &F16) {
*self = *self - *rhs;
}
}
impl MulAssign<&F16> for F16 {
#[inline]
fn mul_assign(&mut self, rhs: &F16) {
*self = *self * *rhs;
}
}
impl DivAssign<&F16> for F16 {
#[inline]
fn div_assign(&mut self, rhs: &F16) {
*self = *self / *rhs;
}
}
impl RemAssign<&F16> for F16 {
#[inline]
fn rem_assign(&mut self, rhs: &F16) {
*self = *self % *rhs;
}
}
impl PartialEq for F16 {
#[inline]
fn eq(&self, other: &Self) -> bool {
if self.is_nan() || other.is_nan() {
return false;
}
if self.is_zero() && other.is_zero() {
return true;
}
self.0 == other.0
}
}
impl PartialOrd for F16 {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
if self.is_nan() || other.is_nan() {
return None;
}
self.to_f32().partial_cmp(&other.to_f32())
}
}
impl Debug for F16 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "F16({:?})", self.to_f32())
}
}
impl Display for F16 {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
write!(f, "{}", self.to_f32())
}
}
impl From<f32> for F16 {
#[inline]
fn from(value: f32) -> Self {
Self::from_f32(value)
}
}
impl From<f64> for F16 {
#[inline]
fn from(value: f64) -> Self {
Self::from_f64(value)
}
}
impl From<F16> for f32 {
#[inline]
fn from(value: F16) -> Self {
value.to_f32()
}
}
impl From<F16> for f64 {
#[inline]
fn from(value: F16) -> Self {
value.to_f64()
}
}
impl num_traits::Zero for F16 {
#[inline]
fn zero() -> Self {
Self::ZERO
}
#[inline]
fn is_zero(&self) -> bool {
F16::is_zero(*self)
}
}
impl num_traits::One for F16 {
#[inline]
fn one() -> Self {
Self::ONE
}
}
impl num_traits::Num for F16 {
type FromStrRadixErr = num_traits::ParseFloatError;
fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
f32::from_str_radix(str, radix).map(Self::from_f32)
}
}
impl num_traits::NumCast for F16 {
fn from<T: num_traits::ToPrimitive>(n: T) -> Option<Self> {
n.to_f32().map(Self::from_f32)
}
}
impl num_traits::ToPrimitive for F16 {
fn to_i64(&self) -> Option<i64> {
Some(F16::to_f32(*self) as i64)
}
fn to_u64(&self) -> Option<u64> {
Some(F16::to_f32(*self) as u64)
}
fn to_f32(&self) -> Option<f32> {
Some(F16::to_f32(*self))
}
fn to_f64(&self) -> Option<f64> {
Some(F16::to_f64(*self))
}
}
impl num_traits::Float for F16 {
fn nan() -> Self {
Self::NAN
}
fn infinity() -> Self {
Self::INFINITY
}
fn neg_infinity() -> Self {
Self::NEG_INFINITY
}
fn neg_zero() -> Self {
Self(0x8000)
}
fn min_value() -> Self {
Self::MIN
}
fn min_positive_value() -> Self {
Self::MIN_POSITIVE
}
fn max_value() -> Self {
Self::MAX
}
fn is_nan(self) -> bool {
F16::is_nan(self)
}
fn is_infinite(self) -> bool {
F16::is_infinite(self)
}
fn is_finite(self) -> bool {
F16::is_finite(self)
}
fn is_normal(self) -> bool {
F16::is_normal(self)
}
fn classify(self) -> core::num::FpCategory {
F16::classify(self)
}
fn floor(self) -> Self {
F16::floor(self)
}
fn ceil(self) -> Self {
F16::ceil(self)
}
fn round(self) -> Self {
F16::round(self)
}
fn trunc(self) -> Self {
F16::trunc(self)
}
fn fract(self) -> Self {
F16::fract(self)
}
fn abs(self) -> Self {
F16::abs(self)
}
fn signum(self) -> Self {
F16::signum(self)
}
fn is_sign_positive(self) -> bool {
F16::is_sign_positive(self)
}
fn is_sign_negative(self) -> bool {
F16::is_sign_negative(self)
}
fn mul_add(self, a: Self, b: Self) -> Self {
F16::mul_add(self, a, b)
}
fn recip(self) -> Self {
F16::recip(self)
}
fn powi(self, n: i32) -> Self {
F16::powi(self, n)
}
fn powf(self, n: Self) -> Self {
F16::powf(self, n)
}
fn sqrt(self) -> Self {
F16::sqrt(self)
}
fn exp(self) -> Self {
F16::exp(self)
}
fn exp2(self) -> Self {
F16::exp2(self)
}
fn ln(self) -> Self {
F16::ln(self)
}
fn log(self, base: Self) -> Self {
F16::log(self, base)
}
fn log2(self) -> Self {
F16::log2(self)
}
fn log10(self) -> Self {
F16::log10(self)
}
fn max(self, other: Self) -> Self {
F16::max(self, other)
}
fn min(self, other: Self) -> Self {
F16::min(self, other)
}
fn abs_sub(self, other: Self) -> Self {
F16::abs_sub(self, other)
}
fn cbrt(self) -> Self {
F16::cbrt(self)
}
fn hypot(self, other: Self) -> Self {
F16::hypot(self, other)
}
fn sin(self) -> Self {
F16::sin(self)
}
fn cos(self) -> Self {
F16::cos(self)
}
fn tan(self) -> Self {
F16::tan(self)
}
fn asin(self) -> Self {
F16::asin(self)
}
fn acos(self) -> Self {
F16::acos(self)
}
fn atan(self) -> Self {
F16::atan(self)
}
fn atan2(self, other: Self) -> Self {
F16::atan2(self, other)
}
fn sin_cos(self) -> (Self, Self) {
F16::sin_cos(self)
}
fn exp_m1(self) -> Self {
F16::exp_m1(self)
}
fn ln_1p(self) -> Self {
F16::ln_1p(self)
}
fn sinh(self) -> Self {
F16::sinh(self)
}
fn cosh(self) -> Self {
F16::cosh(self)
}
fn tanh(self) -> Self {
F16::tanh(self)
}
fn asinh(self) -> Self {
F16::asinh(self)
}
fn acosh(self) -> Self {
F16::acosh(self)
}
fn atanh(self) -> Self {
F16::atanh(self)
}
fn integer_decode(self) -> (u64, i16, i8) {
F16::to_f32(self).integer_decode()
}
fn epsilon() -> Self {
Self::EPSILON
}
fn to_degrees(self) -> Self {
F16::to_degrees(self)
}
fn to_radians(self) -> Self {
F16::to_radians(self)
}
fn copysign(self, sign: Self) -> Self {
F16::copysign(self, sign)
}
}
impl num_traits::FloatConst for F16 {
fn E() -> Self {
Self::E
}
fn FRAC_1_PI() -> Self {
Self::ONE / Self::PI
}
fn FRAC_1_SQRT_2() -> Self {
Self::FRAC_1_SQRT_2
}
fn FRAC_2_PI() -> Self {
Self::TWO / Self::PI
}
fn FRAC_2_SQRT_PI() -> Self {
Self::TWO / F16::sqrt(Self::PI)
}
fn FRAC_PI_2() -> Self {
Self::FRAC_PI_2
}
fn FRAC_PI_3() -> Self {
Self::PI / Self::from_f32(3.0)
}
fn FRAC_PI_4() -> Self {
Self::FRAC_PI_4
}
fn FRAC_PI_6() -> Self {
Self::PI / Self::from_f32(6.0)
}
fn FRAC_PI_8() -> Self {
Self::PI / Self::from_f32(8.0)
}
fn LN_10() -> Self {
Self::from_f32(core::f32::consts::LN_10)
}
fn LN_2() -> Self {
Self::LN_2
}
fn LOG10_E() -> Self {
Self::from_f32(core::f32::consts::LOG10_E)
}
fn LOG2_E() -> Self {
Self::from_f32(core::f32::consts::LOG2_E)
}
fn PI() -> Self {
Self::PI
}
fn SQRT_2() -> Self {
Self::from_f32(core::f32::consts::SQRT_2)
}
fn TAU() -> Self {
Self::TWO_PI
}
}
impl super::Float for F16 {
const ZERO: Self = Self::ZERO;
const ONE: Self = Self::ONE;
const TWO: Self = Self::TWO;
const PI: Self = Self::PI;
const TWO_PI: Self = Self::TWO_PI;
#[inline]
fn sin(self) -> Self {
F16::sin(self)
}
#[inline]
fn cos(self) -> Self {
F16::cos(self)
}
#[inline]
fn sin_cos(self) -> (Self, Self) {
F16::sin_cos(self)
}
#[inline]
fn sqrt(self) -> Self {
F16::sqrt(self)
}
#[inline]
fn abs(self) -> Self {
F16::abs(self)
}
#[inline]
fn from_usize(n: usize) -> Self {
Self::from_f32(n as f32)
}
#[inline]
fn from_isize(n: isize) -> Self {
Self::from_f32(n as f32)
}
#[inline]
fn from_f64(n: f64) -> Self {
Self::from_f64(n)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_f16_constants() {
assert!(!F16::ZERO.is_nan());
assert!(!F16::ONE.is_nan());
assert!(F16::NAN.is_nan());
assert!(F16::INFINITY.is_infinite());
assert!(!F16::ONE.is_infinite());
}
#[test]
fn test_f16_from_f32() {
assert_eq!(F16::from_f32(0.0), F16::ZERO);
assert_eq!(F16::from_f32(1.0), F16::ONE);
assert_eq!(F16::from_f32(-1.0), F16::NEG_ONE);
let pi = F16::from_f32(core::f32::consts::PI);
assert!((pi.to_f32() - core::f32::consts::PI).abs() < 0.01);
}
#[test]
fn test_f16_to_f32() {
assert_eq!(F16::ZERO.to_f32(), 0.0);
assert_eq!(F16::ONE.to_f32(), 1.0);
assert_eq!(F16::NEG_ONE.to_f32(), -1.0);
}
#[test]
fn test_f16_arithmetic() {
let a = F16::from_f32(2.0);
let b = F16::from_f32(3.0);
assert!(((a + b).to_f32() - 5.0).abs() < 0.01);
assert!(((a - b).to_f32() - (-1.0)).abs() < 0.01);
assert!(((a * b).to_f32() - 6.0).abs() < 0.01);
assert!(((b / a).to_f32() - 1.5).abs() < 0.01);
}
#[test]
fn test_f16_negation() {
let a = F16::from_f32(2.5);
assert!(((-a).to_f32() - (-2.5)).abs() < 0.01);
}
#[test]
fn test_f16_comparison() {
let a = F16::from_f32(1.0);
let b = F16::from_f32(2.0);
assert!(a < b);
assert!(b > a);
assert!(a == F16::ONE);
assert!(F16::ZERO == F16::from_f32(-0.0)); }
#[test]
fn test_f16_nan() {
assert!(F16::NAN.is_nan());
assert!(F16::NAN != F16::NAN); assert!(!(F16::NAN == F16::NAN));
}
#[test]
fn test_f16_infinity() {
assert!(F16::INFINITY.is_infinite());
assert!(F16::NEG_INFINITY.is_infinite());
assert!(!F16::INFINITY.is_nan());
assert!(F16::INFINITY > F16::MAX);
}
#[test]
fn test_f16_trig() {
let x = F16::from_f32(0.0);
assert!((x.sin().to_f32() - 0.0).abs() < 0.01);
assert!((x.cos().to_f32() - 1.0).abs() < 0.01);
let pi_2 = F16::from_f32(core::f32::consts::FRAC_PI_2);
assert!((pi_2.sin().to_f32() - 1.0).abs() < 0.01);
assert!(pi_2.cos().to_f32().abs() < 0.1);
}
#[test]
fn test_f16_float_trait() {
use super::super::Float;
let a = F16::from_usize(10);
assert!((a.to_f32() - 10.0).abs() < 0.1);
let b = <F16 as Float>::from_f64(2.5);
assert!((b.to_f32() - 2.5).abs() < 0.01);
let c = F16::from_f32(4.0);
assert!((<F16 as Float>::sqrt(c).to_f32() - 2.0).abs() < 0.01);
}
#[test]
fn test_f16_abs() {
let neg = F16::from_f32(-5.0);
let pos = F16::from_f32(5.0);
assert!((neg.abs().to_f32() - 5.0).abs() < 0.01);
assert!((pos.abs().to_f32() - 5.0).abs() < 0.01);
}
#[test]
fn test_f16_round_trip() {
let values = [0.0_f32, 1.0, -1.0, 0.5, 100.0, 0.001, 65504.0];
for &v in &values {
let f16_val = F16::from_f32(v);
let back = f16_val.to_f32();
if v.abs() > 0.0 {
assert!(
(back - v).abs() / v.abs() < 0.01,
"Round-trip failed for {v}"
);
} else {
assert!(back == 0.0);
}
}
}
#[test]
fn test_f16_num_traits() {
use num_traits::{Float, One, Zero};
assert!(F16::zero().is_zero());
assert!(F16::one() == F16::ONE);
assert!(F16::infinity().is_infinite());
assert!(F16::nan().is_nan());
}
}