use crate::F128;
use core::{
cmp::Ordering,
hash::{Hash, Hasher},
num::FpCategory,
ops::Neg,
};
use half::{bf16, f16};
const PREC: u32 = 113;
const EXP_BITS: u32 = u128::BITS - PREC;
const EXP_BIAS: u32 = (1 << (EXP_BITS - 1)) - 1;
const SIGN_MASK: u128 = 1 << (u128::BITS - 1);
const EXP_MASK: u128 = ((1 << EXP_BITS) - 1) << (PREC - 1);
const MANT_MASK: u128 = (1 << (PREC - 1)) - 1;
pub(crate) mod private {
#[derive(Clone, Copy, Default, Debug)]
pub struct F128 {
pub(crate) bits: u128,
}
}
impl F128 {
pub const ZERO: F128 = F128::from_bits(0);
pub const NEG_ZERO: F128 = F128::from_bits(SIGN_MASK);
pub const ONE: F128 = F128::from_bits((EXP_BIAS as u128) << (PREC - 1));
pub const NEG_ONE: F128 = F128::from_bits(SIGN_MASK | F128::ONE.to_bits());
pub const MIN_POSITIVE_SUB: F128 = F128::from_bits(1);
pub const MIN_POSITIVE: F128 = F128::from_bits(MANT_MASK + 1);
pub const MAX: F128 = F128::from_bits(EXP_MASK - 1);
pub const MIN: F128 = F128::from_bits(SIGN_MASK | F128::MAX.to_bits());
pub const INFINITY: F128 = F128::from_bits(EXP_MASK);
pub const NEG_INFINITY: F128 = F128::from_bits(SIGN_MASK | EXP_MASK);
pub const NAN: F128 = F128::from_bits(EXP_MASK | (1u128 << (PREC - 2)));
pub const RADIX: u32 = 2;
pub const MANTISSA_DIGITS: u32 = PREC;
pub const DIGITS: u32 = 33;
pub const EPSILON: F128 = F128::from_bits(((EXP_BIAS - (PREC - 1)) as u128) << (PREC - 1));
pub const MIN_EXP: i32 = 3 - F128::MAX_EXP;
pub const MAX_EXP: i32 = EXP_BIAS as i32 + 1;
pub const MIN_10_EXP: i32 = -4931;
pub const MAX_10_EXP: i32 = 4932;
#[inline]
pub const fn from_bits(bits: u128) -> F128 {
F128 { bits }
}
#[inline]
pub const fn to_bits(self) -> u128 {
self.bits
}
#[inline]
pub const fn from_be_bytes(bytes: [u8; 16]) -> F128 {
F128::from_bits(u128::from_be_bytes(bytes))
}
#[inline]
pub const fn from_le_bytes(bytes: [u8; 16]) -> F128 {
F128::from_bits(u128::from_le_bytes(bytes))
}
#[inline]
pub const fn from_ne_bytes(bytes: [u8; 16]) -> F128 {
F128::from_bits(u128::from_ne_bytes(bytes))
}
#[inline]
pub const fn to_be_bytes(self) -> [u8; 16] {
self.to_bits().to_be_bytes()
}
#[inline]
pub const fn to_le_bytes(self) -> [u8; 16] {
self.to_bits().to_le_bytes()
}
#[inline]
pub const fn to_ne_bytes(self) -> [u8; 16] {
self.to_bits().to_ne_bytes()
}
#[inline]
pub const fn is_nan(self) -> bool {
(self.to_bits() & !SIGN_MASK) > EXP_MASK
}
#[inline]
pub const fn is_infinite(self) -> bool {
(self.to_bits() & !SIGN_MASK) == EXP_MASK
}
#[inline]
pub const fn is_finite(self) -> bool {
(self.to_bits() & EXP_MASK) != EXP_MASK
}
#[inline]
pub const fn is_zero(self) -> bool {
(self.to_bits() & !SIGN_MASK) == 0
}
#[inline]
pub const fn is_subnormal(self) -> bool {
let abs = self.to_bits() & !SIGN_MASK;
0 < abs && abs < F128::MIN_POSITIVE.to_bits()
}
#[inline]
pub const fn is_normal(self) -> bool {
let abs = self.to_bits() & !SIGN_MASK;
F128::MIN_POSITIVE.to_bits() <= abs && abs <= F128::MAX.to_bits()
}
#[inline]
pub const fn classify(self) -> FpCategory {
let exp = self.to_bits() & EXP_MASK;
let mant = self.to_bits() & MANT_MASK;
if exp == 0 {
if mant == 0 {
FpCategory::Zero
} else {
FpCategory::Subnormal
}
} else if exp == EXP_MASK {
if mant == 0 {
FpCategory::Infinite
} else {
FpCategory::Nan
}
} else {
FpCategory::Normal
}
}
#[inline]
pub const fn abs(self) -> F128 {
F128::from_bits(self.to_bits() & !SIGN_MASK)
}
#[inline]
pub const fn signum(self) -> F128 {
if self.is_nan() {
self
} else if self.is_sign_positive() {
F128::ONE
} else {
F128::NEG_ONE
}
}
#[inline]
pub const fn copysign(self, sign: F128) -> F128 {
F128::from_bits((self.to_bits() & !SIGN_MASK) | (sign.to_bits() & SIGN_MASK))
}
#[inline]
pub const fn is_sign_positive(self) -> bool {
(self.to_bits() & SIGN_MASK) == 0
}
#[inline]
pub const fn is_sign_negative(self) -> bool {
(self.to_bits() & SIGN_MASK) != 0
}
#[inline]
pub const fn max(self, other: F128) -> F128 {
if self.is_nan() || matches!(partial_cmp(&self, &other), Some(Ordering::Less)) {
other
} else {
self
}
}
#[inline]
pub const fn min(self, other: F128) -> F128 {
if self.is_nan() || matches!(partial_cmp(&self, &other), Some(Ordering::Greater)) {
other
} else {
self
}
}
#[inline]
#[track_caller]
pub const fn clamp(mut self, min: F128, max: F128) -> F128 {
match partial_cmp(&min, &max) {
Some(Ordering::Less) | Some(Ordering::Equal) => {}
_ => panic!("need min <= max"),
}
if matches!(partial_cmp(&self, &min), Some(Ordering::Less)) {
self = min;
}
if matches!(partial_cmp(&self, &max), Some(Ordering::Greater)) {
self = max;
}
self
}
#[inline]
pub const fn total_cmp(&self, other: &F128) -> Ordering {
let a = self.to_bits();
let b = other.to_bits();
match (self.is_sign_negative(), other.is_sign_negative()) {
(false, false) => cmp_bits(a, b),
(true, true) => cmp_bits(b, a),
(false, true) => Ordering::Greater,
(true, false) => Ordering::Less,
}
}
}
const fn cmp_bits(a: u128, b: u128) -> Ordering {
if a < b {
Ordering::Less
} else if a == b {
Ordering::Equal
} else {
Ordering::Greater
}
}
impl PartialEq for F128 {
#[inline]
fn eq(&self, other: &F128) -> bool {
if self.is_nan() || other.is_nan() {
return false;
}
let a = self.to_bits();
let b = other.to_bits();
if ((a | b) & !SIGN_MASK) == 0 {
return true;
}
a == b
}
}
impl PartialOrd for F128 {
#[inline]
fn partial_cmp(&self, other: &F128) -> Option<Ordering> {
partial_cmp(self, other)
}
}
#[inline]
const fn partial_cmp(a: &F128, b: &F128) -> Option<Ordering> {
if a.is_nan() || b.is_nan() {
return None;
}
let a_bits = a.to_bits();
let b_bits = b.to_bits();
if ((a_bits | b_bits) & !SIGN_MASK) == 0 {
return Some(Ordering::Equal);
}
match (a.is_sign_negative(), b.is_sign_negative()) {
(false, false) => Some(cmp_bits(a_bits, b_bits)),
(true, true) => Some(cmp_bits(b_bits, a_bits)),
(false, true) => Some(Ordering::Greater),
(true, false) => Some(Ordering::Less),
}
}
impl Hash for F128 {
#[inline]
fn hash<H>(&self, state: &mut H)
where
H: Hasher,
{
let mut bits = self.to_bits();
if bits == F128::NEG_ZERO.to_bits() {
bits = 0;
}
bits.hash(state);
}
}
impl Neg for F128 {
type Output = F128;
#[inline]
fn neg(self) -> F128 {
F128::from_bits(self.to_bits() ^ SIGN_MASK)
}
}
macro_rules! from_float {
($f:ident, $u:ident) => {
impl From<$f> for F128 {
fn from(src: $f) -> F128 {
const PREC_S: u32 = $f::MANTISSA_DIGITS;
const EXP_BITS_S: u32 = $u::BITS - PREC_S;
const EXP_BIAS_S: u32 = (1 << (EXP_BITS_S - 1)) - 1;
const SIGN_MASK_S: $u = 1 << ($u::BITS - 1);
const EXP_MASK_S: $u = ((1 << EXP_BITS_S) - 1) << (PREC_S - 1);
const MANT_MASK_S: $u = (1 << (PREC_S - 1)) - 1;
let b = src.to_bits();
let sign_bit_s = b & SIGN_MASK_S;
let exp_bits_s = b & EXP_MASK_S;
let mant_bits_s = b & MANT_MASK_S;
let sign_bit = u128::from(sign_bit_s) << (u128::BITS - $u::BITS);
if exp_bits_s == EXP_MASK_S {
if mant_bits_s == 0 {
return F128::from_bits(sign_bit | EXP_MASK);
}
let mant_bits =
(u128::from(mant_bits_s) << (PREC - PREC_S)) | (1 << (PREC - 2));
return F128::from_bits(sign_bit | EXP_MASK | mant_bits);
}
if exp_bits_s == 0 {
if mant_bits_s == 0 {
return F128::from_bits(sign_bit);
}
let lz = mant_bits_s.leading_zeros();
let mant_bits = (u128::from(mant_bits_s) << (PREC - $u::BITS + lz)) & MANT_MASK;
let unbiased_exp =
$f::MIN_EXP - PREC_S as i32 - 1 + $u::BITS as i32 - lz as i32;
let exp_bits = ((unbiased_exp + EXP_BIAS as i32) as u128) << (PREC - 1);
return F128::from_bits(sign_bit | exp_bits | mant_bits);
}
let mant_bits = u128::from(mant_bits_s) << (PREC - PREC_S);
let dbias = (EXP_BIAS - EXP_BIAS_S) as u128;
let exp_bits = (u128::from(exp_bits_s >> (PREC_S - 1)) + dbias) << (PREC - 1);
F128::from_bits(sign_bit | exp_bits | mant_bits)
}
}
};
}
from_float! { f64, u64 }
from_float! { f32, u32 }
from_float! { f16, u16 }
from_float! { bf16, u16 }
pub mod consts {
use crate::F128;
pub const PI: F128 = F128::from_bits(0x4000_921F_B544_42D1_8469_898C_C517_01B8);
pub const TAU: F128 = F128::from_bits(0x4001_921F_B544_42D1_8469_898C_C517_01B8);
pub const FRAC_PI_2: F128 = F128::from_bits(0x3FFF_921F_B544_42D1_8469_898C_C517_01B8);
pub const FRAC_PI_3: F128 = F128::from_bits(0x3FFF_0C15_2382_D736_5846_5BB3_2E0F_567B);
pub const FRAC_PI_4: F128 = F128::from_bits(0x3FFE_921F_B544_42D1_8469_898C_C517_01B8);
pub const FRAC_PI_6: F128 = F128::from_bits(0x3FFE_0C15_2382_D736_5846_5BB3_2E0F_567B);
pub const FRAC_PI_8: F128 = F128::from_bits(0x3FFD_921F_B544_42D1_8469_898C_C517_01B8);
pub const FRAC_1_PI: F128 = F128::from_bits(0x3FFD_45F3_06DC_9C88_2A53_F84E_AFA3_EA6A);
pub const FRAC_2_PI: F128 = F128::from_bits(0x3FFE_45F3_06DC_9C88_2A53_F84E_AFA3_EA6A);
pub const FRAC_2_SQRT_PI: F128 = F128::from_bits(0x3FFF_20DD_7504_29B6_D11A_E3A9_14FE_D7FE);
pub const SQRT_2: F128 = F128::from_bits(0x3FFF_6A09_E667_F3BC_C908_B2FB_1366_EA95);
pub const FRAC_1_SQRT_2: F128 = F128::from_bits(0x3FFE_6A09_E667_F3BC_C908_B2FB_1366_EA95);
pub const E: F128 = F128::from_bits(0x4000_5BF0_A8B1_4576_9535_5FB8_AC40_4E7A);
pub const LOG2_10: F128 = F128::from_bits(0x4000_A934_F097_9A37_15FC_9257_EDFE_9B60);
pub const LOG2_E: F128 = F128::from_bits(0x3FFF_7154_7652_B82F_E177_7D0F_FDA0_D23A);
pub const LOG10_2: F128 = F128::from_bits(0x3FFD_3441_3509_F79F_EF31_1F12_B358_16F9);
pub const LOG10_E: F128 = F128::from_bits(0x3FFD_BCB7_B152_6E50_E32A_6AB7_555F_5A68);
pub const LN_2: F128 = F128::from_bits(0x3FFE_62E4_2FEF_A39E_F357_93C7_6730_07E6);
pub const LN_10: F128 = F128::from_bits(0x4000_26BB_1BBB_5551_582D_D4AD_AC57_05A6);
}
#[cfg(test)]
mod tests {
use crate::{traits::FromFixed, F128};
use half::{bf16, f16};
struct Params {
mantissa_digits: u32,
min_exp: i32,
max_exp: i32,
digits: u32,
min_10_exp: i32,
max_10_exp: i32,
}
impl Params {
#[track_caller]
fn check(self) {
let p = f64::from(self.mantissa_digits);
let e_min = f64::from(self.min_exp);
let e_max = f64::from(self.max_exp);
assert_eq!(self.digits, ((p - 1.) * 2f64.log10()).floor() as u32);
assert_eq!(self.min_10_exp, ((e_min - 1.) * 2f64.log10()).ceil() as i32);
assert_eq!(
self.max_10_exp,
((-(-p).exp2()).ln_1p() / 10f64.ln() + e_max * 2f64.log10()).floor() as i32
);
}
}
#[test]
fn decimal_constants_f16() {
let params = Params {
mantissa_digits: f16::MANTISSA_DIGITS,
min_exp: f16::MIN_EXP,
max_exp: f16::MAX_EXP,
digits: f16::DIGITS,
min_10_exp: f16::MIN_10_EXP,
max_10_exp: f16::MAX_10_EXP,
};
params.check();
}
#[test]
fn decimal_constants_bf16() {
let params = Params {
mantissa_digits: bf16::MANTISSA_DIGITS,
min_exp: bf16::MIN_EXP,
max_exp: bf16::MAX_EXP,
digits: bf16::DIGITS,
min_10_exp: bf16::MIN_10_EXP,
max_10_exp: bf16::MAX_10_EXP,
};
params.check();
}
#[test]
fn decimal_constants_f32() {
let params = Params {
mantissa_digits: f32::MANTISSA_DIGITS,
min_exp: f32::MIN_EXP,
max_exp: f32::MAX_EXP,
digits: f32::DIGITS,
min_10_exp: f32::MIN_10_EXP,
max_10_exp: f32::MAX_10_EXP,
};
params.check();
}
#[test]
fn decimal_constants_f64() {
let params = Params {
mantissa_digits: f64::MANTISSA_DIGITS,
min_exp: f64::MIN_EXP,
max_exp: f64::MAX_EXP,
digits: f64::DIGITS,
min_10_exp: f64::MIN_10_EXP,
max_10_exp: f64::MAX_10_EXP,
};
params.check();
}
#[test]
fn decimal_constants_f128() {
let params = Params {
mantissa_digits: F128::MANTISSA_DIGITS,
min_exp: F128::MIN_EXP,
max_exp: F128::MAX_EXP,
digits: F128::DIGITS,
min_10_exp: F128::MIN_10_EXP,
max_10_exp: F128::MAX_10_EXP,
};
params.check();
}
#[test]
fn math_constants() {
use crate::{consts as fix, f128::consts as f128};
assert_eq!(f128::PI, F128::from_fixed(fix::PI));
assert_eq!(f128::TAU, F128::from_fixed(fix::TAU));
assert_eq!(f128::FRAC_PI_2, F128::from_fixed(fix::FRAC_PI_2));
assert_eq!(f128::FRAC_PI_3, F128::from_fixed(fix::FRAC_PI_3));
assert_eq!(f128::FRAC_PI_4, F128::from_fixed(fix::FRAC_PI_4));
assert_eq!(f128::FRAC_PI_6, F128::from_fixed(fix::FRAC_PI_6));
assert_eq!(f128::FRAC_PI_8, F128::from_fixed(fix::FRAC_PI_8));
assert_eq!(f128::FRAC_1_PI, F128::from_fixed(fix::FRAC_1_PI));
assert_eq!(f128::FRAC_2_PI, F128::from_fixed(fix::FRAC_2_PI));
assert_eq!(f128::FRAC_2_SQRT_PI, F128::from_fixed(fix::FRAC_2_SQRT_PI));
assert_eq!(f128::SQRT_2, F128::from_fixed(fix::SQRT_2));
assert_eq!(f128::FRAC_1_SQRT_2, F128::from_fixed(fix::FRAC_1_SQRT_2));
assert_eq!(f128::E, F128::from_fixed(fix::E));
assert_eq!(f128::LOG2_10, F128::from_fixed(fix::LOG2_10));
assert_eq!(f128::LOG2_E, F128::from_fixed(fix::LOG2_E));
assert_eq!(f128::LOG10_2, F128::from_fixed(fix::LOG10_2));
assert_eq!(f128::LOG10_E, F128::from_fixed(fix::LOG10_E));
assert_eq!(f128::LN_2, F128::from_fixed(fix::LN_2));
assert_eq!(f128::LN_10, F128::from_fixed(fix::LN_10));
}
#[test]
fn from_f64() {
assert_eq!(F128::from(1f64), F128::ONE);
assert_eq!(F128::from(-1f64), F128::NEG_ONE);
assert_eq!(F128::from(f64::INFINITY), F128::INFINITY);
assert_eq!(F128::from(f64::NEG_INFINITY), F128::NEG_INFINITY);
assert!(F128::from(f64::NAN).is_nan());
assert_eq!(F128::from(0f64), F128::ZERO);
assert_eq!(F128::from(-0f64), F128::ZERO);
assert!(F128::from(0f64).is_sign_positive());
assert!(F128::from(-0f64).is_sign_negative());
let exp_shift = F128::MANTISSA_DIGITS - 1;
let exp = (F128::MAX_EXP - 1 + f64::MIN_EXP - f64::MANTISSA_DIGITS as i32) as u128;
assert_eq!(
F128::from(f64::from_bits(1)),
F128::from_bits(exp << exp_shift)
);
let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
let exp = exp + 3;
assert_eq!(
F128::from(f64::from_bits((1 << 63) | 11)),
F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
);
}
#[test]
fn from_f32() {
assert_eq!(F128::from(1f32), F128::ONE);
assert_eq!(F128::from(-1f32), F128::NEG_ONE);
assert_eq!(F128::from(f32::INFINITY), F128::INFINITY);
assert_eq!(F128::from(f32::NEG_INFINITY), F128::NEG_INFINITY);
assert!(F128::from(f32::NAN).is_nan());
assert_eq!(F128::from(0f32), F128::ZERO);
assert_eq!(F128::from(-0f32), F128::ZERO);
assert!(F128::from(0f32).is_sign_positive());
assert!(F128::from(-0f32).is_sign_negative());
let exp_shift = F128::MANTISSA_DIGITS - 1;
let exp = (F128::MAX_EXP - 1 + f32::MIN_EXP - f32::MANTISSA_DIGITS as i32) as u128;
assert_eq!(
F128::from(f32::from_bits(1)),
F128::from_bits(exp << exp_shift)
);
let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
let exp = exp + 3;
assert_eq!(
F128::from(f32::from_bits((1 << 31) | 11)),
F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
);
}
#[test]
fn from_f16() {
assert_eq!(F128::from(f16::ONE), F128::ONE);
assert_eq!(F128::from(f16::NEG_ONE), F128::NEG_ONE);
assert_eq!(F128::from(f16::INFINITY), F128::INFINITY);
assert_eq!(F128::from(f16::NEG_INFINITY), F128::NEG_INFINITY);
assert!(F128::from(f16::NAN).is_nan());
assert_eq!(F128::from(f16::ZERO), F128::ZERO);
assert_eq!(F128::from(f16::NEG_ZERO), F128::ZERO);
assert!(F128::from(f16::ZERO).is_sign_positive());
assert!(F128::from(f16::NEG_ZERO).is_sign_negative());
let exp_shift = F128::MANTISSA_DIGITS - 1;
let exp = (F128::MAX_EXP - 1 + f16::MIN_EXP - f16::MANTISSA_DIGITS as i32) as u128;
assert_eq!(
F128::from(f16::from_bits(1)),
F128::from_bits(exp << exp_shift)
);
let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
let exp = exp + 3;
assert_eq!(
F128::from(f16::from_bits((1 << 15) | 11)),
F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
);
}
#[test]
fn from_bf16() {
assert_eq!(F128::from(bf16::ONE), F128::ONE);
assert_eq!(F128::from(bf16::NEG_ONE), F128::NEG_ONE);
assert_eq!(F128::from(bf16::INFINITY), F128::INFINITY);
assert_eq!(F128::from(bf16::NEG_INFINITY), F128::NEG_INFINITY);
assert!(F128::from(bf16::NAN).is_nan());
assert_eq!(F128::from(bf16::ZERO), F128::ZERO);
assert_eq!(F128::from(bf16::NEG_ZERO), F128::ZERO);
assert!(F128::from(bf16::ZERO).is_sign_positive());
assert!(F128::from(bf16::NEG_ZERO).is_sign_negative());
let exp_shift = F128::MANTISSA_DIGITS - 1;
let exp = (F128::MAX_EXP - 1 + bf16::MIN_EXP - bf16::MANTISSA_DIGITS as i32) as u128;
assert_eq!(
F128::from(bf16::from_bits(1)),
F128::from_bits(exp << exp_shift)
);
let mantissa = 3u128 << (F128::MANTISSA_DIGITS - 1 - 3);
let exp = exp + 3;
assert_eq!(
F128::from(bf16::from_bits((1 << 15) | 11)),
F128::from_bits((1 << 127) | (exp << exp_shift) | mantissa)
);
}
}