pub struct F8E4M3(/* private fields */);Expand description
Eight bit floating point type with 4-bit exponent and 3-bit mantissa.
Implementations§
Source§impl F8E4M3
impl F8E4M3
Sourcepub const fn from_bits(bits: u8) -> Self
pub const fn from_bits(bits: u8) -> Self
Construct an 8-bit floating point value from the raw bits.
Sourcepub const fn from_f64(x: f64) -> Self
pub const fn from_f64(x: f64) -> Self
Convert a f64 type into F8E4M3.
This operation is lossy.
- If the 64-bit value is to large to fit in 8-bits, ±∞ will result.
- NaN values are preserved.
- 64-bit subnormal values are too tiny to be represented in 8-bits and result in ±0.
- Exponents that underflow the minimum 8-bit exponent will result in 8-bit subnormals or ±0.
- All other values are truncated and rounded to the nearest representable 8-bit value.
Sourcepub const fn from_f32(x: f32) -> Self
pub const fn from_f32(x: f32) -> Self
Convert a f32 type into F8E4M3.
This operation is lossy.
- If the 32-bit value is to large to fit in 8-bits, ±∞ will result.
- NaN values are preserved.
- 32-bit subnormal values are too tiny to be represented in 8-bits and result in ±0.
- Exponents that underflow the minimum 8-bit exponent will result in 8-bit subnormals or ±0.
- All other values are truncated and rounded to the nearest representable 8-bit value.
Sourcepub fn total_cmp(&self, other: &Self) -> Ordering
pub fn total_cmp(&self, other: &Self) -> Ordering
Returns the ordering between self and other.
- negative quiet NaN
- negative signaling NaN
- negative infinity
- negative numbers
- negative subnormal numbers
- negative zero
- positive zero
- positive subnormal numbers
- positive numbers
- positive infinity
- positive signaling NaN
- positive quiet NaN.
The ordering established by this function does not always agree with the
PartialOrd and PartialEq implementations. For example,
they consider negative and positive zero equal, while total_cmp
doesn’t.
§Example
let mut v: Vec<F8E4M3> = vec![];
v.push(F8E4M3::ONE);
v.push(F8E4M3::INFINITY);
v.push(F8E4M3::NEG_INFINITY);
v.push(F8E4M3::NAN);
v.push(F8E4M3::MAX_SUBNORMAL);
v.push(-F8E4M3::MAX_SUBNORMAL);
v.push(F8E4M3::ZERO);
v.push(F8E4M3::NEG_ZERO);
v.push(F8E4M3::NEG_ONE);
v.push(F8E4M3::MIN_POSITIVE);
v.sort_by(|a, b| a.total_cmp(&b));
assert!(v
.into_iter()
.zip(
[
F8E4M3::NEG_INFINITY,
F8E4M3::NEG_ONE,
-F8E4M3::MAX_SUBNORMAL,
F8E4M3::NEG_ZERO,
F8E4M3::ZERO,
F8E4M3::MAX_SUBNORMAL,
F8E4M3::MIN_POSITIVE,
F8E4M3::ONE,
F8E4M3::INFINITY,
F8E4M3::NAN
]
.iter()
)
.all(|(a, b)| a.to_bits() == b.to_bits()));Sourcepub const fn is_sign_positive(&self) -> bool
pub const fn is_sign_positive(&self) -> bool
Returns true if and only if self has a positive sign, including +0.0, NaNs with a
positive sign bit and +∞.
Sourcepub const fn is_sign_negative(&self) -> bool
pub const fn is_sign_negative(&self) -> bool
Returns true if and only if self has a negative sign, including −0.0, NaNs with a
negative sign bit and −∞.
Sourcepub const fn is_nan(&self) -> bool
pub const fn is_nan(&self) -> bool
Returns true if this value is NaN and false otherwise.
§Examples
let nan = F8E4M3::NAN;
let f = F8E4M3::from_f32(7.0_f32);
assert!(nan.is_nan());
assert!(!f.is_nan());Sourcepub const fn is_infinite(&self) -> bool
pub const fn is_infinite(&self) -> bool
Returns true if this value is ±∞ and false otherwise.
§Examples
let f = F8E4M3::from_f32(7.0f32);
let inf = F8E4M3::INFINITY;
let neg_inf = F8E4M3::NEG_INFINITY;
let nan = F8E4M3::NAN;
assert!(!f.is_infinite());
assert!(!nan.is_infinite());
assert!(inf.is_infinite());
assert!(neg_inf.is_infinite());Sourcepub const fn is_finite(&self) -> bool
pub const fn is_finite(&self) -> bool
Returns true if this number is neither infinite nor NaN.
§Examples
let f = F8E4M3::from_f32(7.0f32);
let inf = F8E4M3::INFINITY;
let neg_inf = F8E4M3::NEG_INFINITY;
let nan = F8E4M3::NAN;
assert!(f.is_finite());
assert!(!nan.is_finite());
assert!(!inf.is_finite());
assert!(!neg_inf.is_finite());Sourcepub const fn is_normal(&self) -> bool
pub const fn is_normal(&self) -> bool
Returns true if the number is neither zero, infinite, subnormal, or NaN and false otherwise.
§Examples
let min = F8E4M3::MIN_POSITIVE;
let max = F8E4M3::MAX;
let lower_than_min = F8E4M3::from_f32(1.0e-10_f32);
let zero = F8E4M3::from_f32(0.0_f32);
assert!(min.is_normal());
assert!(max.is_normal());
assert!(!zero.is_normal());
assert!(!F8E4M3::NAN.is_normal());
assert!(!F8E4M3::INFINITY.is_normal());
// Values between `0` and `min` are Subnormal.
assert!(!lower_than_min.is_normal());Sourcepub fn min(self, other: Self) -> Self
pub fn min(self, other: Self) -> Self
Returns the minimum of the two numbers.
If one of the arguments is NaN, then the other argument is returned.
§Examples
let x = F8E4M3::from_f32(1.0);
let y = F8E4M3::from_f32(2.0);
assert_eq!(x.min(y), x);Sourcepub fn max(self, other: Self) -> Self
pub fn max(self, other: Self) -> Self
Returns the minimum of the two numbers.
If one of the arguments is NaN, then the other argument is returned.
§Examples
let x = F8E4M3::from_f32(1.0);
let y = F8E4M3::from_f32(2.0);
assert_eq!(x.min(y), x);Sourcepub fn clamp(self, min: Self, max: Self) -> Self
pub fn clamp(self, min: Self, max: Self) -> Self
Restrict a value to a certain interval unless it is NaN.
Returns max if self is greater than max, and min if self is less than min.
Otherwise this returns self.
Note that this function returns NaN if the initial value was NaN as well.
§Panics
Panics if min > max, min is NaN, or max is NaN.
§Examples
assert!(F8E4M3::from_f32(-3.0).clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)) == F8E4M3::from_f32(-2.0));
assert!(F8E4M3::from_f32(0.0).clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)) == F8E4M3::from_f32(0.0));
assert!(F8E4M3::from_f32(2.0).clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)) == F8E4M3::from_f32(1.0));
assert!(F8E4M3::NAN.clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)).is_nan());Sourcepub const fn copysign(self, sign: Self) -> Self
pub const fn copysign(self, sign: Self) -> Self
Returns a number composed of the magnitude of self and the sign of sign.
Equal to self if the sign of self and sign are the same, otherwise equal to -self.
If self is NaN, then NaN with the sign of sign is returned.
§Examples
let f = F8E4M3::from_f32(3.5);
assert_eq!(f.copysign(F8E4M3::from_f32(0.42)), F8E4M3::from_f32(3.5));
assert_eq!(f.copysign(F8E4M3::from_f32(-0.42)), F8E4M3::from_f32(-3.5));
assert_eq!((-f).copysign(F8E4M3::from_f32(0.42)), F8E4M3::from_f32(3.5));
assert_eq!((-f).copysign(F8E4M3::from_f32(-0.42)), F8E4M3::from_f32(-3.5));
assert!(F8E4M3::NAN.copysign(F8E4M3::from_f32(1.0)).is_nan());Sourcepub const fn signum(self) -> Self
pub const fn signum(self) -> Self
Returns a number that represents the sign of self.
1.0if the number is positive,+0.0orINFINITY-1.0if the number is negative,-0.0orNEG_INFINITYNANif the number isNaN
§Examples
let f = F8E4M3::from_f32(3.5_f32);
assert_eq!(f.signum(), F8E4M3::from_f32(1.0));
assert_eq!(F8E4M3::NEG_INFINITY.signum(), F8E4M3::from_f32(-1.0));
assert!(F8E4M3::NAN.signum().is_nan());Sourcepub const fn classify(&self) -> FpCategory
pub const fn classify(&self) -> FpCategory
Returns the floating point category of the number.
If only one property is going to be tested, it is generally faster to use the specific predicate instead.
§Examples
use std::num::FpCategory;
let num = F8E4M3::from_f32(12.4_f32);
let inf = F8E4M3::INFINITY;
assert_eq!(num.classify(), FpCategory::Normal);
assert_eq!(inf.classify(), FpCategory::Infinite);Source§impl F8E4M3
impl F8E4M3
Sourcepub const FRAC_2_SQRT_PI: Self
pub const FRAC_2_SQRT_PI: Self
2/sqrt(π)
Sourcepub const FRAC_1_SQRT_2: Self
pub const FRAC_1_SQRT_2: Self
1/sqrt(2)
Source§impl F8E4M3
impl F8E4M3
Sourcepub const MANTISSA_DIGITS: u32 = 3u32
pub const MANTISSA_DIGITS: u32 = 3u32
Number of mantissa digits
Sourcepub const NEG_INFINITY: Self
pub const NEG_INFINITY: Self
Negative infinity -∞
Sourcepub const MIN_POSITIVE: Self
pub const MIN_POSITIVE: Self
Smallest possible normal value
Sourcepub const MIN_POSITIVE_SUBNORMAL: Self
pub const MIN_POSITIVE_SUBNORMAL: Self
Smallest possible subnormal value
Sourcepub const MAX_SUBNORMAL: Self
pub const MAX_SUBNORMAL: Self
Smallest possible subnormal value
Sourcepub const EPSILON: Self
pub const EPSILON: Self
This is the difference between 1.0 and the next largest representable number.
Sourcepub const MIN_EXP: i32 = -5i32
pub const MIN_EXP: i32 = -5i32
One greater than the minimum possible normal power of 2 exponent
Sourcepub const MIN_10_EXP: i32 = -1i32
pub const MIN_10_EXP: i32 = -1i32
Minimum possible normal power of 10 exponent
Sourcepub const MAX_10_EXP: i32 = 2i32
pub const MAX_10_EXP: i32 = 2i32
Maximum possible normal power of 10 exponent
Trait Implementations§
Source§impl AddAssign for F8E4M3
impl AddAssign for F8E4M3
Source§fn add_assign(&mut self, rhs: Self)
fn add_assign(&mut self, rhs: Self)
+= operation. Read moreSource§impl DivAssign for F8E4M3
impl DivAssign for F8E4M3
Source§fn div_assign(&mut self, rhs: Self)
fn div_assign(&mut self, rhs: Self)
/= operation. Read moreSource§impl MulAssign for F8E4M3
impl MulAssign for F8E4M3
Source§fn mul_assign(&mut self, rhs: Self)
fn mul_assign(&mut self, rhs: Self)
*= operation. Read moreSource§impl PartialOrd for F8E4M3
impl PartialOrd for F8E4M3
Source§impl RemAssign for F8E4M3
impl RemAssign for F8E4M3
Source§fn rem_assign(&mut self, rhs: Self)
fn rem_assign(&mut self, rhs: Self)
%= operation. Read moreSource§impl SubAssign for F8E4M3
impl SubAssign for F8E4M3
Source§fn sub_assign(&mut self, rhs: Self)
fn sub_assign(&mut self, rhs: Self)
-= operation. Read more