Struct F8E4M3

Source

pub struct F8E4M3(/* private fields */);

Expand description

Eight bit floating point type with 4-bit exponent and 3-bit mantissa.

Implementations§

Source §

impl F8E4M3

Source

pub const fn from_bits(bits: u8) -> Self

Construct an 8-bit floating point value from the raw bits.

Source

pub const fn to_bits(&self) -> u8

Return the raw bits.

Source

pub const fn from_f64(x: f64) -> Self

Convert a f64 type into F8E4M3.

This operation is lossy.

If the 64-bit value is to large to fit in 8-bits, ±∞ will result.
NaN values are preserved.
64-bit subnormal values are too tiny to be represented in 8-bits and result in ±0.
Exponents that underflow the minimum 8-bit exponent will result in 8-bit subnormals or ±0.
All other values are truncated and rounded to the nearest representable 8-bit value.

Source

pub const fn from_f32(x: f32) -> Self

Convert a f32 type into F8E4M3.

This operation is lossy.

If the 32-bit value is to large to fit in 8-bits, ±∞ will result.
NaN values are preserved.
32-bit subnormal values are too tiny to be represented in 8-bits and result in ±0.
Exponents that underflow the minimum 8-bit exponent will result in 8-bit subnormals or ±0.
All other values are truncated and rounded to the nearest representable 8-bit value.

Source

pub const fn to_f16(&self) -> f16

Convert this F8E4M3 type into a f16 type.

This operation may be lossy.

NaN and zero values are preserved.
Subnormal values are normalized.
Otherwise, the values are mapped to the appropriate 16-bit value.

Source

pub const fn to_f32(&self) -> f32

Convert this F8E4M3 type into a f32 type.

This operation may be lossy.

NaN and zero values are preserved.
Subnormal values are normalized.
Otherwise, the values are mapped to the appropriate 16-bit value.

Source

pub const fn to_f64(&self) -> f64

Convert this F8E4M3 type into a f64 type.

This operation may be lossy.

NaN and zero values are preserved.
Subnormal values are normalized.
Otherwise, the values are mapped to the appropriate 16-bit value.

Source

pub fn total_cmp(&self, other: &Self) -> Ordering

Returns the ordering between self and other.

negative quiet NaN
negative signaling NaN
negative infinity
negative numbers
negative subnormal numbers
negative zero
positive zero
positive subnormal numbers
positive numbers
positive infinity
positive signaling NaN
positive quiet NaN.

The ordering established by this function does not always agree with the PartialOrd and PartialEq implementations. For example, they consider negative and positive zero equal, while total_cmp doesn’t.

§Example


let mut v: Vec<F8E4M3> = vec![];
v.push(F8E4M3::ONE);
v.push(F8E4M3::INFINITY);
v.push(F8E4M3::NEG_INFINITY);
v.push(F8E4M3::NAN);
v.push(F8E4M3::MAX_SUBNORMAL);
v.push(-F8E4M3::MAX_SUBNORMAL);
v.push(F8E4M3::ZERO);
v.push(F8E4M3::NEG_ZERO);
v.push(F8E4M3::NEG_ONE);
v.push(F8E4M3::MIN_POSITIVE);

v.sort_by(|a, b| a.total_cmp(&b));

assert!(v
    .into_iter()
    .zip(
        [
            F8E4M3::NEG_INFINITY,
            F8E4M3::NEG_ONE,
            -F8E4M3::MAX_SUBNORMAL,
            F8E4M3::NEG_ZERO,
            F8E4M3::ZERO,
            F8E4M3::MAX_SUBNORMAL,
            F8E4M3::MIN_POSITIVE,
            F8E4M3::ONE,
            F8E4M3::INFINITY,
            F8E4M3::NAN
        ]
        .iter()
    )
    .all(|(a, b)| a.to_bits() == b.to_bits()));

Source

pub const fn is_sign_positive(&self) -> bool

Returns true if and only if self has a positive sign, including +0.0, NaNs with a positive sign bit and +∞.

Source

pub const fn is_sign_negative(&self) -> bool

Returns true if and only if self has a negative sign, including −0.0, NaNs with a negative sign bit and −∞.

Source

pub const fn is_nan(&self) -> bool

Returns true if this value is NaN and false otherwise.

§Examples


let nan = F8E4M3::NAN;
let f = F8E4M3::from_f32(7.0_f32);

assert!(nan.is_nan());
assert!(!f.is_nan());

Source

pub const fn is_infinite(&self) -> bool

Returns true if this value is ±∞ and false otherwise.

§Examples


let f = F8E4M3::from_f32(7.0f32);
let inf = F8E4M3::INFINITY;
let neg_inf = F8E4M3::NEG_INFINITY;
let nan = F8E4M3::NAN;

assert!(!f.is_infinite());
assert!(!nan.is_infinite());

assert!(inf.is_infinite());
assert!(neg_inf.is_infinite());

Source

pub const fn is_finite(&self) -> bool

Returns true if this number is neither infinite nor NaN.

§Examples


let f = F8E4M3::from_f32(7.0f32);
let inf = F8E4M3::INFINITY;
let neg_inf = F8E4M3::NEG_INFINITY;
let nan = F8E4M3::NAN;

assert!(f.is_finite());

assert!(!nan.is_finite());
assert!(!inf.is_finite());
assert!(!neg_inf.is_finite());

Source

pub const fn is_normal(&self) -> bool

Returns true if the number is neither zero, infinite, subnormal, or NaN and false otherwise.

§Examples


let min = F8E4M3::MIN_POSITIVE;
let max = F8E4M3::MAX;
let lower_than_min = F8E4M3::from_f32(1.0e-10_f32);
let zero = F8E4M3::from_f32(0.0_f32);

assert!(min.is_normal());
assert!(max.is_normal());

assert!(!zero.is_normal());
assert!(!F8E4M3::NAN.is_normal());
assert!(!F8E4M3::INFINITY.is_normal());
// Values between `0` and `min` are Subnormal.
assert!(!lower_than_min.is_normal());

Source

pub fn min(self, other: Self) -> Self

Returns the minimum of the two numbers.

If one of the arguments is NaN, then the other argument is returned.

§Examples

let x = F8E4M3::from_f32(1.0);
let y = F8E4M3::from_f32(2.0);

assert_eq!(x.min(y), x);

Source

pub fn max(self, other: Self) -> Self

Returns the minimum of the two numbers.

If one of the arguments is NaN, then the other argument is returned.

§Examples

let x = F8E4M3::from_f32(1.0);
let y = F8E4M3::from_f32(2.0);

assert_eq!(x.min(y), x);

Source

pub fn clamp(self, min: Self, max: Self) -> Self

Restrict a value to a certain interval unless it is NaN.

Returns max if self is greater than max, and min if self is less than min. Otherwise this returns self.

Note that this function returns NaN if the initial value was NaN as well.

§Panics

Panics if min > max, min is NaN, or max is NaN.

§Examples

assert!(F8E4M3::from_f32(-3.0).clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)) == F8E4M3::from_f32(-2.0));
assert!(F8E4M3::from_f32(0.0).clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)) == F8E4M3::from_f32(0.0));
assert!(F8E4M3::from_f32(2.0).clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)) == F8E4M3::from_f32(1.0));
assert!(F8E4M3::NAN.clamp(F8E4M3::from_f32(-2.0), F8E4M3::from_f32(1.0)).is_nan());

Source

pub const fn copysign(self, sign: Self) -> Self

Returns a number composed of the magnitude of self and the sign of sign.

Equal to self if the sign of self and sign are the same, otherwise equal to -self. If self is NaN, then NaN with the sign of sign is returned.

§Examples

let f = F8E4M3::from_f32(3.5);

assert_eq!(f.copysign(F8E4M3::from_f32(0.42)), F8E4M3::from_f32(3.5));
assert_eq!(f.copysign(F8E4M3::from_f32(-0.42)), F8E4M3::from_f32(-3.5));
assert_eq!((-f).copysign(F8E4M3::from_f32(0.42)), F8E4M3::from_f32(3.5));
assert_eq!((-f).copysign(F8E4M3::from_f32(-0.42)), F8E4M3::from_f32(-3.5));

assert!(F8E4M3::NAN.copysign(F8E4M3::from_f32(1.0)).is_nan());

Source

pub const fn signum(self) -> Self

Returns a number that represents the sign of self.

1.0 if the number is positive, +0.0 or INFINITY
-1.0 if the number is negative, -0.0 or NEG_INFINITY
NAN if the number is NaN

§Examples


let f = F8E4M3::from_f32(3.5_f32);

assert_eq!(f.signum(), F8E4M3::from_f32(1.0));
assert_eq!(F8E4M3::NEG_INFINITY.signum(), F8E4M3::from_f32(-1.0));

assert!(F8E4M3::NAN.signum().is_nan());

Source

pub const fn classify(&self) -> FpCategory

Returns the floating point category of the number.

If only one property is going to be tested, it is generally faster to use the specific predicate instead.

§Examples

use std::num::FpCategory;

let num = F8E4M3::from_f32(12.4_f32);
let inf = F8E4M3::INFINITY;

assert_eq!(num.classify(), FpCategory::Normal);
assert_eq!(inf.classify(), FpCategory::Infinite);

Source §