[][src]Struct half::bf16

#[repr(transparent)]
pub struct bf16(_);

A 16-bit floating point type implementing the bfloat16 format.

The bfloat16 floating point format is a truncated 16-bit version of the IEEE 754 standard binary32, a.k.a f32. bf16 has approximately the same dynamic range as f32 by having a lower precision than f16. While f16 has a precision of 11 bits, bf16 has a precision of only 8 bits.

Like f16, bf16 does not offer arithmetic operations as it is intended for compact storage rather than calculations. Operations should be performed with f32 or higher-precision types and converted to/from bf16 as necessary.

Methods

impl bf16[src]

pub const fn from_bits(bits: u16) -> bf16[src]

Constructs a bf16 value from the raw bits.

pub fn from_f32(value: f32) -> bf16[src]

Constructs a bf16 value from a 32-bit floating point value.

If the 32-bit value is too large to fit, ±∞ will result. NaN values are preserved. Subnormal values that are too tiny to be represented will result in ±0. All other values are truncated and rounded to the nearest representable value.

pub fn from_f64(value: f64) -> bf16[src]

Constructs a bf16 value from a 64-bit floating point value.

If the 64-bit value is to large to fit, ±∞ will result. NaN values are preserved. 64-bit subnormal values are too tiny to be represented and result in ±0. Exponents that underflow the minimum exponent will result in subnormals or ±0. All other values are truncated and rounded to the nearest representable value.

pub const fn to_bits(self) -> u16[src]

Converts a bf16 into the underlying bit representation.

pub fn to_f32(self) -> f32[src]

Converts a bf16 value into an f32 value.

This conversion is lossless as all values can be represented exactly in f32.

pub fn to_f64(self) -> f64[src]

Converts a bf16 value into an f64 value.

This conversion is lossless as all values can be represented exactly in f64.

pub const fn is_nan(self) -> bool[src]

Returns true if this value is NaN and false otherwise.

Examples


let nan = bf16::NAN;
let f = bf16::from_f32(7.0_f32);

assert!(nan.is_nan());
assert!(!f.is_nan());

pub const fn is_infinite(self) -> bool[src]

Returns true if this value is ±∞ and false otherwise.

Examples


let f = bf16::from_f32(7.0f32);
let inf = bf16::INFINITY;
let neg_inf = bf16::NEG_INFINITY;
let nan = bf16::NAN;

assert!(!f.is_infinite());
assert!(!nan.is_infinite());

assert!(inf.is_infinite());
assert!(neg_inf.is_infinite());

pub const fn is_finite(self) -> bool[src]

Returns true if this number is neither infinite nor NaN.

Examples


let f = bf16::from_f32(7.0f32);
let inf = bf16::INFINITY;
let neg_inf = bf16::NEG_INFINITY;
let nan = bf16::NAN;

assert!(f.is_finite());

assert!(!nan.is_finite());
assert!(!inf.is_finite());
assert!(!neg_inf.is_finite());

pub fn is_normal(self) -> bool[src]

Returns true if the number is neither zero, infinite, subnormal, or NaN.

Examples


let min = bf16::MIN_POSITIVE;
let max = bf16::MAX;
let lower_than_min = bf16::from_f32(1.0e-39_f32);
let zero = bf16::from_f32(0.0_f32);

assert!(min.is_normal());
assert!(max.is_normal());

assert!(!zero.is_normal());
assert!(!bf16::NAN.is_normal());
assert!(!bf16::INFINITY.is_normal());
// Values between 0 and `min` are subnormal.
assert!(!lower_than_min.is_normal());

pub fn classify(self) -> FpCategory[src]

Returns the floating point category of the number.

If only one property is going to be tested, it is generally faster to use the specific predicate instead.

Examples

use std::num::FpCategory;

let num = bf16::from_f32(12.4_f32);
let inf = bf16::INFINITY;

assert_eq!(num.classify(), FpCategory::Normal);
assert_eq!(inf.classify(), FpCategory::Infinite);

pub fn signum(self) -> bf16[src]

Returns a number that represents the sign of self.

  • 1.0 if the number is positive, +0.0 or INFINITY
  • −1.0 if the number is negative, −0.0orNEG_INFINITY`
  • NaN if the number is NaN

Examples


let f = bf16::from_f32(3.5_f32);

assert_eq!(f.signum(), bf16::from_f32(1.0));
assert_eq!(bf16::NEG_INFINITY.signum(), bf16::from_f32(-1.0));

assert!(bf16::NAN.signum().is_nan());

pub const fn is_sign_positive(self) -> bool[src]

Returns true if and only if self has a positive sign, including +0.0, NaNs with a positive sign bit and +∞.

Examples


let nan = bf16::NAN;
let f = bf16::from_f32(7.0_f32);
let g = bf16::from_f32(-7.0_f32);

assert!(f.is_sign_positive());
assert!(!g.is_sign_positive());
// NaN can be either positive or negative
assert!(nan.is_sign_positive() != nan.is_sign_negative());

pub const fn is_sign_negative(self) -> bool[src]

Returns true if and only if self has a negative sign, including −0.0, NaNs with a negative sign bit and −∞.

Examples


let nan = bf16::NAN;
let f = bf16::from_f32(7.0f32);
let g = bf16::from_f32(-7.0f32);

assert!(!f.is_sign_negative());
assert!(g.is_sign_negative());
// NaN can be either positive or negative
assert!(nan.is_sign_positive() != nan.is_sign_negative());

pub const DIGITS: u32[src]

Approximate number of bf16 significant digits in base 10.

pub const EPSILON: bf16[src]

bf16 machine epsilon value.

This is the difference between 1.0 and the next largest representable number.

pub const INFINITY: bf16[src]

bf16 positive Infinity (+∞).

pub const MANTISSA_DIGITS: u32[src]

Number of bf16 significant digits in base 2.

pub const MAX: bf16[src]

Largest finite bf16 value.

pub const MAX_10_EXP: i32[src]

Maximum possible bf16 power of 10 exponent.

pub const MAX_EXP: i32[src]

Maximum possible bf16 power of 2 exponent.

pub const MIN: bf16[src]

Smallest finite bf16 value.

pub const MIN_10_EXP: i32[src]

Minimum possible normal bf16 power of 10 exponent.

pub const MIN_EXP: i32[src]

One greater than the minimum possible normal bf16 power of 2 exponent.

pub const MIN_POSITIVE: bf16[src]

Smallest positive normal bf16 value.

pub const NAN: bf16[src]

bf16 Not a Number (NaN).

pub const NEG_INFINITY: bf16[src]

bf16 negative infinity (-∞).

pub const RADIX: u32[src]

The radix or base of the internal representation of bf16.

pub const MIN_POSITIVE_SUBNORMAL: bf16[src]

Minimum positive subnormal bf16 value.

pub const MAX_SUBNORMAL: bf16[src]

Maximum subnormal bf16 value.

pub const ONE: bf16[src]

bf16 1

pub const ZERO: bf16[src]

bf16 0

pub const NEG_ZERO: bf16[src]

bf16 -0

pub const E: bf16[src]

bf16 Euler's number (ℯ).

pub const PI: bf16[src]

bf16 Archimedes' constant (π).

pub const FRAC_1_PI: bf16[src]

bf16 1/π

pub const FRAC_1_SQRT_2: bf16[src]

bf16 1/√2

pub const FRAC_2_PI: bf16[src]

bf16 2/π

pub const FRAC_2_SQRT_PI: bf16[src]

bf16 2/√π

pub const FRAC_PI_2: bf16[src]

bf16 π/2

pub const FRAC_PI_3: bf16[src]

bf16 π/3

pub const FRAC_PI_4: bf16[src]

bf16 π/4

pub const FRAC_PI_6: bf16[src]

bf16 π/6

pub const FRAC_PI_8: bf16[src]

bf16 π/8

pub const LN_10: bf16[src]

bf16 𝗅𝗇 10

pub const LN_2: bf16[src]

bf16 𝗅𝗇 2

pub const LOG10_E: bf16[src]

bf16 𝗅𝗈𝗀₁₀ℯ

pub const LOG2_E: bf16[src]

bf16 𝗅𝗈𝗀₂ℯ

pub const SQRT_2: bf16[src]

bf16 √2

Trait Implementations

impl Clone for bf16[src]

impl Copy for bf16[src]

impl Debug for bf16[src]

impl Default for bf16[src]

impl<'de> Deserialize<'de> for bf16[src]

impl Display for bf16[src]

impl From<bf16> for f32[src]

impl From<bf16> for f64[src]

impl From<i8> for bf16[src]

impl From<u8> for bf16[src]

impl FromStr for bf16[src]

type Err = ParseFloatError

The associated error which can be returned from parsing.

impl LowerExp for bf16[src]

impl PartialEq<bf16> for bf16[src]

impl PartialOrd<bf16> for bf16[src]

impl Serialize for bf16[src]

impl UpperExp for bf16[src]

Auto Trait Implementations

impl RefUnwindSafe for bf16

impl Send for bf16

impl Sync for bf16

impl Unpin for bf16

impl UnwindSafe for bf16

Blanket Implementations

impl<T> Any for T where
    T: 'static + ?Sized
[src]

impl<T> Borrow<T> for T where
    T: ?Sized
[src]

impl<T> BorrowMut<T> for T where
    T: ?Sized
[src]

impl<T> DeserializeOwned for T where
    T: Deserialize<'de>, 
[src]

impl<T> From<T> for T[src]

impl<T, U> Into<U> for T where
    U: From<T>, 
[src]

impl<T> ToOwned for T where
    T: Clone
[src]

type Owned = T

The resulting type after obtaining ownership.

impl<T> ToString for T where
    T: Display + ?Sized
[src]

impl<T, U> TryFrom<U> for T where
    U: Into<T>, 
[src]

type Error = Infallible

The type returned in the event of a conversion error.

impl<T, U> TryInto<U> for T where
    U: TryFrom<T>, 
[src]

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.