Enum pjrt_sys::protos::xla::precision_config::Algorithm

source ·

#[repr(i32)]pub enum Algorithm {
Show 13 variants    AlgUnset = 0,
    AlgDotAnyF8AnyF8F32 = 1,
    AlgDotAnyF8AnyF8F32FastAccum = 2,
    AlgDotF16F16F16 = 3,
    AlgDotF16F16F32 = 4,
    AlgDotBf16Bf16Bf16 = 5,
    AlgDotBf16Bf16F32 = 6,
    AlgDotBf16Bf16F32X3 = 7,
    AlgDotBf16Bf16F32X6 = 8,
    AlgDotTf32Tf32F32 = 9,
    AlgDotTf32Tf32F32X3 = 10,
    AlgDotF32F32F32 = 11,
    AlgDotF64F64F64 = 12,
}

Expand description

The algorithm used to evaluate the instruction.

The naming convention for the dot instruction is ALG_DOT_{A_TYPE}{B_TYPE}{ACCUM_TYPE}[_X{NUM_OPS}] where A_TYPE, B_TYPE and ACCUM_TYPE correspond to the types in the “primitive dot operations” (such as TensorCore operations) and NUM_OPS is the number of such operations used per “primitive tile”. When the NUM_OPS field is skipped, it is assumed to be 1. The types mentioned in the name are independent of the storage types.

In general ATYPE and BTYPE are the precisions that the LHS and RHS of the operation are rounded to and ACCUMTYPE is the accumulation type. If a backend does not support the given algorithm, an error is raised. The Algorithm enum is intended to eventually replace the Precision enum.

Variants§

§

AlgUnset = 0

If the algorithm is ALG_UNSET, we will decide the algorithm based on the operand_precision values (for now).

§

AlgDotAnyF8AnyF8F32 = 1

The storage type can be any 8-bit floating point type.

§

AlgDotAnyF8AnyF8F32FastAccum = 2

The storage type can be any 8-bit floating point type. Intermediate results will not periodically be promoted to a higher precision. This corresponds to CUBLASLT_MATMUL_DESC_FAST_ACCUM. Triton’s maxNumImpreciseAcc=32 setting may be similar.

§

AlgDotBf16Bf16F32X3 = 7

An algorithm which uses 3 BF16_BF16_F32 matmuls to achieve better precision.

§

AlgDotBf16Bf16F32X6 = 8

An algorithm which uses 6 BF16_BF16_F32 matmuls to achieve better precision (similar to F32).

§

AlgDotTf32Tf32F32 = 9

§

AlgDotTf32Tf32F32X3 = 10

An algorithm which uses 3 TF32_TF32_F32 matmuls to achieve better precision (similar to F32).

§

Enum pjrt_sys::protos::xla::precision_config::AlgorithmCopy item path

Variants§

AlgUnset = 0

AlgDotAnyF8AnyF8F32 = 1

AlgDotAnyF8AnyF8F32FastAccum = 2

AlgDotF16F16F16 = 3

AlgDotF16F16F32 = 4

AlgDotBf16Bf16Bf16 = 5

AlgDotBf16Bf16F32 = 6

AlgDotBf16Bf16F32X3 = 7

AlgDotBf16Bf16F32X6 = 8

AlgDotTf32Tf32F32 = 9

AlgDotTf32Tf32F32X3 = 10

AlgDotF32F32F32 = 11

AlgDotF64F64F64 = 12

Implementations§

impl Algorithm

pub fn is_valid(value: i32) -> bool

pub fn from_i32(value: i32) -> Option<Algorithm>

impl Algorithm

pub fn as_str_name(&self) -> &'static str

pub fn from_str_name(value: &str) -> Option<Self>

Trait Implementations§

impl Clone for Algorithm

fn clone(&self) -> Algorithm

fn clone_from(&mut self, source: &Self)

impl Debug for Algorithm

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl Default for Algorithm

fn default() -> Algorithm

impl From<Algorithm> for i32

fn from(value: Algorithm) -> i32

impl Hash for Algorithm

fn hash<__H: Hasher>(&self, state: &mut __H)

fn hash_slice<H>(data: &[Self], state: &mut H)where H: Hasher, Self: Sized,

impl Ord for Algorithm

fn cmp(&self, other: &Algorithm) -> Ordering

fn max(self, other: Self) -> Selfwhere Self: Sized,

fn min(self, other: Self) -> Selfwhere Self: Sized,

fn clamp(self, min: Self, max: Self) -> Selfwhere Self: Sized + PartialOrd,

impl PartialEq for Algorithm

fn eq(&self, other: &Algorithm) -> bool

fn ne(&self, other: &Rhs) -> bool

impl PartialOrd for Algorithm

fn partial_cmp(&self, other: &Algorithm) -> Option<Ordering>

fn lt(&self, other: &Rhs) -> bool

fn le(&self, other: &Rhs) -> bool

fn gt(&self, other: &Rhs) -> bool

fn ge(&self, other: &Rhs) -> bool

impl TryFrom<i32> for Algorithm

type Error = UnknownEnumValue

fn try_from(value: i32) -> Result<Algorithm, UnknownEnumValue>

impl Copy for Algorithm

impl Eq for Algorithm

impl StructuralPartialEq for Algorithm

Auto Trait Implementations§

impl Freeze for Algorithm

impl RefUnwindSafe for Algorithm

impl Send for Algorithm

impl Sync for Algorithm

impl Unpin for Algorithm

impl UnwindSafe for Algorithm

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dst: *mut T)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

Enum pjrt_sys::protos::xla::precision_config::Algorithm

fn hash<H: Hasher>(&self, state: &mut H)

fn hash_slice<H>(data: &[Self], state: &mut H)
where H: Hasher, Self: Sized,

fn max(self, other: Self) -> Self
where Self: Sized,

fn min(self, other: Self) -> Self
where Self: Sized,

fn clamp(self, min: Self, max: Self) -> Self
where Self: Sized + PartialOrd,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,