Struct MmaDefinition

Source

pub struct MmaDefinition<A: CubeType, B: CubeType, CD: CubeType> { /* private fields */ }

Expand description

Defines a matrix multiplication operation, including the input and output type, and the shape.

Implementations§

Source §

impl<A: CubePrimitive, B: CubePrimitive, CD: CubePrimitive> MmaDefinition<A, B, CD>

Source

pub fn new(m: u32, n: u32, k: u32) -> Self

Create a new matrix definition that is going to be used in the manual matrix-multiply and accumulate function.

You have to declare the shape used for the execution. The shape of the current matrix is determined using the MatrixIdent.

MatrixIdent::A Shape => (M, K)
MatrixIdent::B Shape => (K, N)
MatrixIdent::Accumulator Shape => (M, N)

Not all shapes are supported, and the permitted shapes depend on the element type. Layout for manual MMA is determined by the runtime and must be handled manually. Use [line_layout] to check the correct data layout for each element.

Refer to nvidia documentation.

Source

pub fn new_scaled<S: CubePrimitive>( m: u32, n: u32, k: u32, scale_factor: u32, ) -> Self

Create a new matrix definition that is going to be used in the manual matrix-multiply and accumulate function.

You have to declare the shape used for the execution. The shape of the current matrix is determined using the MatrixIdent.

MatrixIdent::A Shape => (M, K)
MatrixIdent::B Shape => (K, N)
MatrixIdent::Accumulator Shape => (M, N)

Not all shapes are supported, and the permitted shapes depend on the element type. Layout for manual MMA is determined by the runtime and must be handled manually. Use [line_layout] to check the correct data layout for each element.

Refer to nvidia documentation.

Source

pub fn num_elems(&self, ident: MatrixIdent) -> u32

Number of elements in the matrix

Source

pub fn elems_per_lane(&self, ident: MatrixIdent) -> u32

Returns the number of elements handled by each lane. Should be packed into Lines of size line_size with [line_layout].

§Note

“Lane” here refers to the unit relative to a plane, to distinguish it from a unit relative to a cube.

Source

pub fn lines_per_lane(&self, ident: MatrixIdent) -> u32

Returns the number of lines of size line_size with layout line_layout per lane.

§Note

“Lane” here refers to the unit relative to a plane, to distinguish it from a unit relative to a cube.

Source

pub fn line_layout(&self, ident: MatrixIdent) -> MatrixLayout

The layout of each line in this matrix (row major or column major)

Source

pub fn line_size(&self, ident: MatrixIdent) -> u32

Number of elements in each line passed to the execute function

Source

pub fn position_of_nth( &self, lane_id: u32, elem_idx: u32, ident: MatrixIdent, ) -> (u32, u32)

Returns the coordinates of the nth element handled by the lane_id Each lane contains [elems_per_lane] elements in [line_size] chunks. Returns (row_idx, col_idx)

§Note

“Lane” here refers to the unit relative to a plane, to distinguish it from a unit relative to a cube.

Source

pub fn scales_index(&self, lane_id: u32, ident: MatrixIdent) -> u32

Index of the scales for this thread, along the non-major dimension of the matrix. Each thread loads all scales in the major direction into a single Line.

Source

pub fn scales_count(&self) -> u32

Number of scales in each line (not the line size!). Line size may include padding bytes.

Source

pub fn scales_line_size(&self) -> u32

Line size for the scale factors. May be larger than the total number of scales.

Source

pub fn execute( &self, registers_a: &Sequence<Line<A>>, registers_b: &Sequence<Line>, registers_c: &Sequence<Line<CD>>, ) -> Array<Line<CD>> ⓘ

Execute a low level mma operation with manually managed registers. Register layout and index mapping can be retrieved from the [MatrixDefinition]

Source

pub fn execute_scaled<S: CubePrimitive>( &self, registers_a: &Sequence<Line<A>>, registers_b: &Sequence<Line>, registers_c: &Sequence<Line<CD>>, scales_a: Line<S>, scales_b: Line<S>, ) -> Array<Line<CD>> ⓘ

Execute a low level block scaled mma operation with manually managed registers. Register layout and index mapping can be retrieved from the [MatrixDefinition]

Source

pub fn __expand_new( scope: &mut Scope, m: u32, n: u32, k: u32, ) -> <Self as CubeType>::ExpandType

Source

pub fn __expand_new_scaled<S: CubePrimitive>( scope: &mut Scope, m: u32, n: u32, k: u32, scale_factor: u32, ) -> <Self as CubeType>::ExpandType

Source

pub fn __expand_num_elems( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

Source

pub fn __expand_elems_per_lane( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

Source

pub fn __expand_lines_per_lane( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

Source

pub fn __expand_line_layout( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> MatrixLayout

Source

pub fn __expand_line_size( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

Source

pub fn __expand_position_of_nth( scope: &mut Scope, this: <Self as CubeType>::ExpandType, lane_id: <u32 as CubeType>::ExpandType, elem_idx: <u32 as CubeType>::ExpandType, ident: MatrixIdent, ) -> <(u32, u32) as CubeType>::ExpandType

Source

pub fn __expand_scales_index( scope: &mut Scope, this: <Self as CubeType>::ExpandType, lane_id: <u32 as CubeType>::ExpandType, ident: MatrixIdent, ) -> <u32 as CubeType>::ExpandType

Source

pub fn __expand_scales_count( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ) -> u32

Source

pub fn __expand_scales_line_size( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ) -> u32

Source

pub fn __expand_execute( scope: &mut Scope, this: <Self as CubeType>::ExpandType, registers_a: <Sequence<Line<A>> as CubeType>::ExpandType, registers_b: <Sequence<Line> as CubeType>::ExpandType, registers_c: <Sequence<Line<CD>> as CubeType>::ExpandType, ) -> <Array<Line<CD>> as CubeType>::ExpandType ⓘ

Source

pub fn __expand_execute_scaled<S: CubePrimitive>( scope: &mut Scope, this: <Self as CubeType>::ExpandType, registers_a: <Sequence<Line<A>> as CubeType>::ExpandType, registers_b: <Sequence<Line> as CubeType>::ExpandType, registers_c: <Sequence<Line<CD>> as CubeType>::ExpandType, scales_a: <Line<S> as CubeType>::ExpandType, scales_b: <Line<S> as CubeType>::ExpandType, ) -> <Array<Line<CD>> as CubeType>::ExpandType ⓘ

Trait Implementations§

Source §

impl<A: Clone + CubeType, B: Clone + CubeType, CD: Clone + CubeType> Clone for MmaDefinition<A, B, CD>

Source §

fn clone(&self) -> MmaDefinition<A, B, CD>

Returns a duplicate of the value. Read more

1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more

Source §

impl<A: CubeType, B: CubeType, CD: CubeType> CubeType for MmaDefinition<A, B, CD>

Source §

type ExpandType = MmaDefinitionExpand<A, B, CD>

Source §

fn into_mut(scope: &mut Scope, expand: Self::ExpandType) -> Self::ExpandType

Wrapper around the init method, necessary to type inference.

Source §

impl<A: Copy + CubeType, B: Copy + CubeType, CD: Copy + CubeType> Copy for MmaDefinition<A, B, CD>

Auto Trait Implementations§

§

impl<A, B, CD> Freeze for MmaDefinition<A, B, CD>

§

impl<A, B, CD> RefUnwindSafe for MmaDefinition<A, B, CD>
where A: RefUnwindSafe, B: RefUnwindSafe, CD: RefUnwindSafe,

§

impl<A, B, CD> Send for MmaDefinition<A, B, CD>
where A: Send, B: Send, CD: Send,

§

impl<A, B, CD> Sync for MmaDefinition<A, B, CD>
where A: Sync, B: Sync, CD: Sync,

§

impl<A, B, CD> Unpin for MmaDefinition<A, B, CD>
where A: Unpin, B: Unpin, CD: Unpin,

§

impl<A, B, CD> UnwindSafe for MmaDefinition<A, B, CD>
where A: UnwindSafe, B: UnwindSafe, CD: UnwindSafe,

Blanket Implementations§

Source §

impl<T> Any for T
where T: 'static + ?Sized,

Source §

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more

Source §

impl<T> Borrow<T> for T
where T: ?Sized,

Source §

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more

Source §

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source §

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more

Source §

impl<T> CloneToUninit for T
where T: Clone,

Source §

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)

Performs copy-assignment from self to dest. Read more

Source §

impl<T> From<T> for T

Source §

fn from(t: T) -> T

Returns the argument unchanged.

Source §

impl<T, U> Into for T
where U: From<T>,

Source §

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source §

impl<T> ToOwned for T
where T: Clone,

Source §

type Owned = T

The resulting type after obtaining ownership.

Source §

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more

Source §

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more

Source §

impl<T, U> TryFrom for T
where U: Into<T>,

Source §

type Error = Infallible

The type returned in the event of a conversion error.

Source §

fn try_from(value: U) -> Result<T, <T as TryFrom>::Error>

Performs the conversion.

Source §

impl<T, U> TryInto for T
where U: TryFrom<T>,

Source §

type Error = >::Error

The type returned in the event of a conversion error.

Source §

fn try_into(self) -> Result<U, >::Error>

Performs the conversion.

Source §

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source §

MmaDefinition

Struct MmaDefinition Copy item path

Implementations§

impl<A: CubePrimitive, B: CubePrimitive, CD: CubePrimitive> MmaDefinition<A, B, CD>

pub fn new(m: u32, n: u32, k: u32) -> Self

pub fn new_scaled<S: CubePrimitive>( m: u32, n: u32, k: u32, scale_factor: u32, ) -> Self

pub fn num_elems(&self, ident: MatrixIdent) -> u32

pub fn elems_per_lane(&self, ident: MatrixIdent) -> u32

§Note

pub fn lines_per_lane(&self, ident: MatrixIdent) -> u32

§Note

pub fn line_layout(&self, ident: MatrixIdent) -> MatrixLayout

pub fn line_size(&self, ident: MatrixIdent) -> u32

pub fn position_of_nth( &self, lane_id: u32, elem_idx: u32, ident: MatrixIdent, ) -> (u32, u32)

§Note

pub fn scales_index(&self, lane_id: u32, ident: MatrixIdent) -> u32

pub fn scales_count(&self) -> u32

pub fn scales_line_size(&self) -> u32

pub fn execute( &self, registers_a: &Sequence<Line<A>>, registers_b: &Sequence<Line<B>>, registers_c: &Sequence<Line<CD>>, ) -> Array<Line<CD>> ⓘ

pub fn execute_scaled<S: CubePrimitive>( &self, registers_a: &Sequence<Line<A>>, registers_b: &Sequence<Line<B>>, registers_c: &Sequence<Line<CD>>, scales_a: Line<S>, scales_b: Line<S>, ) -> Array<Line<CD>> ⓘ

pub fn __expand_new( scope: &mut Scope, m: u32, n: u32, k: u32, ) -> <Self as CubeType>::ExpandType

pub fn __expand_new_scaled<S: CubePrimitive>( scope: &mut Scope, m: u32, n: u32, k: u32, scale_factor: u32, ) -> <Self as CubeType>::ExpandType

pub fn __expand_num_elems( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

pub fn __expand_elems_per_lane( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

pub fn __expand_lines_per_lane( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

pub fn __expand_line_layout( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> MatrixLayout

pub fn __expand_line_size( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ident: MatrixIdent, ) -> u32

pub fn __expand_position_of_nth( scope: &mut Scope, this: <Self as CubeType>::ExpandType, lane_id: <u32 as CubeType>::ExpandType, elem_idx: <u32 as CubeType>::ExpandType, ident: MatrixIdent, ) -> <(u32, u32) as CubeType>::ExpandType

pub fn __expand_scales_index( scope: &mut Scope, this: <Self as CubeType>::ExpandType, lane_id: <u32 as CubeType>::ExpandType, ident: MatrixIdent, ) -> <u32 as CubeType>::ExpandType

pub fn __expand_scales_count( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ) -> u32

pub fn __expand_scales_line_size( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ) -> u32

Trait Implementations§

impl<A: Clone + CubeType, B: Clone + CubeType, CD: Clone + CubeType> Clone for MmaDefinition<A, B, CD>

fn clone(&self) -> MmaDefinition<A, B, CD>

fn clone_from(&mut self, source: &Self)

impl<A: CubeType, B: CubeType, CD: CubeType> CubeType for MmaDefinition<A, B, CD>

type ExpandType = MmaDefinitionExpand<A, B, CD>

fn into_mut(scope: &mut Scope, expand: Self::ExpandType) -> Self::ExpandType

impl<A: Copy + CubeType, B: Copy + CubeType, CD: Copy + CubeType> Copy for MmaDefinition<A, B, CD>

Auto Trait Implementations§

impl<A, B, CD> Freeze for MmaDefinition<A, B, CD>

impl<A, B, CD> RefUnwindSafe for MmaDefinition<A, B, CD>where A: RefUnwindSafe, B: RefUnwindSafe, CD: RefUnwindSafe,

impl<A, B, CD> Send for MmaDefinition<A, B, CD>where A: Send, B: Send, CD: Send,

impl<A, B, CD> Sync for MmaDefinition<A, B, CD>where A: Sync, B: Sync, CD: Sync,

impl<A, B, CD> Unpin for MmaDefinition<A, B, CD>where A: Unpin, B: Unpin, CD: Unpin,

impl<A, B, CD> UnwindSafe for MmaDefinition<A, B, CD>where A: UnwindSafe, B: UnwindSafe, CD: UnwindSafe,

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

impl<V, T> VZip<V> for Twhere V: MultiLane<T>,

fn vzip(self) -> V

Struct MmaDefinition

impl<A, B, CD> RefUnwindSafe for MmaDefinition<A, B, CD>
where A: RefUnwindSafe, B: RefUnwindSafe, CD: RefUnwindSafe,

impl<A, B, CD> Send for MmaDefinition<A, B, CD>
where A: Send, B: Send, CD: Send,

impl<A, B, CD> Sync for MmaDefinition<A, B, CD>
where A: Sync, B: Sync, CD: Sync,

impl<A, B, CD> Unpin for MmaDefinition<A, B, CD>
where A: Unpin, B: Unpin, CD: Unpin,

impl<A, B, CD> UnwindSafe for MmaDefinition<A, B, CD>
where A: UnwindSafe, B: UnwindSafe, CD: UnwindSafe,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

impl<V, T> VZip<V> for T
where V: MultiLane<T>,