pub struct AcceleratedMatmul<Acc: TileKind> { /* private fields */ }Expand description
Uses one plane to perform a small matmul using accelerated instructions.
Trait Implementations§
Source§impl<L: Numeric, R: Numeric, A: Numeric, AccTile: TileKind> TileMatmul<L, R, A> for AcceleratedMatmul<AccTile>where
CmmaStageReader<AccTile>: CmmaFragmentReader<TileKind = AccTile>,
impl<L: Numeric, R: Numeric, A: Numeric, AccTile: TileKind> TileMatmul<L, R, A> for AcceleratedMatmul<AccTile>where
CmmaStageReader<AccTile>: CmmaFragmentReader<TileKind = AccTile>,
Source§type LhsFragment = Matrix<L>
type LhsFragment = Matrix<L>
Contains Lhs data for computation
Source§type RhsFragment = Matrix<R>
type RhsFragment = Matrix<R>
Contains Rhs data for computation
Source§type AccFragment = Matrix<A>
type AccFragment = Matrix<A>
Contains and accumulates results of the Tile Matmul execution
Source§fn execute(
lhs: &Self::LhsFragment,
rhs: &Self::RhsFragment,
out: &mut Self::AccFragment,
_config: Self::Config,
)
fn execute( lhs: &Self::LhsFragment, rhs: &Self::RhsFragment, out: &mut Self::AccFragment, _config: Self::Config, )
Executes the matrix multiplication of Lhs and Rhs, adding the result to the accumulator
Source§fn allocate_lhs(config: Self::Config) -> Self::LhsFragment
fn allocate_lhs(config: Self::Config) -> Self::LhsFragment
Create the container for Lhs Read more
Source§fn allocate_rhs(config: Self::Config) -> Self::RhsFragment
fn allocate_rhs(config: Self::Config) -> Self::RhsFragment
Create the container for Rhs Read more
Source§fn load_lhs<E: Numeric>(
tile: &StridedTile<E>,
lhs: &mut Self::LhsFragment,
_config: Self::Config,
)
fn load_lhs<E: Numeric>( tile: &StridedTile<E>, lhs: &mut Self::LhsFragment, _config: Self::Config, )
Load the container of Lhs from tile data
Source§fn load_rhs<E: Numeric>(
tile: &StridedTile<E>,
rhs: &mut Self::RhsFragment,
_config: Self::Config,
)
fn load_rhs<E: Numeric>( tile: &StridedTile<E>, rhs: &mut Self::RhsFragment, _config: Self::Config, )
Load the container of Rhs from tile data
Source§fn load_acc<E: Numeric>(
tile: &AccTile::Tile<E>,
acc: &mut Self::AccFragment,
config: Self::Config,
)
fn load_acc<E: Numeric>( tile: &AccTile::Tile<E>, acc: &mut Self::AccFragment, config: Self::Config, )
Load the container of Acc from tile data
Source§fn write_results<E: Numeric>(
tile: &mut StridedTile<E, ReadWrite>,
out: &Self::AccFragment,
_config: Self::Config,
)
fn write_results<E: Numeric>( tile: &mut StridedTile<E, ReadWrite>, out: &Self::AccFragment, _config: Self::Config, )
Write the content of the output container to the given slice
Source§fn allocate_acc(config: Self::Config) -> Self::AccFragment
fn allocate_acc(config: Self::Config) -> Self::AccFragment
Allocate the container to receive the execution output. Read more
fn __expand_execute( scope: &mut Scope, lhs: <Self::LhsFragment as CubeType>::ExpandType, rhs: <Self::RhsFragment as CubeType>::ExpandType, out: <Self::AccFragment as CubeType>::ExpandType, _config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_allocate_lhs( scope: &mut Scope, config: Self::Config, ) -> <Self::LhsFragment as CubeType>::ExpandType
fn __expand_allocate_rhs( scope: &mut Scope, config: Self::Config, ) -> <Self::RhsFragment as CubeType>::ExpandType
fn __expand_load_lhs<E: Numeric>( scope: &mut Scope, tile: <StridedTile<E> as CubeType>::ExpandType, lhs: <Self::LhsFragment as CubeType>::ExpandType, _config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_load_rhs<E: Numeric>( scope: &mut Scope, tile: <StridedTile<E> as CubeType>::ExpandType, rhs: <Self::RhsFragment as CubeType>::ExpandType, _config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_load_acc<E: Numeric>( scope: &mut Scope, tile: <AccTile::Tile<E> as CubeType>::ExpandType, acc: <Self::AccFragment as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_write_results<E: Numeric>( scope: &mut Scope, tile: <StridedTile<E, ReadWrite> as CubeType>::ExpandType, out: <Self::AccFragment as CubeType>::ExpandType, _config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_allocate_acc( scope: &mut Scope, config: Self::Config, ) -> <Self::AccFragment as CubeType>::ExpandType
Source§impl<Tile: TileKind> TileMatmulFamily for AcceleratedMatmul<Tile>where
CmmaStageReader<Tile>: CmmaFragmentReader<TileKind = Tile>,
impl<Tile: TileKind> TileMatmulFamily for AcceleratedMatmul<Tile>where
CmmaStageReader<Tile>: CmmaFragmentReader<TileKind = Tile>,
Source§type Matmul<L: Numeric, R: Numeric, A: Numeric> = AcceleratedMatmul<Tile>
type Matmul<L: Numeric, R: Numeric, A: Numeric> = AcceleratedMatmul<Tile>
The specific TileMatmul implementation associated with this family.
Source§fn requires_accelerator() -> bool
fn requires_accelerator() -> bool
Returns whether this tile matmul requires specialized hardware accelerators (e.g., tensor cores).
Source§fn computation_resources() -> Result<ComputeResources, InvalidConfigError>
fn computation_resources() -> Result<ComputeResources, InvalidConfigError>
Returns the compute resources required to run this tile matmul.
Source§fn setup<Lhs: Numeric, Rhs: Numeric, Acc: Numeric, R: Runtime>(
client: &ComputeClient<R::Server>,
problem: &MatmulProblem,
selection: &MatmulSelection,
matmul_line_sizes: &MatmulLineSizes,
) -> Result<Self::Config, MatmulSetupError>
fn setup<Lhs: Numeric, Rhs: Numeric, Acc: Numeric, R: Runtime>( client: &ComputeClient<R::Server>, problem: &MatmulProblem, selection: &MatmulSelection, matmul_line_sizes: &MatmulLineSizes, ) -> Result<Self::Config, MatmulSetupError>
Constructs the configuration based on the matmul problem, selection, and line sizes. Read more
Source§fn filter_line_sizes(
available_line_sizes: AvailableLineSizes,
) -> AvailableLineSizes
fn filter_line_sizes( available_line_sizes: AvailableLineSizes, ) -> AvailableLineSizes
Filters out line sizes that are incompatible with this matmul family. Read more
Auto Trait Implementations§
impl<Acc> Freeze for AcceleratedMatmul<Acc>
impl<Acc> RefUnwindSafe for AcceleratedMatmul<Acc>where
Acc: RefUnwindSafe,
impl<Acc> Send for AcceleratedMatmul<Acc>
impl<Acc> Sync for AcceleratedMatmul<Acc>
impl<Acc> Unpin for AcceleratedMatmul<Acc>where
Acc: Unpin,
impl<Acc> UnwindSafe for AcceleratedMatmul<Acc>where
Acc: UnwindSafe,
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more