pub trait StageMatmul<MP: MatmulPrecision>:
'static
+ Send
+ Sync {
type Config: StageConfig;
type Accumulator: CubeType;
type LhsReader: CubeType;
type RhsReader: CubeType;
type LhsTile: CubeType;
type RhsTile: CubeType;
type Writer: GlobalWriter<MP::EO>;
Show 16 methods
// Required methods
fn execute(
lhs: &Self::LhsReader,
rhs: &Self::RhsReader,
instruction_lhs: &mut Self::LhsTile,
instruction_rhs: &mut Self::RhsTile,
acc: &mut Self::Accumulator,
config: Self::Config,
);
fn execute_with_listener<SEL: StageEventListener<Self::Config>>(
lhs: &Self::LhsReader,
rhs: &Self::RhsReader,
instruction_lhs: &mut Self::LhsTile,
instruction_rhs: &mut Self::RhsTile,
acc: &mut Self::Accumulator,
config: Self::Config,
listener: SEL,
);
fn init_tile_inputs(config: Self::Config) -> (Self::LhsTile, Self::RhsTile);
fn init_accumulator(config: Self::Config) -> Self::Accumulator;
fn zero_accumulator(acc: &mut Self::Accumulator, config: Self::Config);
fn fill_accumulator<L: AccumulatorLoader<MP>>(
loader: &mut L,
acc: &mut Self::Accumulator,
config: Self::Config,
);
fn init_writer(
tensor: VirtualTensor<MP::EO, ReadWrite>,
x_offset: u32,
y_offset: u32,
batch_offset: u32,
) -> Self::Writer;
fn write_results<G: GlobalConfig>(
acc: &Self::Accumulator,
out: &mut Self::Writer,
stage_config: Self::Config,
global_config: G,
);
fn __expand_execute(
scope: &mut Scope,
lhs: <Self::LhsReader as CubeType>::ExpandType,
rhs: <Self::RhsReader as CubeType>::ExpandType,
instruction_lhs: <Self::LhsTile as CubeType>::ExpandType,
instruction_rhs: <Self::RhsTile as CubeType>::ExpandType,
acc: <Self::Accumulator as CubeType>::ExpandType,
config: Self::Config,
) -> <() as CubeType>::ExpandType;
fn __expand_execute_with_listener<SEL: StageEventListener<Self::Config>>(
scope: &mut Scope,
lhs: <Self::LhsReader as CubeType>::ExpandType,
rhs: <Self::RhsReader as CubeType>::ExpandType,
instruction_lhs: <Self::LhsTile as CubeType>::ExpandType,
instruction_rhs: <Self::RhsTile as CubeType>::ExpandType,
acc: <Self::Accumulator as CubeType>::ExpandType,
config: Self::Config,
listener: <SEL as CubeType>::ExpandType,
) -> <() as CubeType>::ExpandType;
fn __expand_init_tile_inputs(
scope: &mut Scope,
config: Self::Config,
) -> <(Self::LhsTile, Self::RhsTile) as CubeType>::ExpandType;
fn __expand_init_accumulator(
scope: &mut Scope,
config: Self::Config,
) -> <Self::Accumulator as CubeType>::ExpandType;
fn __expand_zero_accumulator(
scope: &mut Scope,
acc: <Self::Accumulator as CubeType>::ExpandType,
config: Self::Config,
) -> <() as CubeType>::ExpandType;
fn __expand_fill_accumulator<L: AccumulatorLoader<MP>>(
scope: &mut Scope,
loader: <L as CubeType>::ExpandType,
acc: <Self::Accumulator as CubeType>::ExpandType,
config: Self::Config,
) -> <() as CubeType>::ExpandType;
fn __expand_init_writer(
scope: &mut Scope,
tensor: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType,
x_offset: <u32 as CubeType>::ExpandType,
y_offset: <u32 as CubeType>::ExpandType,
batch_offset: <u32 as CubeType>::ExpandType,
) -> <Self::Writer as CubeType>::ExpandType;
fn __expand_write_results<G: GlobalConfig>(
scope: &mut Scope,
acc: <Self::Accumulator as CubeType>::ExpandType,
out: <Self::Writer as CubeType>::ExpandType,
stage_config: Self::Config,
global_config: G,
) -> <() as CubeType>::ExpandType;
}
Expand description
Provides matrix multiplication operations at the stage level.
At the stage level,
- Inputs are assumed to be already staged into a shared memory.
- All main flow planes within a Cube are used to solve the problem
- Dimensions M, N and K are fixed to an integer, and the matrix multiplication works only for size (M, K) · (K, N) = (M, N). These integers are multiples of the underlying Tile matmul, corresponding to the number of tiles in each dimension.
Assumptions:
- Data given as inputs by stage readers must always be valid. If the actual matrix multiplication should be done on smaller sizes than M, N and K, padding with zeros must be done beforehand.
- Enough planes/units are launched to perform the whole computation
Required Associated Types§
Sourcetype Config: StageConfig
type Config: StageConfig
The configuration type associated with this Matmul.
Sourcetype Accumulator: CubeType
type Accumulator: CubeType
Contains the matrix multiplication output, that can be shared across the different planes of the cube. The same Accumulator will be added to across multiple executions of the Stage Matmul.
Sourcetype Writer: GlobalWriter<MP::EO>
type Writer: GlobalWriter<MP::EO>
How to write to global memory after computation
Required Methods§
Sourcefn execute(
lhs: &Self::LhsReader,
rhs: &Self::RhsReader,
instruction_lhs: &mut Self::LhsTile,
instruction_rhs: &mut Self::RhsTile,
acc: &mut Self::Accumulator,
config: Self::Config,
)
fn execute( lhs: &Self::LhsReader, rhs: &Self::RhsReader, instruction_lhs: &mut Self::LhsTile, instruction_rhs: &mut Self::RhsTile, acc: &mut Self::Accumulator, config: Self::Config, )
Executes the matrix multiplication of Lhs and Rhs, adding the result to the accumulator
Equivalent to execute_with_listener with SEL:=NoEvent
Sourcefn execute_with_listener<SEL: StageEventListener<Self::Config>>(
lhs: &Self::LhsReader,
rhs: &Self::RhsReader,
instruction_lhs: &mut Self::LhsTile,
instruction_rhs: &mut Self::RhsTile,
acc: &mut Self::Accumulator,
config: Self::Config,
listener: SEL,
)
fn execute_with_listener<SEL: StageEventListener<Self::Config>>( lhs: &Self::LhsReader, rhs: &Self::RhsReader, instruction_lhs: &mut Self::LhsTile, instruction_rhs: &mut Self::RhsTile, acc: &mut Self::Accumulator, config: Self::Config, listener: SEL, )
Executes the matrix multiplication of Lhs and Rhs, with the addition of injected event listener.
Sourcefn init_tile_inputs(config: Self::Config) -> (Self::LhsTile, Self::RhsTile)
fn init_tile_inputs(config: Self::Config) -> (Self::LhsTile, Self::RhsTile)
Inits inputs of the underlying Tile Matmul
Sourcefn init_accumulator(config: Self::Config) -> Self::Accumulator
fn init_accumulator(config: Self::Config) -> Self::Accumulator
Create an instance of the accumulator, without data
Sourcefn zero_accumulator(acc: &mut Self::Accumulator, config: Self::Config)
fn zero_accumulator(acc: &mut Self::Accumulator, config: Self::Config)
Fill the accumulator with zeros
Sourcefn fill_accumulator<L: AccumulatorLoader<MP>>(
loader: &mut L,
acc: &mut Self::Accumulator,
config: Self::Config,
)
fn fill_accumulator<L: AccumulatorLoader<MP>>( loader: &mut L, acc: &mut Self::Accumulator, config: Self::Config, )
Fill the accumulator with data
Sourcefn init_writer(
tensor: VirtualTensor<MP::EO, ReadWrite>,
x_offset: u32,
y_offset: u32,
batch_offset: u32,
) -> Self::Writer
fn init_writer( tensor: VirtualTensor<MP::EO, ReadWrite>, x_offset: u32, y_offset: u32, batch_offset: u32, ) -> Self::Writer
Inits the writer at the given offsets
Sourcefn write_results<G: GlobalConfig>(
acc: &Self::Accumulator,
out: &mut Self::Writer,
stage_config: Self::Config,
global_config: G,
)
fn write_results<G: GlobalConfig>( acc: &Self::Accumulator, out: &mut Self::Writer, stage_config: Self::Config, global_config: G, )
Reads the result of the accumulator and hands it to the stage writer
§Quantization
If some quantization
is provided, the read will also requantize the stage in the output
and update the scaling of the output tensor. This assumes that [execute] is called
with some scaling
provided.
fn __expand_execute( scope: &mut Scope, lhs: <Self::LhsReader as CubeType>::ExpandType, rhs: <Self::RhsReader as CubeType>::ExpandType, instruction_lhs: <Self::LhsTile as CubeType>::ExpandType, instruction_rhs: <Self::RhsTile as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_execute_with_listener<SEL: StageEventListener<Self::Config>>( scope: &mut Scope, lhs: <Self::LhsReader as CubeType>::ExpandType, rhs: <Self::RhsReader as CubeType>::ExpandType, instruction_lhs: <Self::LhsTile as CubeType>::ExpandType, instruction_rhs: <Self::RhsTile as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, listener: <SEL as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
fn __expand_init_tile_inputs( scope: &mut Scope, config: Self::Config, ) -> <(Self::LhsTile, Self::RhsTile) as CubeType>::ExpandType
fn __expand_init_accumulator( scope: &mut Scope, config: Self::Config, ) -> <Self::Accumulator as CubeType>::ExpandType
fn __expand_zero_accumulator( scope: &mut Scope, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_fill_accumulator<L: AccumulatorLoader<MP>>( scope: &mut Scope, loader: <L as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType
fn __expand_init_writer( scope: &mut Scope, tensor: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType, x_offset: <u32 as CubeType>::ExpandType, y_offset: <u32 as CubeType>::ExpandType, batch_offset: <u32 as CubeType>::ExpandType, ) -> <Self::Writer as CubeType>::ExpandType
fn __expand_write_results<G: GlobalConfig>( scope: &mut Scope, acc: <Self::Accumulator as CubeType>::ExpandType, out: <Self::Writer as CubeType>::ExpandType, stage_config: Self::Config, global_config: G, ) -> <() as CubeType>::ExpandType
Dyn Compatibility§
This trait is not dyn compatible.
In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.