Trait StageMatmul

Source

pub trait StageMatmul<MP: MatmulPrecision>:
    'static
    + Send
    + Sync {
    type Config: StageConfig;
    type Accumulator: CubeType;
    type LhsReader: CubeType;
    type RhsReader: CubeType;
    type LhsTile: CubeType;
    type RhsTile: CubeType;
    type Writer: GlobalWriter<MP::EO>;

Show 16 methods    // Required methods
    fn execute(
        lhs: &Self::LhsReader,
        rhs: &Self::RhsReader,
        instruction_lhs: &mut Self::LhsTile,
        instruction_rhs: &mut Self::RhsTile,
        acc: &mut Self::Accumulator,
        config: Self::Config,
    );
    fn execute_with_listener<SEL: StageEventListener<Self::Config>>(
        lhs: &Self::LhsReader,
        rhs: &Self::RhsReader,
        instruction_lhs: &mut Self::LhsTile,
        instruction_rhs: &mut Self::RhsTile,
        acc: &mut Self::Accumulator,
        config: Self::Config,
        listener: SEL,
    );
    fn init_tile_inputs(config: Self::Config) -> (Self::LhsTile, Self::RhsTile);
    fn init_accumulator(config: Self::Config) -> Self::Accumulator;
    fn zero_accumulator(acc: &mut Self::Accumulator, config: Self::Config);
    fn fill_accumulator<L: AccumulatorLoader<MP>>(
        loader: &mut L,
        acc: &mut Self::Accumulator,
        config: Self::Config,
    );
    fn init_writer(
        tensor: VirtualTensor<MP::EO, ReadWrite>,
        x_offset: u32,
        y_offset: u32,
        batch_offset: u32,
    ) -> Self::Writer;
    fn write_results<G: GlobalConfig>(
        acc: &Self::Accumulator,
        out: &mut Self::Writer,
        stage_config: Self::Config,
        global_config: G,
    );
    fn __expand_execute(
        scope: &mut Scope,
        lhs: <Self::LhsReader as CubeType>::ExpandType,
        rhs: <Self::RhsReader as CubeType>::ExpandType,
        instruction_lhs: <Self::LhsTile as CubeType>::ExpandType,
        instruction_rhs: <Self::RhsTile as CubeType>::ExpandType,
        acc: <Self::Accumulator as CubeType>::ExpandType,
        config: Self::Config,
    ) -> <() as CubeType>::ExpandType;
    fn __expand_execute_with_listener<SEL: StageEventListener<Self::Config>>(
        scope: &mut Scope,
        lhs: <Self::LhsReader as CubeType>::ExpandType,
        rhs: <Self::RhsReader as CubeType>::ExpandType,
        instruction_lhs: <Self::LhsTile as CubeType>::ExpandType,
        instruction_rhs: <Self::RhsTile as CubeType>::ExpandType,
        acc: <Self::Accumulator as CubeType>::ExpandType,
        config: Self::Config,
        listener: <SEL as CubeType>::ExpandType,
    ) -> <() as CubeType>::ExpandType;
    fn __expand_init_tile_inputs(
        scope: &mut Scope,
        config: Self::Config,
    ) -> <(Self::LhsTile, Self::RhsTile) as CubeType>::ExpandType;
    fn __expand_init_accumulator(
        scope: &mut Scope,
        config: Self::Config,
    ) -> <Self::Accumulator as CubeType>::ExpandType;
    fn __expand_zero_accumulator(
        scope: &mut Scope,
        acc: <Self::Accumulator as CubeType>::ExpandType,
        config: Self::Config,
    ) -> <() as CubeType>::ExpandType;
    fn __expand_fill_accumulator<L: AccumulatorLoader<MP>>(
        scope: &mut Scope,
        loader: <L as CubeType>::ExpandType,
        acc: <Self::Accumulator as CubeType>::ExpandType,
        config: Self::Config,
    ) -> <() as CubeType>::ExpandType;
    fn __expand_init_writer(
        scope: &mut Scope,
        tensor: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType,
        x_offset: <u32 as CubeType>::ExpandType,
        y_offset: <u32 as CubeType>::ExpandType,
        batch_offset: <u32 as CubeType>::ExpandType,
    ) -> <Self::Writer as CubeType>::ExpandType;
    fn __expand_write_results<G: GlobalConfig>(
        scope: &mut Scope,
        acc: <Self::Accumulator as CubeType>::ExpandType,
        out: <Self::Writer as CubeType>::ExpandType,
        stage_config: Self::Config,
        global_config: G,
    ) -> <() as CubeType>::ExpandType;
}

Expand description

Provides matrix multiplication operations at the stage level.

At the stage level,

Inputs are assumed to be already staged into a shared memory.
All main flow planes within a Cube are used to solve the problem
Dimensions M, N and K are fixed to an integer, and the matrix multiplication works only for size (M, K) · (K, N) = (M, N). These integers are multiples of the underlying Tile matmul, corresponding to the number of tiles in each dimension.

Assumptions:

Data given as inputs by stage readers must always be valid. If the actual matrix multiplication should be done on smaller sizes than M, N and K, padding with zeros must be done beforehand.
Enough planes/units are launched to perform the whole computation

Required Associated Types§

Source

type Config: StageConfig

The configuration type associated with this Matmul.

Source

type Accumulator: CubeType

Contains the matrix multiplication output, that can be shared across the different planes of the cube. The same Accumulator will be added to across multiple executions of the Stage Matmul.

Source

type LhsTile: CubeType

Lhs input of the underlying Tile Matmul

Source

type RhsTile: CubeType

Rhs input of the underlying Tile Matmul

Source

type Writer: GlobalWriter<MP::EO>

How to write to global memory after computation

Required Methods§

Source

fn execute( lhs: &Self::LhsReader, rhs: &Self::RhsReader, instruction_lhs: &mut Self::LhsTile, instruction_rhs: &mut Self::RhsTile, acc: &mut Self::Accumulator, config: Self::Config, )

Executes the matrix multiplication of Lhs and Rhs, adding the result to the accumulator

Equivalent to execute_with_listener with SEL:=NoEvent

Source

fn execute_with_listener<SEL: StageEventListener<Self::Config>>( lhs: &Self::LhsReader, rhs: &Self::RhsReader, instruction_lhs: &mut Self::LhsTile, instruction_rhs: &mut Self::RhsTile, acc: &mut Self::Accumulator, config: Self::Config, listener: SEL, )

Executes the matrix multiplication of Lhs and Rhs, with the addition of injected event listener.

Source

fn init_tile_inputs(config: Self::Config) -> (Self::LhsTile, Self::RhsTile)

Inits inputs of the underlying Tile Matmul

Source

fn init_accumulator(config: Self::Config) -> Self::Accumulator

Create an instance of the accumulator, without data

Source

fn zero_accumulator(acc: &mut Self::Accumulator, config: Self::Config)

Fill the accumulator with zeros

Source

fn fill_accumulator<L: AccumulatorLoader<MP>>( loader: &mut L, acc: &mut Self::Accumulator, config: Self::Config, )

Fill the accumulator with data

Source

fn init_writer( tensor: VirtualTensor<MP::EO, ReadWrite>, x_offset: u32, y_offset: u32, batch_offset: u32, ) -> Self::Writer

Inits the writer at the given offsets

Source

fn write_results<G: GlobalConfig>( acc: &Self::Accumulator, out: &mut Self::Writer, stage_config: Self::Config, global_config: G, )

Reads the result of the accumulator and hands it to the stage writer

§Quantization

If some quantization is provided, the read will also requantize the stage in the output and update the scaling of the output tensor. This assumes that [execute] is called with some scaling provided.

Source

fn __expand_execute( scope: &mut Scope, lhs: <Self::LhsReader as CubeType>::ExpandType, rhs: <Self::RhsReader as CubeType>::ExpandType, instruction_lhs: <Self::LhsTile as CubeType>::ExpandType, instruction_rhs: <Self::RhsTile as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType

Source

fn __expand_execute_with_listener<SEL: StageEventListener<Self::Config>>( scope: &mut Scope, lhs: <Self::LhsReader as CubeType>::ExpandType, rhs: <Self::RhsReader as CubeType>::ExpandType, instruction_lhs: <Self::LhsTile as CubeType>::ExpandType, instruction_rhs: <Self::RhsTile as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, listener: <SEL as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType

Source

fn __expand_init_tile_inputs( scope: &mut Scope, config: Self::Config, ) -> <(Self::LhsTile, Self::RhsTile) as CubeType>::ExpandType

Source

fn __expand_init_accumulator( scope: &mut Scope, config: Self::Config, ) -> <Self::Accumulator as CubeType>::ExpandType

Source

fn __expand_zero_accumulator( scope: &mut Scope, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType

Source

fn __expand_fill_accumulator<L: AccumulatorLoader<MP>>( scope: &mut Scope, loader: <L as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType

Source

fn __expand_init_writer( scope: &mut Scope, tensor: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType, x_offset: <u32 as CubeType>::ExpandType, y_offset: <u32 as CubeType>::ExpandType, batch_offset: <u32 as CubeType>::ExpandType, ) -> <Self::Writer as CubeType>::ExpandType

Source

fn __expand_write_results<G: GlobalConfig>( scope: &mut Scope, acc: <Self::Accumulator as CubeType>::ExpandType, out: <Self::Writer as CubeType>::ExpandType, stage_config: Self::Config, global_config: G, ) -> <() as CubeType>::ExpandType

Dyn Compatibility§

This trait is not dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.

Trait StageMatmulCopy item path

Required Associated Types§

type Config: StageConfig

type Accumulator: CubeType

type LhsReader: CubeType

type RhsReader: CubeType

type LhsTile: CubeType

type RhsTile: CubeType

type Writer: GlobalWriter<MP::EO>

Required Methods§

fn execute( lhs: &Self::LhsReader, rhs: &Self::RhsReader, instruction_lhs: &mut Self::LhsTile, instruction_rhs: &mut Self::RhsTile, acc: &mut Self::Accumulator, config: Self::Config, )

fn execute_with_listener<SEL: StageEventListener<Self::Config>>( lhs: &Self::LhsReader, rhs: &Self::RhsReader, instruction_lhs: &mut Self::LhsTile, instruction_rhs: &mut Self::RhsTile, acc: &mut Self::Accumulator, config: Self::Config, listener: SEL, )

fn init_tile_inputs(config: Self::Config) -> (Self::LhsTile, Self::RhsTile)

fn init_accumulator(config: Self::Config) -> Self::Accumulator

fn zero_accumulator(acc: &mut Self::Accumulator, config: Self::Config)

fn fill_accumulator<L: AccumulatorLoader<MP>>( loader: &mut L, acc: &mut Self::Accumulator, config: Self::Config, )

fn init_writer( tensor: VirtualTensor<MP::EO, ReadWrite>, x_offset: u32, y_offset: u32, batch_offset: u32, ) -> Self::Writer

fn write_results<G: GlobalConfig>( acc: &Self::Accumulator, out: &mut Self::Writer, stage_config: Self::Config, global_config: G, )

§Quantization

fn __expand_init_tile_inputs( scope: &mut Scope, config: Self::Config, ) -> <(Self::LhsTile, Self::RhsTile) as CubeType>::ExpandType

fn __expand_init_accumulator( scope: &mut Scope, config: Self::Config, ) -> <Self::Accumulator as CubeType>::ExpandType

fn __expand_zero_accumulator( scope: &mut Scope, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType

fn __expand_fill_accumulator<L: AccumulatorLoader<MP>>( scope: &mut Scope, loader: <L as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType

fn __expand_init_writer( scope: &mut Scope, tensor: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType, x_offset: <u32 as CubeType>::ExpandType, y_offset: <u32 as CubeType>::ExpandType, batch_offset: <u32 as CubeType>::ExpandType, ) -> <Self::Writer as CubeType>::ExpandType

fn __expand_write_results<G: GlobalConfig>( scope: &mut Scope, acc: <Self::Accumulator as CubeType>::ExpandType, out: <Self::Writer as CubeType>::ExpandType, stage_config: Self::Config, global_config: G, ) -> <() as CubeType>::ExpandType

Dyn Compatibility§

Implementors§

Trait StageMatmul