Trait GlobalMatmul

Source

pub trait GlobalMatmul<MP: MatmulPrecision>:
    'static
    + Send
    + Sync {
    type Config: GlobalConfig;
    type LhsLoader: CubeType;
    type RhsLoader: CubeType;
    type AccumulatorLoader: CubeType;
    type Writer: GlobalWriter<MP::EO>;
    type Accumulator: CubeType;

    // Required methods
    fn execute(
        lhs_loader: Self::LhsLoader,
        rhs_loader: Self::RhsLoader,
        writer: Self::Writer,
        acc: &mut Self::Accumulator,
        k_range: (u32, u32),
        config: Self::Config,
    );
    fn init_lhs_loader(
        lhs: VirtualTensor<MP::EI>,
        m_offset: u32,
        k_offset: u32,
        nth_batch: u32,
        batch_offset: u32,
        quantization: CubeOption<Quantization<MP>>,
        config: Self::Config,
    ) -> Self::LhsLoader;
    fn init_rhs_loader(
        rhs: VirtualTensor<MP::EI>,
        k_offset: u32,
        n_offset: u32,
        nth_batch: u32,
        batch_offset: u32,
        quantization: CubeOption<Quantization<MP>>,
        config: Self::Config,
    ) -> Self::RhsLoader;
    fn init_accumulator(config: Self::Config) -> Self::Accumulator;
    fn init_writer(
        out: VirtualTensor<MP::EO, ReadWrite>,
        m_offset: u32,
        n_offset: u32,
        nth_batch: u32,
        batch_offset: u32,
    ) -> Self::Writer;
    fn __expand_execute(
        scope: &mut Scope,
        lhs_loader: <Self::LhsLoader as CubeType>::ExpandType,
        rhs_loader: <Self::RhsLoader as CubeType>::ExpandType,
        writer: <Self::Writer as CubeType>::ExpandType,
        acc: <Self::Accumulator as CubeType>::ExpandType,
        k_range: <(u32, u32) as CubeType>::ExpandType,
        config: Self::Config,
    ) -> <() as CubeType>::ExpandType;
    fn __expand_init_lhs_loader(
        scope: &mut Scope,
        lhs: <VirtualTensor<MP::EI> as CubeType>::ExpandType,
        m_offset: <u32 as CubeType>::ExpandType,
        k_offset: <u32 as CubeType>::ExpandType,
        nth_batch: <u32 as CubeType>::ExpandType,
        batch_offset: <u32 as CubeType>::ExpandType,
        quantization: <CubeOption<Quantization<MP>> as CubeType>::ExpandType,
        config: Self::Config,
    ) -> <Self::LhsLoader as CubeType>::ExpandType;
    fn __expand_init_rhs_loader(
        scope: &mut Scope,
        rhs: <VirtualTensor<MP::EI> as CubeType>::ExpandType,
        k_offset: <u32 as CubeType>::ExpandType,
        n_offset: <u32 as CubeType>::ExpandType,
        nth_batch: <u32 as CubeType>::ExpandType,
        batch_offset: <u32 as CubeType>::ExpandType,
        quantization: <CubeOption<Quantization<MP>> as CubeType>::ExpandType,
        config: Self::Config,
    ) -> <Self::RhsLoader as CubeType>::ExpandType;
    fn __expand_init_accumulator(
        scope: &mut Scope,
        config: Self::Config,
    ) -> <Self::Accumulator as CubeType>::ExpandType;
    fn __expand_init_writer(
        scope: &mut Scope,
        out: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType,
        m_offset: <u32 as CubeType>::ExpandType,
        n_offset: <u32 as CubeType>::ExpandType,
        nth_batch: <u32 as CubeType>::ExpandType,
        batch_offset: <u32 as CubeType>::ExpandType,
    ) -> <Self::Writer as CubeType>::ExpandType;
}

Expand description

Provides matrix multiplication operations at the global level.

At the global level,

Inputs are views over global memory, meaning access is given to only parts of the global memory inputs at once.
All planes within a Cube are used to solve the problem
Dimensions M and N are fixed to an integer, but K is arbitrary large. The matrix multiplication works only for size (M, ) · (, N) = (M, N). M and N should match the underlying Stage matmul’s M and N.

§Assumptions

Line sizes of the inputs evenly divide the dimension they are aligned with.

§Safety

It is not assumed that the matmul’s dimensions match its inputs dimensions perfectly. It is therefore important that Loaders and Writers perform checks to avoid out-of-bounds before loading data.

Required Associated Types§

Source

type Accumulator: CubeType

Required Methods§

Source

fn execute( lhs_loader: Self::LhsLoader, rhs_loader: Self::RhsLoader, writer: Self::Writer, acc: &mut Self::Accumulator, k_range: (u32, u32), config: Self::Config, )

Performs the matrix multiplication over data loaded by the Lhs and Rhs loaders, over the range given for K, and stores with using the output writer.

To compute the whole range of k values, use k_range=(0, K) where K is the K dimension of Lhs and Rhs.

Source

fn init_lhs_loader( lhs: VirtualTensor<MP::EI>, m_offset: u32, k_offset: u32, nth_batch: u32, batch_offset: u32, quantization: CubeOption<Quantization<MP>>, config: Self::Config, ) -> Self::LhsLoader

Initialize the loader for Lhs, starting at row m and column k

Source

fn init_rhs_loader( rhs: VirtualTensor<MP::EI>, k_offset: u32, n_offset: u32, nth_batch: u32, batch_offset: u32, quantization: CubeOption<Quantization<MP>>, config: Self::Config, ) -> Self::RhsLoader

Initialize the loader for Rhs, starting at row k and column n

Source

fn init_accumulator(config: Self::Config) -> Self::Accumulator

Initialize the accumulator without data

Source

fn init_writer( out: VirtualTensor<MP::EO, ReadWrite>, m_offset: u32, n_offset: u32, nth_batch: u32, batch_offset: u32, ) -> Self::Writer

Initialize the writer at row m and column n

Source

fn __expand_execute( scope: &mut Scope, lhs_loader: <Self::LhsLoader as CubeType>::ExpandType, rhs_loader: <Self::RhsLoader as CubeType>::ExpandType, writer: <Self::Writer as CubeType>::ExpandType, acc: <Self::Accumulator as CubeType>::ExpandType, k_range: <(u32, u32) as CubeType>::ExpandType, config: Self::Config, ) -> <() as CubeType>::ExpandType

Source

fn __expand_init_lhs_loader( scope: &mut Scope, lhs: <VirtualTensor<MP::EI> as CubeType>::ExpandType, m_offset: <u32 as CubeType>::ExpandType, k_offset: <u32 as CubeType>::ExpandType, nth_batch: <u32 as CubeType>::ExpandType, batch_offset: <u32 as CubeType>::ExpandType, quantization: <CubeOption<Quantization<MP>> as CubeType>::ExpandType, config: Self::Config, ) -> <Self::LhsLoader as CubeType>::ExpandType

Source

fn __expand_init_rhs_loader( scope: &mut Scope, rhs: <VirtualTensor<MP::EI> as CubeType>::ExpandType, k_offset: <u32 as CubeType>::ExpandType, n_offset: <u32 as CubeType>::ExpandType, nth_batch: <u32 as CubeType>::ExpandType, batch_offset: <u32 as CubeType>::ExpandType, quantization: <CubeOption<Quantization<MP>> as CubeType>::ExpandType, config: Self::Config, ) -> <Self::RhsLoader as CubeType>::ExpandType

Source

fn __expand_init_accumulator( scope: &mut Scope, config: Self::Config, ) -> <Self::Accumulator as CubeType>::ExpandType

Source

fn __expand_init_writer( scope: &mut Scope, out: <VirtualTensor<MP::EO, ReadWrite> as CubeType>::ExpandType, m_offset: <u32 as CubeType>::ExpandType, n_offset: <u32 as CubeType>::ExpandType, nth_batch: <u32 as CubeType>::ExpandType, batch_offset: <u32 as CubeType>::ExpandType, ) -> <Self::Writer as CubeType>::ExpandType

Dyn Compatibility§

This trait is not dyn compatible.

In older versions of Rust, dyn compatibility was called "object safety", so this trait is not object safe.

Implementors§

Source §

impl<MP: MatmulPrecision, SMM, LL, RL> GlobalMatmul<MP> for DoubleBufferingMatmul<MP, SMM, LL, RL>
where SMM: StageMatmul<MP, LhsReader = PartialStageToTileReader<MP::ES, LL::TilingLayout>, RhsReader = PartialStageToTileReader<MP::ES, RL::TilingLayout>>, LL: SyncPartialLoadingStrategy, RL: SyncPartialLoadingStrategy,

Source §

type Config = DoubleBufferingGlobalConfig<<SMM as StageMatmul<MP>>::Config>

Source §

type LhsLoader = SyncPartialLoader<MP, <DoubleBufferingMatmul<MP, SMM, LL, RL> as GlobalMatmul<MP>>::Config, LL>

Source §

type RhsLoader = SyncPartialLoader<MP, <DoubleBufferingMatmul<MP, SMM, LL, RL> as GlobalMatmul<MP>>::Config, RL>

Source §

type AccumulatorLoader = ZeroAccumulatorLoader

Source §

type Writer = <SMM as StageMatmul<MP>>::Writer

Source §

type Accumulator = <SMM as StageMatmul<MP>>::Accumulator

Source §

impl<MP: MatmulPrecision, SMM, RL> GlobalMatmul<MP> for OrderedDoubleBufferingMatmul<MP, SMM, RL>
where SMM: StageMatmul<MP, LhsReader = FullStageToTileReader<MP::ES, <LL as SyncFullLoadingStrategy>::TilingLayout>, RhsReader = PartialStageToTileReader<MP::ES, RL::TilingLayout>>, RL: SyncPartialLoadingStrategy,

Source §

type Config = OrderedDoubleBufferingGlobalConfig<<SMM as StageMatmul<MP>>::Config>

Source §

type LhsLoader = SyncFullLoader<MP, <OrderedDoubleBufferingMatmul<MP, SMM, RL> as GlobalMatmul<MP>>::Config, SyncFullOrderedLoading>

Source §

type RhsLoader = SyncPartialLoader<MP, <OrderedDoubleBufferingMatmul<MP, SMM, RL> as GlobalMatmul<MP>>::Config, RL>

Source §

type AccumulatorLoader = ZeroAccumulatorLoader

Source §

type Writer = <SMM as StageMatmul<MP>>::Writer

Source §

Trait GlobalMatmulCopy item path

§Assumptions

§Safety

Required Associated Types§

type Config: GlobalConfig

type LhsLoader: CubeType

type RhsLoader: CubeType

type AccumulatorLoader: CubeType

type Writer: GlobalWriter<MP::EO>

type Accumulator: CubeType

Required Methods§

fn execute( lhs_loader: Self::LhsLoader, rhs_loader: Self::RhsLoader, writer: Self::Writer, acc: &mut Self::Accumulator, k_range: (u32, u32), config: Self::Config, )

fn init_lhs_loader( lhs: VirtualTensor<MP::EI>, m_offset: u32, k_offset: u32, nth_batch: u32, batch_offset: u32, quantization: CubeOption<Quantization<MP>>, config: Self::Config, ) -> Self::LhsLoader

fn init_rhs_loader( rhs: VirtualTensor<MP::EI>, k_offset: u32, n_offset: u32, nth_batch: u32, batch_offset: u32, quantization: CubeOption<Quantization<MP>>, config: Self::Config, ) -> Self::RhsLoader

fn init_accumulator(config: Self::Config) -> Self::Accumulator

fn init_writer( out: VirtualTensor<MP::EO, ReadWrite>, m_offset: u32, n_offset: u32, nth_batch: u32, batch_offset: u32, ) -> Self::Writer

fn __expand_init_accumulator( scope: &mut Scope, config: Self::Config, ) -> <Self::Accumulator as CubeType>::ExpandType

Dyn Compatibility§

Implementors§

type Config = DoubleBufferingGlobalConfig<<SMM as StageMatmul<MP>>::Config>

type LhsLoader = SyncPartialLoader<MP, <DoubleBufferingMatmul<MP, SMM, LL, RL> as GlobalMatmul<MP>>::Config, LL>

type RhsLoader = SyncPartialLoader<MP, <DoubleBufferingMatmul<MP, SMM, LL, RL> as GlobalMatmul<MP>>::Config, RL>

type AccumulatorLoader = ZeroAccumulatorLoader

type Writer = <SMM as StageMatmul<MP>>::Writer

type Accumulator = <SMM as StageMatmul<MP>>::Accumulator

type Config = OrderedDoubleBufferingGlobalConfig<<SMM as StageMatmul<MP>>::Config>

type LhsLoader = SyncFullLoader<MP, <OrderedDoubleBufferingMatmul<MP, SMM, RL> as GlobalMatmul<MP>>::Config, SyncFullOrderedLoading>

type RhsLoader = SyncPartialLoader<MP, <OrderedDoubleBufferingMatmul<MP, SMM, RL> as GlobalMatmul<MP>>::Config, RL>

type AccumulatorLoader = ZeroAccumulatorLoader

type Writer = <SMM as StageMatmul<MP>>::Writer

type Accumulator = <SMM as StageMatmul<MP>>::Accumulator

Trait GlobalMatmul