pub enum Tile<N: Numeric, Sc: TileScope, IO: SliceVisibility> {
SharedMemory(SharedTile<N, IO>),
Cmma(CmmaTile<N>),
Mma(MmaTile<N>),
Register(RegisterTile<N>),
PlaneVec(PlaneVecTile<N>),
Interleaved(InterleavedTile<N>),
Unit(UnitTile<N>),
WhiteboxFragment(WhiteboxFragment<N>),
Bounce(BounceTile<N>),
Broadcasted(Value<N>),
None,
_Phantom(ScopeMarker<Sc>),
}Variants§
Cmma(CmmaTile<N>)
Mma(MmaTile<N>)
Register(RegisterTile<N>)
PlaneVec(PlaneVecTile<N>)
Interleaved(InterleavedTile<N>)
Unit(UnitTile<N>)
Each unit holds a full row-major copy of the tile in registers.
Only valid when Sc = Unit.
WhiteboxFragment(WhiteboxFragment<N>)
The tile is fragmented across plane units, with the layout exposed.
Only valid when Sc = Plane.
Bounce(BounceTile<N>)
Bundles a cmma fragment, an smem scratch slice, and a WhiteboxFragment view.
From the caller’s perspective it is a single tile; the smem round-trip
is internal to ops dispatch. Only valid when Sc = Plane.
Broadcasted(Value<N>)
None
_Phantom(ScopeMarker<Sc>)
Implementations§
Source§impl<N: Numeric, Sc: TileScope, IO: SliceVisibility> Tile<N, Sc, IO>
impl<N: Numeric, Sc: TileScope, IO: SliceVisibility> Tile<N, Sc, IO>
pub fn new_Cmma(_0: CmmaTile<N>) -> Self
pub fn __expand_new_Cmma( _: &mut Scope, _0: <CmmaTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_Mma(_0: MmaTile<N>) -> Self
pub fn __expand_new_Mma( _: &mut Scope, _0: <MmaTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_Register(_0: RegisterTile<N>) -> Self
pub fn __expand_new_Register( _: &mut Scope, _0: <RegisterTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_PlaneVec(_0: PlaneVecTile<N>) -> Self
pub fn __expand_new_PlaneVec( _: &mut Scope, _0: <PlaneVecTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_Interleaved(_0: InterleavedTile<N>) -> Self
pub fn __expand_new_Interleaved( _: &mut Scope, _0: <InterleavedTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_Unit(_0: UnitTile<N>) -> Self
pub fn __expand_new_Unit( _: &mut Scope, _0: <UnitTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_WhiteboxFragment(_0: WhiteboxFragment<N>) -> Self
pub fn __expand_new_WhiteboxFragment( _: &mut Scope, _0: <WhiteboxFragment<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_Bounce(_0: BounceTile<N>) -> Self
pub fn __expand_new_Bounce( _: &mut Scope, _0: <BounceTile<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_Broadcasted(_0: Value<N>) -> Self
pub fn __expand_new_Broadcasted( _: &mut Scope, _0: <Value<N> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
pub fn new_None() -> Self
pub fn __expand_new_None(_: &mut Scope) -> TileExpand<N, Sc, IO>
pub fn new__Phantom(_0: ScopeMarker<Sc>) -> Self
pub fn __expand_new__Phantom( _: &mut Scope, _0: <ScopeMarker<Sc> as CubeType>::ExpandType, ) -> TileExpand<N, Sc, IO>
Source§impl<N: Numeric, Sc: TileScope> Tile<N, Sc, ReadWrite>
impl<N: Numeric, Sc: TileScope> Tile<N, Sc, ReadWrite>
Sourcepub fn mma<L: Numeric, R: Numeric>(
&mut self,
lhs: &Tile<L, Sc, ReadWrite>,
rhs: &Tile<R, Sc, ReadWrite>,
)
pub fn mma<L: Numeric, R: Numeric>( &mut self, lhs: &Tile<L, Sc, ReadWrite>, rhs: &Tile<R, Sc, ReadWrite>, )
Executes lhs · rhs, accumulating the result into self.
pub fn __expand_mma<L: Numeric, R: Numeric>( scope: &mut Scope, this: <Self as CubeType>::ExpandType, lhs: <Tile<L, Sc, ReadWrite> as CubeType>::ExpandType, rhs: <Tile<R, Sc, ReadWrite> as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
Source§impl<E: Float> Tile<E, Plane, ReadWrite>
impl<E: Float> Tile<E, Plane, ReadWrite>
pub fn row_max(&self, acc: &mut RowWise<E>, base: &RowWise<E>)
pub fn row_sum(&self, acc: &mut RowWise<E>)
pub fn exp_diff(&mut self, rowwise: &RowWise<E>)
pub fn rowwise_scale(&mut self, scale: &RowWise<E>)
pub fn __expand_row_max( scope: &mut Scope, this: <Self as CubeType>::ExpandType, acc: <RowWise<E> as CubeType>::ExpandType, base: <RowWise<E> as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
pub fn __expand_row_sum( scope: &mut Scope, this: <Self as CubeType>::ExpandType, acc: <RowWise<E> as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
pub fn __expand_exp_diff( scope: &mut Scope, this: <Self as CubeType>::ExpandType, rowwise: <RowWise<E> as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
pub fn __expand_rowwise_scale( scope: &mut Scope, this: <Self as CubeType>::ExpandType, scale: <RowWise<E> as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
Source§impl<N: Numeric, Sc: TileScope> Tile<N, Sc, ReadWrite>
impl<N: Numeric, Sc: TileScope> Tile<N, Sc, ReadWrite>
Sourcepub fn copy_from<SE: Numeric, SS: Size, L: Numeric, R: Numeric, A: Numeric, SIO: SliceVisibility>(
&mut self,
source: &Tile<SE, Sc, SIO>,
ident: StageIdent,
)
pub fn copy_from<SE: Numeric, SS: Size, L: Numeric, R: Numeric, A: Numeric, SIO: SliceVisibility>( &mut self, source: &Tile<SE, Sc, SIO>, ident: StageIdent, )
Copies data from source into self.
SS is the vector size of the shared memory tile involved in the copy
(whether that’s the source on a load, or the destination on a write).
L/R/A are the matrix-level numeric types needed by the MMA
readers/writers — they are unused on non-MMA paths.
pub fn __expand_copy_from<SE: Numeric, SS: Size, L: Numeric, R: Numeric, A: Numeric, SIO: SliceVisibility>( scope: &mut Scope, this: <Self as CubeType>::ExpandType, source: <Tile<SE, Sc, SIO> as CubeType>::ExpandType, ident: StageIdent, ) -> <() as CubeType>::ExpandType
Source§impl<E: Float> Tile<E, Plane, ReadWrite>
impl<E: Float> Tile<E, Plane, ReadWrite>
Sourcepub fn scale_and_mask<M: Mask>(&mut self, scale: E, mask: &M)
pub fn scale_and_mask<M: Mask>(&mut self, scale: E, mask: &M)
Multiplies each element by scale and adds -inf at masked positions.
scale is a scalar; mask.should_mask((r, c)) is element-wise.
pub fn __expand_scale_and_mask<M: Mask>( scope: &mut Scope, this: <Self as CubeType>::ExpandType, scale: <E as CubeType>::ExpandType, mask: <M as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
pub fn __expand_fill_zero( scope: &mut Scope, this: <Self as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
Source§impl<N: Numeric, Sc: TileScope> Tile<N, Sc, ReadWrite>
impl<N: Numeric, Sc: TileScope> Tile<N, Sc, ReadWrite>
Sourcepub fn load_mask_from_strided_tile<E: Numeric, ES: Size>(
&mut self,
tile: &StridedTile<E, ES>,
)
pub fn load_mask_from_strided_tile<E: Numeric, ES: Size>( &mut self, tile: &StridedTile<E, ES>, )
Loads the data from an external strided tile into the inner storage of a
Tile::Unit or Tile::WhiteboxFragment. Used to materialize a mask fragment.
pub fn __expand_load_mask_from_strided_tile<E: Numeric, ES: Size>( scope: &mut Scope, this: <Self as CubeType>::ExpandType, tile: <StridedTile<E, ES> as CubeType>::ExpandType, ) -> <() as CubeType>::ExpandType
Source§impl<Acc: Float> Tile<Acc, Plane, ReadWrite>
impl<Acc: Float> Tile<Acc, Plane, ReadWrite>
Sourcepub fn softmax<Lhs: Float, M: Mask>(
&mut self,
mask: &M,
softmaxed_tile: &mut Tile<Lhs, Plane, ReadWrite>,
state: &mut (RowWise<Acc>, RowWise<Acc>),
head_dim_factor: Acc,
) -> RowWise<Acc>
pub fn softmax<Lhs: Float, M: Mask>( &mut self, mask: &M, softmaxed_tile: &mut Tile<Lhs, Plane, ReadWrite>, state: &mut (RowWise<Acc>, RowWise<Acc>), head_dim_factor: Acc, ) -> RowWise<Acc>
Online softmax update over a single attention tile, fused with the
precision-cast write into a value-matmul lhs tile. Dispatches on the
score variant — each variant owns the algorithm best suited to its
storage and is polymorphic in the destination: a Bounce score can
be written into any compatible softmaxed tile (Bounce, fragment, …),
not just another Bounce.
Returns the per-row scaling factor α_i = e^(m_old - m_new) used by the
caller to rescale running output accumulators.
Sourcepub fn scale_mul<SM: Float>(&mut self, scale: &RowWise<SM>)
pub fn scale_mul<SM: Float>(&mut self, scale: &RowWise<SM>)
Multiplies each row of self by the corresponding scale[r]. The
Bounce arm round-trips through smem so the cmma fragment is current
for the next mma; the others operate in place on their native storage.
Sourcepub fn scale_div<SM: Float>(&mut self, running_state_l: &RowWise<SM>)
pub fn scale_div<SM: Float>(&mut self, running_state_l: &RowWise<SM>)
Divides each row of self by the corresponding running_state_l[r],
guarding against zero (a fully-masked row stays zero).
Sourcepub fn write_results<DE: Float, DS: Size>(
&self,
dest: &mut Tile<DE, Plane, ReadWrite>,
)
pub fn write_results<DE: Float, DS: Size>( &self, dest: &mut Tile<DE, Plane, ReadWrite>, )
Copies self into dest (a stage-side strided/shared tile in the
caller’s downstream write path).