pub struct Barrier<C: CubePrimitive> { /* private fields */ }
Expand description
A mechanism for awaiting on asynchronous data transfers Behaviour is defined by its BarrierLevel.
Implementations§
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_2d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
y: i32,
x: i32,
)
pub fn tma_load_2d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, y: i32, x: i32, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_2d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, y: ExpandElementTyped<i32>, x: ExpandElementTyped<i32>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_3d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
z: i32,
y: i32,
x: i32,
)
pub fn tma_load_3d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, z: i32, y: i32, x: i32, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_3d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, z: ExpandElementTyped<i32>, y: ExpandElementTyped<i32>, x: ExpandElementTyped<i32>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_4d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
w: i32,
z: i32,
y: i32,
x: i32,
)
pub fn tma_load_4d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, w: i32, z: i32, y: i32, x: i32, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_4d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, w: ExpandElementTyped<i32>, z: ExpandElementTyped<i32>, y: ExpandElementTyped<i32>, x: ExpandElementTyped<i32>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_5d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
v: i32,
w: i32,
z: i32,
y: i32,
x: i32,
)
pub fn tma_load_5d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, v: i32, w: i32, z: i32, y: i32, x: i32, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_5d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, v: ExpandElementTyped<i32>, w: ExpandElementTyped<i32>, z: ExpandElementTyped<i32>, y: ExpandElementTyped<i32>, x: ExpandElementTyped<i32>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_im2col_3d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
n: i32,
w: i32,
c: i32,
w_offset: u16,
)
pub fn tma_load_im2col_3d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, n: i32, w: i32, c: i32, w_offset: u16, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_im2col_3d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, n: ExpandElementTyped<i32>, w: ExpandElementTyped<i32>, c: ExpandElementTyped<i32>, w_offset: ExpandElementTyped<u16>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_im2col_4d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
n: i32,
h: i32,
w: i32,
c: i32,
h_offset: u16,
w_offset: u16,
)
pub fn tma_load_im2col_4d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, n: i32, h: i32, w: i32, c: i32, h_offset: u16, w_offset: u16, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_im2col_4d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, n: ExpandElementTyped<i32>, h: ExpandElementTyped<i32>, w: ExpandElementTyped<i32>, c: ExpandElementTyped<i32>, h_offset: ExpandElementTyped<u16>, w_offset: ExpandElementTyped<u16>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn tma_load_im2col_5d(
&self,
source: &TensorMap<C>,
destination: &mut SliceMut<Line<C>>,
n: i32,
d: i32,
h: i32,
w: i32,
c: i32,
d_offset: u16,
h_offset: u16,
w_offset: u16,
)
pub fn tma_load_im2col_5d( &self, source: &TensorMap<C>, destination: &mut SliceMut<Line<C>>, n: i32, d: i32, h: i32, w: i32, c: i32, d_offset: u16, h_offset: u16, w_offset: u16, )
Copy a tile from a global memory source
to a shared memory destination
, with
the provided offsets.
pub fn __expand_tma_load_im2col_5d( scope: &mut Scope, expand: BarrierExpand<C>, source: ExpandElementTyped<TensorMap<C>>, destination: SliceExpand<Line<C>, ReadWrite>, n: ExpandElementTyped<i32>, d: ExpandElementTyped<i32>, h: ExpandElementTyped<i32>, w: ExpandElementTyped<i32>, c: ExpandElementTyped<i32>, d_offset: ExpandElementTyped<u16>, h_offset: ExpandElementTyped<u16>, w_offset: ExpandElementTyped<u16>, )
Source§impl<C: CubePrimitive> Barrier<C>
impl<C: CubePrimitive> Barrier<C>
Sourcepub fn new(_level: BarrierLevel) -> Self
pub fn new(_level: BarrierLevel) -> Self
Creates a barrier using a user defined comptime barrier level
Sourcepub fn new_with_tma_proxy(_level: BarrierLevel) -> Self
pub fn new_with_tma_proxy(_level: BarrierLevel) -> Self
Creates a new barrier for use with TMA instructions. Adds a shared memory proxy barrier to the initialization.
Sourcepub fn memcpy_async(
&self,
_source: &Slice<Line<C>>,
_destination: &mut SliceMut<Line<C>>,
)
pub fn memcpy_async( &self, _source: &Slice<Line<C>>, _destination: &mut SliceMut<Line<C>>, )
Copy the source slice to destination
§Safety
This will try to copy the whole source slice, so make sure source length <= destination length
Sourcepub fn arrive_tx(&self, _arrival_count: u32, _transaction_count: u32)
pub fn arrive_tx(&self, _arrival_count: u32, _transaction_count: u32)
Arrive at the barrier, decrementing arrival count. Additionally increments expected count.
Sourcepub fn arrive_and_wait(&self)
pub fn arrive_and_wait(&self)
Wait until all data is loaded