pub struct TmaDescriptorBuilder { /* private fields */ }Expand description
Typed builder for TMA tensor-map descriptors.
Collects parameters in a convenient Rust API and produces a
TmaEncodeTiledParams struct suitable for passing to the CUDA driver’s
cuTensorMapEncodeTiled entry point.
§Example
use oxicuda_driver::tma::{
CuTensorMapDataType, CuTensorMapSwizzle, TmaDescriptorBuilder,
};
let params = TmaDescriptorBuilder::new_2d(
CuTensorMapDataType::Bfloat16,
512, 1024, // rows × cols
1024 * 2, // row stride in bytes
64, 64, // tile rows × tile cols
)
.with_swizzle(CuTensorMapSwizzle::B128)
.params();
assert_eq!(params.num_dims, 2);
assert_eq!(params.global_dims[0], 1024); // cols (innermost)
assert_eq!(params.global_dims[1], 512); // rows
assert_eq!(params.box_dims[0], 64); // tile cols
assert_eq!(params.box_dims[1], 64); // tile rowsImplementations§
Source§impl TmaDescriptorBuilder
impl TmaDescriptorBuilder
Sourcepub fn new_2d(
data_type: CuTensorMapDataType,
rows: u64,
cols: u64,
row_stride_bytes: u64,
box_rows: u32,
box_cols: u32,
) -> Self
pub fn new_2d( data_type: CuTensorMapDataType, rows: u64, cols: u64, row_stride_bytes: u64, box_rows: u32, box_cols: u32, ) -> Self
Create a 2-D tiled TMA descriptor for a row-major matrix.
§Parameters
data_type— element type.rows— number of rows in the global tensor.cols— number of columns in the global tensor.row_stride_bytes— byte offset between consecutive rows in global memory (oftencols * element_size).box_rows— tile height (rows per block in shared memory).box_cols— tile width (cols per block in shared memory).
§Panics
Does not panic; invalid parameters will be caught by the driver when
cuTensorMapEncodeTiled is called.
Sourcepub fn new_nd(
data_type: CuTensorMapDataType,
num_dims: u32,
global_dims: [u64; 5],
global_strides: [u64; 4],
box_dims: [u32; 5],
element_strides: [u32; 5],
) -> Self
pub fn new_nd( data_type: CuTensorMapDataType, num_dims: u32, global_dims: [u64; 5], global_strides: [u64; 4], box_dims: [u32; 5], element_strides: [u32; 5], ) -> Self
Create an N-dimensional tiled TMA descriptor (N ≤ 5).
§Parameters
data_type— element type.num_dims— number of tensor dimensions (1–5).global_dims— size of each dimension, innermost (col) first.global_strides— byte stride for each outer dimension (num_dims - 1entries).box_dims— tile extent per dimension.element_strides— stride between elements in each tile dimension.
Sourcepub fn with_swizzle(self, swizzle: CuTensorMapSwizzle) -> Self
pub fn with_swizzle(self, swizzle: CuTensorMapSwizzle) -> Self
Override the swizzle pattern (default: CuTensorMapSwizzle::B128).
Sourcepub fn with_interleave(self, interleave: CuTensorMapInterleave) -> Self
pub fn with_interleave(self, interleave: CuTensorMapInterleave) -> Self
Override the interleave mode (default: CuTensorMapInterleave::None).
Sourcepub fn with_l2_promotion(self, l2_promotion: CuTensorMapL2Promotion) -> Self
pub fn with_l2_promotion(self, l2_promotion: CuTensorMapL2Promotion) -> Self
Override the L2 promotion hint (default: CuTensorMapL2Promotion::L2B128).
Sourcepub fn with_oob_fill(self, oob_fill: CuTensorMapFloatOobFill) -> Self
pub fn with_oob_fill(self, oob_fill: CuTensorMapFloatOobFill) -> Self
Override the out-of-bounds fill mode
(default: CuTensorMapFloatOobFill::None).
Sourcepub fn params(self) -> TmaEncodeTiledParams
pub fn params(self) -> TmaEncodeTiledParams
Finalise the builder and return the flat parameter struct.
Pass the fields of the returned TmaEncodeTiledParams directly to
cuTensorMapEncodeTiled.
Trait Implementations§
Source§impl Clone for TmaDescriptorBuilder
impl Clone for TmaDescriptorBuilder
Source§fn clone(&self) -> TmaDescriptorBuilder
fn clone(&self) -> TmaDescriptorBuilder
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more