pub struct Handle { /* private fields */ }Expand description
cuTENSOR library handle.
Implementations§
Source§impl Handle
impl Handle
Sourcepub fn as_raw(&self) -> cutensorHandle_t
pub fn as_raw(&self) -> cutensorHandle_t
Raw cutensorHandle_t. Use with care.
Sourcepub fn resize_plan_cache(&self, num_entries: u32) -> Result<()>
pub fn resize_plan_cache(&self, num_entries: u32) -> Result<()>
Resize the internal plan cache — larger = more cached plans, faster re-invocations. Default is 64.
Sourcepub fn write_plan_cache_to_file(&self, path: &str) -> Result<()>
pub fn write_plan_cache_to_file(&self, path: &str) -> Result<()>
Persist the plan cache to disk.
Sourcepub fn read_plan_cache_from_file(&self, path: &str) -> Result<()>
pub fn read_plan_cache_from_file(&self, path: &str) -> Result<()>
Read a previously-written plan cache from disk.
Sourcepub fn write_kernel_cache_to_file(&self, path: &str) -> Result<()>
pub fn write_kernel_cache_to_file(&self, path: &str) -> Result<()>
Persist the kernel cache (compiled binary kernels) to disk. Separate from plan cache — kernel cache survives across planner changes.
Sourcepub fn read_kernel_cache_from_file(&self, path: &str) -> Result<()>
pub fn read_kernel_cache_from_file(&self, path: &str) -> Result<()>
Read a previously-written kernel cache from disk.
Sourcepub fn compute_desc_32f(&self) -> Result<*const c_void>
pub fn compute_desc_32f(&self) -> Result<*const c_void>
Fetch cuTENSOR’s pre-defined CUTENSOR_COMPUTE_DESC_32F descriptor.
Pass this (or one of the sibling accessors) as compute_desc to
any op constructor.
Sourcepub fn compute_desc_64f(&self) -> Result<*const c_void>
pub fn compute_desc_64f(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_64F — double-precision accumulator.
Sourcepub fn compute_desc_16f(&self) -> Result<*const c_void>
pub fn compute_desc_16f(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_16F — half-precision accumulator.
Sourcepub fn compute_desc_16bf(&self) -> Result<*const c_void>
pub fn compute_desc_16bf(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_16BF — bf16 accumulator.
Sourcepub fn compute_desc_tf32(&self) -> Result<*const c_void>
pub fn compute_desc_tf32(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_TF32 — TF32 tensor-core accumulator.
Sourcepub fn compute_desc_3xtf32(&self) -> Result<*const c_void>
pub fn compute_desc_3xtf32(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_3XTF32 — 3xTF32 emulation for f32.
Sourcepub fn compute_desc_4x16f(&self) -> Result<*const c_void>
pub fn compute_desc_4x16f(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_4X16F — 4x f16 mixed-precision.
Sourcepub fn compute_desc_8xint8(&self) -> Result<*const c_void>
pub fn compute_desc_8xint8(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_8XINT8 — packed int8 tensor cores.
Sourcepub fn compute_desc_9x16bf(&self) -> Result<*const c_void>
pub fn compute_desc_9x16bf(&self) -> Result<*const c_void>
Fetch CUTENSOR_COMPUTE_DESC_9X16BF — bf16 stochastic-rounding mode.