use crate::error::CudaResult;
use crate::loader::try_driver;
use crate::module::Function;
impl Function {
pub fn max_active_blocks_per_sm(
&self,
block_size: i32,
dynamic_smem: usize,
) -> CudaResult<i32> {
let api = try_driver()?;
let mut num_blocks: i32 = 0;
crate::cuda_call!((api.cu_occupancy_max_active_blocks_per_multiprocessor)(
&mut num_blocks,
self.raw(),
block_size,
dynamic_smem,
))?;
Ok(num_blocks)
}
pub fn optimal_block_size(&self, dynamic_smem: usize) -> CudaResult<(i32, i32)> {
let api = try_driver()?;
let mut min_grid_size: i32 = 0;
let mut block_size: i32 = 0;
crate::cuda_call!((api.cu_occupancy_max_potential_block_size)(
&mut min_grid_size,
&mut block_size,
self.raw(),
None, dynamic_smem,
0, ))?;
Ok((min_grid_size, block_size))
}
}