use baracuda_cuda_sys::driver;
use crate::error::{check, Result};
use crate::module::Function;
pub fn max_active_blocks_per_multiprocessor(
func: &Function,
block_size: i32,
dynamic_smem_bytes: usize,
) -> Result<i32> {
let d = driver()?;
let cu = d.cu_occupancy_max_active_blocks_per_multiprocessor()?;
let mut n: core::ffi::c_int = 0;
check(unsafe { cu(&mut n, func.as_raw(), block_size, dynamic_smem_bytes) })?;
Ok(n)
}
pub fn max_active_blocks_per_multiprocessor_with_flags(
func: &Function,
block_size: i32,
dynamic_smem_bytes: usize,
flags: u32,
) -> Result<i32> {
let d = driver()?;
let cu = d.cu_occupancy_max_active_blocks_per_multiprocessor_with_flags()?;
let mut n: core::ffi::c_int = 0;
check(unsafe { cu(&mut n, func.as_raw(), block_size, dynamic_smem_bytes, flags) })?;
Ok(n)
}
pub fn max_potential_block_size(
func: &Function,
dynamic_smem_bytes: usize,
block_size_limit: i32,
) -> Result<(i32, i32)> {
let d = driver()?;
let cu = d.cu_occupancy_max_potential_block_size()?;
let mut min_grid: core::ffi::c_int = 0;
let mut block: core::ffi::c_int = 0;
check(unsafe {
cu(
&mut min_grid,
&mut block,
func.as_raw(),
None,
dynamic_smem_bytes,
block_size_limit,
)
})?;
Ok((min_grid, block))
}
pub fn available_dynamic_smem_per_block(
func: &Function,
num_blocks: i32,
block_size: i32,
) -> Result<usize> {
let d = driver()?;
let cu = d.cu_occupancy_available_dynamic_smem_per_block()?;
let mut bytes: usize = 0;
check(unsafe { cu(&mut bytes, func.as_raw(), num_blocks, block_size) })?;
Ok(bytes)
}