pub struct CudaFunction { /* private fields */ }Expand description
Wrapper around sys::CUfunction. Used by CudaStream::launch_builder to execute kernels.
Implementations§
Source§impl CudaFunction
impl CudaFunction
pub fn occupancy_available_dynamic_smem_per_block( &self, num_blocks: u32, block_size: u32, ) -> Result<usize, DriverError>
pub fn occupancy_max_active_blocks_per_multiprocessor( &self, block_size: u32, dynamic_smem_size: usize, flags: Option<CUoccupancy_flags_enum>, ) -> Result<u32, DriverError>
pub fn occupancy_max_active_clusters( &self, config: LaunchConfig, stream: &CudaStream, ) -> Result<u32, DriverError>
Sourcepub fn occupancy_max_potential_block_size(
&self,
block_size_to_dynamic_smem_size: extern "C" fn(block_size: c_int) -> usize,
dynamic_smem_size: usize,
block_size_limit: u32,
flags: Option<CUoccupancy_flags_enum>,
) -> Result<(u32, u32), DriverError>
pub fn occupancy_max_potential_block_size( &self, block_size_to_dynamic_smem_size: extern "C" fn(block_size: c_int) -> usize, dynamic_smem_size: usize, block_size_limit: u32, flags: Option<CUoccupancy_flags_enum>, ) -> Result<(u32, u32), DriverError>
Examples found in repository?
examples/10-function-attributes.rs (line 63)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}pub fn occupancy_max_potential_cluster_size( &self, config: LaunchConfig, stream: &CudaStream, ) -> Result<u32, DriverError>
Sourcepub fn get_attribute(
&self,
attribute: CUfunction_attribute_enum,
) -> Result<i32, DriverError>
pub fn get_attribute( &self, attribute: CUfunction_attribute_enum, ) -> Result<i32, DriverError>
Get the value of a specific attribute of this CudaFunction.
See CUDA docs
Sourcepub fn num_regs(&self) -> Result<i32, DriverError>
pub fn num_regs(&self) -> Result<i32, DriverError>
Get the number of registers used per thread.
Examples found in repository?
examples/10-function-attributes.rs (line 21)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Get the size of statically-allocated shared memory in bytes.
Examples found in repository?
examples/10-function-attributes.rs (line 24)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Sourcepub fn const_size_bytes(&self) -> Result<i32, DriverError>
pub fn const_size_bytes(&self) -> Result<i32, DriverError>
Get the size of constant memory in bytes used by this function.
Examples found in repository?
examples/10-function-attributes.rs (line 28)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Sourcepub fn local_size_bytes(&self) -> Result<i32, DriverError>
pub fn local_size_bytes(&self) -> Result<i32, DriverError>
Get the size of local memory in bytes used per thread.
Examples found in repository?
examples/10-function-attributes.rs (line 32)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Sourcepub fn max_threads_per_block(&self) -> Result<i32, DriverError>
pub fn max_threads_per_block(&self) -> Result<i32, DriverError>
Get the maximum number of threads per block for this function.
Examples found in repository?
examples/10-function-attributes.rs (line 39)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Sourcepub fn ptx_version(&self) -> Result<i32, DriverError>
pub fn ptx_version(&self) -> Result<i32, DriverError>
Get the PTX virtual architecture version for which the function was compiled.
Examples found in repository?
examples/10-function-attributes.rs (line 44)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Sourcepub fn binary_version(&self) -> Result<i32, DriverError>
pub fn binary_version(&self) -> Result<i32, DriverError>
Get the binary architecture version for which the function was compiled.
Examples found in repository?
examples/10-function-attributes.rs (line 45)
6fn main() -> Result<(), DriverError> {
7 let ctx = CudaContext::new(0)?;
8
9 println!("Device: {}", ctx.name()?);
10 println!();
11
12 // Load the module with the sin_kernel
13 let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14 let sin_kernel = module.load_function("sin_kernel")?;
15
16 // Query function attributes
17 println!("=== Function Attributes for 'sin_kernel' ===");
18 println!();
19
20 println!("Resource Usage:");
21 println!(" Registers per thread: {}", sin_kernel.num_regs()?);
22 println!(
23 " Static shared memory: {} bytes",
24 sin_kernel.shared_size_bytes()?
25 );
26 println!(
27 " Constant memory: {} bytes",
28 sin_kernel.const_size_bytes()?
29 );
30 println!(
31 " Local memory per thread: {} bytes",
32 sin_kernel.local_size_bytes()?
33 );
34 println!();
35
36 println!("Limits:");
37 println!(
38 " Max threads per block: {}",
39 sin_kernel.max_threads_per_block()?
40 );
41 println!();
42
43 println!("Compilation Info:");
44 let ptx_ver = sin_kernel.ptx_version()?;
45 let bin_ver = sin_kernel.binary_version()?;
46 println!(
47 " PTX version: {}.{}",
48 ptx_ver / 10,
49 ptx_ver % 10
50 );
51 println!(
52 " Binary version: {}.{}",
53 bin_ver / 10,
54 bin_ver % 10
55 );
56 println!();
57
58 // Use occupancy API to get optimal launch configuration
59 extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60 0
61 }
62 let (min_grid_size, block_size) =
63 sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65 println!("=== Optimal Launch Configuration (sin_kernel) ===");
66 println!(" Suggested block size: {}", block_size);
67 println!(" Min grid size: {}", min_grid_size);
68 println!(" Total threads per grid: {}", min_grid_size * block_size);
69
70 Ok(())
71}Sourcepub fn set_attribute(
&self,
attribute: CUfunction_attribute_enum,
value: i32,
) -> Result<(), DriverError>
pub fn set_attribute( &self, attribute: CUfunction_attribute_enum, value: i32, ) -> Result<(), DriverError>
Set the value of a specific attribute of this CudaFunction.
Sourcepub fn set_function_cache_config(
&self,
attribute: CUfunc_cache_enum,
) -> Result<(), DriverError>
pub fn set_function_cache_config( &self, attribute: CUfunc_cache_enum, ) -> Result<(), DriverError>
Set the cache config of this CudaFunction.
Trait Implementations§
Source§impl Clone for CudaFunction
impl Clone for CudaFunction
Source§fn clone(&self) -> CudaFunction
fn clone(&self) -> CudaFunction
Returns a duplicate of the value. Read more
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
Performs copy-assignment from
source. Read moreSource§impl Debug for CudaFunction
impl Debug for CudaFunction
impl Send for CudaFunction
impl Sync for CudaFunction
Auto Trait Implementations§
impl Freeze for CudaFunction
impl RefUnwindSafe for CudaFunction
impl Unpin for CudaFunction
impl UnwindSafe for CudaFunction
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more