CudaFunction

Struct CudaFunction 

Source
pub struct CudaFunction { /* private fields */ }
Expand description

Wrapper around sys::CUfunction. Used by CudaStream::launch_builder to execute kernels.

Implementations§

Source§

impl CudaFunction

Source

pub fn occupancy_available_dynamic_smem_per_block( &self, num_blocks: u32, block_size: u32, ) -> Result<usize, DriverError>

Source

pub fn occupancy_max_active_blocks_per_multiprocessor( &self, block_size: u32, dynamic_smem_size: usize, flags: Option<CUoccupancy_flags_enum>, ) -> Result<u32, DriverError>

Source

pub fn occupancy_max_active_clusters( &self, config: LaunchConfig, stream: &CudaStream, ) -> Result<u32, DriverError>

Source

pub fn occupancy_max_potential_block_size( &self, block_size_to_dynamic_smem_size: extern "C" fn(block_size: c_int) -> usize, dynamic_smem_size: usize, block_size_limit: u32, flags: Option<CUoccupancy_flags_enum>, ) -> Result<(u32, u32), DriverError>

Examples found in repository?
examples/10-function-attributes.rs (line 63)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn occupancy_max_potential_cluster_size( &self, config: LaunchConfig, stream: &CudaStream, ) -> Result<u32, DriverError>

Source

pub fn get_attribute( &self, attribute: CUfunction_attribute_enum, ) -> Result<i32, DriverError>

Get the value of a specific attribute of this CudaFunction.

See CUDA docs

Source

pub fn num_regs(&self) -> Result<i32, DriverError>

Get the number of registers used per thread.

Examples found in repository?
examples/10-function-attributes.rs (line 21)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn shared_size_bytes(&self) -> Result<i32, DriverError>

Get the size of statically-allocated shared memory in bytes.

Examples found in repository?
examples/10-function-attributes.rs (line 24)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn const_size_bytes(&self) -> Result<i32, DriverError>

Get the size of constant memory in bytes used by this function.

Examples found in repository?
examples/10-function-attributes.rs (line 28)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn local_size_bytes(&self) -> Result<i32, DriverError>

Get the size of local memory in bytes used per thread.

Examples found in repository?
examples/10-function-attributes.rs (line 32)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn max_threads_per_block(&self) -> Result<i32, DriverError>

Get the maximum number of threads per block for this function.

Examples found in repository?
examples/10-function-attributes.rs (line 39)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn ptx_version(&self) -> Result<i32, DriverError>

Get the PTX virtual architecture version for which the function was compiled.

Examples found in repository?
examples/10-function-attributes.rs (line 44)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn binary_version(&self) -> Result<i32, DriverError>

Get the binary architecture version for which the function was compiled.

Examples found in repository?
examples/10-function-attributes.rs (line 45)
6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}
Source

pub fn set_attribute( &self, attribute: CUfunction_attribute_enum, value: i32, ) -> Result<(), DriverError>

Set the value of a specific attribute of this CudaFunction.

Source

pub fn set_function_cache_config( &self, attribute: CUfunc_cache_enum, ) -> Result<(), DriverError>

Set the cache config of this CudaFunction.

Trait Implementations§

Source§

impl Clone for CudaFunction

Source§

fn clone(&self) -> CudaFunction

Returns a duplicate of the value. Read more
1.0.0 · Source§

fn clone_from(&mut self, source: &Self)

Performs copy-assignment from source. Read more
Source§

impl Debug for CudaFunction

Source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more
Source§

impl Send for CudaFunction

Source§

impl Sync for CudaFunction

Auto Trait Implementations§

Blanket Implementations§

Source§

impl<T> Any for T
where T: 'static + ?Sized,

Source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
Source§

impl<T> Borrow<T> for T
where T: ?Sized,

Source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
Source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

Source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
Source§

impl<T> CloneToUninit for T
where T: Clone,

Source§

unsafe fn clone_to_uninit(&self, dest: *mut u8)

🔬This is a nightly-only experimental API. (clone_to_uninit)
Performs copy-assignment from self to dest. Read more
Source§

impl<T> From<T> for T

Source§

fn from(t: T) -> T

Returns the argument unchanged.

Source§

impl<T, U> Into<U> for T
where U: From<T>,

Source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

Source§

impl<T> ToOwned for T
where T: Clone,

Source§

type Owned = T

The resulting type after obtaining ownership.
Source§

fn to_owned(&self) -> T

Creates owned data from borrowed data, usually by cloning. Read more
Source§

fn clone_into(&self, target: &mut T)

Uses borrowed data to replace owned data, usually by cloning. Read more
Source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

Source§

type Error = Infallible

The type returned in the event of a conversion error.
Source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
Source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

Source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
Source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
Source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

Source§

fn vzip(self) -> V