Struct CudaFunction

Source

pub struct CudaFunction { /* private fields */ }

Expand description

Wrapper around sys::CUfunction. Used by CudaStream::launch_builder to execute kernels.

Implementations§

Source §

impl CudaFunction

Source

pub fn occupancy_available_dynamic_smem_per_block( &self, num_blocks: u32, block_size: u32, ) -> Result<usize, DriverError>

Source

pub fn occupancy_max_active_blocks_per_multiprocessor( &self, block_size: u32, dynamic_smem_size: usize, flags: Option<CUoccupancy_flags_enum>, ) -> Result<u32, DriverError>

Source

pub fn occupancy_max_active_clusters( &self, config: LaunchConfig, stream: &CudaStream, ) -> Result<u32, DriverError>

Source

pub fn occupancy_max_potential_block_size( &self, block_size_to_dynamic_smem_size: extern "C" fn(block_size: c_int) -> usize, dynamic_smem_size: usize, block_size_limit: u32, flags: Option<CUoccupancy_flags_enum>, ) -> Result<(u32, u32), DriverError>

Examples found in repository ?

examples/10-function-attributes.rs (line 63)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn occupancy_max_potential_cluster_size( &self, config: LaunchConfig, stream: &CudaStream, ) -> Result<u32, DriverError>

Source

pub fn get_attribute( &self, attribute: CUfunction_attribute_enum, ) -> Result<i32, DriverError>

Get the value of a specific attribute of this CudaFunction.

See CUDA docs

Source

pub fn num_regs(&self) -> Result<i32, DriverError>

Get the number of registers used per thread.

Examples found in repository ?

examples/10-function-attributes.rs (line 21)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn shared_size_bytes(&self) -> Result<i32, DriverError>

Get the size of statically-allocated shared memory in bytes.

Examples found in repository ?

examples/10-function-attributes.rs (line 24)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn const_size_bytes(&self) -> Result<i32, DriverError>

Get the size of constant memory in bytes used by this function.

Examples found in repository ?

examples/10-function-attributes.rs (line 28)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn local_size_bytes(&self) -> Result<i32, DriverError>

Get the size of local memory in bytes used per thread.

Examples found in repository ?

examples/10-function-attributes.rs (line 32)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn max_threads_per_block(&self) -> Result<i32, DriverError>

Get the maximum number of threads per block for this function.

Examples found in repository ?

examples/10-function-attributes.rs (line 39)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn ptx_version(&self) -> Result<i32, DriverError>

Get the PTX virtual architecture version for which the function was compiled.

Examples found in repository ?

examples/10-function-attributes.rs (line 44)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}

Source

pub fn binary_version(&self) -> Result<i32, DriverError>

Get the binary architecture version for which the function was compiled.

Examples found in repository ?

examples/10-function-attributes.rs (line 45)

6fn main() -> Result<(), DriverError> {
7    let ctx = CudaContext::new(0)?;
8
9    println!("Device: {}", ctx.name()?);
10    println!();
11
12    // Load the module with the sin_kernel
13    let module = ctx.load_module(Ptx::from_file("./examples/sin.ptx"))?;
14    let sin_kernel = module.load_function("sin_kernel")?;
15
16    // Query function attributes
17    println!("=== Function Attributes for 'sin_kernel' ===");
18    println!();
19
20    println!("Resource Usage:");
21    println!("  Registers per thread:     {}", sin_kernel.num_regs()?);
22    println!(
23        "  Static shared memory:     {} bytes",
24        sin_kernel.shared_size_bytes()?
25    );
26    println!(
27        "  Constant memory:          {} bytes",
28        sin_kernel.const_size_bytes()?
29    );
30    println!(
31        "  Local memory per thread:  {} bytes",
32        sin_kernel.local_size_bytes()?
33    );
34    println!();
35
36    println!("Limits:");
37    println!(
38        "  Max threads per block:    {}",
39        sin_kernel.max_threads_per_block()?
40    );
41    println!();
42
43    println!("Compilation Info:");
44    let ptx_ver = sin_kernel.ptx_version()?;
45    let bin_ver = sin_kernel.binary_version()?;
46    println!(
47        "  PTX version:              {}.{}",
48        ptx_ver / 10,
49        ptx_ver % 10
50    );
51    println!(
52        "  Binary version:           {}.{}",
53        bin_ver / 10,
54        bin_ver % 10
55    );
56    println!();
57
58    // Use occupancy API to get optimal launch configuration
59    extern "C" fn no_dynamic_smem(_block_size: std::ffi::c_int) -> usize {
60        0
61    }
62    let (min_grid_size, block_size) =
63        sin_kernel.occupancy_max_potential_block_size(no_dynamic_smem, 0, 0, None)?;
64
65    println!("=== Optimal Launch Configuration (sin_kernel) ===");
66    println!("  Suggested block size:     {}", block_size);
67    println!("  Min grid size:            {}", min_grid_size);
68    println!("  Total threads per grid:   {}", min_grid_size * block_size);
69
70    Ok(())
71}