oxicuda_driver/function_attr.rs
1//! Safe wrappers for querying and configuring CUDA function attributes.
2//!
3//! This module extends [`Function`] with methods for inspecting kernel
4//! resource usage (registers, shared memory, etc.) and tuning launch
5//! parameters such as dynamic shared memory limits.
6//!
7//! # Example
8//!
9//! ```rust,no_run
10//! # use oxicuda_driver::module::{Module, Function};
11//! # fn main() -> Result<(), oxicuda_driver::CudaError> {
12//! # let module = Module::from_ptx("...")?;
13//! # let func = module.get_function("my_kernel")?;
14//! let regs = func.num_registers()?;
15//! let smem = func.shared_memory_bytes()?;
16//! println!("kernel uses {regs} registers, {smem} bytes shared mem");
17//! # Ok(())
18//! # }
19//! ```
20
21use std::ffi::c_int;
22
23use crate::error::{CudaError, CudaResult};
24use crate::ffi::CUfunction_attribute;
25use crate::loader::try_driver;
26use crate::module::Function;
27
28// ---------------------------------------------------------------------------
29// Internal helpers
30// ---------------------------------------------------------------------------
31
32/// Fetch a single integer function attribute from the driver.
33fn get_attribute(func: &Function, attrib: CUfunction_attribute) -> CudaResult<i32> {
34 let api = try_driver()?;
35 let f = api.cu_func_get_attribute.ok_or(CudaError::NotSupported)?;
36 let mut value: c_int = 0;
37 crate::cuda_call!(f(&mut value, attrib as i32, func.raw()))?;
38 Ok(value)
39}
40
41/// Set a single integer function attribute via the driver.
42fn set_attribute(func: &Function, attrib: CUfunction_attribute, value: i32) -> CudaResult<()> {
43 let api = try_driver()?;
44 let f = api.cu_func_set_attribute.ok_or(CudaError::NotSupported)?;
45 crate::cuda_call!(f(func.raw(), attrib as i32, value))
46}
47
48// ---------------------------------------------------------------------------
49// Function attribute methods
50// ---------------------------------------------------------------------------
51
52impl Function {
53 /// Returns the number of registers used by each thread of this kernel.
54 ///
55 /// # Errors
56 ///
57 /// Returns [`CudaError::NotSupported`] if the driver lacks
58 /// `cuFuncGetAttribute`, or another error on failure.
59 pub fn num_registers(&self) -> CudaResult<i32> {
60 get_attribute(self, CUfunction_attribute::NumRegs)
61 }
62
63 /// Returns the static shared memory used by this kernel (bytes).
64 ///
65 /// # Errors
66 ///
67 /// Returns [`CudaError::NotSupported`] if the driver lacks
68 /// `cuFuncGetAttribute`, or another error on failure.
69 pub fn shared_memory_bytes(&self) -> CudaResult<i32> {
70 get_attribute(self, CUfunction_attribute::SharedSizeBytes)
71 }
72
73 /// Returns the maximum number of threads per block for this kernel.
74 ///
75 /// # Errors
76 ///
77 /// Returns a [`CudaError`] on failure.
78 pub fn max_threads_per_block_attr(&self) -> CudaResult<i32> {
79 get_attribute(self, CUfunction_attribute::MaxThreadsPerBlock)
80 }
81
82 /// Returns the local memory used by each thread of this kernel (bytes).
83 ///
84 /// # Errors
85 ///
86 /// Returns a [`CudaError`] on failure.
87 pub fn local_memory_bytes(&self) -> CudaResult<i32> {
88 get_attribute(self, CUfunction_attribute::LocalSizeBytes)
89 }
90
91 /// Returns the PTX virtual architecture version for this kernel.
92 ///
93 /// # Errors
94 ///
95 /// Returns a [`CudaError`] on failure.
96 pub fn ptx_version(&self) -> CudaResult<i32> {
97 get_attribute(self, CUfunction_attribute::PtxVersion)
98 }
99
100 /// Returns the binary (SASS) architecture version for this kernel.
101 ///
102 /// # Errors
103 ///
104 /// Returns a [`CudaError`] on failure.
105 pub fn binary_version(&self) -> CudaResult<i32> {
106 get_attribute(self, CUfunction_attribute::BinaryVersion)
107 }
108
109 /// Returns the maximum dynamic shared memory size (bytes) for this kernel.
110 ///
111 /// # Errors
112 ///
113 /// Returns a [`CudaError`] on failure.
114 pub fn max_dynamic_shared_memory(&self) -> CudaResult<i32> {
115 get_attribute(self, CUfunction_attribute::MaxDynamicSharedSizeBytes)
116 }
117
118 /// Sets the maximum dynamic shared memory size (bytes) for this kernel.
119 ///
120 /// This must be called before launching the kernel if you need more
121 /// dynamic shared memory than the default limit.
122 ///
123 /// # Errors
124 ///
125 /// Returns [`CudaError::NotSupported`] if the driver lacks
126 /// `cuFuncSetAttribute`, or another error on failure.
127 pub fn set_max_dynamic_shared_memory(&self, bytes: i32) -> CudaResult<()> {
128 set_attribute(self, CUfunction_attribute::MaxDynamicSharedSizeBytes, bytes)
129 }
130
131 /// Sets the preferred shared memory carve-out (percentage 0-100).
132 ///
133 /// A value of 0 means use the device default. Values between 1 and 100
134 /// indicate the desired percentage of L1 cache to use as shared memory.
135 ///
136 /// # Errors
137 ///
138 /// Returns [`CudaError::NotSupported`] if the driver lacks
139 /// `cuFuncSetAttribute`, or another error on failure.
140 pub fn set_preferred_shared_memory_carveout(&self, percent: i32) -> CudaResult<()> {
141 set_attribute(
142 self,
143 CUfunction_attribute::PreferredSharedMemoryCarveout,
144 percent,
145 )
146 }
147}
148
149// ---------------------------------------------------------------------------
150// Tests
151// ---------------------------------------------------------------------------
152
153#[cfg(test)]
154mod tests {
155 #[test]
156 fn function_attribute_enum_values() {
157 use crate::ffi::CUfunction_attribute;
158 assert_eq!(CUfunction_attribute::MaxThreadsPerBlock as i32, 0);
159 assert_eq!(CUfunction_attribute::NumRegs as i32, 4);
160 assert_eq!(CUfunction_attribute::PtxVersion as i32, 5);
161 assert_eq!(CUfunction_attribute::MaxDynamicSharedSizeBytes as i32, 8);
162 }
163}