oxicuda_driver/
function_attr.rs

1//! Safe wrappers for querying and configuring CUDA function attributes.
2//!
3//! This module extends [`Function`] with methods for inspecting kernel
4//! resource usage (registers, shared memory, etc.) and tuning launch
5//! parameters such as dynamic shared memory limits.
6//!
7//! # Example
8//!
9//! ```rust,no_run
10//! # use oxicuda_driver::module::{Module, Function};
11//! # fn main() -> Result<(), oxicuda_driver::CudaError> {
12//! # let module = Module::from_ptx("...")?;
13//! # let func = module.get_function("my_kernel")?;
14//! let regs = func.num_registers()?;
15//! let smem = func.shared_memory_bytes()?;
16//! println!("kernel uses {regs} registers, {smem} bytes shared mem");
17//! # Ok(())
18//! # }
19//! ```
20
21use std::ffi::c_int;
22
23use crate::error::{CudaError, CudaResult};
24use crate::ffi::CUfunction_attribute;
25use crate::loader::try_driver;
26use crate::module::Function;
27
28// ---------------------------------------------------------------------------
29// Internal helpers
30// ---------------------------------------------------------------------------
31
32/// Fetch a single integer function attribute from the driver.
33fn get_attribute(func: &Function, attrib: CUfunction_attribute) -> CudaResult<i32> {
34    let api = try_driver()?;
35    let f = api.cu_func_get_attribute.ok_or(CudaError::NotSupported)?;
36    let mut value: c_int = 0;
37    crate::cuda_call!(f(&mut value, attrib as i32, func.raw()))?;
38    Ok(value)
39}
40
41/// Set a single integer function attribute via the driver.
42fn set_attribute(func: &Function, attrib: CUfunction_attribute, value: i32) -> CudaResult<()> {
43    let api = try_driver()?;
44    let f = api.cu_func_set_attribute.ok_or(CudaError::NotSupported)?;
45    crate::cuda_call!(f(func.raw(), attrib as i32, value))
46}
47
48// ---------------------------------------------------------------------------
49// Function attribute methods
50// ---------------------------------------------------------------------------
51
52impl Function {
53    /// Returns the number of registers used by each thread of this kernel.
54    ///
55    /// # Errors
56    ///
57    /// Returns [`CudaError::NotSupported`] if the driver lacks
58    /// `cuFuncGetAttribute`, or another error on failure.
59    pub fn num_registers(&self) -> CudaResult<i32> {
60        get_attribute(self, CUfunction_attribute::NumRegs)
61    }
62
63    /// Returns the static shared memory used by this kernel (bytes).
64    ///
65    /// # Errors
66    ///
67    /// Returns [`CudaError::NotSupported`] if the driver lacks
68    /// `cuFuncGetAttribute`, or another error on failure.
69    pub fn shared_memory_bytes(&self) -> CudaResult<i32> {
70        get_attribute(self, CUfunction_attribute::SharedSizeBytes)
71    }
72
73    /// Returns the maximum number of threads per block for this kernel.
74    ///
75    /// # Errors
76    ///
77    /// Returns a [`CudaError`] on failure.
78    pub fn max_threads_per_block_attr(&self) -> CudaResult<i32> {
79        get_attribute(self, CUfunction_attribute::MaxThreadsPerBlock)
80    }
81
82    /// Returns the local memory used by each thread of this kernel (bytes).
83    ///
84    /// # Errors
85    ///
86    /// Returns a [`CudaError`] on failure.
87    pub fn local_memory_bytes(&self) -> CudaResult<i32> {
88        get_attribute(self, CUfunction_attribute::LocalSizeBytes)
89    }
90
91    /// Returns the PTX virtual architecture version for this kernel.
92    ///
93    /// # Errors
94    ///
95    /// Returns a [`CudaError`] on failure.
96    pub fn ptx_version(&self) -> CudaResult<i32> {
97        get_attribute(self, CUfunction_attribute::PtxVersion)
98    }
99
100    /// Returns the binary (SASS) architecture version for this kernel.
101    ///
102    /// # Errors
103    ///
104    /// Returns a [`CudaError`] on failure.
105    pub fn binary_version(&self) -> CudaResult<i32> {
106        get_attribute(self, CUfunction_attribute::BinaryVersion)
107    }
108
109    /// Returns the maximum dynamic shared memory size (bytes) for this kernel.
110    ///
111    /// # Errors
112    ///
113    /// Returns a [`CudaError`] on failure.
114    pub fn max_dynamic_shared_memory(&self) -> CudaResult<i32> {
115        get_attribute(self, CUfunction_attribute::MaxDynamicSharedSizeBytes)
116    }
117
118    /// Sets the maximum dynamic shared memory size (bytes) for this kernel.
119    ///
120    /// This must be called before launching the kernel if you need more
121    /// dynamic shared memory than the default limit.
122    ///
123    /// # Errors
124    ///
125    /// Returns [`CudaError::NotSupported`] if the driver lacks
126    /// `cuFuncSetAttribute`, or another error on failure.
127    pub fn set_max_dynamic_shared_memory(&self, bytes: i32) -> CudaResult<()> {
128        set_attribute(self, CUfunction_attribute::MaxDynamicSharedSizeBytes, bytes)
129    }
130
131    /// Sets the preferred shared memory carve-out (percentage 0-100).
132    ///
133    /// A value of 0 means use the device default. Values between 1 and 100
134    /// indicate the desired percentage of L1 cache to use as shared memory.
135    ///
136    /// # Errors
137    ///
138    /// Returns [`CudaError::NotSupported`] if the driver lacks
139    /// `cuFuncSetAttribute`, or another error on failure.
140    pub fn set_preferred_shared_memory_carveout(&self, percent: i32) -> CudaResult<()> {
141        set_attribute(
142            self,
143            CUfunction_attribute::PreferredSharedMemoryCarveout,
144            percent,
145        )
146    }
147}
148
149// ---------------------------------------------------------------------------
150// Tests
151// ---------------------------------------------------------------------------
152
153#[cfg(test)]
154mod tests {
155    #[test]
156    fn function_attribute_enum_values() {
157        use crate::ffi::CUfunction_attribute;
158        assert_eq!(CUfunction_attribute::MaxThreadsPerBlock as i32, 0);
159        assert_eq!(CUfunction_attribute::NumRegs as i32, 4);
160        assert_eq!(CUfunction_attribute::PtxVersion as i32, 5);
161        assert_eq!(CUfunction_attribute::MaxDynamicSharedSizeBytes as i32, 8);
162    }
163}
oxicuda_driver/function_attr.rs

oxicuda_driver/
function_attr.rs