Skip to main content

oxicuda_driver/
ffi_launch.rs

1//! Extended launch configuration types for `cuLaunchKernelEx` (CUDA 12.0+).
2//!
3//! Thread block cluster launch attributes, launch configuration, and related
4//! types for the modern CUDA 12.x kernel launch API.
5
6use super::CUstream;
7
8// =========================================================================
9// CuLaunchAttributeId — attribute discriminant
10// =========================================================================
11
12/// Attribute identifier for `CuLaunchAttribute`.
13///
14/// Controls which extended kernel launch feature is configured.
15/// Used with `cuLaunchKernelEx` (CUDA 12.0+).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17#[repr(u32)]
18pub enum CuLaunchAttributeId {
19    /// Controls whether shared memory reuse is ignored.
20    IgnoreSharedMemoryReuse = 1,
21    /// Specifies thread block cluster dimensions (sm_90+).
22    ClusterDimension = 2,
23    /// Controls cluster scheduling policy preference.
24    ClusterSchedulingPolicyPreference = 3,
25    /// Enables programmatic stream serialization.
26    ProgrammaticStreamSerialization = 4,
27    /// Specifies a programmatic completion event.
28    ProgrammaticEvent = 5,
29    /// Specifies kernel launch priority.
30    Priority = 6,
31    /// Maps memory synchronization domains.
32    MemSyncDomainMap = 7,
33    /// Sets memory synchronization domain.
34    MemSyncDomain = 8,
35    /// Specifies a launch completion event.
36    LaunchCompletionEvent = 9,
37    /// Configures device-updatable kernel node.
38    DeviceUpdatableKernelNode = 10,
39}
40
41// =========================================================================
42// CuLaunchAttributeClusterDim — cluster geometry
43// =========================================================================
44
45/// Cluster dimension for thread block clusters (sm_90+).
46///
47/// Specifies how many thread blocks form one cluster in each dimension.
48/// Used inside [`CuLaunchAttributeValue`] when the attribute id is
49/// [`CuLaunchAttributeId::ClusterDimension`].
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51#[repr(C)]
52pub struct CuLaunchAttributeClusterDim {
53    /// Cluster extent in X dimension.
54    pub x: u32,
55    /// Cluster extent in Y dimension.
56    pub y: u32,
57    /// Cluster extent in Z dimension.
58    pub z: u32,
59}
60
61// =========================================================================
62// CuLaunchAttributeValue — attribute value union
63// =========================================================================
64
65/// Value union for `CuLaunchAttribute`.
66///
67/// # Safety
68///
69/// This is a C union — callers must only read the field that matches
70/// the accompanying [`CuLaunchAttributeId`] discriminant.
71/// Padding ensures the union is always 64 bytes, matching the CUDA ABI.
72#[repr(C)]
73pub union CuLaunchAttributeValue {
74    /// Cluster dimension configuration (when id == `ClusterDimension`).
75    pub cluster_dim: CuLaunchAttributeClusterDim,
76    /// Scalar u32 value (for single-word attributes).
77    pub value_u32: u32,
78    /// Raw padding to maintain 64-byte ABI alignment.
79    pub pad: [u8; 64],
80}
81
82// Manual Clone/Copy for the union (derive cannot handle unions with non-Copy
83// fields, but all union fields here are effectively POD).
84// `Copy` is declared first so that the `Clone` impl can delegate to it.
85impl Copy for CuLaunchAttributeValue {}
86
87impl Clone for CuLaunchAttributeValue {
88    fn clone(&self) -> Self {
89        // Delegate to Copy — canonical approach for Copy types.
90        *self
91    }
92}
93
94// =========================================================================
95// CuLaunchAttribute — single attribute entry
96// =========================================================================
97
98/// A single extended kernel launch attribute (id + value pair).
99///
100/// Used in the `attrs` array of [`CuLaunchConfig`].
101#[repr(C)]
102#[derive(Clone, Copy)]
103pub struct CuLaunchAttribute {
104    /// Which feature this attribute configures.
105    pub id: CuLaunchAttributeId,
106    /// Alignment padding (must be zero).
107    pub pad: [u8; 4],
108    /// The attribute value — interpret according to `id`.
109    pub value: CuLaunchAttributeValue,
110}
111
112// =========================================================================
113// CuLaunchConfig — full launch configuration
114// =========================================================================
115
116/// Extended kernel launch configuration for `cuLaunchKernelEx` (CUDA 12.0+).
117///
118/// Supersedes the individual parameters of `cuLaunchKernel` and adds
119/// support for thread block clusters, launch priorities, and other
120/// CUDA 12.x features.
121///
122/// # Example
123///
124/// ```rust
125/// use oxicuda_driver::ffi::{
126///     CuLaunchConfig, CuLaunchAttribute, CuLaunchAttributeId,
127///     CuLaunchAttributeValue, CuLaunchAttributeClusterDim, CUstream,
128/// };
129///
130/// // Build a cluster-launch config for a 2×1×1 cluster.
131/// let cluster_attr = CuLaunchAttribute {
132///     id: CuLaunchAttributeId::ClusterDimension,
133///     pad: [0u8; 4],
134///     value: CuLaunchAttributeValue {
135///         cluster_dim: CuLaunchAttributeClusterDim { x: 2, y: 1, z: 1 },
136///     },
137/// };
138/// let _config = CuLaunchConfig {
139///     grid_dim_x: 8,
140///     grid_dim_y: 1,
141///     grid_dim_z: 1,
142///     block_dim_x: 256,
143///     block_dim_y: 1,
144///     block_dim_z: 1,
145///     shared_mem_bytes: 0,
146///     stream: CUstream::default(),
147///     attrs: std::ptr::null(),
148///     num_attrs: 0,
149/// };
150/// ```
151#[repr(C)]
152pub struct CuLaunchConfig {
153    /// Grid dimension in X.
154    pub grid_dim_x: u32,
155    /// Grid dimension in Y.
156    pub grid_dim_y: u32,
157    /// Grid dimension in Z.
158    pub grid_dim_z: u32,
159    /// Block dimension in X (threads per block in X).
160    pub block_dim_x: u32,
161    /// Block dimension in Y.
162    pub block_dim_y: u32,
163    /// Block dimension in Z.
164    pub block_dim_z: u32,
165    /// Dynamic shared memory per block in bytes.
166    pub shared_mem_bytes: u32,
167    /// Stream to submit the kernel on.
168    pub stream: CUstream,
169    /// Pointer to an array of `num_attrs` attributes (may be null if zero).
170    pub attrs: *const CuLaunchAttribute,
171    /// Number of entries in `attrs`.
172    pub num_attrs: u32,
173}
174
175// SAFETY: CuLaunchConfig is a plain data structure mirroring the CUDA ABI.
176// The raw pointer `attrs` must be valid for the lifetime of the config, but
177// the struct itself is Send + Sync because no interior mutation occurs.
178unsafe impl Send for CuLaunchConfig {}
179unsafe impl Sync for CuLaunchConfig {}