oxicuda_driver/ffi_launch.rs
1//! Extended launch configuration types for `cuLaunchKernelEx` (CUDA 12.0+).
2//!
3//! Thread block cluster launch attributes, launch configuration, and related
4//! types for the modern CUDA 12.x kernel launch API.
5
6use super::CUstream;
7
8// =========================================================================
9// CuLaunchAttributeId — attribute discriminant
10// =========================================================================
11
12/// Attribute identifier for `CuLaunchAttribute`.
13///
14/// Controls which extended kernel launch feature is configured.
15/// Used with `cuLaunchKernelEx` (CUDA 12.0+).
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
17#[repr(u32)]
18pub enum CuLaunchAttributeId {
19 /// Controls whether shared memory reuse is ignored.
20 IgnoreSharedMemoryReuse = 1,
21 /// Specifies thread block cluster dimensions (sm_90+).
22 ClusterDimension = 2,
23 /// Controls cluster scheduling policy preference.
24 ClusterSchedulingPolicyPreference = 3,
25 /// Enables programmatic stream serialization.
26 ProgrammaticStreamSerialization = 4,
27 /// Specifies a programmatic completion event.
28 ProgrammaticEvent = 5,
29 /// Specifies kernel launch priority.
30 Priority = 6,
31 /// Maps memory synchronization domains.
32 MemSyncDomainMap = 7,
33 /// Sets memory synchronization domain.
34 MemSyncDomain = 8,
35 /// Specifies a launch completion event.
36 LaunchCompletionEvent = 9,
37 /// Configures device-updatable kernel node.
38 DeviceUpdatableKernelNode = 10,
39}
40
41// =========================================================================
42// CuLaunchAttributeClusterDim — cluster geometry
43// =========================================================================
44
45/// Cluster dimension for thread block clusters (sm_90+).
46///
47/// Specifies how many thread blocks form one cluster in each dimension.
48/// Used inside [`CuLaunchAttributeValue`] when the attribute id is
49/// [`CuLaunchAttributeId::ClusterDimension`].
50#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
51#[repr(C)]
52pub struct CuLaunchAttributeClusterDim {
53 /// Cluster extent in X dimension.
54 pub x: u32,
55 /// Cluster extent in Y dimension.
56 pub y: u32,
57 /// Cluster extent in Z dimension.
58 pub z: u32,
59}
60
61// =========================================================================
62// CuLaunchAttributeValue — attribute value union
63// =========================================================================
64
65/// Value union for `CuLaunchAttribute`.
66///
67/// # Safety
68///
69/// This is a C union — callers must only read the field that matches
70/// the accompanying [`CuLaunchAttributeId`] discriminant.
71/// Padding ensures the union is always 64 bytes, matching the CUDA ABI.
72#[repr(C)]
73pub union CuLaunchAttributeValue {
74 /// Cluster dimension configuration (when id == `ClusterDimension`).
75 pub cluster_dim: CuLaunchAttributeClusterDim,
76 /// Scalar u32 value (for single-word attributes).
77 pub value_u32: u32,
78 /// Raw padding to maintain 64-byte ABI alignment.
79 pub pad: [u8; 64],
80}
81
82// Manual Clone/Copy for the union (derive cannot handle unions with non-Copy
83// fields, but all union fields here are effectively POD).
84// `Copy` is declared first so that the `Clone` impl can delegate to it.
85impl Copy for CuLaunchAttributeValue {}
86
87impl Clone for CuLaunchAttributeValue {
88 fn clone(&self) -> Self {
89 // Delegate to Copy — canonical approach for Copy types.
90 *self
91 }
92}
93
94// =========================================================================
95// CuLaunchAttribute — single attribute entry
96// =========================================================================
97
98/// A single extended kernel launch attribute (id + value pair).
99///
100/// Used in the `attrs` array of [`CuLaunchConfig`].
101#[repr(C)]
102#[derive(Clone, Copy)]
103pub struct CuLaunchAttribute {
104 /// Which feature this attribute configures.
105 pub id: CuLaunchAttributeId,
106 /// Alignment padding (must be zero).
107 pub pad: [u8; 4],
108 /// The attribute value — interpret according to `id`.
109 pub value: CuLaunchAttributeValue,
110}
111
112// =========================================================================
113// CuLaunchConfig — full launch configuration
114// =========================================================================
115
116/// Extended kernel launch configuration for `cuLaunchKernelEx` (CUDA 12.0+).
117///
118/// Supersedes the individual parameters of `cuLaunchKernel` and adds
119/// support for thread block clusters, launch priorities, and other
120/// CUDA 12.x features.
121///
122/// # Example
123///
124/// ```rust
125/// use oxicuda_driver::ffi::{
126/// CuLaunchConfig, CuLaunchAttribute, CuLaunchAttributeId,
127/// CuLaunchAttributeValue, CuLaunchAttributeClusterDim, CUstream,
128/// };
129///
130/// // Build a cluster-launch config for a 2×1×1 cluster.
131/// let cluster_attr = CuLaunchAttribute {
132/// id: CuLaunchAttributeId::ClusterDimension,
133/// pad: [0u8; 4],
134/// value: CuLaunchAttributeValue {
135/// cluster_dim: CuLaunchAttributeClusterDim { x: 2, y: 1, z: 1 },
136/// },
137/// };
138/// let _config = CuLaunchConfig {
139/// grid_dim_x: 8,
140/// grid_dim_y: 1,
141/// grid_dim_z: 1,
142/// block_dim_x: 256,
143/// block_dim_y: 1,
144/// block_dim_z: 1,
145/// shared_mem_bytes: 0,
146/// stream: CUstream::default(),
147/// attrs: std::ptr::null(),
148/// num_attrs: 0,
149/// };
150/// ```
151#[repr(C)]
152pub struct CuLaunchConfig {
153 /// Grid dimension in X.
154 pub grid_dim_x: u32,
155 /// Grid dimension in Y.
156 pub grid_dim_y: u32,
157 /// Grid dimension in Z.
158 pub grid_dim_z: u32,
159 /// Block dimension in X (threads per block in X).
160 pub block_dim_x: u32,
161 /// Block dimension in Y.
162 pub block_dim_y: u32,
163 /// Block dimension in Z.
164 pub block_dim_z: u32,
165 /// Dynamic shared memory per block in bytes.
166 pub shared_mem_bytes: u32,
167 /// Stream to submit the kernel on.
168 pub stream: CUstream,
169 /// Pointer to an array of `num_attrs` attributes (may be null if zero).
170 pub attrs: *const CuLaunchAttribute,
171 /// Number of entries in `attrs`.
172 pub num_attrs: u32,
173}
174
175// SAFETY: CuLaunchConfig is a plain data structure mirroring the CUDA ABI.
176// The raw pointer `attrs` must be valid for the lifetime of the config, but
177// the struct itself is Send + Sync because no interior mutation occurs.
178unsafe impl Send for CuLaunchConfig {}
179unsafe impl Sync for CuLaunchConfig {}