Skip to main content

oxicuda_driver/
ffi_constants.rs

1//! CUDA Driver API flag and constant definitions.
2//!
3//! Stream flags, event flags, memory pool attributes, memory attach flags,
4//! host register flags, pointer attribute codes, memory type values, context
5//! scheduling flags, function attribute constants, memory advise values,
6//! limit constants, and occupancy flags.
7
8// =========================================================================
9// Stream creation flags
10// =========================================================================
11
12/// Default stream creation flag (implicit synchronisation with the NULL stream).
13pub const CU_STREAM_DEFAULT: u32 = 0;
14
15/// Stream does not synchronise with the NULL stream.
16pub const CU_STREAM_NON_BLOCKING: u32 = 1;
17
18// =========================================================================
19// Stream-ordered memory pool attributes (CUDA 11.2+)
20// =========================================================================
21
22/// Pool reuse policy: follow event dependencies before reusing a freed block.
23pub const CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES: u32 = 1;
24
25/// Pool reuse policy: allow opportunistic reuse without ordering guarantees.
26pub const CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC: u32 = 2;
27
28/// Pool reuse policy: allow the driver to insert internal dependencies for reuse.
29pub const CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES: u32 = 3;
30
31/// Release threshold (bytes): memory returned to OS when usage drops below this.
32pub const CU_MEMPOOL_ATTR_RELEASE_THRESHOLD: u32 = 4;
33
34/// Current reserved memory in bytes (read-only).
35pub const CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT: u32 = 5;
36
37/// High-water mark of reserved memory in bytes (resettable).
38pub const CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH: u32 = 6;
39
40/// Current used memory in bytes (read-only).
41pub const CU_MEMPOOL_ATTR_USED_MEM_CURRENT: u32 = 7;
42
43/// High-water mark of used memory in bytes (resettable).
44pub const CU_MEMPOOL_ATTR_USED_MEM_HIGH: u32 = 8;
45
46// =========================================================================
47// Event creation flags
48// =========================================================================
49
50/// Default event creation flag.
51pub const CU_EVENT_DEFAULT: u32 = 0;
52
53/// Event uses blocking synchronisation.
54pub const CU_EVENT_BLOCKING_SYNC: u32 = 1;
55
56/// Event does not record timing data (faster).
57pub const CU_EVENT_DISABLE_TIMING: u32 = 2;
58
59/// Event may be used as an interprocess event.
60pub const CU_EVENT_INTERPROCESS: u32 = 4;
61
62// =========================================================================
63// Memory-attach flags (for managed / mapped memory)
64// =========================================================================
65
66/// Memory is accessible from any stream on any device.
67pub const CU_MEM_ATTACH_GLOBAL: u32 = 1;
68
69/// Memory is initially accessible only from the allocating stream/host.
70pub const CU_MEM_ATTACH_HOST: u32 = 2;
71
72/// Memory is initially accessible only from a single stream.
73pub const CU_MEM_ATTACH_SINGLE: u32 = 4;
74
75// =========================================================================
76// cuMemHostRegister flags
77// =========================================================================
78
79/// Registered memory is portable across CUDA contexts.
80pub const CU_MEMHOSTREGISTER_PORTABLE: u32 = 0x01;
81
82/// Registered memory is mapped into the device address space.
83pub const CU_MEMHOSTREGISTER_DEVICEMAP: u32 = 0x02;
84
85/// Pointer is to I/O memory (not system RAM).
86pub const CU_MEMHOSTREGISTER_IOMEMORY: u32 = 0x04;
87
88/// Registered memory will not be written by the GPU (read-only).
89pub const CU_MEMHOSTREGISTER_READ_ONLY: u32 = 0x08;
90
91// =========================================================================
92// cuPointerGetAttribute attribute codes
93// =========================================================================
94
95/// Query the CUDA context associated with a pointer.
96pub const CU_POINTER_ATTRIBUTE_CONTEXT: u32 = 1;
97
98/// Query the memory type (host / device / unified) of a pointer.
99pub const CU_POINTER_ATTRIBUTE_MEMORY_TYPE: u32 = 2;
100
101/// Query the device pointer corresponding to a host pointer.
102pub const CU_POINTER_ATTRIBUTE_DEVICE_POINTER: u32 = 3;
103
104/// Query the host pointer corresponding to a device pointer.
105pub const CU_POINTER_ATTRIBUTE_HOST_POINTER: u32 = 4;
106
107/// Query whether the memory is managed (unified).
108pub const CU_POINTER_ATTRIBUTE_IS_MANAGED: u32 = 7;
109
110// =========================================================================
111// CU_MEMORYTYPE values (returned by pointer attribute queries)
112// =========================================================================
113
114/// Host (system) memory.
115pub const CU_MEMORYTYPE_HOST: u32 = 1;
116
117/// Device (GPU) memory.
118pub const CU_MEMORYTYPE_DEVICE: u32 = 2;
119
120/// Array memory.
121pub const CU_MEMORYTYPE_ARRAY: u32 = 3;
122
123/// Unified (managed) memory.
124pub const CU_MEMORYTYPE_UNIFIED: u32 = 4;
125
126// =========================================================================
127// Context scheduling flags
128// =========================================================================
129
130/// The driver picks the most appropriate scheduling mode.
131pub const CU_CTX_SCHED_AUTO: u32 = 0;
132
133/// Actively spin when waiting for results from the GPU.
134pub const CU_CTX_SCHED_SPIN: u32 = 1;
135
136/// Yield the CPU when waiting for results from the GPU.
137pub const CU_CTX_SCHED_YIELD: u32 = 2;
138
139/// Block the calling thread when waiting for results.
140pub const CU_CTX_SCHED_BLOCKING_SYNC: u32 = 4;
141
142/// Mask for the scheduling flags.
143pub const CU_CTX_SCHED_MASK: u32 = 0x07;
144
145/// Support mapped pinned allocations.
146pub const CU_CTX_MAP_HOST: u32 = 0x08;
147
148/// Keep local memory allocation after launch.
149pub const CU_CTX_LMEM_RESIZE_TO_MAX: u32 = 0x10;
150
151/// Coredump enable.
152pub const CU_CTX_COREDUMP_ENABLE: u32 = 0x20;
153
154/// User coredump enable.
155pub const CU_CTX_USER_COREDUMP_ENABLE: u32 = 0x40;
156
157/// Sync-memops flag.
158pub const CU_CTX_SYNC_MEMOPS: u32 = 0x80;
159
160/// Mask for all context flags.
161pub const CU_CTX_FLAGS_MASK: u32 = 0xFF;
162
163// =========================================================================
164// Function attribute values (used with cuFuncGetAttribute)
165// =========================================================================
166
167/// Maximum threads per block for this function.
168pub const CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK: i32 = 0;
169
170/// Shared memory used by this function (bytes).
171pub const CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES: i32 = 1;
172
173/// Size of user-allocated constant memory (bytes).
174pub const CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES: i32 = 2;
175
176/// Size of local memory used by each thread (bytes).
177pub const CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES: i32 = 3;
178
179/// Number of registers used by each thread.
180pub const CU_FUNC_ATTRIBUTE_NUM_REGS: i32 = 4;
181
182/// PTX virtual architecture version (e.g. 70 for sm_70).
183pub const CU_FUNC_ATTRIBUTE_PTX_VERSION: i32 = 5;
184
185/// Binary architecture version (e.g. 70 for sm_70).
186pub const CU_FUNC_ATTRIBUTE_BINARY_VERSION: i32 = 6;
187
188/// Whether this function has been cached.
189pub const CU_FUNC_ATTRIBUTE_CACHE_MODE_CA: i32 = 7;
190
191/// Maximum dynamic shared memory size (bytes).
192pub const CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES: i32 = 8;
193
194/// Preferred shared memory carve-out.
195pub const CU_FUNC_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT: i32 = 9;
196
197/// Cluster size setting.
198pub const CU_FUNC_ATTRIBUTE_CLUSTER_SIZE_MUST_BE_SET: i32 = 10;
199
200/// Required cluster width.
201pub const CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_WIDTH: i32 = 11;
202
203/// Required cluster height.
204pub const CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_HEIGHT: i32 = 12;
205
206/// Required cluster depth.
207pub const CU_FUNC_ATTRIBUTE_REQUIRED_CLUSTER_DEPTH: i32 = 13;
208
209/// Non-portable cluster size allowed.
210pub const CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED: i32 = 14;
211
212/// Required cluster scheduling policy preference.
213pub const CU_FUNC_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE: i32 = 15;
214
215// =========================================================================
216// Memory advise values
217// =========================================================================
218
219/// Hint that the data will be read mostly.
220pub const CU_MEM_ADVISE_SET_READ_MOSTLY: u32 = 1;
221
222/// Unset read-mostly hint.
223pub const CU_MEM_ADVISE_UNSET_READ_MOSTLY: u32 = 2;
224
225/// Set the preferred location to the specified device.
226pub const CU_MEM_ADVISE_SET_PREFERRED_LOCATION: u32 = 3;
227
228/// Unset the preferred location.
229pub const CU_MEM_ADVISE_UNSET_PREFERRED_LOCATION: u32 = 4;
230
231/// Set access from the specified device.
232pub const CU_MEM_ADVISE_SET_ACCESSED_BY: u32 = 5;
233
234/// Unset access from the specified device.
235pub const CU_MEM_ADVISE_UNSET_ACCESSED_BY: u32 = 6;
236
237// =========================================================================
238// Limit values (cuCtxSetLimit / cuCtxGetLimit)
239// =========================================================================
240
241/// Stack size for each GPU thread.
242pub const CU_LIMIT_STACK_SIZE: u32 = 0;
243
244/// Size of the printf FIFO.
245pub const CU_LIMIT_PRINTF_FIFO_SIZE: u32 = 1;
246
247/// Size of the heap used by `malloc()` on the device.
248pub const CU_LIMIT_MALLOC_HEAP_SIZE: u32 = 2;
249
250/// Maximum nesting depth of a device runtime launch.
251pub const CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH: u32 = 3;
252
253/// Maximum number of outstanding device runtime launches.
254pub const CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT: u32 = 4;
255
256/// L2 cache fetch granularity.
257pub const CU_LIMIT_MAX_L2_FETCH_GRANULARITY: u32 = 5;
258
259/// Maximum persisting L2 cache size.
260pub const CU_LIMIT_PERSISTING_L2_CACHE_SIZE: u32 = 6;
261
262// =========================================================================
263// Occupancy flags
264// =========================================================================
265
266/// Default occupancy calculation.
267pub const CU_OCCUPANCY_DEFAULT: u32 = 0;
268
269/// Disable caching override.
270pub const CU_OCCUPANCY_DISABLE_CACHING_OVERRIDE: u32 = 1;
271
272// =========================================================================
273// Tests
274// =========================================================================
275
276#[cfg(test)]
277mod tests {
278    use super::*;
279
280    #[test]
281    fn test_stream_and_event_flags() {
282        assert_eq!(CU_STREAM_DEFAULT, 0);
283        assert_eq!(CU_STREAM_NON_BLOCKING, 1);
284        assert_eq!(CU_EVENT_DEFAULT, 0);
285        assert_eq!(CU_EVENT_BLOCKING_SYNC, 1);
286        assert_eq!(CU_EVENT_DISABLE_TIMING, 2);
287        assert_eq!(CU_EVENT_INTERPROCESS, 4);
288    }
289
290    #[test]
291    fn test_context_scheduling_flags() {
292        assert_eq!(CU_CTX_SCHED_AUTO, 0);
293        assert_eq!(CU_CTX_SCHED_SPIN, 1);
294        assert_eq!(CU_CTX_SCHED_YIELD, 2);
295        assert_eq!(CU_CTX_SCHED_BLOCKING_SYNC, 4);
296    }
297
298    #[test]
299    fn test_mem_attach_flags() {
300        assert_eq!(CU_MEM_ATTACH_GLOBAL, 1);
301        assert_eq!(CU_MEM_ATTACH_HOST, 2);
302        assert_eq!(CU_MEM_ATTACH_SINGLE, 4);
303    }
304
305    #[test]
306    fn test_func_attribute_constants() {
307        assert_eq!(CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, 0);
308        assert_eq!(CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, 1);
309        assert_eq!(CU_FUNC_ATTRIBUTE_NUM_REGS, 4);
310    }
311
312    #[test]
313    fn test_limit_constants() {
314        assert_eq!(CU_LIMIT_STACK_SIZE, 0);
315        assert_eq!(CU_LIMIT_PRINTF_FIFO_SIZE, 1);
316        assert_eq!(CU_LIMIT_MALLOC_HEAP_SIZE, 2);
317    }
318
319    #[test]
320    fn test_memory_type_constants() {
321        assert_eq!(CU_MEMORYTYPE_HOST, 1);
322        assert_eq!(CU_MEMORYTYPE_DEVICE, 2);
323        assert_eq!(CU_MEMORYTYPE_ARRAY, 3);
324        assert_eq!(CU_MEMORYTYPE_UNIFIED, 4);
325    }
326}