Skip to main content

baracuda_cuda_sys/runtime/
loader.rs

1//! The `Runtime` loader — parallels [`crate::Driver`] for the CUDA Runtime API.
2//!
3//! `libcudart` does not expose a `cuGetProcAddress`-style entry-point
4//! resolver, so we resolve everything via plain `dlsym`. Symbols are
5//! cached in per-function `OnceLock`s exactly like the Driver loader.
6
7use std::sync::OnceLock;
8
9use baracuda_core::{platform, Library, LoaderError};
10
11use super::functions::*;
12
13macro_rules! runtime_fns {
14    ($(
15        $(#[$attr:meta])*
16        fn $name:ident as $sym:literal : $pfn:ty;
17    )*) => {
18        /// Lazily-resolved CUDA Runtime API function-pointer table.
19        #[allow(non_snake_case)]
20        pub struct Runtime {
21            lib: Library,
22            $(
23                $name: OnceLock<$pfn>,
24            )*
25        }
26
27        impl core::fmt::Debug for Runtime {
28            fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
29                f.debug_struct("Runtime")
30                    .field("lib", &self.lib)
31                    .finish_non_exhaustive()
32            }
33        }
34
35        impl Runtime {
36            fn empty(lib: Library) -> Self {
37                Self {
38                    lib,
39                    $(
40                        $name: OnceLock::new(),
41                    )*
42                }
43            }
44
45            $(
46                $(#[$attr])*
47                #[allow(non_snake_case)]
48                #[doc = concat!("Resolve `", $sym, "` and return the cached function pointer.")]
49                pub fn $name(&self) -> Result<$pfn, LoaderError> {
50                    if let Some(&p) = self.$name.get() { return Ok(p); }
51                    let raw: *mut () = unsafe { self.lib.raw_symbol($sym)? };
52                    // SAFETY: `$pfn` is a function-pointer type whose
53                    // signature matches the C declaration of `$sym`.
54                    let p: $pfn = unsafe { core::mem::transmute_copy::<*mut (), $pfn>(&raw) };
55                    let _ = self.$name.set(p);
56                    Ok(p)
57                }
58            )*
59        }
60    };
61}
62
63runtime_fns! {
64    // Version & error
65    fn cuda_runtime_get_version as "cudaRuntimeGetVersion": PFN_cudaRuntimeGetVersion;
66    fn cuda_driver_get_version as "cudaDriverGetVersion": PFN_cudaDriverGetVersion;
67    fn cuda_get_last_error as "cudaGetLastError": PFN_cudaGetLastError;
68    fn cuda_peek_at_last_error as "cudaPeekAtLastError": PFN_cudaPeekAtLastError;
69    fn cuda_get_error_name as "cudaGetErrorName": PFN_cudaGetErrorName;
70    fn cuda_get_error_string as "cudaGetErrorString": PFN_cudaGetErrorString;
71
72    // Device
73    fn cuda_get_device_count as "cudaGetDeviceCount": PFN_cudaGetDeviceCount;
74    fn cuda_set_device as "cudaSetDevice": PFN_cudaSetDevice;
75    fn cuda_get_device as "cudaGetDevice": PFN_cudaGetDevice;
76    fn cuda_device_synchronize as "cudaDeviceSynchronize": PFN_cudaDeviceSynchronize;
77    fn cuda_device_reset as "cudaDeviceReset": PFN_cudaDeviceReset;
78    fn cuda_device_get_attribute as "cudaDeviceGetAttribute": PFN_cudaDeviceGetAttribute;
79    fn cuda_init_device as "cudaInitDevice": PFN_cudaInitDevice;
80
81    // Memory
82    fn cuda_malloc as "cudaMalloc": PFN_cudaMalloc;
83    fn cuda_free as "cudaFree": PFN_cudaFree;
84    fn cuda_malloc_managed as "cudaMallocManaged": PFN_cudaMallocManaged;
85    fn cuda_memcpy as "cudaMemcpy": PFN_cudaMemcpy;
86    fn cuda_memcpy_async as "cudaMemcpyAsync": PFN_cudaMemcpyAsync;
87    fn cuda_memset as "cudaMemset": PFN_cudaMemset;
88    fn cuda_memset_async as "cudaMemsetAsync": PFN_cudaMemsetAsync;
89
90    // Stream
91    fn cuda_stream_create as "cudaStreamCreate": PFN_cudaStreamCreate;
92    fn cuda_stream_create_with_flags as "cudaStreamCreateWithFlags": PFN_cudaStreamCreateWithFlags;
93    fn cuda_stream_destroy as "cudaStreamDestroy": PFN_cudaStreamDestroy;
94    fn cuda_stream_synchronize as "cudaStreamSynchronize": PFN_cudaStreamSynchronize;
95    fn cuda_stream_query as "cudaStreamQuery": PFN_cudaStreamQuery;
96    fn cuda_stream_wait_event as "cudaStreamWaitEvent": PFN_cudaStreamWaitEvent;
97
98    // Event
99    fn cuda_event_create as "cudaEventCreate": PFN_cudaEventCreate;
100    fn cuda_event_create_with_flags as "cudaEventCreateWithFlags": PFN_cudaEventCreateWithFlags;
101    fn cuda_event_destroy as "cudaEventDestroy": PFN_cudaEventDestroy;
102    fn cuda_event_record as "cudaEventRecord": PFN_cudaEventRecord;
103    fn cuda_event_synchronize as "cudaEventSynchronize": PFN_cudaEventSynchronize;
104    fn cuda_event_query as "cudaEventQuery": PFN_cudaEventQuery;
105    fn cuda_event_elapsed_time as "cudaEventElapsedTime": PFN_cudaEventElapsedTime;
106
107    // Kernel launch
108    fn cuda_launch_kernel as "cudaLaunchKernel": PFN_cudaLaunchKernel;
109
110    // Library management (CUDA 12.0+) — will fail on older installs; callers
111    // should gate via Feature::LibraryManagement.
112    fn cuda_library_load_data as "cudaLibraryLoadData": PFN_cudaLibraryLoadData;
113    fn cuda_library_unload as "cudaLibraryUnload": PFN_cudaLibraryUnload;
114    fn cuda_library_get_kernel as "cudaLibraryGetKernel": PFN_cudaLibraryGetKernel;
115
116    // Stream extras
117    fn cuda_stream_create_with_priority as "cudaStreamCreateWithPriority": PFN_cudaStreamCreateWithPriority;
118    fn cuda_stream_get_priority as "cudaStreamGetPriority": PFN_cudaStreamGetPriority;
119    fn cuda_stream_get_flags as "cudaStreamGetFlags": PFN_cudaStreamGetFlags;
120    fn cuda_device_get_stream_priority_range as "cudaDeviceGetStreamPriorityRange": PFN_cudaDeviceGetStreamPriorityRange;
121
122    // Peer access
123    fn cuda_device_can_access_peer as "cudaDeviceCanAccessPeer": PFN_cudaDeviceCanAccessPeer;
124    fn cuda_device_enable_peer_access as "cudaDeviceEnablePeerAccess": PFN_cudaDeviceEnablePeerAccess;
125    fn cuda_device_disable_peer_access as "cudaDeviceDisablePeerAccess": PFN_cudaDeviceDisablePeerAccess;
126
127    // Mem prefetch/advise + mem info
128    fn cuda_mem_prefetch_async as "cudaMemPrefetchAsync": PFN_cudaMemPrefetchAsync;
129    fn cuda_mem_advise as "cudaMemAdvise": PFN_cudaMemAdvise;
130    fn cuda_mem_get_info as "cudaMemGetInfo": PFN_cudaMemGetInfo;
131
132    // Pinned + managed memory
133    fn cuda_malloc_host as "cudaMallocHost": PFN_cudaMallocHost;
134    fn cuda_free_host as "cudaFreeHost": PFN_cudaFreeHost;
135    fn cuda_host_alloc as "cudaHostAlloc": PFN_cudaHostAlloc;
136    fn cuda_host_register as "cudaHostRegister": PFN_cudaHostRegister;
137    fn cuda_host_unregister as "cudaHostUnregister": PFN_cudaHostUnregister;
138    fn cuda_host_get_device_pointer as "cudaHostGetDevicePointer": PFN_cudaHostGetDevicePointer;
139    fn cuda_host_get_flags as "cudaHostGetFlags": PFN_cudaHostGetFlags;
140
141    // Async alloc
142    fn cuda_malloc_async as "cudaMallocAsync": PFN_cudaMallocAsync;
143    fn cuda_free_async as "cudaFreeAsync": PFN_cudaFreeAsync;
144
145    // Graphs + stream capture
146    fn cuda_graph_create as "cudaGraphCreate": PFN_cudaGraphCreate;
147    fn cuda_graph_destroy as "cudaGraphDestroy": PFN_cudaGraphDestroy;
148    fn cuda_graph_instantiate as "cudaGraphInstantiate": PFN_cudaGraphInstantiate;
149    fn cuda_graph_launch as "cudaGraphLaunch": PFN_cudaGraphLaunch;
150    fn cuda_graph_exec_destroy as "cudaGraphExecDestroy": PFN_cudaGraphExecDestroy;
151    fn cuda_graph_get_nodes as "cudaGraphGetNodes": PFN_cudaGraphGetNodes;
152    fn cuda_stream_begin_capture as "cudaStreamBeginCapture": PFN_cudaStreamBeginCapture;
153    fn cuda_stream_end_capture as "cudaStreamEndCapture": PFN_cudaStreamEndCapture;
154    fn cuda_stream_is_capturing as "cudaStreamIsCapturing": PFN_cudaStreamIsCapturing;
155
156    // Function symbol / attrs / occupancy
157    fn cuda_get_func_by_symbol as "cudaGetFuncBySymbol": PFN_cudaGetFuncBySymbol;
158    fn cuda_func_get_attributes as "cudaFuncGetAttributes": PFN_cudaFuncGetAttributes;
159    fn cuda_func_set_attribute as "cudaFuncSetAttribute": PFN_cudaFuncSetAttribute;
160    fn cuda_occupancy_max_active_blocks_per_multiprocessor as "cudaOccupancyMaxActiveBlocksPerMultiprocessor": PFN_cudaOccupancyMaxActiveBlocksPerMultiprocessor;
161    fn cuda_occupancy_max_potential_block_size as "cudaOccupancyMaxPotentialBlockSize": PFN_cudaOccupancyMaxPotentialBlockSize;
162
163    // Pointer attributes
164    fn cuda_pointer_get_attributes as "cudaPointerGetAttributes": PFN_cudaPointerGetAttributes;
165
166    // 2-D memcpy + memset variants
167    fn cuda_memcpy_2d as "cudaMemcpy2D": PFN_cudaMemcpy2D;
168    fn cuda_memcpy_2d_async as "cudaMemcpy2DAsync": PFN_cudaMemcpy2DAsync;
169    fn cuda_malloc_pitch as "cudaMallocPitch": PFN_cudaMallocPitch;
170    fn cuda_memset_2d as "cudaMemset2D": PFN_cudaMemset2D;
171    fn cuda_memset_2d_async as "cudaMemset2DAsync": PFN_cudaMemset2DAsync;
172
173    // Peer memcpy
174    fn cuda_memcpy_peer as "cudaMemcpyPeer": PFN_cudaMemcpyPeer;
175    fn cuda_memcpy_peer_async as "cudaMemcpyPeerAsync": PFN_cudaMemcpyPeerAsync;
176
177    // Device properties (opaque 1KB+ struct)
178    // Accepts both base and _v2 names at the symbol-resolver level —
179    // runtime_fns! hard-codes the name, so we try `_v2` first via a
180    // separate alias registered below and fall back in the safe wrapper.
181    fn cuda_get_device_properties as "cudaGetDeviceProperties": PFN_cudaGetDeviceProperties;
182
183    // External memory + semaphore interop
184    fn cuda_import_external_memory as "cudaImportExternalMemory": PFN_cudaImportExternalMemory;
185    fn cuda_destroy_external_memory as "cudaDestroyExternalMemory": PFN_cudaDestroyExternalMemory;
186    fn cuda_external_memory_get_mapped_buffer as "cudaExternalMemoryGetMappedBuffer": PFN_cudaExternalMemoryGetMappedBuffer;
187    fn cuda_external_memory_get_mapped_mipmapped_array as "cudaExternalMemoryGetMappedMipmappedArray": PFN_cudaExternalMemoryGetMappedMipmappedArray;
188    fn cuda_import_external_semaphore as "cudaImportExternalSemaphore": PFN_cudaImportExternalSemaphore;
189    fn cuda_destroy_external_semaphore as "cudaDestroyExternalSemaphore": PFN_cudaDestroyExternalSemaphore;
190    fn cuda_signal_external_semaphores_async as "cudaSignalExternalSemaphoresAsync": PFN_cudaSignalExternalSemaphoresAsync;
191    fn cuda_wait_external_semaphores_async as "cudaWaitExternalSemaphoresAsync": PFN_cudaWaitExternalSemaphoresAsync;
192
193    // ---- Runtime Wave 1: host-fn launch + stream write/wait value ----
194    fn cuda_launch_host_func as "cudaLaunchHostFunc": PFN_cudaLaunchHostFunc;
195    fn cuda_stream_write_value_32 as "cudaStreamWriteValue32": PFN_cudaStreamWriteValue32;
196    fn cuda_stream_write_value_64 as "cudaStreamWriteValue64": PFN_cudaStreamWriteValue64;
197    fn cuda_stream_wait_value_32 as "cudaStreamWaitValue32": PFN_cudaStreamWaitValue32;
198    fn cuda_stream_wait_value_64 as "cudaStreamWaitValue64": PFN_cudaStreamWaitValue64;
199
200    // ---- Memory pools ----
201    fn cuda_mem_pool_create as "cudaMemPoolCreate": PFN_cudaMemPoolCreate;
202    fn cuda_mem_pool_destroy as "cudaMemPoolDestroy": PFN_cudaMemPoolDestroy;
203    fn cuda_mem_pool_set_attribute as "cudaMemPoolSetAttribute": PFN_cudaMemPoolSetAttribute;
204    fn cuda_mem_pool_get_attribute as "cudaMemPoolGetAttribute": PFN_cudaMemPoolGetAttribute;
205    fn cuda_mem_pool_trim_to as "cudaMemPoolTrimTo": PFN_cudaMemPoolTrimTo;
206    fn cuda_mem_pool_set_access as "cudaMemPoolSetAccess": PFN_cudaMemPoolSetAccess;
207    fn cuda_mem_pool_get_access as "cudaMemPoolGetAccess": PFN_cudaMemPoolGetAccess;
208    fn cuda_malloc_from_pool_async as "cudaMallocFromPoolAsync": PFN_cudaMallocFromPoolAsync;
209    fn cuda_device_get_default_mem_pool as "cudaDeviceGetDefaultMemPool": PFN_cudaDeviceGetDefaultMemPool;
210    fn cuda_device_get_mem_pool as "cudaDeviceGetMemPool": PFN_cudaDeviceGetMemPool;
211    fn cuda_device_set_mem_pool as "cudaDeviceSetMemPool": PFN_cudaDeviceSetMemPool;
212    fn cuda_mem_pool_export_to_shareable_handle as "cudaMemPoolExportToShareableHandle": PFN_cudaMemPoolExportToShareableHandle;
213    fn cuda_mem_pool_import_from_shareable_handle as "cudaMemPoolImportFromShareableHandle": PFN_cudaMemPoolImportFromShareableHandle;
214    fn cuda_mem_pool_export_pointer as "cudaMemPoolExportPointer": PFN_cudaMemPoolExportPointer;
215    fn cuda_mem_pool_import_pointer as "cudaMemPoolImportPointer": PFN_cudaMemPoolImportPointer;
216
217    // ---- Explicit graph node builders ----
218    fn cuda_graph_add_kernel_node as "cudaGraphAddKernelNode": PFN_cudaGraphAddKernelNode;
219    fn cuda_graph_add_memset_node as "cudaGraphAddMemsetNode": PFN_cudaGraphAddMemsetNode;
220    fn cuda_graph_add_memcpy_node as "cudaGraphAddMemcpyNode": PFN_cudaGraphAddMemcpyNode;
221    fn cuda_graph_add_host_node as "cudaGraphAddHostNode": PFN_cudaGraphAddHostNode;
222    fn cuda_graph_add_empty_node as "cudaGraphAddEmptyNode": PFN_cudaGraphAddEmptyNode;
223    fn cuda_graph_add_child_graph_node as "cudaGraphAddChildGraphNode": PFN_cudaGraphAddChildGraphNode;
224    fn cuda_graph_add_event_record_node as "cudaGraphAddEventRecordNode": PFN_cudaGraphAddEventRecordNode;
225    fn cuda_graph_add_event_wait_node as "cudaGraphAddEventWaitNode": PFN_cudaGraphAddEventWaitNode;
226    fn cuda_graph_add_mem_alloc_node as "cudaGraphAddMemAllocNode": PFN_cudaGraphAddMemAllocNode;
227    fn cuda_graph_add_mem_free_node as "cudaGraphAddMemFreeNode": PFN_cudaGraphAddMemFreeNode;
228    fn cuda_graph_exec_update as "cudaGraphExecUpdate": PFN_cudaGraphExecUpdate;
229    fn cuda_graph_add_dependencies as "cudaGraphAddDependencies": PFN_cudaGraphAddDependencies;
230    fn cuda_graph_mem_free_node_get_params as "cudaGraphMemFreeNodeGetParams": PFN_cudaGraphMemFreeNodeGetParams;
231    fn cuda_graph_node_get_type as "cudaGraphNodeGetType": PFN_cudaGraphNodeGetType;
232
233    // ---- Runtime Wave 2 ----
234    // Arrays + tex/surf
235    fn cuda_malloc_array as "cudaMallocArray": PFN_cudaMallocArray;
236    fn cuda_free_array as "cudaFreeArray": PFN_cudaFreeArray;
237    fn cuda_memcpy_2d_to_array as "cudaMemcpy2DToArray": PFN_cudaMemcpy2DToArray;
238    fn cuda_memcpy_2d_from_array as "cudaMemcpy2DFromArray": PFN_cudaMemcpy2DFromArray;
239    fn cuda_create_texture_object as "cudaCreateTextureObject": PFN_cudaCreateTextureObject;
240    fn cuda_destroy_texture_object as "cudaDestroyTextureObject": PFN_cudaDestroyTextureObject;
241    fn cuda_create_surface_object as "cudaCreateSurfaceObject": PFN_cudaCreateSurfaceObject;
242    fn cuda_destroy_surface_object as "cudaDestroySurfaceObject": PFN_cudaDestroySurfaceObject;
243
244    // User objects
245    fn cuda_user_object_create as "cudaUserObjectCreate": PFN_cudaUserObjectCreate;
246    fn cuda_user_object_retain as "cudaUserObjectRetain": PFN_cudaUserObjectRetain;
247    fn cuda_user_object_release as "cudaUserObjectRelease": PFN_cudaUserObjectRelease;
248    fn cuda_graph_retain_user_object as "cudaGraphRetainUserObject": PFN_cudaGraphRetainUserObject;
249    fn cuda_graph_release_user_object as "cudaGraphReleaseUserObject": PFN_cudaGraphReleaseUserObject;
250
251    // Cooperative launch
252    fn cuda_launch_cooperative_kernel as "cudaLaunchCooperativeKernel": PFN_cudaLaunchCooperativeKernel;
253
254    // Stream attach + attrs
255    fn cuda_stream_attach_mem_async as "cudaStreamAttachMemAsync": PFN_cudaStreamAttachMemAsync;
256    fn cuda_stream_get_attribute as "cudaStreamGetAttribute": PFN_cudaStreamGetAttribute;
257    fn cuda_stream_set_attribute as "cudaStreamSetAttribute": PFN_cudaStreamSetAttribute;
258    fn cuda_stream_copy_attributes as "cudaStreamCopyAttributes": PFN_cudaStreamCopyAttributes;
259
260    // IPC
261    fn cuda_ipc_get_event_handle as "cudaIpcGetEventHandle": PFN_cudaIpcGetEventHandle;
262    fn cuda_ipc_open_event_handle as "cudaIpcOpenEventHandle": PFN_cudaIpcOpenEventHandle;
263    fn cuda_ipc_get_mem_handle as "cudaIpcGetMemHandle": PFN_cudaIpcGetMemHandle;
264    fn cuda_ipc_open_mem_handle as "cudaIpcOpenMemHandle": PFN_cudaIpcOpenMemHandle;
265    fn cuda_ipc_close_mem_handle as "cudaIpcCloseMemHandle": PFN_cudaIpcCloseMemHandle;
266
267    // Device flags
268    fn cuda_set_device_flags as "cudaSetDeviceFlags": PFN_cudaSetDeviceFlags;
269    fn cuda_get_device_flags as "cudaGetDeviceFlags": PFN_cudaGetDeviceFlags;
270
271    // ---- Runtime Wave 3: batch mem ops + conditional nodes + driver bridge + occupancy ----
272    fn cuda_stream_batch_mem_op as "cudaStreamBatchMemOp": PFN_cudaStreamBatchMemOp;
273    fn cuda_graph_add_node as "cudaGraphAddNode": PFN_cudaGraphAddNode;
274    fn cuda_graph_conditional_handle_create as "cudaGraphConditionalHandleCreate": PFN_cudaGraphConditionalHandleCreate;
275    fn cuda_get_driver_entry_point as "cudaGetDriverEntryPoint": PFN_cudaGetDriverEntryPoint;
276    fn cuda_occupancy_max_active_blocks_per_multiprocessor_with_flags
277        as "cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags":
278        PFN_cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags;
279    fn cuda_occupancy_available_dynamic_smem_per_block
280        as "cudaOccupancyAvailableDynamicSMemPerBlock":
281        PFN_cudaOccupancyAvailableDynamicSMemPerBlock;
282
283    // ---- Runtime Wave 4: graphics interop (core + GL + D3D + VDPAU + EGL + NvSci) ----
284
285    // Core graphics
286    fn cuda_graphics_unregister_resource as "cudaGraphicsUnregisterResource":
287        PFN_cudaGraphicsUnregisterResource;
288    fn cuda_graphics_map_resources as "cudaGraphicsMapResources":
289        PFN_cudaGraphicsMapResources;
290    fn cuda_graphics_unmap_resources as "cudaGraphicsUnmapResources":
291        PFN_cudaGraphicsUnmapResources;
292    fn cuda_graphics_resource_get_mapped_pointer
293        as "cudaGraphicsResourceGetMappedPointer":
294        PFN_cudaGraphicsResourceGetMappedPointer;
295    fn cuda_graphics_sub_resource_get_mapped_array
296        as "cudaGraphicsSubResourceGetMappedArray":
297        PFN_cudaGraphicsSubResourceGetMappedArray;
298    fn cuda_graphics_resource_get_mapped_mipmapped_array
299        as "cudaGraphicsResourceGetMappedMipmappedArray":
300        PFN_cudaGraphicsResourceGetMappedMipmappedArray;
301    fn cuda_graphics_resource_set_map_flags as "cudaGraphicsResourceSetMapFlags":
302        PFN_cudaGraphicsResourceSetMapFlags;
303
304    // OpenGL
305    fn cuda_graphics_gl_register_buffer as "cudaGraphicsGLRegisterBuffer":
306        PFN_cudaGraphicsGLRegisterBuffer;
307    fn cuda_graphics_gl_register_image as "cudaGraphicsGLRegisterImage":
308        PFN_cudaGraphicsGLRegisterImage;
309    fn cuda_gl_get_devices as "cudaGLGetDevices": PFN_cudaGLGetDevices;
310
311    // D3D9 / D3D10 / D3D11
312    fn cuda_d3d9_get_device as "cudaD3D9GetDevice": PFN_cudaD3D9GetDevice;
313    fn cuda_d3d9_get_devices as "cudaD3D9GetDevices": PFN_cudaD3D9GetDevices;
314    fn cuda_graphics_d3d9_register_resource as "cudaGraphicsD3D9RegisterResource":
315        PFN_cudaGraphicsD3D9RegisterResource;
316    fn cuda_d3d10_get_device as "cudaD3D10GetDevice": PFN_cudaD3D10GetDevice;
317    fn cuda_d3d10_get_devices as "cudaD3D10GetDevices": PFN_cudaD3D10GetDevices;
318    fn cuda_graphics_d3d10_register_resource as "cudaGraphicsD3D10RegisterResource":
319        PFN_cudaGraphicsD3D10RegisterResource;
320    fn cuda_d3d11_get_device as "cudaD3D11GetDevice": PFN_cudaD3D11GetDevice;
321    fn cuda_d3d11_get_devices as "cudaD3D11GetDevices": PFN_cudaD3D11GetDevices;
322    fn cuda_graphics_d3d11_register_resource as "cudaGraphicsD3D11RegisterResource":
323        PFN_cudaGraphicsD3D11RegisterResource;
324
325    // VDPAU
326    fn cuda_vdpau_get_device as "cudaVDPAUGetDevice": PFN_cudaVDPAUGetDevice;
327    fn cuda_graphics_vdpau_register_video_surface
328        as "cudaGraphicsVDPAURegisterVideoSurface":
329        PFN_cudaGraphicsVDPAURegisterVideoSurface;
330    fn cuda_graphics_vdpau_register_output_surface
331        as "cudaGraphicsVDPAURegisterOutputSurface":
332        PFN_cudaGraphicsVDPAURegisterOutputSurface;
333
334    // EGL
335    fn cuda_graphics_egl_register_image as "cudaGraphicsEGLRegisterImage":
336        PFN_cudaGraphicsEGLRegisterImage;
337    fn cuda_graphics_resource_get_mapped_egl_frame
338        as "cudaGraphicsResourceGetMappedEglFrame":
339        PFN_cudaGraphicsResourceGetMappedEglFrame;
340    fn cuda_event_create_from_egl_sync as "cudaEventCreateFromEGLSync":
341        PFN_cudaEventCreateFromEGLSync;
342    fn cuda_egl_stream_consumer_connect as "cudaEGLStreamConsumerConnect":
343        PFN_cudaEGLStreamConsumerConnect;
344    fn cuda_egl_stream_consumer_disconnect as "cudaEGLStreamConsumerDisconnect":
345        PFN_cudaEGLStreamConsumerDisconnect;
346    fn cuda_egl_stream_consumer_acquire_frame as "cudaEGLStreamConsumerAcquireFrame":
347        PFN_cudaEGLStreamConsumerAcquireFrame;
348    fn cuda_egl_stream_consumer_release_frame as "cudaEGLStreamConsumerReleaseFrame":
349        PFN_cudaEGLStreamConsumerReleaseFrame;
350    fn cuda_egl_stream_producer_connect as "cudaEGLStreamProducerConnect":
351        PFN_cudaEGLStreamProducerConnect;
352    fn cuda_egl_stream_producer_disconnect as "cudaEGLStreamProducerDisconnect":
353        PFN_cudaEGLStreamProducerDisconnect;
354    fn cuda_egl_stream_producer_present_frame as "cudaEGLStreamProducerPresentFrame":
355        PFN_cudaEGLStreamProducerPresentFrame;
356    fn cuda_egl_stream_producer_return_frame as "cudaEGLStreamProducerReturnFrame":
357        PFN_cudaEGLStreamProducerReturnFrame;
358
359    // NvSci
360    fn cuda_device_get_nv_sci_sync_attributes as "cudaDeviceGetNvSciSyncAttributes":
361        PFN_cudaDeviceGetNvSciSyncAttributes;
362
363    // ---- Runtime Wave 5 ----
364
365    // Arrays (extras) + tex/surf object descriptors
366    fn cuda_malloc_mipmapped_array as "cudaMallocMipmappedArray": PFN_cudaMallocMipmappedArray;
367    fn cuda_free_mipmapped_array as "cudaFreeMipmappedArray": PFN_cudaFreeMipmappedArray;
368    fn cuda_array_get_info as "cudaArrayGetInfo": PFN_cudaArrayGetInfo;
369    fn cuda_get_mipmapped_array_level as "cudaGetMipmappedArrayLevel":
370        PFN_cudaGetMipmappedArrayLevel;
371    fn cuda_get_texture_object_resource_desc as "cudaGetTextureObjectResourceDesc":
372        PFN_cudaGetTextureObjectResourceDesc;
373    fn cuda_get_texture_object_texture_desc as "cudaGetTextureObjectTextureDesc":
374        PFN_cudaGetTextureObjectTextureDesc;
375    fn cuda_get_texture_object_resource_view_desc as "cudaGetTextureObjectResourceViewDesc":
376        PFN_cudaGetTextureObjectResourceViewDesc;
377    fn cuda_get_surface_object_resource_desc as "cudaGetSurfaceObjectResourceDesc":
378        PFN_cudaGetSurfaceObjectResourceDesc;
379
380    // 3D memcpy
381    fn cuda_memcpy_3d as "cudaMemcpy3D": PFN_cudaMemcpy3D;
382    fn cuda_memcpy_3d_async as "cudaMemcpy3DAsync": PFN_cudaMemcpy3DAsync;
383    fn cuda_memcpy_3d_peer as "cudaMemcpy3DPeer": PFN_cudaMemcpy3DPeer;
384    fn cuda_memcpy_3d_peer_async as "cudaMemcpy3DPeerAsync": PFN_cudaMemcpy3DPeerAsync;
385    fn cuda_memset_3d as "cudaMemset3D": PFN_cudaMemset3D;
386    fn cuda_malloc_3d as "cudaMalloc3D": PFN_cudaMalloc3D;
387    fn cuda_malloc_3d_array as "cudaMalloc3DArray": PFN_cudaMalloc3DArray;
388
389    // Launch-ex / cluster
390    fn cuda_launch_kernel_ex as "cudaLaunchKernelEx": PFN_cudaLaunchKernelEx;
391
392    // Profiler
393    fn cuda_profiler_start as "cudaProfilerStart": PFN_cudaProfilerStart;
394    fn cuda_profiler_stop as "cudaProfilerStop": PFN_cudaProfilerStop;
395
396    // VMM
397    fn cuda_mem_address_reserve as "cudaMemAddressReserve": PFN_cudaMemAddressReserve;
398    fn cuda_mem_address_free as "cudaMemAddressFree": PFN_cudaMemAddressFree;
399    fn cuda_mem_create as "cudaMemCreate": PFN_cudaMemCreate;
400    fn cuda_mem_release as "cudaMemRelease": PFN_cudaMemRelease;
401    fn cuda_mem_map as "cudaMemMap": PFN_cudaMemMap;
402    fn cuda_mem_unmap as "cudaMemUnmap": PFN_cudaMemUnmap;
403    fn cuda_mem_set_access as "cudaMemSetAccess": PFN_cudaMemSetAccess;
404    fn cuda_mem_get_access as "cudaMemGetAccess": PFN_cudaMemGetAccess;
405    fn cuda_mem_get_allocation_granularity as "cudaMemGetAllocationGranularity":
406        PFN_cudaMemGetAllocationGranularity;
407    fn cuda_mem_get_allocation_properties_from_handle
408        as "cudaMemGetAllocationPropertiesFromHandle":
409        PFN_cudaMemGetAllocationPropertiesFromHandle;
410    fn cuda_mem_export_to_shareable_handle as "cudaMemExportToShareableHandle":
411        PFN_cudaMemExportToShareableHandle;
412    fn cuda_mem_import_from_shareable_handle as "cudaMemImportFromShareableHandle":
413        PFN_cudaMemImportFromShareableHandle;
414    fn cuda_mem_retain_allocation_handle as "cudaMemRetainAllocationHandle":
415        PFN_cudaMemRetainAllocationHandle;
416
417    // Multicast (12.0+)
418    fn cuda_multicast_create as "cudaMulticastCreate": PFN_cudaMulticastCreate;
419    fn cuda_multicast_add_device as "cudaMulticastAddDevice": PFN_cudaMulticastAddDevice;
420    fn cuda_multicast_bind_mem as "cudaMulticastBindMem": PFN_cudaMulticastBindMem;
421    fn cuda_multicast_bind_addr as "cudaMulticastBindAddr": PFN_cudaMulticastBindAddr;
422    fn cuda_multicast_unbind as "cudaMulticastUnbind": PFN_cudaMulticastUnbind;
423    fn cuda_multicast_get_granularity as "cudaMulticastGetGranularity":
424        PFN_cudaMulticastGetGranularity;
425
426    // Green contexts (13.1+)
427    fn cuda_device_create_green_ctx as "cudaDeviceCreateGreenCtx": PFN_cudaDeviceCreateGreenCtx;
428    fn cuda_green_ctx_destroy as "cudaGreenCtxDestroy": PFN_cudaGreenCtxDestroy;
429    fn cuda_green_ctx_record_event as "cudaGreenCtxRecordEvent": PFN_cudaGreenCtxRecordEvent;
430    fn cuda_green_ctx_wait_event as "cudaGreenCtxWaitEvent": PFN_cudaGreenCtxWaitEvent;
431    fn cuda_green_ctx_stream_create as "cudaGreenCtxStreamCreate": PFN_cudaGreenCtxStreamCreate;
432}
433
434/// Lazily-initialized process-wide Runtime singleton.
435pub fn runtime() -> Result<&'static Runtime, LoaderError> {
436    static RUNTIME: OnceLock<Runtime> = OnceLock::new();
437    if let Some(r) = RUNTIME.get() {
438        return Ok(r);
439    }
440    let lib = Library::open("cuda-runtime", platform::runtime_library_candidates())?;
441    let r = Runtime::empty(lib);
442    let _ = RUNTIME.set(r);
443    Ok(RUNTIME.get().expect("OnceLock set or lost race"))
444}
445
446#[cfg(test)]
447mod tests {
448    use super::*;
449
450    #[test]
451    fn runtime_singleton_returns_loader_error_without_cuda_runtime() {
452        // No panic regardless of whether cudart is present.
453        let _ = runtime();
454    }
455}