pub struct DriverApi {Show 103 fields
pub cu_init: unsafe extern "C" fn(flags: u32) -> CUresult,
pub cu_driver_get_version: unsafe extern "C" fn(version: *mut c_int) -> CUresult,
pub cu_device_get: unsafe extern "C" fn(device: *mut CUdevice, ordinal: c_int) -> CUresult,
pub cu_device_get_count: unsafe extern "C" fn(count: *mut c_int) -> CUresult,
pub cu_device_get_name: unsafe extern "C" fn(name: *mut c_char, len: c_int, dev: CUdevice) -> CUresult,
pub cu_device_get_attribute: unsafe extern "C" fn(pi: *mut c_int, attrib: CUdevice_attribute, dev: CUdevice) -> CUresult,
pub cu_device_total_mem_v2: unsafe extern "C" fn(bytes: *mut usize, dev: CUdevice) -> CUresult,
pub cu_device_can_access_peer: unsafe extern "C" fn(can_access: *mut c_int, dev: CUdevice, peer_dev: CUdevice) -> CUresult,
pub cu_device_primary_ctx_retain: unsafe extern "C" fn(pctx: *mut CUcontext, dev: CUdevice) -> CUresult,
pub cu_device_primary_ctx_release_v2: unsafe extern "C" fn(dev: CUdevice) -> CUresult,
pub cu_device_primary_ctx_set_flags_v2: unsafe extern "C" fn(dev: CUdevice, flags: u32) -> CUresult,
pub cu_device_primary_ctx_get_state: unsafe extern "C" fn(dev: CUdevice, flags: *mut u32, active: *mut c_int) -> CUresult,
pub cu_device_primary_ctx_reset_v2: unsafe extern "C" fn(dev: CUdevice) -> CUresult,
pub cu_ctx_create_v2: unsafe extern "C" fn(pctx: *mut CUcontext, flags: u32, dev: CUdevice) -> CUresult,
pub cu_ctx_destroy_v2: unsafe extern "C" fn(ctx: CUcontext) -> CUresult,
pub cu_ctx_set_current: unsafe extern "C" fn(ctx: CUcontext) -> CUresult,
pub cu_ctx_get_current: unsafe extern "C" fn(pctx: *mut CUcontext) -> CUresult,
pub cu_ctx_synchronize: unsafe extern "C" fn() -> CUresult,
pub cu_module_load_data: unsafe extern "C" fn(module: *mut CUmodule, image: *const c_void) -> CUresult,
pub cu_module_load_data_ex: unsafe extern "C" fn(module: *mut CUmodule, image: *const c_void, num_options: u32, options: *mut CUjit_option, option_values: *mut *mut c_void) -> CUresult,
pub cu_module_get_function: unsafe extern "C" fn(hfunc: *mut CUfunction, hmod: CUmodule, name: *const c_char) -> CUresult,
pub cu_module_unload: unsafe extern "C" fn(hmod: CUmodule) -> CUresult,
pub cu_mem_alloc_v2: unsafe extern "C" fn(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresult,
pub cu_mem_free_v2: unsafe extern "C" fn(dptr: CUdeviceptr) -> CUresult,
pub cu_memcpy_htod_v2: unsafe extern "C" fn(dst: CUdeviceptr, src: *const c_void, bytesize: usize) -> CUresult,
pub cu_memcpy_dtoh_v2: unsafe extern "C" fn(dst: *mut c_void, src: CUdeviceptr, bytesize: usize) -> CUresult,
pub cu_memcpy_dtod_v2: unsafe extern "C" fn(dst: CUdeviceptr, src: CUdeviceptr, bytesize: usize) -> CUresult,
pub cu_memcpy_htod_async_v2: unsafe extern "C" fn(dst: CUdeviceptr, src: *const c_void, bytesize: usize, stream: CUstream) -> CUresult,
pub cu_memcpy_dtoh_async_v2: unsafe extern "C" fn(dst: *mut c_void, src: CUdeviceptr, bytesize: usize, stream: CUstream) -> CUresult,
pub cu_mem_alloc_host_v2: unsafe extern "C" fn(pp: *mut *mut c_void, bytesize: usize) -> CUresult,
pub cu_mem_free_host: unsafe extern "C" fn(p: *mut c_void) -> CUresult,
pub cu_mem_alloc_managed: unsafe extern "C" fn(dptr: *mut CUdeviceptr, bytesize: usize, flags: u32) -> CUresult,
pub cu_memset_d8_v2: unsafe extern "C" fn(dst: CUdeviceptr, value: u8, count: usize) -> CUresult,
pub cu_memset_d32_v2: unsafe extern "C" fn(dst: CUdeviceptr, value: u32, count: usize) -> CUresult,
pub cu_mem_get_info_v2: unsafe extern "C" fn(free: *mut usize, total: *mut usize) -> CUresult,
pub cu_mem_host_register_v2: unsafe extern "C" fn(p: *mut c_void, bytesize: usize, flags: u32) -> CUresult,
pub cu_mem_host_unregister: unsafe extern "C" fn(p: *mut c_void) -> CUresult,
pub cu_mem_host_get_device_pointer_v2: unsafe extern "C" fn(pdptr: *mut CUdeviceptr, p: *mut c_void, flags: u32) -> CUresult,
pub cu_pointer_get_attribute: unsafe extern "C" fn(data: *mut c_void, attribute: u32, ptr: CUdeviceptr) -> CUresult,
pub cu_mem_advise: unsafe extern "C" fn(dev_ptr: CUdeviceptr, count: usize, advice: u32, device: CUdevice) -> CUresult,
pub cu_mem_prefetch_async: unsafe extern "C" fn(dev_ptr: CUdeviceptr, count: usize, dst_device: CUdevice, hstream: CUstream) -> CUresult,
pub cu_stream_create: unsafe extern "C" fn(phstream: *mut CUstream, flags: u32) -> CUresult,
pub cu_stream_create_with_priority: unsafe extern "C" fn(phstream: *mut CUstream, flags: u32, priority: c_int) -> CUresult,
pub cu_stream_destroy_v2: unsafe extern "C" fn(hstream: CUstream) -> CUresult,
pub cu_stream_synchronize: unsafe extern "C" fn(hstream: CUstream) -> CUresult,
pub cu_stream_wait_event: unsafe extern "C" fn(hstream: CUstream, hevent: CUevent, flags: u32) -> CUresult,
pub cu_stream_query: unsafe extern "C" fn(hstream: CUstream) -> CUresult,
pub cu_stream_get_priority: unsafe extern "C" fn(hstream: CUstream, priority: *mut c_int) -> CUresult,
pub cu_stream_get_flags: unsafe extern "C" fn(hstream: CUstream, flags: *mut u32) -> CUresult,
pub cu_event_create: unsafe extern "C" fn(phevent: *mut CUevent, flags: u32) -> CUresult,
pub cu_event_destroy_v2: unsafe extern "C" fn(hevent: CUevent) -> CUresult,
pub cu_event_record: unsafe extern "C" fn(hevent: CUevent, hstream: CUstream) -> CUresult,
pub cu_event_query: unsafe extern "C" fn(hevent: CUevent) -> CUresult,
pub cu_event_synchronize: unsafe extern "C" fn(hevent: CUevent) -> CUresult,
pub cu_event_elapsed_time: unsafe extern "C" fn(pmilliseconds: *mut f32, hstart: CUevent, hend: CUevent) -> CUresult,
pub cu_memcpy_peer: unsafe extern "C" fn(dst_device: u64, dst_ctx: CUcontext, src_device: u64, src_ctx: CUcontext, count: usize) -> CUresult,
pub cu_memcpy_peer_async: unsafe extern "C" fn(dst_device: u64, dst_ctx: CUcontext, src_device: u64, src_ctx: CUcontext, count: usize, stream: CUstream) -> CUresult,
pub cu_ctx_enable_peer_access: unsafe extern "C" fn(peer_context: CUcontext, flags: u32) -> CUresult,
pub cu_ctx_disable_peer_access: unsafe extern "C" fn(peer_context: CUcontext) -> CUresult,
pub cu_launch_kernel: unsafe extern "C" fn(f: CUfunction, grid_dim_x: u32, grid_dim_y: u32, grid_dim_z: u32, block_dim_x: u32, block_dim_y: u32, block_dim_z: u32, shared_mem_bytes: u32, hstream: CUstream, kernel_params: *mut *mut c_void, extra: *mut *mut c_void) -> CUresult,
pub cu_launch_cooperative_kernel: unsafe extern "C" fn(f: CUfunction, grid_dim_x: u32, grid_dim_y: u32, grid_dim_z: u32, block_dim_x: u32, block_dim_y: u32, block_dim_z: u32, shared_mem_bytes: u32, hstream: CUstream, kernel_params: *mut *mut c_void) -> CUresult,
pub cu_launch_cooperative_kernel_multi_device: unsafe extern "C" fn(launch_params_list: *mut c_void, num_devices: u32, flags: u32) -> CUresult,
pub cu_occupancy_max_active_blocks_per_multiprocessor: unsafe extern "C" fn(num_blocks: *mut c_int, func: CUfunction, block_size: c_int, dynamic_smem_size: usize) -> CUresult,
pub cu_occupancy_max_potential_block_size: unsafe extern "C" fn(min_grid_size: *mut c_int, block_size: *mut c_int, func: CUfunction, block_size_to_dynamic_smem_size: Option<unsafe extern "C" fn(c_int) -> usize>, dynamic_smem_size: usize, block_size_limit: c_int) -> CUresult,
pub cu_occupancy_max_active_blocks_per_multiprocessor_with_flags: unsafe extern "C" fn(num_blocks: *mut c_int, func: CUfunction, block_size: c_int, dynamic_smem_size: usize, flags: u32) -> CUresult,
pub cu_memcpy_dtod_async_v2: Option<unsafe extern "C" fn(dst: CUdeviceptr, src: CUdeviceptr, bytesize: usize, stream: CUstream) -> CUresult>,
pub cu_memset_d16_v2: Option<unsafe extern "C" fn(dst: CUdeviceptr, value: u16, count: usize) -> CUresult>,
pub cu_memset_d32_async: Option<unsafe extern "C" fn(dst: CUdeviceptr, value: u32, count: usize, stream: CUstream) -> CUresult>,
pub cu_ctx_get_limit: Option<unsafe extern "C" fn(value: *mut usize, limit: u32) -> CUresult>,
pub cu_ctx_set_limit: Option<unsafe extern "C" fn(limit: u32, value: usize) -> CUresult>,
pub cu_ctx_get_cache_config: Option<unsafe extern "C" fn(config: *mut u32) -> CUresult>,
pub cu_ctx_set_cache_config: Option<unsafe extern "C" fn(config: u32) -> CUresult>,
pub cu_ctx_get_shared_mem_config: Option<unsafe extern "C" fn(config: *mut u32) -> CUresult>,
pub cu_ctx_set_shared_mem_config: Option<unsafe extern "C" fn(config: u32) -> CUresult>,
pub cu_event_record_with_flags: Option<unsafe extern "C" fn(hevent: CUevent, hstream: CUstream, flags: u32) -> CUresult>,
pub cu_func_get_attribute: Option<unsafe extern "C" fn(value: *mut c_int, attrib: c_int, func: CUfunction) -> CUresult>,
pub cu_func_set_cache_config: Option<unsafe extern "C" fn(func: CUfunction, config: u32) -> CUresult>,
pub cu_func_set_shared_mem_config: Option<unsafe extern "C" fn(func: CUfunction, config: u32) -> CUresult>,
pub cu_func_set_attribute: Option<unsafe extern "C" fn(func: CUfunction, attrib: c_int, value: c_int) -> CUresult>,
pub cu_profiler_start: Option<unsafe extern "C" fn() -> CUresult>,
pub cu_profiler_stop: Option<unsafe extern "C" fn() -> CUresult>,
pub cu_launch_kernel_ex: Option<unsafe extern "C" fn(config: *const CuLaunchConfig, f: CUfunction, kernel_params: *mut *mut c_void, extra: *mut *mut c_void) -> CUresult>,
pub cu_tensor_map_encode_tiled: Option<unsafe extern "C" fn(tensor_map: *mut c_void, tensor_data_type: u32, tensor_rank: u32, global_address: *mut c_void, global_dim: *const u64, global_strides: *const u64, box_dim: *const u32, element_strides: *const u32, interleave: u32, swizzle: u32, l2_promotion: u32, oob_fill: u32) -> CUresult>,
pub cu_tensor_map_encode_tiled_memref: Option<unsafe extern "C" fn(tensor_map: *mut c_void, tensor_data_type: u32, tensor_rank: u32, global_address: *mut c_void, global_dim: *const u64, global_strides: *const u64, box_dim: *const u32, element_strides: *const u32, interleave: u32, swizzle: u32, l2_promotion: u32, oob_fill: u32, flags: u64) -> CUresult>,
pub cu_kernel_get_library: Option<unsafe extern "C" fn(p_lib: *mut CUlibrary, kernel: CUkernel) -> CUresult>,
pub cu_multicast_get_granularity: Option<unsafe extern "C" fn(granularity: *mut usize, desc: *const c_void, option: u32) -> CUresult>,
pub cu_multicast_create: Option<unsafe extern "C" fn(mc_handle: *mut CUmulticastObject, desc: *const c_void) -> CUresult>,
pub cu_multicast_add_device: Option<unsafe extern "C" fn(mc_handle: CUmulticastObject, dev: CUdevice) -> CUresult>,
pub cu_memcpy_batch_async: Option<unsafe extern "C" fn(dsts: *const *mut c_void, srcs: *const *const c_void, sizes: *const usize, count: u64, flags: u64, stream: CUstream) -> CUresult>,
pub cu_array_create_v2: Option<unsafe extern "C" fn(p_handle: *mut CUarray, p_allocate_array: *const CUDA_ARRAY_DESCRIPTOR) -> CUresult>,
pub cu_array_destroy: Option<unsafe extern "C" fn(h_array: CUarray) -> CUresult>,
pub cu_array_get_descriptor_v2: Option<unsafe extern "C" fn(p_array_descriptor: *mut CUDA_ARRAY_DESCRIPTOR, h_array: CUarray) -> CUresult>,
pub cu_array3d_create_v2: Option<unsafe extern "C" fn(p_handle: *mut CUarray, p_allocate_array: *const CUDA_ARRAY3D_DESCRIPTOR) -> CUresult>,
pub cu_array3d_get_descriptor_v2: Option<unsafe extern "C" fn(p_array_descriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, h_array: CUarray) -> CUresult>,
pub cu_memcpy_htoa_v2: Option<unsafe extern "C" fn(dst_array: CUarray, dst_offset: usize, src_host: *const c_void, byte_count: usize) -> CUresult>,
pub cu_memcpy_atoh_v2: Option<unsafe extern "C" fn(dst_host: *mut c_void, src_array: CUarray, src_offset: usize, byte_count: usize) -> CUresult>,
pub cu_memcpy_htoa_async_v2: Option<unsafe extern "C" fn(dst_array: CUarray, dst_offset: usize, src_host: *const c_void, byte_count: usize, stream: CUstream) -> CUresult>,
pub cu_memcpy_atoh_async_v2: Option<unsafe extern "C" fn(dst_host: *mut c_void, src_array: CUarray, src_offset: usize, byte_count: usize, stream: CUstream) -> CUresult>,
pub cu_tex_object_create: Option<unsafe extern "C" fn(p_tex_object: *mut CUtexObject, p_res_desc: *const CUDA_RESOURCE_DESC, p_tex_desc: *const CUDA_TEXTURE_DESC, p_res_view_desc: *const CUDA_RESOURCE_VIEW_DESC) -> CUresult>,
pub cu_tex_object_destroy: Option<unsafe extern "C" fn(tex_object: CUtexObject) -> CUresult>,
pub cu_tex_object_get_resource_desc: Option<unsafe extern "C" fn(p_res_desc: *mut CUDA_RESOURCE_DESC, tex_object: CUtexObject) -> CUresult>,
pub cu_surf_object_create: Option<unsafe extern "C" fn(p_surf_object: *mut CUsurfObject, p_res_desc: *const CUDA_RESOURCE_DESC) -> CUresult>,
pub cu_surf_object_destroy: Option<unsafe extern "C" fn(surf_object: CUsurfObject) -> CUresult>,
/* private fields */
}Expand description
Complete function-pointer table for the CUDA Driver API.
An instance of this struct is produced by DriverApi::load and kept
alive for the lifetime of the process inside the DRIVER singleton.
The embedded Library handle ensures the shared object is not unloaded.
§Function pointer groups
The fields are organised into logical groups mirroring the CUDA Driver API documentation:
- Initialisation —
cu_init - Device management —
cu_device_* - Context management —
cu_ctx_* - Module management —
cu_module_* - Memory management —
cu_mem_*,cu_memcpy_*,cu_memset_* - Stream management —
cu_stream_* - Event management —
cu_event_* - Kernel launch —
cu_launch_kernel - Occupancy queries —
cu_occupancy_*
Fields§
§cu_init: unsafe extern "C" fn(flags: u32) -> CUresultcuInit(flags) -> CUresult
Initialises the CUDA driver API. Must be called before any other
driver function. Passing 0 for flags is the only documented
value.
cu_driver_get_version: unsafe extern "C" fn(version: *mut c_int) -> CUresultcuDriverGetVersion(driverVersion*) -> CUresult
Returns the CUDA driver version as major*1000 + minor*10.
cu_device_get: unsafe extern "C" fn(device: *mut CUdevice, ordinal: c_int) -> CUresultcuDeviceGet(device*, ordinal) -> CUresult
Returns a handle to a compute device.
cu_device_get_count: unsafe extern "C" fn(count: *mut c_int) -> CUresultcuDeviceGetCount(count*) -> CUresult
Returns the number of compute-capable devices.
cu_device_get_name: unsafe extern "C" fn(name: *mut c_char, len: c_int, dev: CUdevice) -> CUresultcuDeviceGetName(name*, len, dev) -> CUresult
Returns an ASCII string identifying the device.
cu_device_get_attribute: unsafe extern "C" fn(pi: *mut c_int, attrib: CUdevice_attribute, dev: CUdevice) -> CUresultcuDeviceGetAttribute(pi*, attrib, dev) -> CUresult
Returns information about the device.
cu_device_total_mem_v2: unsafe extern "C" fn(bytes: *mut usize, dev: CUdevice) -> CUresultcuDeviceTotalMem_v2(bytes*, dev) -> CUresult
Returns the total amount of memory on the device.
cu_device_can_access_peer: unsafe extern "C" fn(can_access: *mut c_int, dev: CUdevice, peer_dev: CUdevice) -> CUresultcuDeviceCanAccessPeer(canAccessPeer*, dev, peerDev) -> CUresult
Queries if a device may directly access a peer device’s memory.
cu_device_primary_ctx_retain: unsafe extern "C" fn(pctx: *mut CUcontext, dev: CUdevice) -> CUresultcuDevicePrimaryCtxRetain(pctx*, dev) -> CUresult
Retains the primary context on the device, creating it if necessary.
cu_device_primary_ctx_release_v2: unsafe extern "C" fn(dev: CUdevice) -> CUresultcuDevicePrimaryCtxRelease_v2(dev) -> CUresult
Releases the primary context on the device.
cu_device_primary_ctx_set_flags_v2: unsafe extern "C" fn(dev: CUdevice, flags: u32) -> CUresultcuDevicePrimaryCtxSetFlags_v2(dev, flags) -> CUresult
Sets flags for the primary context.
cu_device_primary_ctx_get_state: unsafe extern "C" fn(dev: CUdevice, flags: *mut u32, active: *mut c_int) -> CUresultcuDevicePrimaryCtxGetState(dev, flags*, active*) -> CUresult
Returns the state (flags and active status) of the primary context.
cu_device_primary_ctx_reset_v2: unsafe extern "C" fn(dev: CUdevice) -> CUresultcuDevicePrimaryCtxReset_v2(dev) -> CUresult
Resets the primary context on the device.
cu_ctx_create_v2: unsafe extern "C" fn(pctx: *mut CUcontext, flags: u32, dev: CUdevice) -> CUresultcuCtxCreate_v2(pctx*, flags, dev) -> CUresult
Creates a new CUDA context and associates it with the calling thread.
cu_ctx_destroy_v2: unsafe extern "C" fn(ctx: CUcontext) -> CUresultcuCtxDestroy_v2(ctx) -> CUresult
Destroys a CUDA context.
cu_ctx_set_current: unsafe extern "C" fn(ctx: CUcontext) -> CUresultcuCtxSetCurrent(ctx) -> CUresult
Binds the specified CUDA context to the calling CPU thread.
cu_ctx_get_current: unsafe extern "C" fn(pctx: *mut CUcontext) -> CUresultcuCtxGetCurrent(pctx*) -> CUresult
Returns the CUDA context bound to the calling CPU thread.
cu_ctx_synchronize: unsafe extern "C" fn() -> CUresultcuCtxSynchronize() -> CUresult
Blocks until the device has completed all preceding requested tasks.
cu_module_load_data: unsafe extern "C" fn(module: *mut CUmodule, image: *const c_void) -> CUresultcuModuleLoadData(module*, image*) -> CUresult
Loads a module from a PTX or cubin image in host memory.
cu_module_load_data_ex: unsafe extern "C" fn(module: *mut CUmodule, image: *const c_void, num_options: u32, options: *mut CUjit_option, option_values: *mut *mut c_void) -> CUresultcuModuleLoadDataEx(module*, image*, numOptions, options*, optionValues*) -> CUresult
Loads a module with JIT compiler options.
cu_module_get_function: unsafe extern "C" fn(hfunc: *mut CUfunction, hmod: CUmodule, name: *const c_char) -> CUresultcuModuleGetFunction(hfunc*, hmod, name*) -> CUresult
Returns a handle to a function within a module.
cu_module_unload: unsafe extern "C" fn(hmod: CUmodule) -> CUresultcuModuleUnload(hmod) -> CUresult
Unloads a module from the current context.
cu_mem_alloc_v2: unsafe extern "C" fn(dptr: *mut CUdeviceptr, bytesize: usize) -> CUresultcuMemAlloc_v2(dptr*, bytesize) -> CUresult
Allocates device memory.
cu_mem_free_v2: unsafe extern "C" fn(dptr: CUdeviceptr) -> CUresultcuMemFree_v2(dptr) -> CUresult
Frees device memory.
cu_memcpy_htod_v2: unsafe extern "C" fn(dst: CUdeviceptr, src: *const c_void, bytesize: usize) -> CUresultcuMemcpyHtoD_v2(dst, src*, bytesize) -> CUresult
Copies data from host memory to device memory.
cu_memcpy_dtoh_v2: unsafe extern "C" fn(dst: *mut c_void, src: CUdeviceptr, bytesize: usize) -> CUresultcuMemcpyDtoH_v2(dst*, src, bytesize) -> CUresult
Copies data from device memory to host memory.
cu_memcpy_dtod_v2: unsafe extern "C" fn(dst: CUdeviceptr, src: CUdeviceptr, bytesize: usize) -> CUresultcuMemcpyDtoD_v2(dst, src, bytesize) -> CUresult
Copies data from device memory to device memory.
cu_memcpy_htod_async_v2: unsafe extern "C" fn(dst: CUdeviceptr, src: *const c_void, bytesize: usize, stream: CUstream) -> CUresultcuMemcpyHtoDAsync_v2(dst, src*, bytesize, stream) -> CUresult
Asynchronously copies data from host to device memory.
cu_memcpy_dtoh_async_v2: unsafe extern "C" fn(dst: *mut c_void, src: CUdeviceptr, bytesize: usize, stream: CUstream) -> CUresultcuMemcpyDtoHAsync_v2(dst*, src, bytesize, stream) -> CUresult
Asynchronously copies data from device to host memory.
cu_mem_alloc_host_v2: unsafe extern "C" fn(pp: *mut *mut c_void, bytesize: usize) -> CUresultcuMemAllocHost_v2(pp*, bytesize) -> CUresult
Allocates page-locked (pinned) host memory.
cu_mem_free_host: unsafe extern "C" fn(p: *mut c_void) -> CUresultcuMemFreeHost(p*) -> CUresult
Frees page-locked host memory.
cu_mem_alloc_managed: unsafe extern "C" fn(dptr: *mut CUdeviceptr, bytesize: usize, flags: u32) -> CUresultcuMemAllocManaged(dptr*, bytesize, flags) -> CUresult
Allocates unified memory accessible from both host and device.
cu_memset_d8_v2: unsafe extern "C" fn(dst: CUdeviceptr, value: u8, count: usize) -> CUresultcuMemsetD8_v2(dst, value, count) -> CUresult
Sets device memory to a value (byte granularity).
cu_memset_d32_v2: unsafe extern "C" fn(dst: CUdeviceptr, value: u32, count: usize) -> CUresultcuMemsetD32_v2(dst, value, count) -> CUresult
Sets device memory to a value (32-bit granularity).
cu_mem_get_info_v2: unsafe extern "C" fn(free: *mut usize, total: *mut usize) -> CUresultcuMemGetInfo_v2(free*, total*) -> CUresult
Returns free and total memory for the current context’s device.
cu_mem_host_register_v2: unsafe extern "C" fn(p: *mut c_void, bytesize: usize, flags: u32) -> CUresultcuMemHostRegister_v2(p*, bytesize, flags) -> CUresult
Registers an existing host memory range for use by CUDA.
cu_mem_host_unregister: unsafe extern "C" fn(p: *mut c_void) -> CUresultcuMemHostUnregister(p*) -> CUresult
Unregisters a memory range that was registered with cuMemHostRegister.
cu_mem_host_get_device_pointer_v2: unsafe extern "C" fn(pdptr: *mut CUdeviceptr, p: *mut c_void, flags: u32) -> CUresultcuMemHostGetDevicePointer_v2(pdptr*, p*, flags) -> CUresult
Returns the device pointer mapped to a registered host pointer.
cu_pointer_get_attribute: unsafe extern "C" fn(data: *mut c_void, attribute: u32, ptr: CUdeviceptr) -> CUresultcuPointerGetAttribute(data*, attribute, ptr) -> CUresult
Returns information about a pointer.
cu_mem_advise: unsafe extern "C" fn(dev_ptr: CUdeviceptr, count: usize, advice: u32, device: CUdevice) -> CUresultcuMemAdvise(devPtr, count, advice, device) -> CUresult
Advises the unified memory subsystem about usage patterns.
cu_mem_prefetch_async: unsafe extern "C" fn(dev_ptr: CUdeviceptr, count: usize, dst_device: CUdevice, hstream: CUstream) -> CUresultcuMemPrefetchAsync(devPtr, count, dstDevice, hStream) -> CUresult
Prefetches unified memory to the specified device.
cu_stream_create: unsafe extern "C" fn(phstream: *mut CUstream, flags: u32) -> CUresultcuStreamCreate(phStream*, flags) -> CUresult
Creates a stream.
cu_stream_create_with_priority: unsafe extern "C" fn(phstream: *mut CUstream, flags: u32, priority: c_int) -> CUresultcuStreamCreateWithPriority(phStream*, flags, priority) -> CUresult
Creates a stream with the given priority.
cu_stream_destroy_v2: unsafe extern "C" fn(hstream: CUstream) -> CUresultcuStreamDestroy_v2(hStream) -> CUresult
Destroys a stream.
cu_stream_synchronize: unsafe extern "C" fn(hstream: CUstream) -> CUresultcuStreamSynchronize(hStream) -> CUresult
Waits until a stream’s tasks are completed.
cu_stream_wait_event: unsafe extern "C" fn(hstream: CUstream, hevent: CUevent, flags: u32) -> CUresultcuStreamWaitEvent(hStream, hEvent, flags) -> CUresult
Makes all future work submitted to the stream wait for the event.
cu_stream_query: unsafe extern "C" fn(hstream: CUstream) -> CUresultcuStreamQuery(hStream) -> CUresult
Returns CUDA_SUCCESS if all operations in the stream have completed,
CUDA_ERROR_NOT_READY if still pending.
cu_stream_get_priority: unsafe extern "C" fn(hstream: CUstream, priority: *mut c_int) -> CUresultcuStreamGetPriority(hStream, priority*) -> CUresult
Query the priority of hStream.
cu_stream_get_flags: unsafe extern "C" fn(hstream: CUstream, flags: *mut u32) -> CUresultcuStreamGetFlags(hStream, flags*) -> CUresult
Query the flags of hStream.
cu_event_create: unsafe extern "C" fn(phevent: *mut CUevent, flags: u32) -> CUresultcuEventCreate(phEvent*, flags) -> CUresult
Creates an event.
cu_event_destroy_v2: unsafe extern "C" fn(hevent: CUevent) -> CUresultcuEventDestroy_v2(hEvent) -> CUresult
Destroys an event.
cu_event_record: unsafe extern "C" fn(hevent: CUevent, hstream: CUstream) -> CUresultcuEventRecord(hEvent, hStream) -> CUresult
Records an event in a stream.
cu_event_query: unsafe extern "C" fn(hevent: CUevent) -> CUresultcuEventQuery(hEvent) -> CUresult
Queries the status of an event. Returns CUDA_SUCCESS if complete,
CUDA_ERROR_NOT_READY if still pending.
cu_event_synchronize: unsafe extern "C" fn(hevent: CUevent) -> CUresultcuEventSynchronize(hEvent) -> CUresult
Waits until an event completes.
cu_event_elapsed_time: unsafe extern "C" fn(pmilliseconds: *mut f32, hstart: CUevent, hend: CUevent) -> CUresultcuEventElapsedTime(pMilliseconds*, hStart, hEnd) -> CUresult
Computes the elapsed time between two events.
cu_memcpy_peer: unsafe extern "C" fn(dst_device: u64, dst_ctx: CUcontext, src_device: u64, src_ctx: CUcontext, count: usize) -> CUresultcuMemcpyPeer(dstDevice, dstContext, srcDevice, srcContext, count) -> CUresult
Copies device memory between two primary contexts.
cu_memcpy_peer_async: unsafe extern "C" fn(dst_device: u64, dst_ctx: CUcontext, src_device: u64, src_ctx: CUcontext, count: usize, stream: CUstream) -> CUresultcuMemcpyPeerAsync(..., hStream) -> CUresult
Asynchronous cross-device copy.
cu_ctx_enable_peer_access: unsafe extern "C" fn(peer_context: CUcontext, flags: u32) -> CUresultcuCtxEnablePeerAccess(peerContext, flags) -> CUresult
Enables peer access between two contexts.
cu_ctx_disable_peer_access: unsafe extern "C" fn(peer_context: CUcontext) -> CUresultcuCtxDisablePeerAccess(peerContext) -> CUresult
Disables peer access to a context.
cu_launch_kernel: unsafe extern "C" fn(f: CUfunction, grid_dim_x: u32, grid_dim_y: u32, grid_dim_z: u32, block_dim_x: u32, block_dim_y: u32, block_dim_z: u32, shared_mem_bytes: u32, hstream: CUstream, kernel_params: *mut *mut c_void, extra: *mut *mut c_void) -> CUresultcuLaunchKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams**, extra**) -> CUresult
Launches a CUDA kernel.
cu_launch_cooperative_kernel: unsafe extern "C" fn(f: CUfunction, grid_dim_x: u32, grid_dim_y: u32, grid_dim_z: u32, block_dim_x: u32, block_dim_y: u32, block_dim_z: u32, shared_mem_bytes: u32, hstream: CUstream, kernel_params: *mut *mut c_void) -> CUresultcuLaunchCooperativeKernel(f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, hStream, kernelParams**) -> CUresult
Launches a cooperative CUDA kernel (CUDA 9.0+).
cu_launch_cooperative_kernel_multi_device: unsafe extern "C" fn(launch_params_list: *mut c_void, num_devices: u32, flags: u32) -> CUresultcuLaunchCooperativeKernelMultiDevice(launchParamsList*, numDevices, flags) -> CUresult
Launches a cooperative kernel across multiple devices (CUDA 9.0+).
cu_occupancy_max_active_blocks_per_multiprocessor: unsafe extern "C" fn(num_blocks: *mut c_int, func: CUfunction, block_size: c_int, dynamic_smem_size: usize) -> CUresultcuOccupancyMaxActiveBlocksPerMultiprocessor(numBlocks*, func, blockSize, dynamicSMemSize) -> CUresult
Returns the number of the maximum active blocks per streaming multiprocessor.
cu_occupancy_max_potential_block_size: unsafe extern "C" fn(min_grid_size: *mut c_int, block_size: *mut c_int, func: CUfunction, block_size_to_dynamic_smem_size: Option<unsafe extern "C" fn(c_int) -> usize>, dynamic_smem_size: usize, block_size_limit: c_int) -> CUresultcuOccupancyMaxPotentialBlockSize(minGridSize*, blockSize*, func, blockSizeToDynamicSMemSize, dynamicSMemSize, blockSizeLimit) -> CUresult
Suggests a launch configuration with reasonable occupancy.
cu_occupancy_max_active_blocks_per_multiprocessor_with_flags: unsafe extern "C" fn(num_blocks: *mut c_int, func: CUfunction, block_size: c_int, dynamic_smem_size: usize, flags: u32) -> CUresultcuOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(numBlocks*, func, blockSize, dynamicSMemSize, flags) -> CUresult
Like cuOccupancyMaxActiveBlocksPerMultiprocessor but with flags
to control caching behaviour (CUDA 9.0+).
cu_memcpy_dtod_async_v2: Option<unsafe extern "C" fn(dst: CUdeviceptr, src: CUdeviceptr, bytesize: usize, stream: CUstream) -> CUresult>cuMemcpyDtoDAsync_v2(dst, src, bytesize, stream) -> CUresult
Asynchronously copies data from device memory to device memory.
cu_memset_d16_v2: Option<unsafe extern "C" fn(dst: CUdeviceptr, value: u16, count: usize) -> CUresult>cuMemsetD16_v2(dst, value, count) -> CUresult
Sets device memory to a value (16-bit granularity).
cu_memset_d32_async: Option<unsafe extern "C" fn(dst: CUdeviceptr, value: u32, count: usize, stream: CUstream) -> CUresult>cuMemsetD32Async(dst, value, count, stream) -> CUresult
Asynchronously sets device memory to a value (32-bit granularity).
cu_ctx_get_limit: Option<unsafe extern "C" fn(value: *mut usize, limit: u32) -> CUresult>cuCtxGetLimit(value*, limit) -> CUresult
Returns the value of a context limit.
cu_ctx_set_limit: Option<unsafe extern "C" fn(limit: u32, value: usize) -> CUresult>cuCtxSetLimit(limit, value) -> CUresult
Sets a context limit.
cu_ctx_get_cache_config: Option<unsafe extern "C" fn(config: *mut u32) -> CUresult>cuCtxGetCacheConfig(config*) -> CUresult
Returns the current cache configuration for the context.
cu_ctx_set_cache_config: Option<unsafe extern "C" fn(config: u32) -> CUresult>cuCtxSetCacheConfig(config) -> CUresult
Sets the cache configuration for the current context.
cuCtxGetSharedMemConfig(config*) -> CUresult
Returns the shared memory configuration for the context.
cuCtxSetSharedMemConfig(config) -> CUresult
Sets the shared memory configuration for the current context.
cu_event_record_with_flags: Option<unsafe extern "C" fn(hevent: CUevent, hstream: CUstream, flags: u32) -> CUresult>cuEventRecordWithFlags(hEvent, hStream, flags) -> CUresult
Records an event in a stream with additional flags (CUDA 11.1+).
Falls back to cu_event_record when None.
cu_func_get_attribute: Option<unsafe extern "C" fn(value: *mut c_int, attrib: c_int, func: CUfunction) -> CUresult>cuFuncGetAttribute(value*, attrib, func) -> CUresult
Returns information about a function.
cu_func_set_cache_config: Option<unsafe extern "C" fn(func: CUfunction, config: u32) -> CUresult>cuFuncSetCacheConfig(func, config) -> CUresult
Sets the cache configuration for a device function.
cuFuncSetSharedMemConfig(func, config) -> CUresult
Sets the shared memory configuration for a device function.
cu_func_set_attribute: Option<unsafe extern "C" fn(func: CUfunction, attrib: c_int, value: c_int) -> CUresult>cuFuncSetAttribute(func, attrib, value) -> CUresult
Sets an attribute value for a device function.
cu_profiler_start: Option<unsafe extern "C" fn() -> CUresult>cuProfilerStart() -> CUresult
Starts the CUDA profiler.
cu_profiler_stop: Option<unsafe extern "C" fn() -> CUresult>cuProfilerStop() -> CUresult
Stops the CUDA profiler.
cu_launch_kernel_ex: Option<unsafe extern "C" fn(config: *const CuLaunchConfig, f: CUfunction, kernel_params: *mut *mut c_void, extra: *mut *mut c_void) -> CUresult>cuLaunchKernelEx(config*, f, kernelParams**, extra**) -> CUresult
Extended kernel launch with cluster dimensions and other CUDA 12.0+ attributes. Available only when the driver is CUDA 12.0 or newer.
When None, fall back to cu_launch_kernel.
cu_tensor_map_encode_tiled: Option<unsafe extern "C" fn(tensor_map: *mut c_void, tensor_data_type: u32, tensor_rank: u32, global_address: *mut c_void, global_dim: *const u64, global_strides: *const u64, box_dim: *const u32, element_strides: *const u32, interleave: u32, swizzle: u32, l2_promotion: u32, oob_fill: u32) -> CUresult>cuTensorMapEncodeTiled(tensorMap*, ...) -> CUresult
Creates a TMA tensor map descriptor for tiled access patterns. Available on CUDA 12.0+ with sm_90+ (Hopper/Blackwell).
When None, TMA is not supported by the loaded driver.
cu_tensor_map_encode_tiled_memref: Option<unsafe extern "C" fn(tensor_map: *mut c_void, tensor_data_type: u32, tensor_rank: u32, global_address: *mut c_void, global_dim: *const u64, global_strides: *const u64, box_dim: *const u32, element_strides: *const u32, interleave: u32, swizzle: u32, l2_promotion: u32, oob_fill: u32, flags: u64) -> CUresult>cuTensorMapEncodeTiledMemref(tensorMap*, ...) -> CUresult
Extended TMA encoding using memref descriptors (CUDA 12.8+,
Blackwell sm_100/sm_120). When None, fall back to
cu_tensor_map_encode_tiled.
cu_kernel_get_library: Option<unsafe extern "C" fn(p_lib: *mut CUlibrary, kernel: CUkernel) -> CUresult>cuKernelGetLibrary(pLib*, kernel) -> CUresult
Returns the library handle that owns a given kernel handle
(CUDA 12.8+). When None, the driver does not support the JIT
library API.
cu_multicast_get_granularity: Option<unsafe extern "C" fn(granularity: *mut usize, desc: *const c_void, option: u32) -> CUresult>cuMulticastGetGranularity(granularity*, desc*, option) -> CUresult
Queries the recommended memory granularity for an NVLink multicast
object (CUDA 12.8+). When None, multicast memory is not supported.
cu_multicast_create: Option<unsafe extern "C" fn(mc_handle: *mut CUmulticastObject, desc: *const c_void) -> CUresult>cuMulticastCreate(mcHandle*, desc*) -> CUresult
Creates an NVLink multicast object for cross-GPU broadcast memory
(CUDA 12.8+). When None, multicast memory is not supported.
cu_multicast_add_device: Option<unsafe extern "C" fn(mc_handle: CUmulticastObject, dev: CUdevice) -> CUresult>cuMulticastAddDevice(mcHandle, dev) -> CUresult
Adds a device to an NVLink multicast group (CUDA 12.8+). When
None, multicast memory is not supported.
cu_memcpy_batch_async: Option<unsafe extern "C" fn(dsts: *const *mut c_void, srcs: *const *const c_void, sizes: *const usize, count: u64, flags: u64, stream: CUstream) -> CUresult>cuMemcpyBatchAsync(dsts*, srcs*, sizes*, count, flags, stream) -> CUresult
Issues count asynchronous memory copies (H2D, D2H, or D2D) in a
single driver call (CUDA 12.8+). When None, issue individual
cuMemcpyAsync calls as a fallback.
cu_array_create_v2: Option<unsafe extern "C" fn(p_handle: *mut CUarray, p_allocate_array: *const CUDA_ARRAY_DESCRIPTOR) -> CUresult>cuArrayCreate_v2(pHandle*, pAllocateArray*) -> CUresult
Allocates a 1-D or 2-D CUDA array. When None, CUDA array allocation
is not supported by the loaded driver.
cu_array_destroy: Option<unsafe extern "C" fn(h_array: CUarray) -> CUresult>cuArrayDestroy(hArray) -> CUresult
Frees a CUDA array previously allocated by cuArrayCreate_v2.
cu_array_get_descriptor_v2: Option<unsafe extern "C" fn(p_array_descriptor: *mut CUDA_ARRAY_DESCRIPTOR, h_array: CUarray) -> CUresult>cuArrayGetDescriptor_v2(pArrayDescriptor*, hArray) -> CUresult
Returns the descriptor of a 1-D or 2-D CUDA array.
cu_array3d_create_v2: Option<unsafe extern "C" fn(p_handle: *mut CUarray, p_allocate_array: *const CUDA_ARRAY3D_DESCRIPTOR) -> CUresult>cuArray3DCreate_v2(pHandle*, pAllocateArray*) -> CUresult
Allocates a 3-D CUDA array (also supports layered and cubemap arrays).
cu_array3d_get_descriptor_v2: Option<unsafe extern "C" fn(p_array_descriptor: *mut CUDA_ARRAY3D_DESCRIPTOR, h_array: CUarray) -> CUresult>cuArray3DGetDescriptor_v2(pArrayDescriptor*, hArray) -> CUresult
Returns the descriptor of a 3-D CUDA array.
cu_memcpy_htoa_v2: Option<unsafe extern "C" fn(dst_array: CUarray, dst_offset: usize, src_host: *const c_void, byte_count: usize) -> CUresult>cuMemcpyHtoA_v2(dstArray, dstOffset, srcHost*, ByteCount) -> CUresult
Synchronously copies host memory into a CUDA array.
cu_memcpy_atoh_v2: Option<unsafe extern "C" fn(dst_host: *mut c_void, src_array: CUarray, src_offset: usize, byte_count: usize) -> CUresult>cuMemcpyAtoH_v2(dstHost*, srcArray, srcOffset, ByteCount) -> CUresult
Synchronously copies data from a CUDA array into host memory.
cu_memcpy_htoa_async_v2: Option<unsafe extern "C" fn(dst_array: CUarray, dst_offset: usize, src_host: *const c_void, byte_count: usize, stream: CUstream) -> CUresult>cuMemcpyHtoAAsync_v2(dstArray, dstOffset, srcHost*, byteCount, stream) -> CUresult
Asynchronously copies host memory into a CUDA array on a stream.
cu_memcpy_atoh_async_v2: Option<unsafe extern "C" fn(dst_host: *mut c_void, src_array: CUarray, src_offset: usize, byte_count: usize, stream: CUstream) -> CUresult>cuMemcpyAtoHAsync_v2(dstHost*, srcArray, srcOffset, byteCount, stream) -> CUresult
Asynchronously copies data from a CUDA array into host memory on a stream.
cu_tex_object_create: Option<unsafe extern "C" fn(p_tex_object: *mut CUtexObject, p_res_desc: *const CUDA_RESOURCE_DESC, p_tex_desc: *const CUDA_TEXTURE_DESC, p_res_view_desc: *const CUDA_RESOURCE_VIEW_DESC) -> CUresult>cuTexObjectCreate(pTexObject*, pResDesc*, pTexDesc*, pResViewDesc*) -> CUresult
Creates a texture object from a resource descriptor, texture descriptor, and optional resource-view descriptor (CUDA 5.0+).
cu_tex_object_destroy: Option<unsafe extern "C" fn(tex_object: CUtexObject) -> CUresult>cuTexObjectDestroy(texObject) -> CUresult
Destroys a texture object created by cuTexObjectCreate.
cu_tex_object_get_resource_desc: Option<unsafe extern "C" fn(p_res_desc: *mut CUDA_RESOURCE_DESC, tex_object: CUtexObject) -> CUresult>cuTexObjectGetResourceDesc(pResDesc*, texObject) -> CUresult
Returns the resource descriptor of a texture object.
cu_surf_object_create: Option<unsafe extern "C" fn(p_surf_object: *mut CUsurfObject, p_res_desc: *const CUDA_RESOURCE_DESC) -> CUresult>cuSurfObjectCreate(pSurfObject*, pResDesc*) -> CUresult
Creates a surface object from a resource descriptor (CUDA 5.0+).
The resource type must be Array (surface-capable CUDA arrays only).
cu_surf_object_destroy: Option<unsafe extern "C" fn(surf_object: CUsurfObject) -> CUresult>cuSurfObjectDestroy(surfObject) -> CUresult
Destroys a surface object created by cuSurfObjectCreate.
Implementations§
Source§impl DriverApi
impl DriverApi
Sourcepub fn load() -> Result<Self, DriverLoadError>
pub fn load() -> Result<Self, DriverLoadError>
Attempt to dynamically load the CUDA driver shared library and resolve every required symbol.
§Platform behaviour
- macOS — immediately returns
DriverLoadError::UnsupportedPlatform. - Linux — tries
libcuda.so.1thenlibcuda.so. - Windows — tries
nvcuda.dll.
§Errors
DriverLoadError::UnsupportedPlatformon macOS.DriverLoadError::LibraryNotFoundif none of the candidate library names could be opened.DriverLoadError::SymbolNotFoundif a required CUDA entry point is missing from the loaded library.