Expand description
§OxiCUDA Driver
Dynamic, safe Rust bindings for the NVIDIA CUDA Driver API.
oxicuda-driver provides a zero-SDK-dependency wrapper around the CUDA
Driver API. Unlike traditional CUDA crate approaches that require the
CUDA Toolkit (or at least its headers and link stubs) to be present at
build time, this crate loads the driver shared library entirely at
runtime via libloading.
§Zero build-time dependency
No cuda.h, no libcuda.so symlink, no nvcc — the crate compiles on
any Rust toolchain. The actual GPU driver is discovered and loaded the
first time you call try_driver() or init().
§Runtime library loading
| Platform | Library searched |
|---|---|
| Linux | libcuda.so, libcuda.so.1 |
| Windows | nvcuda.dll |
| macOS | (returns UnsupportedPlatform — NVIDIA dropped macOS support) |
§Key types
| Type | Description |
|---|---|
Device | A CUDA-capable GPU discovered on the system |
Context | Owns a CUDA context bound to a device |
Stream | Asynchronous command queue within a context |
Event | Timing / synchronisation marker on a stream |
Module | Loaded PTX or cubin containing kernel code |
Function | A single kernel entry point inside a module |
CudaError | Strongly-typed driver error code |
§Quick start
use oxicuda_driver::prelude::*;
// Initialise the CUDA driver (loads libcuda at runtime).
init()?;
// Pick the best available GPU and create a context.
let dev = Device::get(0)?;
let _ctx = Context::new(&dev)?;
// Load a PTX module and look up a kernel.
let module = Module::from_ptx("ptx_source")?;
let kernel = module.get_function("vector_add")?;Re-exports§
pub use error::CudaError;pub use error::CudaResult;pub use error::DriverLoadError;pub use error::check;pub use ffi::CU_TRSF_DISABLE_TRILINEAR_OPTIMIZATION;pub use ffi::CU_TRSF_NORMALIZED_COORDINATES;pub use ffi::CU_TRSF_READ_AS_INTEGER;pub use ffi::CU_TRSF_SRGB;pub use ffi::CUDA_ARRAY_DESCRIPTOR;pub use ffi::CUDA_ARRAY3D_CUBEMAP;pub use ffi::CUDA_ARRAY3D_DESCRIPTOR;pub use ffi::CUDA_ARRAY3D_LAYERED;pub use ffi::CUDA_ARRAY3D_SURFACE_LDST;pub use ffi::CUDA_ARRAY3D_TEXTURE_GATHER;pub use ffi::CUDA_RESOURCE_DESC;pub use ffi::CUDA_RESOURCE_VIEW_DESC;pub use ffi::CUDA_TEXTURE_DESC;pub use ffi::CUaddress_mode;pub use ffi::CUarray;pub use ffi::CUarray_format;pub use ffi::CUcontext;pub use ffi::CUdevice;pub use ffi::CUdevice_attribute;pub use ffi::CUdeviceptr;pub use ffi::CUevent;pub use ffi::CUfilter_mode;pub use ffi::CUfunction;pub use ffi::CUfunction_attribute;pub use ffi::CUjit_option;pub use ffi::CUkernel;pub use ffi::CUlibrary;pub use ffi::CUlimit;pub use ffi::CUmemoryPool;pub use ffi::CUmemorytype;pub use ffi::CUmipmappedArray;pub use ffi::CUmodule;pub use ffi::CUmulticastObject;pub use ffi::CUpointer_attribute;pub use ffi::CUresourceViewFormat;pub use ffi::CUresourcetype;pub use ffi::CUstream;pub use ffi::CUsurfObject;pub use ffi::CUsurfref;pub use ffi::CUtexObject;pub use ffi::CUtexref;pub use ffi::CuLaunchAttribute;pub use ffi::CuLaunchAttributeClusterDim;pub use ffi::CuLaunchAttributeId;pub use ffi::CuLaunchAttributeValue;pub use ffi::CuLaunchConfig;pub use ffi::CudaResourceDescArray;pub use ffi::CudaResourceDescLinear;pub use ffi::CudaResourceDescMipmap;pub use ffi::CudaResourceDescPitch2d;pub use ffi::CudaResourceDescRes;pub use context::Context;pub use context_config::CacheConfig;pub use cooperative_launch::CooperativeLaunchConfig;pub use cooperative_launch::CooperativeLaunchSupport;pub use cooperative_launch::DeviceLaunchConfig;pub use cooperative_launch::MultiDeviceCooperativeLaunchConfig;pub use cooperative_launch::cooperative_launch;pub use cooperative_launch::cooperative_launch_multi_device;pub use debug::DebugLevel;pub use debug::DebugSession;pub use debug::KernelDebugger;pub use debug::MemoryChecker;pub use debug::NanInfChecker;pub use device::Device;pub use device::DeviceInfo;pub use device::best_device;pub use device::can_access_peer;pub use device::driver_version;pub use device::list_devices;pub use event::Event;pub use graph::Graph;pub use graph::GraphExec;pub use graph::GraphNode;pub use graph::MemcpyDirection;pub use graph::StreamCapture;pub use link::FallbackStrategy;pub use link::LinkInputType;pub use link::LinkedModule;pub use link::Linker;pub use link::LinkerOptions;pub use link::OptimizationLevel;pub use loader::try_driver;pub use module::Function;pub use module::JitDiagnostic;pub use module::JitLog;pub use module::JitOptions;pub use module::JitSeverity;pub use module::Module;pub use multi_gpu::DevicePool;pub use nvlink_topology::GpuTopology;pub use nvlink_topology::NvLinkVersion;pub use nvlink_topology::TopologyTree;pub use nvlink_topology::TopologyType;pub use primary_context::PrimaryContext;pub use profiler::ProfilerGuard;pub use stream::Stream;pub use stream_ordered_alloc::StreamAllocation;pub use stream_ordered_alloc::StreamMemoryPool;pub use stream_ordered_alloc::StreamOrderedAllocConfig;pub use stream_ordered_alloc::stream_alloc;pub use stream_ordered_alloc::stream_free;
Modules§
- context
- CUDA context management with RAII semantics.
- context_
config - Context configuration: limits, cache config, and shared memory config.
- cooperative_
launch - Cooperative kernel launch support (CUDA 9.0+).
- debug
- Kernel debugging utilities for OxiCUDA.
- device
- CUDA device enumeration and attribute queries.
- error
- Error types for the OxiCUDA driver crate.
- event
- CUDA event management for timing and synchronisation.
- features
- Compile-time feature availability.
- ffi
- Raw CUDA Driver API FFI types, constants, and enums.
- function_
attr - Safe wrappers for querying and configuring CUDA function attributes.
- graph
- CUDA Graph API for recording and replaying sequences of GPU operations.
- link
- Link-time optimisation for JIT-linking multiple PTX modules.
- loader
- Dynamic CUDA driver library loader.
- memory_
info - Safe wrappers for GPU memory information and bulk memory operations.
- module
- PTX module loading and kernel function management.
- multi_
gpu - Multi-GPU context management with per-device context pools.
- nvlink_
topology - NVLink/NVSwitch topology-aware communication.
- occupancy
- GPU occupancy queries for performance optimisation.
- occupancy_
ext - Extended occupancy helpers for CPU-side occupancy estimation.
- prelude
- Convenient glob import for common OxiCUDA Driver types.
- primary_
context - Primary context management (one per device, reference counted by driver).
- profiler
- CUDA profiler control.
- stream
- CUDA stream management.
- stream_
ordered_ alloc - Stream-ordered memory allocation (CUDA 11.2+ / 12.x+).
- tma
- Tensor Memory Accelerator (TMA) descriptor types for CUDA 12.x / sm_90+.
Macros§
- cuda_
call - Invoke a raw CUDA Driver API function and convert the result to
CudaResult.
Functions§
- init
- Initialise the CUDA driver API.