1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//! `baracuda` — idiomatic Rust wrappers for the NVIDIA CUDA stack.
//!
//! Umbrella crate. Re-exports individual safe-API crates behind Cargo
//! features. All features are opt-in; defaults enable just `driver` +
//! `runtime` so downstream consumers pay for only what they use.
//!
//! # Feature matrix
//!
//! | Feature | Re-exports |
//! |-------------|-----------------------------------------------|
//! | `driver` | [`driver`] — CUDA Driver API |
//! | `runtime` | [`runtime`] — CUDA Runtime API |
//! | `nvrtc` | [`nvrtc`] — runtime C++→PTX compiler |
//! | `nvjitlink` | [`nvjitlink`] — CUDA 12+ JIT linker |
//! | `cublas` | [`cublas`] — BLAS |
//! | `curand` | [`curand`] — RNG |
//! | `cufft` | [`cufft`] — FFT |
//! | `cusparse` | [`cusparse`] — sparse linear algebra |
//! | `cusolver` | [`cusolver`] — dense/sparse solvers |
//! | `cudnn` | [`cudnn`] — deep-learning primitives |
//! | `nccl` | [`nccl`] — multi-GPU collectives |
//! | `npp` | [`npp`] — performance primitives |
//! | `nvjpeg` | [`nvjpeg`] — GPU JPEG codec |
//! | `nvcomp` | [`nvcomp`] — GPU compression (scaffolding) |
//! | `cvcuda` | [`cvcuda`] — CV-CUDA (scaffolding) |
//! | `nvml` | [`nvml`] — driver-bundled GPU monitoring |
//! | `cufile` | [`cufile`] — GPUDirect Storage (Linux only) |
//!
//! Bundles: `math` = cuBLAS + cuRAND + cuFFT + cuSPARSE + cuSOLVER;
//! `imaging` = NPP + nvJPEG + CV-CUDA; `ml` = driver + runtime + nvrtc +
//! nvjitlink + math + cuDNN + NCCL; `full` = ml + imaging + nvcomp +
//! nvml + cufile.
//!
//! # Quickstart
//!
//! ```toml
//! [dependencies]
//! baracuda = "0.1"
//! ```
//!
//! ```no_run
//! use baracuda::driver::{Context, Device, DeviceBuffer};
//! # fn main() -> baracuda::driver::Result<()> {
//! let device = Device::get(0)?;
//! let ctx = Context::new(&device)?;
//! let data = DeviceBuffer::from_slice(&ctx, &[1.0f32, 2.0, 3.0])?;
//! # let _ = data;
//! # Ok(())
//! # }
//! ```
//!
//! # Shared vocabulary
//!
//! [`types`] (always available, no feature flag) exposes `Half`,
//! `BFloat16`, `Complex32`, `Complex64`, `DeviceRepr`, `CudaVersion`,
//! etc. — the types shared across every safe crate.
//!
//! # Building your own kernels
//!
//! `baracuda` wraps the runtime side of CUDA — *launching* kernels, managing
//! memory, calling library APIs. To *compile* your own `.cu` files into a
//! linkable library or PTX from `build.rs`, add the sibling crate
//! `baracuda-forge` to your `[build-dependencies]`. It handles nvcc
//! invocation, incremental rebuilds, parallel compilation, and CUTLASS
//! integration.
//!
//! # Acknowledgments
//!
//! `baracuda-forge` is a vendored fork of [`cudaforge`] by Guoqing Bao,
//! adapted to baracuda's workspace conventions. Big thanks to Guoqing for
//! releasing cudaforge under permissive terms.
//!
//! [`cudaforge`]: https://github.com/guoqingbao/cudaforge
/// Shared type vocabulary (re-export of `baracuda-types`).
pub use baracuda_types as types;
/// CUDA Driver API (enabled with the `driver` feature).
pub use baracuda_driver as driver;
/// CUDA Runtime API (enabled with the `runtime` feature).
pub use baracuda_runtime as runtime;
/// NVRTC — runtime CUDA C++ → PTX compiler (`nvrtc` feature).
pub use baracuda_nvrtc as nvrtc;
/// nvJitLink — CUDA 12+ JIT linker (`nvjitlink` feature).
pub use baracuda_nvjitlink as nvjitlink;
/// cuBLAS — GPU-accelerated BLAS (`cublas` feature).
pub use baracuda_cublas as cublas;
/// cuRAND — GPU random-number generation (`curand` feature).
pub use baracuda_curand as curand;
/// cuFFT — GPU FFT (`cufft` feature).
pub use baracuda_cufft as cufft;
/// cuSPARSE — sparse linear algebra (`cusparse` feature).
pub use baracuda_cusparse as cusparse;
/// cuSOLVER — dense + sparse solvers (`cusolver` feature).
pub use baracuda_cusolver as cusolver;
/// cuDNN — deep-learning primitives (`cudnn` feature).
pub use baracuda_cudnn as cudnn;
/// NCCL — multi-GPU collective communication (`nccl` feature).
pub use baracuda_nccl as nccl;
/// TensorRT — high-performance inference runtime (`tensorrt` feature).
pub use baracuda_tensorrt as tensorrt;
/// cuDF — RAPIDS GPU DataFrames (`cudf` feature; skeleton over emerging libcudf_c).
pub use baracuda_cudf as cudf;
/// NPP — NVIDIA Performance Primitives (`npp` feature).
pub use baracuda_npp as npp;
/// nvJPEG — GPU JPEG codec (`nvjpeg` feature).
pub use baracuda_nvjpeg as nvjpeg;
/// nvCOMP — GPU compression (`nvcomp` feature; scaffolding only at v0.1).
pub use baracuda_nvcomp as nvcomp;
/// CV-CUDA — computer-vision operators (`cvcuda` feature; scaffolding only at v0.1).
pub use baracuda_cvcuda as cvcuda;
/// NVML — driver-bundled GPU monitoring (`nvml` feature).
pub use baracuda_nvml as nvml;
/// cuFile — GPUDirect Storage (`cufile` feature; Linux only).
pub use baracuda_cufile as cufile;