Skip to main content

baracuda_cufile/
lib.rs

1//! Safe Rust wrappers for NVIDIA cuFile (GPUDirect Storage).
2//!
3//! cuFile is **Linux-only** and requires a GDS-capable filesystem (ext4
4//! / XFS on NVMe with the NVIDIA GDS kernel driver). On Windows and
5//! macOS every API returns
6//! [`baracuda_core::LoaderError::UnsupportedPlatform`].
7//!
8//! # Workflow
9//!
10//! 1. [`Driver::open`] — initialize the driver (do this once per process).
11//! 2. [`FileHandle::register`] an open file descriptor.
12//! 3. [`BufRegistration::register`] a CUDA device buffer (optional but
13//!    strongly recommended for performance).
14//! 4. [`FileHandle::read`] / [`FileHandle::write`] directly between the
15//!    file and the device buffer — no bounce through host memory.
16
17#![warn(missing_debug_implementations)]
18
19use core::ffi::c_void;
20
21use baracuda_cufile_sys::{cufile, CUfileDescr_t, CUfileError_t, CUfileHandle_t, CUfileOpError};
22
23/// Error type for cuFile operations.
24pub type Error = baracuda_core::Error<CUfileOpError>;
25/// Result alias.
26pub type Result<T, E = Error> = core::result::Result<T, E>;
27
28fn check(rc: CUfileError_t) -> Result<()> {
29    if rc.err.0 == 0 {
30        Ok(())
31    } else {
32        Err(Error::Status { status: rc.err })
33    }
34}
35
36/// Verify cuFile is loadable on this host. Fails on non-Linux platforms.
37pub fn probe() -> Result<()> {
38    cufile()?;
39    Ok(())
40}
41
42/// cuFile runtime version as reported by `cuFileGetVersion`.
43pub fn version() -> Result<i32> {
44    let c = cufile()?;
45    let cu = c.cu_file_get_version()?;
46    let mut v: core::ffi::c_int = 0;
47    check(unsafe { cu(&mut v) })?;
48    Ok(v as i32)
49}
50
51/// RAII handle for the cuFile driver lifecycle.
52///
53/// `cuFileDriverOpen` is idempotent inside the library, but baracuda
54/// matches every [`Driver::open`] with a `cuFileDriverClose` on drop for
55/// symmetry.
56#[derive(Debug)]
57pub struct Driver {
58    _nonsend: core::marker::PhantomData<*const ()>,
59}
60
61impl Driver {
62    /// Initialize the cuFile driver. Keep the returned handle alive for
63    /// the lifetime of any I/O you do.
64    pub fn open() -> Result<Self> {
65        let c = cufile()?;
66        let cu = c.cu_file_driver_open()?;
67        check(unsafe { cu() })?;
68        Ok(Self {
69            _nonsend: core::marker::PhantomData,
70        })
71    }
72
73    /// Toggle polling vs interrupt-driven I/O; `poll_threshold_size` is
74    /// the smallest I/O below which polling is used.
75    pub fn set_poll_mode(&self, poll: bool, poll_threshold_size: usize) -> Result<()> {
76        let c = cufile()?;
77        let cu = c.cu_file_driver_set_poll_mode()?;
78        check(unsafe { cu(poll, poll_threshold_size) })
79    }
80
81    /// Maximum direct-I/O chunk size in KiB (default 16 MiB).
82    pub fn set_max_direct_io_size_kb(&self, size_kb: usize) -> Result<()> {
83        let c = cufile()?;
84        let cu = c.cu_file_driver_set_max_direct_io_size()?;
85        check(unsafe { cu(size_kb) })
86    }
87
88    /// Maximum page-cache size cuFile can use, in KiB.
89    pub fn set_max_cache_size_kb(&self, size_kb: usize) -> Result<()> {
90        let c = cufile()?;
91        let cu = c.cu_file_driver_set_max_cache_size()?;
92        check(unsafe { cu(size_kb) })
93    }
94
95    /// Maximum pinned-host-memory budget cuFile can allocate, in KiB.
96    pub fn set_max_pinned_mem_size_kb(&self, size_kb: usize) -> Result<()> {
97        let c = cufile()?;
98        let cu = c.cu_file_driver_set_max_pinned_mem_size()?;
99        check(unsafe { cu(size_kb) })
100    }
101
102    /// Fill `props` with the current cuFile driver properties. The struct
103    /// layout follows `CUfileDrvProps_t` in the cuFile headers; callers
104    /// typically allocate the struct as `std::mem::zeroed::<[u8; 64]>()`
105    /// first and then reinterpret the bytes.
106    ///
107    /// # Safety
108    /// `props` must point to at least `sizeof(CUfileDrvProps_t)` bytes.
109    pub unsafe fn properties(&self, props: *mut core::ffi::c_void) -> Result<()> { unsafe {
110        let c = cufile()?;
111        let cu = c.cu_file_driver_get_properties()?;
112        check(cu(props))
113    }}
114}
115
116/// Human-readable string describing a [`CUfileOpError`] code.
117pub fn op_status_error_string(status: CUfileOpError) -> Result<String> {
118    let c = cufile()?;
119    let cu = c.cu_file_op_status_error()?;
120    let ptr = unsafe { cu(status) };
121    if ptr.is_null() {
122        return Ok(String::new());
123    }
124    let cstr = unsafe { core::ffi::CStr::from_ptr(ptr) };
125    Ok(cstr.to_string_lossy().into_owned())
126}
127
128impl Drop for Driver {
129    fn drop(&mut self) {
130        if let Ok(c) = cufile() {
131            if let Ok(cu) = c.cu_file_driver_close() {
132                let _ = unsafe { cu() };
133            }
134        }
135    }
136}
137
138/// A registered file descriptor.
139#[derive(Debug)]
140pub struct FileHandle {
141    handle: CUfileHandle_t,
142}
143
144impl FileHandle {
145    /// Register an open file descriptor with cuFile. `fd` is typically
146    /// obtained from `std::fs::File::as_raw_fd()` on Linux.
147    ///
148    /// # Safety
149    ///
150    /// `fd` must stay open for the lifetime of the returned handle.
151    pub unsafe fn register(fd: i32) -> Result<Self> { unsafe {
152        let c = cufile()?;
153        let cu = c.cu_file_handle_register()?;
154        let mut descr = CUfileDescr_t {
155            handle_fd: fd,
156            ..Default::default()
157        };
158        let mut h: CUfileHandle_t = core::ptr::null_mut();
159        check(cu(&mut h, &mut descr))?;
160        Ok(Self { handle: h })
161    }}
162
163    #[inline]
164    pub fn as_raw(&self) -> CUfileHandle_t {
165        self.handle
166    }
167
168    /// Read `size` bytes from `file_offset` into `dev_buf + buf_offset`.
169    /// Returns the number of bytes actually read (negative on failure;
170    /// we map negatives to `Err`).
171    ///
172    /// # Safety
173    ///
174    /// `dev_buf` must be a device pointer with at least `size` bytes
175    /// live starting at `buf_offset`.
176    pub unsafe fn read(
177        &self,
178        dev_buf: *mut c_void,
179        size: usize,
180        file_offset: i64,
181        buf_offset: i64,
182    ) -> Result<usize> { unsafe {
183        let c = cufile()?;
184        let cu = c.cu_file_read()?;
185        let n = cu(self.handle, dev_buf, size, file_offset, buf_offset);
186        if n < 0 {
187            Err(Error::Status {
188                status: CUfileOpError(n as i32),
189            })
190        } else {
191            Ok(n as usize)
192        }
193    }}
194
195    /// Write `size` bytes from `dev_buf + buf_offset` into `file_offset`.
196    ///
197    /// # Safety
198    ///
199    /// Same as [`Self::read`].
200    pub unsafe fn write(
201        &self,
202        dev_buf: *const c_void,
203        size: usize,
204        file_offset: i64,
205        buf_offset: i64,
206    ) -> Result<usize> { unsafe {
207        let c = cufile()?;
208        let cu = c.cu_file_write()?;
209        let n = cu(self.handle, dev_buf, size, file_offset, buf_offset);
210        if n < 0 {
211            Err(Error::Status {
212                status: CUfileOpError(n as i32),
213            })
214        } else {
215            Ok(n as usize)
216        }
217    }}
218}
219
220impl Drop for FileHandle {
221    fn drop(&mut self) {
222        if self.handle.is_null() {
223            return;
224        }
225        if let Ok(c) = cufile() {
226            if let Ok(cu) = c.cu_file_handle_deregister() {
227                unsafe { cu(self.handle) };
228            }
229        }
230    }
231}
232
233/// A registered device-buffer region. Registration is optional — cuFile
234/// works without it — but it unlocks the fastest DMA path.
235#[derive(Debug)]
236pub struct BufRegistration {
237    ptr: *mut c_void,
238    _marker: core::marker::PhantomData<*const ()>,
239}
240
241impl BufRegistration {
242    /// Register `length` bytes starting at `dev_ptr`. `flags = 0` for
243    /// default.
244    ///
245    /// # Safety
246    ///
247    /// `dev_ptr` must be a device-memory pointer with `length` live
248    /// bytes. Keep it alive for the full registration lifetime.
249    pub unsafe fn register(dev_ptr: *mut c_void, length: usize, flags: i32) -> Result<Self> { unsafe {
250        let c = cufile()?;
251        let cu = c.cu_file_buf_register()?;
252        check(cu(dev_ptr, length, flags))?;
253        Ok(Self {
254            ptr: dev_ptr,
255            _marker: core::marker::PhantomData,
256        })
257    }}
258}
259
260impl Drop for BufRegistration {
261    fn drop(&mut self) {
262        if let Ok(c) = cufile() {
263            if let Ok(cu) = c.cu_file_buf_deregister() {
264                let _ = unsafe { cu(self.ptr) };
265            }
266        }
267    }
268}
269
270// =================== Async I/O (v1.6+) ===================
271
272/// A cuFile-aware CUDA stream. Register a stream once to use it with
273/// [`FileHandle::read_async`] / [`FileHandle::write_async`] — cuFile
274/// will queue the I/O behind prior stream work and signal completion
275/// as another stream op.
276#[derive(Debug)]
277pub struct StreamRegistration {
278    stream: *mut c_void,
279}
280
281impl StreamRegistration {
282    /// Register `stream` for async I/O. `flags = 0` = default.
283    ///
284    /// # Safety
285    ///
286    /// `stream` must be a live `cudaStream_t` on the current context.
287    pub unsafe fn register(stream: *mut c_void, flags: u32) -> Result<Self> { unsafe {
288        let c = cufile()?;
289        let cu = c.cu_file_stream_register()?;
290        check(cu(stream, flags))?;
291        Ok(Self { stream })
292    }}
293}
294
295impl Drop for StreamRegistration {
296    fn drop(&mut self) {
297        if let Ok(c) = cufile() {
298            if let Ok(cu) = c.cu_file_stream_deregister() {
299                let _ = unsafe { cu(self.stream) };
300            }
301        }
302    }
303}
304
305impl FileHandle {
306    /// Queue a stream-ordered read. All parameters point at device /
307    /// pinned-host memory that cuFile reads *when the stream reaches
308    /// this op* (not at call time). `bytes_read` is written when the
309    /// op completes.
310    ///
311    /// # Safety
312    ///
313    /// All pointers must stay live until the stream reports completion.
314    /// `stream` must be previously registered via [`StreamRegistration::register`].
315    #[allow(clippy::too_many_arguments)]
316    pub unsafe fn read_async(
317        &self,
318        dev_buf: *mut c_void,
319        size_p: *mut usize,
320        file_offset_p: *mut i64,
321        buf_offset_p: *mut i64,
322        bytes_read: *mut isize,
323        stream: *mut c_void,
324    ) -> Result<()> { unsafe {
325        let c = cufile()?;
326        let cu = c.cu_file_read_async()?;
327        check(cu(
328            self.handle,
329            dev_buf,
330            size_p,
331            file_offset_p,
332            buf_offset_p,
333            bytes_read,
334            stream,
335        ))
336    }}
337
338    /// Queue a stream-ordered write.
339    ///
340    /// # Safety
341    ///
342    /// Same as [`Self::read_async`].
343    #[allow(clippy::too_many_arguments)]
344    pub unsafe fn write_async(
345        &self,
346        dev_buf: *const c_void,
347        size_p: *mut usize,
348        file_offset_p: *mut i64,
349        buf_offset_p: *mut i64,
350        bytes_written: *mut isize,
351        stream: *mut c_void,
352    ) -> Result<()> { unsafe {
353        let c = cufile()?;
354        let cu = c.cu_file_write_async()?;
355        check(cu(
356            self.handle,
357            dev_buf,
358            size_p,
359            file_offset_p,
360            buf_offset_p,
361            bytes_written,
362            stream,
363        ))
364    }}
365
366    /// cuFile's per-handle ref-count (non-zero = handle in use by
367    /// outstanding I/O).
368    pub fn use_count(&self) -> Result<i32> {
369        let c = cufile()?;
370        let cu = c.cu_file_use_count()?;
371        Ok(unsafe { cu(self.handle) })
372    }
373}
374
375// =================== Batched I/O (v1.6+) ===================
376
377pub use baracuda_cufile_sys::{
378    CUfileBatchHandle_t, CUfileIOEvents_t, CUfileIOParams_t, CUfileOpcode,
379};
380
381/// RAII handle for a cuFile batch-I/O request group. Supports up to
382/// `capacity` entries per submission cycle.
383#[derive(Debug)]
384pub struct BatchIO {
385    handle: CUfileBatchHandle_t,
386}
387
388impl BatchIO {
389    /// Create a new batch handle with room for `capacity` concurrent
390    /// entries. Typical value: 64–256.
391    pub fn new(capacity: u32) -> Result<Self> {
392        let c = cufile()?;
393        let cu = c.cu_file_batch_io_set_up()?;
394        let mut h: CUfileBatchHandle_t = core::ptr::null_mut();
395        check(unsafe { cu(&mut h, capacity) })?;
396        Ok(Self { handle: h })
397    }
398
399    /// Submit `params.len()` entries for execution. Returns before
400    /// completion; call [`Self::poll`] to reap.
401    ///
402    /// # Safety
403    ///
404    /// Every entry's pointers must stay live until reaped.
405    pub unsafe fn submit(&self, params: &mut [CUfileIOParams_t], flags: u32) -> Result<()> { unsafe {
406        let c = cufile()?;
407        let cu = c.cu_file_batch_io_submit()?;
408        check(cu(
409            self.handle,
410            params.len() as u32,
411            params.as_mut_ptr(),
412            flags,
413        ))
414    }}
415
416    /// Wait for at least `min_nr` entries to complete. Fills
417    /// `events[..nr]` with outcomes. `timeout_ns = None` blocks
418    /// indefinitely.
419    ///
420    /// # Safety
421    ///
422    /// `events` is written up to its capacity.
423    pub unsafe fn poll(&self, min_nr: u32, events: &mut [CUfileIOEvents_t]) -> Result<u32> { unsafe {
424        let c = cufile()?;
425        let cu = c.cu_file_batch_io_get_status()?;
426        let mut nr: u32 = events.len() as u32;
427        check(cu(
428            self.handle,
429            min_nr,
430            &mut nr,
431            events.as_mut_ptr(),
432            core::ptr::null_mut(),
433        ))?;
434        Ok(nr)
435    }}
436
437    /// Cancel pending entries.
438    pub fn cancel(&self) -> Result<()> {
439        let c = cufile()?;
440        let cu = c.cu_file_batch_io_cancel()?;
441        check(unsafe { cu(self.handle) })
442    }
443
444    #[inline]
445    pub fn as_raw(&self) -> CUfileBatchHandle_t {
446        self.handle
447    }
448}
449
450impl Drop for BatchIO {
451    fn drop(&mut self) {
452        if let Ok(c) = cufile() {
453            if let Ok(cu) = c.cu_file_batch_io_destroy() {
454                let _ = unsafe { cu(self.handle) };
455            }
456        }
457    }
458}