Skip to main content

oxicuda_memory/
host_buffer.rs

1//! Pinned (page-locked) host memory buffer.
2//!
3//! [`PinnedBuffer<T>`] allocates host memory via `cuMemAllocHost_v2`, which
4//! pins the pages so that the CUDA driver can perform DMA transfers without
5//! an intermediate staging copy.  This is the recommended source/destination
6//! for asynchronous host-device transfers.
7//!
8//! # Deref
9//!
10//! `PinnedBuffer<T>` implements [`Deref`] and [`DerefMut`] to `[T]`, so it
11//! can be used anywhere a slice is expected.
12//!
13//! # Ownership
14//!
15//! The allocation is freed with `cuMemFreeHost` on drop.  Errors during
16//! drop are logged via [`tracing::warn`].
17//!
18//! # Example
19//!
20//! ```rust,no_run
21//! # use oxicuda_memory::PinnedBuffer;
22//! let mut pinned = PinnedBuffer::<f32>::alloc(256)?;
23//! for (i, v) in pinned.iter_mut().enumerate() {
24//!     *v = i as f32;
25//! }
26//! assert_eq!(pinned.len(), 256);
27//! # Ok::<(), oxicuda_driver::error::CudaError>(())
28//! ```
29
30use std::ffi::c_void;
31use std::ops::{Deref, DerefMut};
32
33use oxicuda_driver::error::{CudaError, CudaResult};
34use oxicuda_driver::loader::try_driver;
35
36// ---------------------------------------------------------------------------
37// PinnedBuffer<T>
38// ---------------------------------------------------------------------------
39
40/// A contiguous buffer of `T` elements in page-locked (pinned) host memory.
41///
42/// Pinned memory enables the CUDA driver to use DMA for host-device
43/// transfers, avoiding an extra copy through a staging buffer.  This makes
44/// pinned buffers the preferred choice for async copy operations.
45///
46/// The buffer dereferences to `&[T]` / `&mut [T]` for ergonomic access.
47pub struct PinnedBuffer<T: Copy> {
48    /// Pointer to the start of the pinned allocation.
49    ptr: *mut T,
50    /// Number of `T` elements (not bytes).
51    len: usize,
52}
53
54// SAFETY: The pinned host memory is not thread-local; it is a plain heap
55// allocation that is safe to access from any thread.
56unsafe impl<T: Copy + Send> Send for PinnedBuffer<T> {}
57unsafe impl<T: Copy + Sync> Sync for PinnedBuffer<T> {}
58
59impl<T: Copy> PinnedBuffer<T> {
60    /// Allocates a pinned host buffer capable of holding `n` elements of type `T`.
61    ///
62    /// # Errors
63    ///
64    /// * [`CudaError::InvalidValue`] if `n` is zero.
65    /// * [`CudaError::OutOfMemory`] if the host cannot satisfy the request.
66    /// * Other driver errors from `cuMemAllocHost_v2`.
67    pub fn alloc(n: usize) -> CudaResult<Self> {
68        if n == 0 {
69            return Err(CudaError::InvalidValue);
70        }
71        let byte_size = n
72            .checked_mul(std::mem::size_of::<T>())
73            .ok_or(CudaError::InvalidValue)?;
74        let api = try_driver()?;
75        let mut raw_ptr: *mut c_void = std::ptr::null_mut();
76        // SAFETY: `cu_mem_alloc_host_v2` writes a valid host pointer on success.
77        let rc = unsafe { (api.cu_mem_alloc_host_v2)(&mut raw_ptr, byte_size) };
78        oxicuda_driver::check(rc)?;
79        Ok(Self {
80            ptr: raw_ptr.cast::<T>(),
81            len: n,
82        })
83    }
84
85    /// Allocates a pinned host buffer and copies the contents of `data` into it.
86    ///
87    /// # Errors
88    ///
89    /// * [`CudaError::InvalidValue`] if `data` is empty.
90    /// * Other driver errors from allocation.
91    pub fn from_slice(data: &[T]) -> CudaResult<Self> {
92        let buf = Self::alloc(data.len())?;
93        // SAFETY: both `data` and `buf.ptr` point to valid memory of
94        // `data.len() * size_of::<T>()` bytes, and `T: Copy`.
95        unsafe {
96            std::ptr::copy_nonoverlapping(data.as_ptr(), buf.ptr, data.len());
97        }
98        Ok(buf)
99    }
100
101    /// Returns the number of `T` elements in this buffer.
102    #[inline]
103    pub fn len(&self) -> usize {
104        self.len
105    }
106
107    /// Returns `true` if the buffer contains zero elements.
108    #[inline]
109    pub fn is_empty(&self) -> bool {
110        self.len == 0
111    }
112
113    /// Returns a raw const pointer to the buffer's data.
114    #[inline]
115    pub fn as_ptr(&self) -> *const T {
116        self.ptr
117    }
118
119    /// Returns a raw mutable pointer to the buffer's data.
120    #[inline]
121    pub fn as_mut_ptr(&mut self) -> *mut T {
122        self.ptr
123    }
124
125    /// Returns a shared slice over the buffer's contents.
126    #[inline]
127    pub fn as_slice(&self) -> &[T] {
128        // SAFETY: `self.ptr` is a valid, aligned allocation of `self.len`
129        // elements, and we have `&self` so no mutable alias exists.
130        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
131    }
132
133    /// Returns a mutable slice over the buffer's contents.
134    #[inline]
135    pub fn as_mut_slice(&mut self) -> &mut [T] {
136        // SAFETY: `self.ptr` is a valid, aligned allocation of `self.len`
137        // elements, and we have `&mut self` so no other alias exists.
138        unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) }
139    }
140}
141
142impl<T: Copy> Deref for PinnedBuffer<T> {
143    type Target = [T];
144
145    #[inline]
146    fn deref(&self) -> &[T] {
147        self.as_slice()
148    }
149}
150
151impl<T: Copy> DerefMut for PinnedBuffer<T> {
152    #[inline]
153    fn deref_mut(&mut self) -> &mut [T] {
154        self.as_mut_slice()
155    }
156}
157
158impl<T: Copy> Drop for PinnedBuffer<T> {
159    fn drop(&mut self) {
160        if let Ok(api) = try_driver() {
161            // SAFETY: `self.ptr` was allocated by `cu_mem_alloc_host_v2` and
162            // has not yet been freed.
163            let rc = unsafe { (api.cu_mem_free_host)(self.ptr.cast::<c_void>()) };
164            if rc != 0 {
165                tracing::warn!(
166                    cuda_error = rc,
167                    len = self.len,
168                    "cuMemFreeHost failed during PinnedBuffer drop"
169                );
170            }
171        }
172    }
173}