oxicuda_memory/host_buffer.rs
1//! Pinned (page-locked) host memory buffer.
2//!
3//! [`PinnedBuffer<T>`] allocates host memory via `cuMemAllocHost_v2`, which
4//! pins the pages so that the CUDA driver can perform DMA transfers without
5//! an intermediate staging copy. This is the recommended source/destination
6//! for asynchronous host-device transfers.
7//!
8//! # Deref
9//!
10//! `PinnedBuffer<T>` implements [`Deref`] and [`DerefMut`] to `[T]`, so it
11//! can be used anywhere a slice is expected.
12//!
13//! # Ownership
14//!
15//! The allocation is freed with `cuMemFreeHost` on drop. Errors during
16//! drop are logged via [`tracing::warn`].
17//!
18//! # Example
19//!
20//! ```rust,no_run
21//! # use oxicuda_memory::PinnedBuffer;
22//! let mut pinned = PinnedBuffer::<f32>::alloc(256)?;
23//! for (i, v) in pinned.iter_mut().enumerate() {
24//! *v = i as f32;
25//! }
26//! assert_eq!(pinned.len(), 256);
27//! # Ok::<(), oxicuda_driver::error::CudaError>(())
28//! ```
29
30use std::ffi::c_void;
31use std::ops::{Deref, DerefMut};
32
33use oxicuda_driver::error::{CudaError, CudaResult};
34use oxicuda_driver::loader::try_driver;
35
36// ---------------------------------------------------------------------------
37// PinnedBuffer<T>
38// ---------------------------------------------------------------------------
39
40/// A contiguous buffer of `T` elements in page-locked (pinned) host memory.
41///
42/// Pinned memory enables the CUDA driver to use DMA for host-device
43/// transfers, avoiding an extra copy through a staging buffer. This makes
44/// pinned buffers the preferred choice for async copy operations.
45///
46/// The buffer dereferences to `&[T]` / `&mut [T]` for ergonomic access.
47pub struct PinnedBuffer<T: Copy> {
48 /// Pointer to the start of the pinned allocation.
49 ptr: *mut T,
50 /// Number of `T` elements (not bytes).
51 len: usize,
52}
53
54// SAFETY: The pinned host memory is not thread-local; it is a plain heap
55// allocation that is safe to access from any thread.
56unsafe impl<T: Copy + Send> Send for PinnedBuffer<T> {}
57unsafe impl<T: Copy + Sync> Sync for PinnedBuffer<T> {}
58
59impl<T: Copy> PinnedBuffer<T> {
60 /// Allocates a pinned host buffer capable of holding `n` elements of type `T`.
61 ///
62 /// # Errors
63 ///
64 /// * [`CudaError::InvalidValue`] if `n` is zero.
65 /// * [`CudaError::OutOfMemory`] if the host cannot satisfy the request.
66 /// * Other driver errors from `cuMemAllocHost_v2`.
67 pub fn alloc(n: usize) -> CudaResult<Self> {
68 if n == 0 {
69 return Err(CudaError::InvalidValue);
70 }
71 let byte_size = n
72 .checked_mul(std::mem::size_of::<T>())
73 .ok_or(CudaError::InvalidValue)?;
74 let api = try_driver()?;
75 let mut raw_ptr: *mut c_void = std::ptr::null_mut();
76 // SAFETY: `cu_mem_alloc_host_v2` writes a valid host pointer on success.
77 let rc = unsafe { (api.cu_mem_alloc_host_v2)(&mut raw_ptr, byte_size) };
78 oxicuda_driver::check(rc)?;
79 Ok(Self {
80 ptr: raw_ptr.cast::<T>(),
81 len: n,
82 })
83 }
84
85 /// Allocates a pinned host buffer and copies the contents of `data` into it.
86 ///
87 /// # Errors
88 ///
89 /// * [`CudaError::InvalidValue`] if `data` is empty.
90 /// * Other driver errors from allocation.
91 pub fn from_slice(data: &[T]) -> CudaResult<Self> {
92 let buf = Self::alloc(data.len())?;
93 // SAFETY: both `data` and `buf.ptr` point to valid memory of
94 // `data.len() * size_of::<T>()` bytes, and `T: Copy`.
95 unsafe {
96 std::ptr::copy_nonoverlapping(data.as_ptr(), buf.ptr, data.len());
97 }
98 Ok(buf)
99 }
100
101 /// Returns the number of `T` elements in this buffer.
102 #[inline]
103 pub fn len(&self) -> usize {
104 self.len
105 }
106
107 /// Returns `true` if the buffer contains zero elements.
108 #[inline]
109 pub fn is_empty(&self) -> bool {
110 self.len == 0
111 }
112
113 /// Returns a raw const pointer to the buffer's data.
114 #[inline]
115 pub fn as_ptr(&self) -> *const T {
116 self.ptr
117 }
118
119 /// Returns a raw mutable pointer to the buffer's data.
120 #[inline]
121 pub fn as_mut_ptr(&mut self) -> *mut T {
122 self.ptr
123 }
124
125 /// Returns a shared slice over the buffer's contents.
126 #[inline]
127 pub fn as_slice(&self) -> &[T] {
128 // SAFETY: `self.ptr` is a valid, aligned allocation of `self.len`
129 // elements, and we have `&self` so no mutable alias exists.
130 unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
131 }
132
133 /// Returns a mutable slice over the buffer's contents.
134 #[inline]
135 pub fn as_mut_slice(&mut self) -> &mut [T] {
136 // SAFETY: `self.ptr` is a valid, aligned allocation of `self.len`
137 // elements, and we have `&mut self` so no other alias exists.
138 unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) }
139 }
140}
141
142impl<T: Copy> Deref for PinnedBuffer<T> {
143 type Target = [T];
144
145 #[inline]
146 fn deref(&self) -> &[T] {
147 self.as_slice()
148 }
149}
150
151impl<T: Copy> DerefMut for PinnedBuffer<T> {
152 #[inline]
153 fn deref_mut(&mut self) -> &mut [T] {
154 self.as_mut_slice()
155 }
156}
157
158impl<T: Copy> Drop for PinnedBuffer<T> {
159 fn drop(&mut self) {
160 if let Ok(api) = try_driver() {
161 // SAFETY: `self.ptr` was allocated by `cu_mem_alloc_host_v2` and
162 // has not yet been freed.
163 let rc = unsafe { (api.cu_mem_free_host)(self.ptr.cast::<c_void>()) };
164 if rc != 0 {
165 tracing::warn!(
166 cuda_error = rc,
167 len = self.len,
168 "cuMemFreeHost failed during PinnedBuffer drop"
169 );
170 }
171 }
172 }
173}