oxicuda_memory/zero_copy.rs
1//! Zero-copy (host-mapped) memory.
2//!
3//! Allows GPU kernels to directly access host memory without explicit
4//! transfers. Useful for small, frequently-updated data or when PCIe
5//! bandwidth is acceptable.
6//!
7//! # How it works
8//!
9//! Zero-copy memory is allocated on the host with the
10//! `CU_MEMHOSTALLOC_DEVICEMAP` flag, which makes it accessible from GPU
11//! kernels via a device pointer obtained from `cuMemHostGetDevicePointer`.
12//! The GPU reads/writes traverse the PCIe bus on each access, so this is
13//! best suited for data that is accessed infrequently or streamed
14//! sequentially.
15//!
16//! # Status
17//!
18//! This module is a placeholder. Full implementation is planned for a
19//! future release once the `cuMemHostAlloc` and
20//! `cuMemHostGetDevicePointer` function pointers are added to
21//! `oxicuda-driver`.
22
23use std::marker::PhantomData;
24
25use oxicuda_driver::error::{CudaError, CudaResult};
26use oxicuda_driver::ffi::CUdeviceptr;
27
28// ---------------------------------------------------------------------------
29// MappedBuffer<T>
30// ---------------------------------------------------------------------------
31
32/// A host-allocated, device-mapped (zero-copy) memory buffer.
33///
34/// The host memory is accessible from both CPU code and GPU kernels.
35/// GPU accesses traverse the PCIe bus, making this suitable for small
36/// or infrequently-accessed data where the overhead of explicit transfers
37/// is not justified.
38///
39/// # Status
40///
41/// This type is a placeholder. The allocation method currently returns
42/// [`CudaError::NotSupported`].
43///
44/// TODO: Add `cu_mem_host_alloc` (with `CU_MEMHOSTALLOC_DEVICEMAP`) and
45/// `cu_mem_host_get_device_pointer` to `DriverApi`.
46pub struct MappedBuffer<T: Copy> {
47 /// Host pointer to the mapped allocation.
48 _host_ptr: *mut T,
49 /// Corresponding device pointer for kernel access.
50 _device_ptr: CUdeviceptr,
51 /// Number of `T` elements.
52 _len: usize,
53 /// Marker for the element type.
54 _phantom: PhantomData<T>,
55}
56
57// SAFETY: The mapped host memory is not thread-local.
58unsafe impl<T: Copy + Send> Send for MappedBuffer<T> {}
59unsafe impl<T: Copy + Sync> Sync for MappedBuffer<T> {}
60
61impl<T: Copy> MappedBuffer<T> {
62 /// Allocates a zero-copy host-mapped buffer of `n` elements.
63 ///
64 /// # Errors
65 ///
66 /// Currently always returns [`CudaError::NotSupported`] because the
67 /// required driver function pointers are not yet loaded.
68 ///
69 /// TODO: Implement once `cu_mem_host_alloc` and
70 /// `cu_mem_host_get_device_pointer` are available in `DriverApi`.
71 pub fn alloc(_n: usize) -> CudaResult<Self> {
72 // TODO: call (api.cu_mem_host_alloc)(..., CU_MEMHOSTALLOC_DEVICEMAP)
73 // and (api.cu_mem_host_get_device_pointer)(...) when available.
74 Err(CudaError::NotSupported)
75 }
76
77 /// Returns the number of `T` elements in this buffer.
78 #[inline]
79 pub fn len(&self) -> usize {
80 self._len
81 }
82
83 /// Returns `true` if the buffer contains zero elements.
84 #[inline]
85 pub fn is_empty(&self) -> bool {
86 self._len == 0
87 }
88
89 /// Returns the raw device pointer for use in kernel parameters.
90 #[inline]
91 pub fn as_device_ptr(&self) -> CUdeviceptr {
92 self._device_ptr
93 }
94
95 /// Returns a raw const pointer to the host-side data.
96 #[inline]
97 pub fn as_host_ptr(&self) -> *const T {
98 self._host_ptr
99 }
100}
101
102impl<T: Copy> Drop for MappedBuffer<T> {
103 fn drop(&mut self) {
104 // TODO: call (api.cu_mem_free_host)(self._host_ptr) when available.
105 // For now, nothing to free since construction always fails.
106 }
107}