Skip to main content

oxicuda_memory/
zero_copy.rs

1//! Zero-copy (host-mapped) memory.
2//!
3//! Allows GPU kernels to directly access host memory without explicit
4//! transfers.  Useful for small, frequently-updated data or when PCIe
5//! bandwidth is acceptable.
6//!
7//! # How it works
8//!
9//! Zero-copy memory is allocated on the host with the
10//! `CU_MEMHOSTALLOC_DEVICEMAP` flag, which makes it accessible from GPU
11//! kernels via a device pointer obtained from `cuMemHostGetDevicePointer`.
12//! The GPU reads/writes traverse the PCIe bus on each access, so this is
13//! best suited for data that is accessed infrequently or streamed
14//! sequentially.
15//!
16//! # Status
17//!
18//! This module is a placeholder.  Full implementation is planned for a
19//! future release once the `cuMemHostAlloc` and
20//! `cuMemHostGetDevicePointer` function pointers are added to
21//! `oxicuda-driver`.
22
23use std::marker::PhantomData;
24
25use oxicuda_driver::error::{CudaError, CudaResult};
26use oxicuda_driver::ffi::CUdeviceptr;
27
28// ---------------------------------------------------------------------------
29// MappedBuffer<T>
30// ---------------------------------------------------------------------------
31
32/// A host-allocated, device-mapped (zero-copy) memory buffer.
33///
34/// The host memory is accessible from both CPU code and GPU kernels.
35/// GPU accesses traverse the PCIe bus, making this suitable for small
36/// or infrequently-accessed data where the overhead of explicit transfers
37/// is not justified.
38///
39/// # Status
40///
41/// This type is a placeholder.  The allocation method currently returns
42/// [`CudaError::NotSupported`].
43///
44/// TODO: Add `cu_mem_host_alloc` (with `CU_MEMHOSTALLOC_DEVICEMAP`) and
45/// `cu_mem_host_get_device_pointer` to `DriverApi`.
46pub struct MappedBuffer<T: Copy> {
47    /// Host pointer to the mapped allocation.
48    _host_ptr: *mut T,
49    /// Corresponding device pointer for kernel access.
50    _device_ptr: CUdeviceptr,
51    /// Number of `T` elements.
52    _len: usize,
53    /// Marker for the element type.
54    _phantom: PhantomData<T>,
55}
56
57// SAFETY: The mapped host memory is not thread-local.
58unsafe impl<T: Copy + Send> Send for MappedBuffer<T> {}
59unsafe impl<T: Copy + Sync> Sync for MappedBuffer<T> {}
60
61impl<T: Copy> MappedBuffer<T> {
62    /// Allocates a zero-copy host-mapped buffer of `n` elements.
63    ///
64    /// # Errors
65    ///
66    /// Currently always returns [`CudaError::NotSupported`] because the
67    /// required driver function pointers are not yet loaded.
68    ///
69    /// TODO: Implement once `cu_mem_host_alloc` and
70    /// `cu_mem_host_get_device_pointer` are available in `DriverApi`.
71    pub fn alloc(_n: usize) -> CudaResult<Self> {
72        // TODO: call (api.cu_mem_host_alloc)(..., CU_MEMHOSTALLOC_DEVICEMAP)
73        // and (api.cu_mem_host_get_device_pointer)(...) when available.
74        Err(CudaError::NotSupported)
75    }
76
77    /// Returns the number of `T` elements in this buffer.
78    #[inline]
79    pub fn len(&self) -> usize {
80        self._len
81    }
82
83    /// Returns `true` if the buffer contains zero elements.
84    #[inline]
85    pub fn is_empty(&self) -> bool {
86        self._len == 0
87    }
88
89    /// Returns the raw device pointer for use in kernel parameters.
90    #[inline]
91    pub fn as_device_ptr(&self) -> CUdeviceptr {
92        self._device_ptr
93    }
94
95    /// Returns a raw const pointer to the host-side data.
96    #[inline]
97    pub fn as_host_ptr(&self) -> *const T {
98        self._host_ptr
99    }
100}
101
102impl<T: Copy> Drop for MappedBuffer<T> {
103    fn drop(&mut self) {
104        // TODO: call (api.cu_mem_free_host)(self._host_ptr) when available.
105        // For now, nothing to free since construction always fails.
106    }
107}