Skip to main content

baracuda_runtime/
memcpy3d.rs

1//! 3D memcpy + `cudaMalloc3D` pitched 3D buffers.
2
3use core::ffi::c_void;
4
5use baracuda_cuda_sys::runtime::runtime;
6use baracuda_cuda_sys::runtime::types::{cudaExtent, cudaMemcpy3DParms, cudaPitchedPtr};
7use baracuda_types::DeviceRepr;
8
9use crate::error::{check, Result};
10use crate::stream::Stream;
11
12/// A pitched 3D device allocation (from `cudaMalloc3D`). Freed on drop.
13pub struct Pitched3dBuffer<T: DeviceRepr> {
14    ptr: cudaPitchedPtr,
15    extent: cudaExtent,
16    _marker: core::marker::PhantomData<T>,
17}
18
19impl<T: DeviceRepr> core::fmt::Debug for Pitched3dBuffer<T> {
20    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
21        f.debug_struct("Pitched3dBuffer")
22            .field("ptr", &self.ptr.ptr)
23            .field("pitch", &self.ptr.pitch)
24            .field("extent", &self.extent)
25            .finish()
26    }
27}
28
29impl<T: DeviceRepr> Pitched3dBuffer<T> {
30    /// Allocate a `width × height × depth` box, with `width` in elements
31    /// of `T` (the runtime measures in bytes, so we multiply).
32    pub fn new(width: usize, height: usize, depth: usize) -> Result<Self> {
33        let r = runtime()?;
34        let cu = r.cuda_malloc_3d()?;
35        let extent = cudaExtent {
36            width: width * core::mem::size_of::<T>(),
37            height,
38            depth,
39        };
40        let mut pitched = cudaPitchedPtr::default();
41        check(unsafe {
42            cu(
43                &mut pitched as *mut cudaPitchedPtr as *mut c_void,
44                &extent as *const cudaExtent as *const c_void,
45            )
46        })?;
47        Ok(Self {
48            ptr: pitched,
49            extent,
50            _marker: core::marker::PhantomData,
51        })
52    }
53
54    #[inline]
55    pub fn as_pitched_ptr(&self) -> cudaPitchedPtr {
56        self.ptr
57    }
58
59    #[inline]
60    pub fn extent(&self) -> cudaExtent {
61        self.extent
62    }
63
64    /// Pitch in bytes.
65    #[inline]
66    pub fn pitch_bytes(&self) -> usize {
67        self.ptr.pitch
68    }
69}
70
71impl<T: DeviceRepr> Drop for Pitched3dBuffer<T> {
72    fn drop(&mut self) {
73        if self.ptr.ptr.is_null() {
74            return;
75        }
76        if let Ok(r) = runtime() {
77            if let Ok(cu) = r.cuda_free() {
78                let _ = unsafe { cu(self.ptr.ptr) };
79            }
80        }
81    }
82}
83
84/// Issue a `cudaMemcpy3D` with the given parameters.
85///
86/// # Safety
87///
88/// Every pointer inside `params` (both array handles and pitched ptrs)
89/// must be valid for the copy region.
90pub unsafe fn memcpy_3d(params: &cudaMemcpy3DParms) -> Result<()> { unsafe {
91    let r = runtime()?;
92    let cu = r.cuda_memcpy_3d()?;
93    check(cu(params as *const cudaMemcpy3DParms as *const c_void))
94}}
95
96/// `cudaMemcpy3DAsync`.
97///
98/// # Safety
99///
100/// Same as [`memcpy_3d`]; caller owns synchronization.
101pub unsafe fn memcpy_3d_async(params: &cudaMemcpy3DParms, stream: &Stream) -> Result<()> { unsafe {
102    let r = runtime()?;
103    let cu = r.cuda_memcpy_3d_async()?;
104    check(cu(
105        params as *const cudaMemcpy3DParms as *const c_void,
106        stream.as_raw(),
107    ))
108}}
109
110/// `cudaMemcpy3DPeer`.
111///
112/// # Safety
113///
114/// Same as [`memcpy_3d`]. `params` must include `srcDevice` / `dstDevice`.
115pub unsafe fn memcpy_3d_peer(params: &cudaMemcpy3DParms) -> Result<()> { unsafe {
116    let r = runtime()?;
117    let cu = r.cuda_memcpy_3d_peer()?;
118    check(cu(params as *const cudaMemcpy3DParms as *const c_void))
119}}
120
121/// `cudaMemcpy3DPeerAsync`.
122///
123/// # Safety
124///
125/// Same as [`memcpy_3d_peer`].
126pub unsafe fn memcpy_3d_peer_async(params: &cudaMemcpy3DParms, stream: &Stream) -> Result<()> { unsafe {
127    let r = runtime()?;
128    let cu = r.cuda_memcpy_3d_peer_async()?;
129    check(cu(
130        params as *const cudaMemcpy3DParms as *const c_void,
131        stream.as_raw(),
132    ))
133}}
134
135/// `cudaMemset3D` — fill a 3D region with a byte value.
136///
137/// # Safety
138///
139/// `pitched` must cover the `extent` region.
140pub unsafe fn memset_3d(pitched: cudaPitchedPtr, value: i32, extent: cudaExtent) -> Result<()> { unsafe {
141    let r = runtime()?;
142    let cu = r.cuda_memset_3d()?;
143    // Note: the C signature takes cudaPitchedPtr by value, but we pass a
144    // pointer for portability (matches how the PFN is typed at sys layer).
145    let mut p = pitched;
146    check(cu(
147        &mut p as *mut cudaPitchedPtr as *mut c_void,
148        value,
149        &extent as *const cudaExtent as *const c_void,
150    ))
151}}