baracuda_runtime/
memcpy2d.rs1use core::ffi::c_void;
8use core::mem::size_of;
9
10use baracuda_cuda_sys::runtime::{cudaMemcpyKind, runtime};
11use baracuda_types::DeviceRepr;
12
13use crate::error::{check, Result};
14use crate::stream::Stream;
15
16pub struct PitchedBuffer<T: DeviceRepr> {
19 ptr: *mut c_void,
20 pitch_bytes: usize,
21 width_elems: usize,
22 height: usize,
23 _marker: core::marker::PhantomData<T>,
24}
25
26unsafe impl<T: DeviceRepr + Send> Send for PitchedBuffer<T> {}
27
28impl<T: DeviceRepr> core::fmt::Debug for PitchedBuffer<T> {
29 fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
30 f.debug_struct("PitchedBuffer")
31 .field("ptr", &self.ptr)
32 .field("width_elems", &self.width_elems)
33 .field("height", &self.height)
34 .field("pitch_bytes", &self.pitch_bytes)
35 .field("type", &core::any::type_name::<T>())
36 .finish()
37 }
38}
39
40impl<T: DeviceRepr> PitchedBuffer<T> {
41 pub fn new(width_elems: usize, height: usize) -> Result<Self> {
43 let r = runtime()?;
44 let cu = r.cuda_malloc_pitch()?;
45 let width_bytes = width_elems
46 .checked_mul(size_of::<T>())
47 .expect("overflow in 2D allocation width");
48 let mut ptr: *mut c_void = core::ptr::null_mut();
49 let mut pitch: usize = 0;
50 check(unsafe { cu(&mut ptr, &mut pitch, width_bytes, height) })?;
51 Ok(Self {
52 ptr,
53 pitch_bytes: pitch,
54 width_elems,
55 height,
56 _marker: core::marker::PhantomData,
57 })
58 }
59
60 #[inline]
61 pub fn width_elems(&self) -> usize {
62 self.width_elems
63 }
64 #[inline]
65 pub fn height(&self) -> usize {
66 self.height
67 }
68 #[inline]
70 pub fn pitch_bytes(&self) -> usize {
71 self.pitch_bytes
72 }
73 #[inline]
74 pub fn as_raw(&self) -> *mut c_void {
75 self.ptr
76 }
77}
78
79impl<T: DeviceRepr> Drop for PitchedBuffer<T> {
80 fn drop(&mut self) {
81 if self.ptr.is_null() {
82 return;
83 }
84 if let Ok(r) = runtime() {
85 if let Ok(cu) = r.cuda_free() {
86 let _ = unsafe { cu(self.ptr) };
87 }
88 }
89 }
90}
91
92pub fn copy_h_to_d_2d<T: DeviceRepr>(
95 src: &[T],
96 src_host_pitch_bytes: usize,
97 dst: &PitchedBuffer<T>,
98 width_elems: usize,
99 height: usize,
100) -> Result<()> {
101 assert!(width_elems <= dst.width_elems);
102 assert!(height <= dst.height);
103 let r = runtime()?;
104 let cu = r.cuda_memcpy_2d()?;
105 check(unsafe {
106 cu(
107 dst.ptr,
108 dst.pitch_bytes,
109 src.as_ptr() as *const c_void,
110 src_host_pitch_bytes,
111 width_elems * size_of::<T>(),
112 height,
113 cudaMemcpyKind::HostToDevice,
114 )
115 })
116}
117
118pub fn copy_d_to_h_2d<T: DeviceRepr>(
120 src: &PitchedBuffer<T>,
121 dst: &mut [T],
122 dst_host_pitch_bytes: usize,
123 width_elems: usize,
124 height: usize,
125) -> Result<()> {
126 assert!(width_elems <= src.width_elems);
127 assert!(height <= src.height);
128 let r = runtime()?;
129 let cu = r.cuda_memcpy_2d()?;
130 check(unsafe {
131 cu(
132 dst.as_mut_ptr() as *mut c_void,
133 dst_host_pitch_bytes,
134 src.ptr,
135 src.pitch_bytes,
136 width_elems * size_of::<T>(),
137 height,
138 cudaMemcpyKind::DeviceToHost,
139 )
140 })
141}
142
143pub fn copy_h_to_d_2d_async<T: DeviceRepr>(
145 src: &[T],
146 src_host_pitch_bytes: usize,
147 dst: &PitchedBuffer<T>,
148 width_elems: usize,
149 height: usize,
150 stream: &Stream,
151) -> Result<()> {
152 assert!(width_elems <= dst.width_elems);
153 assert!(height <= dst.height);
154 let r = runtime()?;
155 let cu = r.cuda_memcpy_2d_async()?;
156 check(unsafe {
157 cu(
158 dst.ptr,
159 dst.pitch_bytes,
160 src.as_ptr() as *const c_void,
161 src_host_pitch_bytes,
162 width_elems * size_of::<T>(),
163 height,
164 cudaMemcpyKind::HostToDevice,
165 stream.as_raw(),
166 )
167 })
168}
169
170pub fn memset_2d<T: DeviceRepr>(
172 dst: &PitchedBuffer<T>,
173 value: u8,
174 width_elems: usize,
175 height: usize,
176) -> Result<()> {
177 let r = runtime()?;
178 let cu = r.cuda_memset_2d()?;
179 check(unsafe {
180 cu(
181 dst.ptr,
182 dst.pitch_bytes,
183 value as core::ffi::c_int,
184 width_elems * size_of::<T>(),
185 height,
186 )
187 })
188}
189
190pub fn memset_2d_async<T: DeviceRepr>(
192 dst: &PitchedBuffer<T>,
193 value: u8,
194 width_elems: usize,
195 height: usize,
196 stream: &Stream,
197) -> Result<()> {
198 let r = runtime()?;
199 let cu = r.cuda_memset_2d_async()?;
200 check(unsafe {
201 cu(
202 dst.ptr,
203 dst.pitch_bytes,
204 value as core::ffi::c_int,
205 width_elems * size_of::<T>(),
206 height,
207 stream.as_raw(),
208 )
209 })
210}