async_cuda_core/memory/device2d.rs
1use crate::ffi;
2use crate::memory::HostBuffer;
3use crate::runtime::Future;
4use crate::stream::Stream;
5
6type Result<T> = std::result::Result<T, crate::error::Error>;
7
8/// A buffer on the device.
9///
10/// # Example
11///
12/// Copying data from a [`HostBuffer`] to a [`DeviceBuffer2D`]:
13///
14/// ```
15/// # use async_cuda_core::{DeviceBuffer2D, HostBuffer, Stream};
16/// # tokio_test::block_on(async {
17/// let stream = Stream::new().await.unwrap();
18/// let all_ones = vec![1_u8; 300];
19/// let host_buffer = HostBuffer::<u8>::from_slice(&all_ones).await;
20/// let mut device_buffer = DeviceBuffer2D::<u8>::new(10, 10, 3).await;
21/// device_buffer.copy_from(&host_buffer, &stream).await.unwrap();
22/// # })
23/// ```
24pub struct DeviceBuffer2D<T: Copy + 'static> {
25 inner: ffi::memory::DeviceBuffer2D<T>,
26}
27
28impl<T: Copy + 'static> DeviceBuffer2D<T> {
29 /// Allocates 2D memory on the device.
30 ///
31 /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g32bd7a39135594788a542ae72217775c)
32 ///
33 /// # Arguments
34 ///
35 /// * `width` - Width of 2-dimensional buffer.
36 /// * `height` - Height of 2-dimensional buffer.
37 /// * `num_channels` - Number of channels per item.
38 pub async fn new(width: usize, height: usize, num_channels: usize) -> Self {
39 let inner =
40 Future::new(move || ffi::memory::DeviceBuffer2D::<T>::new(width, height, num_channels))
41 .await;
42 Self { inner }
43 }
44
45 /// Allocate memory on the device, and copy 3D array from host into it.
46 ///
47 /// This function creates a temporary [`HostBuffer`], copies the slice into it, then finally
48 /// copies the data from the host buffer to the [`DeviceBuffer`].
49 ///
50 /// The given stream is automatically synchronized, since the temporary host buffer might
51 /// otherwise be dropped before the copy can complete.
52 ///
53 /// # Arguments
54 ///
55 /// * `array` - 3-dimensional array to copy into the buffer. The first and second dimensions are
56 /// equivalent to the height and width of the 2D buffer (respectively), and the third
57 /// dimension is the number of channels.
58 /// * `stream` - Stream to use.
59 #[cfg(feature = "ndarray")]
60 pub async fn from_array(array: &ndarray::ArrayView3<'_, T>, stream: &Stream) -> Result<Self> {
61 let host_buffer = HostBuffer::from_array(array).await;
62 let (height, width, num_channels) = array.dim();
63 let mut this = Self::new(width, height, num_channels).await;
64 this.copy_from(&host_buffer, stream).await?;
65 Ok(this)
66 }
67
68 /// Copies memory from the provided pinned host buffer to this 2D buffer.
69 ///
70 /// This function synchronizes the stream implicitly.
71 ///
72 /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
73 /// `width` times `height` times `num_channels`.
74 ///
75 /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
76 ///
77 /// # Pinned transfer
78 ///
79 /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
80 /// guaranteed to produce a pinned transfer on the runtime thread.
81 ///
82 /// # Stream ordered semantics
83 ///
84 /// This function uses stream ordered semantics. It can only be guaranteed to complete
85 /// sequentially relative to operations scheduled on the same stream or the default stream.
86 ///
87 /// # Arguments
88 ///
89 /// * `other` - Buffer to copy from.
90 /// * `stream` - Stream to use.
91 #[inline]
92 pub async fn copy_from(&mut self, other: &HostBuffer<T>, stream: &Stream) -> Result<()> {
93 // SAFETY: Stream is synchronized after this.
94 unsafe {
95 self.copy_from_async(other, stream).await?;
96 }
97 stream.synchronize().await?;
98 Ok(())
99 }
100
101 /// Copies memory from the provided pinned host buffer to this 2D buffer.
102 ///
103 /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
104 /// `width` times `height` times `num_channels`.
105 ///
106 /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
107 ///
108 /// # Pinned transfer
109 ///
110 /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
111 /// guaranteed to produce a pinned transfer on the runtime thread.
112 ///
113 /// # Stream ordered semantics
114 ///
115 /// This function uses stream ordered semantics. It can only be guaranteed to complete
116 /// sequentially relative to operations scheduled on the same stream or the default stream.
117 ///
118 /// # Safety
119 ///
120 /// This function is unsafe because the operation might not have completed when the function
121 /// returns, and thus the state of the buffer is undefined.
122 ///
123 /// # Arguments
124 ///
125 /// * `other` - Buffer to copy from.
126 /// * `stream` - Stream to use.
127 pub async unsafe fn copy_from_async(
128 &mut self,
129 other: &HostBuffer<T>,
130 stream: &Stream,
131 ) -> Result<()> {
132 assert_eq!(self.num_elements(), other.num_elements());
133 Future::new(move || self.inner.copy_from_async(other.inner(), stream.inner())).await
134 }
135
136 /// Copies memory from this 2D buffer to the provided pinned host buffer.
137 ///
138 /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
139 /// `width` times `height` times `num_channels`.
140 ///
141 /// This function synchronizes the stream implicitly.
142 ///
143 /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
144 ///
145 /// # Pinned transfer
146 ///
147 /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
148 /// guaranteed to produce a pinned transfer on the runtime thread.
149 ///
150 /// # Stream ordered semantics
151 ///
152 /// This function uses stream ordered semantics. It can only be guaranteed to complete
153 /// sequentially relative to operations scheduled on the same stream or the default stream.
154 ///
155 /// # Arguments
156 ///
157 /// * `other` - Buffer to copy to.
158 /// * `stream` - Stream to use.
159 #[inline]
160 pub async fn copy_to(&self, other: &mut HostBuffer<T>, stream: &Stream) -> Result<()> {
161 // SAFETY: Stream is synchronized after this.
162 unsafe {
163 self.copy_to_async(other, stream).await?;
164 }
165 stream.synchronize().await?;
166 Ok(())
167 }
168
169 /// Copies memory from this 2D buffer to the provided pinned host buffer.
170 ///
171 /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
172 /// `width` times `height` times `num_channels`.
173 ///
174 /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
175 ///
176 /// # Pinned transfer
177 ///
178 /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
179 /// guaranteed to produce a pinned transfer on the runtime thread.
180 ///
181 /// # Stream ordered semantics
182 ///
183 /// This function uses stream ordered semantics. It can only be guaranteed to complete
184 /// sequentially relative to operations scheduled on the same stream or the default stream.
185 ///
186 /// # Safety
187 ///
188 /// This function is unsafe because the operation might not have completed when the function
189 /// returns, and thus the state of the buffer is undefined.
190 ///
191 /// # Arguments
192 ///
193 /// * `other` - Buffer to copy to.
194 /// * `stream` - Stream to use.
195 pub async unsafe fn copy_to_async(
196 &self,
197 other: &mut HostBuffer<T>,
198 stream: &Stream,
199 ) -> Result<()> {
200 assert_eq!(self.num_elements(), other.num_elements());
201 Future::new(move || self.inner.copy_to_async(other.inner_mut(), stream.inner())).await
202 }
203
204 /// Fill the entire buffer with the given byte.
205 ///
206 /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g8fdcc53996ff49c570f4b5ead0256ef0)
207 ///
208 /// # Stream ordered semantics
209 ///
210 /// This function uses stream ordered semantics. It can only be guaranteed to complete
211 /// sequentially relative to operations scheduled on the same stream or the default stream.
212 ///
213 /// # Arguments
214 ///
215 /// * `value` - Byte value to fill buffer with.
216 pub async fn fill_with_byte(&mut self, value: u8, stream: &Stream) -> Result<()> {
217 Future::new(move || self.inner.fill_with_byte(value, stream.inner())).await
218 }
219
220 /// Get 2D buffer width.
221 #[inline(always)]
222 pub fn width(&self) -> usize {
223 self.inner.width
224 }
225
226 /// Get 2D buffer height.
227 #[inline(always)]
228 pub fn height(&self) -> usize {
229 self.inner.height
230 }
231
232 /// Get 2D buffer number of channels.
233 #[inline(always)]
234 pub fn num_channels(&self) -> usize {
235 self.inner.num_channels
236 }
237
238 /// Get the total number of elements in buffer.
239 ///
240 /// This is equal to: `width` times `height` times `num_channels`.
241 #[inline(always)]
242 pub fn num_elements(&self) -> usize {
243 self.inner.num_elements()
244 }
245
246 /// Access the inner synchronous implementation of [`DeviceBuffer2D`].
247 #[inline(always)]
248 pub fn inner(&self) -> &ffi::memory::DeviceBuffer2D<T> {
249 &self.inner
250 }
251
252 /// Access the inner synchronous implementation of [`DeviceBuffer2D`].
253 #[inline(always)]
254 pub fn inner_mut(&mut self) -> &mut ffi::memory::DeviceBuffer2D<T> {
255 &mut self.inner
256 }
257}
258
259#[cfg(test)]
260mod tests {
261 use super::*;
262
263 #[tokio::test]
264 async fn test_new() {
265 let buffer = DeviceBuffer2D::<u32>::new(120, 80, 3).await;
266 assert_eq!(buffer.width(), 120);
267 assert_eq!(buffer.height(), 80);
268 assert_eq!(buffer.num_channels(), 3);
269 assert_eq!(buffer.num_elements(), 120 * 80 * 3);
270 assert!(buffer.inner().pitch >= 360);
271 }
272
273 #[tokio::test]
274 async fn test_copy() {
275 let stream = Stream::new().await.unwrap();
276 let all_ones = vec![1_u32; 150];
277 let host_buffer_all_ones = HostBuffer::from_slice(all_ones.as_slice()).await;
278
279 let mut device_buffer = DeviceBuffer2D::<u32>::new(10, 5, 3).await;
280 unsafe {
281 device_buffer
282 .copy_from_async(&host_buffer_all_ones, &stream)
283 .await
284 .unwrap();
285 }
286
287 let mut host_buffer = HostBuffer::<u32>::new(150).await;
288 unsafe {
289 device_buffer
290 .copy_to_async(&mut host_buffer, &stream)
291 .await
292 .unwrap();
293 }
294
295 let mut another_device_buffer = DeviceBuffer2D::<u32>::new(10, 5, 3).await;
296 unsafe {
297 another_device_buffer
298 .copy_from_async(&host_buffer, &stream)
299 .await
300 .unwrap();
301 }
302
303 let mut return_host_buffer = HostBuffer::<u32>::new(150).await;
304 unsafe {
305 another_device_buffer
306 .copy_to_async(&mut return_host_buffer, &stream)
307 .await
308 .unwrap();
309 }
310
311 stream.synchronize().await.unwrap();
312
313 assert_eq!(return_host_buffer.num_elements(), 150);
314 let return_data = return_host_buffer.to_vec();
315 assert_eq!(return_data.len(), 150);
316 assert!(return_data.into_iter().all(|v| v == 1_u32));
317 }
318
319 #[tokio::test]
320 async fn test_copy_2d() {
321 let stream = Stream::new().await.unwrap();
322 let image: [u8; 12] = [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4];
323 let host_buffer = HostBuffer::from_slice(&image).await;
324 let mut device_buffer = DeviceBuffer2D::<u8>::new(2, 2, 3).await;
325 unsafe {
326 device_buffer
327 .copy_from_async(&host_buffer, &stream)
328 .await
329 .unwrap();
330 }
331 let mut return_host_buffer = HostBuffer::<u8>::new(12).await;
332 unsafe {
333 device_buffer
334 .copy_to_async(&mut return_host_buffer, &stream)
335 .await
336 .unwrap();
337 }
338 stream.synchronize().await.unwrap();
339 assert_eq!(
340 &return_host_buffer.to_vec(),
341 &[1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]
342 );
343 }
344
345 #[tokio::test]
346 async fn test_fill_with_byte() {
347 let stream = Stream::new().await.unwrap();
348 let mut device_buffer = DeviceBuffer2D::<u8>::new(2, 2, 3).await;
349 let mut host_buffer = HostBuffer::<u8>::new(2 * 2 * 3).await;
350 device_buffer.fill_with_byte(0xab, &stream).await.unwrap();
351 device_buffer
352 .copy_to(&mut host_buffer, &stream)
353 .await
354 .unwrap();
355 assert_eq!(
356 host_buffer.to_vec(),
357 &[0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab]
358 );
359 }
360
361 #[tokio::test]
362 #[should_panic]
363 async fn test_it_panics_when_copying_invalid_size() {
364 let stream = Stream::new().await.unwrap();
365 let device_buffer = DeviceBuffer2D::<u32>::new(5, 5, 3).await;
366 let mut host_buffer = HostBuffer::<u32>::new(80).await;
367 let _ = unsafe { device_buffer.copy_to_async(&mut host_buffer, &stream).await };
368 }
369}