async_cuda_core/memory/
device2d.rs

1use crate::ffi;
2use crate::memory::HostBuffer;
3use crate::runtime::Future;
4use crate::stream::Stream;
5
6type Result<T> = std::result::Result<T, crate::error::Error>;
7
8/// A buffer on the device.
9///
10/// # Example
11///
12/// Copying data from a [`HostBuffer`] to a [`DeviceBuffer2D`]:
13///
14/// ```
15/// # use async_cuda_core::{DeviceBuffer2D, HostBuffer, Stream};
16/// # tokio_test::block_on(async {
17/// let stream = Stream::new().await.unwrap();
18/// let all_ones = vec![1_u8; 300];
19/// let host_buffer = HostBuffer::<u8>::from_slice(&all_ones).await;
20/// let mut device_buffer = DeviceBuffer2D::<u8>::new(10, 10, 3).await;
21/// device_buffer.copy_from(&host_buffer, &stream).await.unwrap();
22/// # })
23/// ```
24pub struct DeviceBuffer2D<T: Copy + 'static> {
25    inner: ffi::memory::DeviceBuffer2D<T>,
26}
27
28impl<T: Copy + 'static> DeviceBuffer2D<T> {
29    /// Allocates 2D memory on the device.
30    ///
31    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g32bd7a39135594788a542ae72217775c)
32    ///
33    /// # Arguments
34    ///
35    /// * `width` - Width of 2-dimensional buffer.
36    /// * `height` - Height of 2-dimensional buffer.
37    /// * `num_channels` - Number of channels per item.
38    pub async fn new(width: usize, height: usize, num_channels: usize) -> Self {
39        let inner =
40            Future::new(move || ffi::memory::DeviceBuffer2D::<T>::new(width, height, num_channels))
41                .await;
42        Self { inner }
43    }
44
45    /// Allocate memory on the device, and copy 3D array from host into it.
46    ///
47    /// This function creates a temporary [`HostBuffer`], copies the slice into it, then finally
48    /// copies the data from the host buffer to the [`DeviceBuffer`].
49    ///
50    /// The given stream is automatically synchronized, since the temporary host buffer might
51    /// otherwise be dropped before the copy can complete.
52    ///
53    /// # Arguments
54    ///
55    /// * `array` - 3-dimensional array to copy into the buffer. The first and second dimensions are
56    ///   equivalent to the height and width of the 2D buffer (respectively), and the third
57    ///   dimension is the number of channels.
58    /// * `stream` - Stream to use.
59    #[cfg(feature = "ndarray")]
60    pub async fn from_array(array: &ndarray::ArrayView3<'_, T>, stream: &Stream) -> Result<Self> {
61        let host_buffer = HostBuffer::from_array(array).await;
62        let (height, width, num_channels) = array.dim();
63        let mut this = Self::new(width, height, num_channels).await;
64        this.copy_from(&host_buffer, stream).await?;
65        Ok(this)
66    }
67
68    /// Copies memory from the provided pinned host buffer to this 2D buffer.
69    ///
70    /// This function synchronizes the stream implicitly.
71    ///
72    /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
73    /// `width` times `height` times `num_channels`.
74    ///
75    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
76    ///
77    /// # Pinned transfer
78    ///
79    /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
80    /// guaranteed to produce a pinned transfer on the runtime thread.
81    ///
82    /// # Stream ordered semantics
83    ///
84    /// This function uses stream ordered semantics. It can only be guaranteed to complete
85    /// sequentially relative to operations scheduled on the same stream or the default stream.
86    ///
87    /// # Arguments
88    ///
89    /// * `other` - Buffer to copy from.
90    /// * `stream` - Stream to use.
91    #[inline]
92    pub async fn copy_from(&mut self, other: &HostBuffer<T>, stream: &Stream) -> Result<()> {
93        // SAFETY: Stream is synchronized after this.
94        unsafe {
95            self.copy_from_async(other, stream).await?;
96        }
97        stream.synchronize().await?;
98        Ok(())
99    }
100
101    /// Copies memory from the provided pinned host buffer to this 2D buffer.
102    ///
103    /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
104    /// `width` times `height` times `num_channels`.
105    ///
106    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
107    ///
108    /// # Pinned transfer
109    ///
110    /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
111    /// guaranteed to produce a pinned transfer on the runtime thread.
112    ///
113    /// # Stream ordered semantics
114    ///
115    /// This function uses stream ordered semantics. It can only be guaranteed to complete
116    /// sequentially relative to operations scheduled on the same stream or the default stream.
117    ///
118    /// # Safety
119    ///
120    /// This function is unsafe because the operation might not have completed when the function
121    /// returns, and thus the state of the buffer is undefined.
122    ///
123    /// # Arguments
124    ///
125    /// * `other` - Buffer to copy from.
126    /// * `stream` - Stream to use.
127    pub async unsafe fn copy_from_async(
128        &mut self,
129        other: &HostBuffer<T>,
130        stream: &Stream,
131    ) -> Result<()> {
132        assert_eq!(self.num_elements(), other.num_elements());
133        Future::new(move || self.inner.copy_from_async(other.inner(), stream.inner())).await
134    }
135
136    /// Copies memory from this 2D buffer to the provided pinned host buffer.
137    ///
138    /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
139    /// `width` times `height` times `num_channels`.
140    ///
141    /// This function synchronizes the stream implicitly.
142    ///
143    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
144    ///
145    /// # Pinned transfer
146    ///
147    /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
148    /// guaranteed to produce a pinned transfer on the runtime thread.
149    ///
150    /// # Stream ordered semantics
151    ///
152    /// This function uses stream ordered semantics. It can only be guaranteed to complete
153    /// sequentially relative to operations scheduled on the same stream or the default stream.
154    ///
155    /// # Arguments
156    ///
157    /// * `other` - Buffer to copy to.
158    /// * `stream` - Stream to use.
159    #[inline]
160    pub async fn copy_to(&self, other: &mut HostBuffer<T>, stream: &Stream) -> Result<()> {
161        // SAFETY: Stream is synchronized after this.
162        unsafe {
163            self.copy_to_async(other, stream).await?;
164        }
165        stream.synchronize().await?;
166        Ok(())
167    }
168
169    /// Copies memory from this 2D buffer to the provided pinned host buffer.
170    ///
171    /// The host buffer must be of the same size. For the 2D buffer, the total number of elements is
172    /// `width` times `height` times `num_channels`.
173    ///
174    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1ge529b926e8fb574c2666a9a1d58b0dc1)
175    ///
176    /// # Pinned transfer
177    ///
178    /// The other buffer (of type [`HostBuffer`]) is always a pinned buffer. This function is
179    /// guaranteed to produce a pinned transfer on the runtime thread.
180    ///
181    /// # Stream ordered semantics
182    ///
183    /// This function uses stream ordered semantics. It can only be guaranteed to complete
184    /// sequentially relative to operations scheduled on the same stream or the default stream.
185    ///
186    /// # Safety
187    ///
188    /// This function is unsafe because the operation might not have completed when the function
189    /// returns, and thus the state of the buffer is undefined.
190    ///
191    /// # Arguments
192    ///
193    /// * `other` - Buffer to copy to.
194    /// * `stream` - Stream to use.
195    pub async unsafe fn copy_to_async(
196        &self,
197        other: &mut HostBuffer<T>,
198        stream: &Stream,
199    ) -> Result<()> {
200        assert_eq!(self.num_elements(), other.num_elements());
201        Future::new(move || self.inner.copy_to_async(other.inner_mut(), stream.inner())).await
202    }
203
204    /// Fill the entire buffer with the given byte.
205    ///
206    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY.html#group__CUDART__MEMORY_1g8fdcc53996ff49c570f4b5ead0256ef0)
207    ///
208    /// # Stream ordered semantics
209    ///
210    /// This function uses stream ordered semantics. It can only be guaranteed to complete
211    /// sequentially relative to operations scheduled on the same stream or the default stream.
212    ///
213    /// # Arguments
214    ///
215    /// * `value` - Byte value to fill buffer with.
216    pub async fn fill_with_byte(&mut self, value: u8, stream: &Stream) -> Result<()> {
217        Future::new(move || self.inner.fill_with_byte(value, stream.inner())).await
218    }
219
220    /// Get 2D buffer width.
221    #[inline(always)]
222    pub fn width(&self) -> usize {
223        self.inner.width
224    }
225
226    /// Get 2D buffer height.
227    #[inline(always)]
228    pub fn height(&self) -> usize {
229        self.inner.height
230    }
231
232    /// Get 2D buffer number of channels.
233    #[inline(always)]
234    pub fn num_channels(&self) -> usize {
235        self.inner.num_channels
236    }
237
238    /// Get the total number of elements in buffer.
239    ///
240    /// This is equal to: `width` times `height` times `num_channels`.
241    #[inline(always)]
242    pub fn num_elements(&self) -> usize {
243        self.inner.num_elements()
244    }
245
246    /// Access the inner synchronous implementation of [`DeviceBuffer2D`].
247    #[inline(always)]
248    pub fn inner(&self) -> &ffi::memory::DeviceBuffer2D<T> {
249        &self.inner
250    }
251
252    /// Access the inner synchronous implementation of [`DeviceBuffer2D`].
253    #[inline(always)]
254    pub fn inner_mut(&mut self) -> &mut ffi::memory::DeviceBuffer2D<T> {
255        &mut self.inner
256    }
257}
258
259#[cfg(test)]
260mod tests {
261    use super::*;
262
263    #[tokio::test]
264    async fn test_new() {
265        let buffer = DeviceBuffer2D::<u32>::new(120, 80, 3).await;
266        assert_eq!(buffer.width(), 120);
267        assert_eq!(buffer.height(), 80);
268        assert_eq!(buffer.num_channels(), 3);
269        assert_eq!(buffer.num_elements(), 120 * 80 * 3);
270        assert!(buffer.inner().pitch >= 360);
271    }
272
273    #[tokio::test]
274    async fn test_copy() {
275        let stream = Stream::new().await.unwrap();
276        let all_ones = vec![1_u32; 150];
277        let host_buffer_all_ones = HostBuffer::from_slice(all_ones.as_slice()).await;
278
279        let mut device_buffer = DeviceBuffer2D::<u32>::new(10, 5, 3).await;
280        unsafe {
281            device_buffer
282                .copy_from_async(&host_buffer_all_ones, &stream)
283                .await
284                .unwrap();
285        }
286
287        let mut host_buffer = HostBuffer::<u32>::new(150).await;
288        unsafe {
289            device_buffer
290                .copy_to_async(&mut host_buffer, &stream)
291                .await
292                .unwrap();
293        }
294
295        let mut another_device_buffer = DeviceBuffer2D::<u32>::new(10, 5, 3).await;
296        unsafe {
297            another_device_buffer
298                .copy_from_async(&host_buffer, &stream)
299                .await
300                .unwrap();
301        }
302
303        let mut return_host_buffer = HostBuffer::<u32>::new(150).await;
304        unsafe {
305            another_device_buffer
306                .copy_to_async(&mut return_host_buffer, &stream)
307                .await
308                .unwrap();
309        }
310
311        stream.synchronize().await.unwrap();
312
313        assert_eq!(return_host_buffer.num_elements(), 150);
314        let return_data = return_host_buffer.to_vec();
315        assert_eq!(return_data.len(), 150);
316        assert!(return_data.into_iter().all(|v| v == 1_u32));
317    }
318
319    #[tokio::test]
320    async fn test_copy_2d() {
321        let stream = Stream::new().await.unwrap();
322        let image: [u8; 12] = [1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4];
323        let host_buffer = HostBuffer::from_slice(&image).await;
324        let mut device_buffer = DeviceBuffer2D::<u8>::new(2, 2, 3).await;
325        unsafe {
326            device_buffer
327                .copy_from_async(&host_buffer, &stream)
328                .await
329                .unwrap();
330        }
331        let mut return_host_buffer = HostBuffer::<u8>::new(12).await;
332        unsafe {
333            device_buffer
334                .copy_to_async(&mut return_host_buffer, &stream)
335                .await
336                .unwrap();
337        }
338        stream.synchronize().await.unwrap();
339        assert_eq!(
340            &return_host_buffer.to_vec(),
341            &[1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4]
342        );
343    }
344
345    #[tokio::test]
346    async fn test_fill_with_byte() {
347        let stream = Stream::new().await.unwrap();
348        let mut device_buffer = DeviceBuffer2D::<u8>::new(2, 2, 3).await;
349        let mut host_buffer = HostBuffer::<u8>::new(2 * 2 * 3).await;
350        device_buffer.fill_with_byte(0xab, &stream).await.unwrap();
351        device_buffer
352            .copy_to(&mut host_buffer, &stream)
353            .await
354            .unwrap();
355        assert_eq!(
356            host_buffer.to_vec(),
357            &[0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab, 0xab]
358        );
359    }
360
361    #[tokio::test]
362    #[should_panic]
363    async fn test_it_panics_when_copying_invalid_size() {
364        let stream = Stream::new().await.unwrap();
365        let device_buffer = DeviceBuffer2D::<u32>::new(5, 5, 3).await;
366        let mut host_buffer = HostBuffer::<u32>::new(80).await;
367        let _ = unsafe { device_buffer.copy_to_async(&mut host_buffer, &stream).await };
368    }
369}