async_cuda_core/
stream.rs

1use crate::ffi;
2use crate::runtime::{Future, SynchronizeFuture};
3
4type Result<T> = std::result::Result<T, crate::error::Error>;
5
6/// CUDA stream.
7pub struct Stream {
8    inner: ffi::stream::Stream,
9}
10
11impl Stream {
12    /// Create a [`Stream`] object that represent the default stream, also known as the null stream.
13    ///
14    /// Refer to the [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html)
15    /// for more information regarding the default ("null") stream:
16    ///
17    /// # Prefer owned streams
18    ///
19    /// It is recommended to use owned streams as much as possible, for two reasons:
20    ///
21    /// * Using streams to separate semanticly unrelated streams of operations allows the GPU to
22    ///   overlap operations and improved parallelism.
23    /// * Using the default stream can incur implicit synchronization, even on other streams, which
24    ///   causes their performance to degrade.
25    ///
26    /// Note that it is not enforced that there is only one [`Stream`] object that represents the
27    /// default stream. This is safe because all operations are serialized anyway.
28    pub fn null() -> Self {
29        Self {
30            inner: ffi::stream::Stream::null(),
31        }
32    }
33
34    /// Create an asynchronous stream.
35    ///
36    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g6a3c4b819e6a994c26d0c4824a4c80da)
37    pub async fn new() -> Result<Self> {
38        let inner = Future::new(ffi::stream::Stream::new).await?;
39        Ok(Self { inner })
40    }
41
42    /// Synchronize stream. This future will only return once all currently enqueued work on the
43    /// stream is done.
44    ///
45    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g74aa9f4b1c2f12d994bf13876a5a2498)
46    ///
47    /// # Behavior
48    ///
49    /// In constrast to most of the API, this future does not become ready eagerly. Instead, a
50    /// callback is pushed onto the given stream that will be invoked to make the future ready once
51    /// all work on the stream that was previously queued asynchroneously is completed.
52    ///
53    /// Internally, the future uses `cudaStreamAddCallback` to schedule the callback on the stream.
54    pub async fn synchronize(&self) -> Result<()> {
55        SynchronizeFuture::new(self).await
56    }
57
58    /// Access the inner synchronous implementation of [`Stream`].
59    #[inline(always)]
60    pub fn inner(&self) -> &ffi::stream::Stream {
61        &self.inner
62    }
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68
69    #[tokio::test]
70    async fn test_new() {
71        assert!(Stream::new().await.is_ok());
72    }
73
74    #[tokio::test]
75    async fn test_synchronize() {
76        let stream = Stream::new().await.unwrap();
77        assert!(stream.synchronize().await.is_ok());
78    }
79
80    #[tokio::test]
81    async fn test_synchronize_null_stream() {
82        let stream = Stream::null();
83        assert!(stream.synchronize().await.is_ok());
84    }
85}