1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
use crate::ffi;
use crate::runtime::{Future, SynchronizeFuture};

type Result<T> = std::result::Result<T, crate::error::Error>;

/// CUDA stream.
pub struct Stream {
    inner: ffi::stream::Stream,
}

impl Stream {
    /// Create a [`Stream`] object that represent the default stream, also known as the null stream.
    ///
    /// Refer to the [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html)
    /// for more information regarding the default ("null") stream:
    ///
    /// # Prefer owned streams
    ///
    /// It is recommended to use owned streams as much as possible, for two reasons:
    ///
    /// * Using streams to separate semanticly unrelated streams of operations allows the GPU to
    ///   overlap operations and improved parallelism.
    /// * Using the default stream can incur implicit synchronization, even on other streams, which
    ///   causes their performance to degrade.
    ///
    /// Note that it is not enforced that there is only one [`Stream`] object that represents the
    /// default stream. This is safe because all operations are serialized anyway.
    pub fn null() -> Self {
        Self {
            inner: ffi::stream::Stream::null(),
        }
    }

    /// Create an asynchronous stream.
    ///
    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g6a3c4b819e6a994c26d0c4824a4c80da)
    pub async fn new() -> Result<Self> {
        let inner = Future::new(ffi::stream::Stream::new).await?;
        Ok(Self { inner })
    }

    /// Synchronize stream. This future will only return once all currently enqueued work on the
    /// stream is done.
    ///
    /// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g74aa9f4b1c2f12d994bf13876a5a2498)
    ///
    /// # Behavior
    ///
    /// In constrast to most of the API, this future does not become ready eagerly. Instead, a
    /// callback is pushed onto the given stream that will be invoked to make the future ready once
    /// all work on the stream that was previously queued asynchroneously is completed.
    ///
    /// Internally, the future uses `cudaStreamAddCallback` to schedule the callback on the stream.
    pub async fn synchronize(&self) -> Result<()> {
        SynchronizeFuture::new(self).await
    }

    /// Access the inner synchronous implementation of [`Stream`].
    #[inline(always)]
    pub fn inner(&self) -> &ffi::stream::Stream {
        &self.inner
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[tokio::test]
    async fn test_new() {
        assert!(Stream::new().await.is_ok());
    }

    #[tokio::test]
    async fn test_synchronize() {
        let stream = Stream::new().await.unwrap();
        assert!(stream.synchronize().await.is_ok());
    }

    #[tokio::test]
    async fn test_synchronize_null_stream() {
        let stream = Stream::null();
        assert!(stream.synchronize().await.is_ok());
    }
}