1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
use crate::ffi;
use crate::runtime::{Future, SynchronizeFuture};
type Result<T> = std::result::Result<T, crate::error::Error>;
/// CUDA stream.
pub struct Stream {
inner: ffi::stream::Stream,
}
impl Stream {
/// Create a [`Stream`] object that represent the default stream, also known as the null stream.
///
/// Refer to the [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html)
/// for more information regarding the default ("null") stream:
///
/// # Prefer owned streams
///
/// It is recommended to use owned streams as much as possible, for two reasons:
///
/// * Using streams to separate semanticly unrelated streams of operations allows the GPU to
/// overlap operations and improved parallelism.
/// * Using the default stream can incur implicit synchronization, even on other streams, which
/// causes their performance to degrade.
///
/// Note that it is not enforced that there is only one [`Stream`] object that represents the
/// default stream. This is safe because all operations are serialized anyway.
pub fn null() -> Self {
Self {
inner: ffi::stream::Stream::null(),
}
}
/// Create an asynchronous stream.
///
/// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g6a3c4b819e6a994c26d0c4824a4c80da)
pub async fn new() -> Result<Self> {
let inner = Future::new(ffi::stream::Stream::new).await?;
Ok(Self { inner })
}
/// Synchronize stream. This future will only return once all currently enqueued work on the
/// stream is done.
///
/// [CUDA documentation](https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html#group__CUDART__STREAM_1g74aa9f4b1c2f12d994bf13876a5a2498)
///
/// # Behavior
///
/// In constrast to most of the API, this future does not become ready eagerly. Instead, a
/// callback is pushed onto the given stream that will be invoked to make the future ready once
/// all work on the stream that was previously queued asynchroneously is completed.
///
/// Internally, the future uses `cudaStreamAddCallback` to schedule the callback on the stream.
pub async fn synchronize(&self) -> Result<()> {
SynchronizeFuture::new(self).await
}
/// Access the inner synchronous implementation of [`Stream`].
#[inline(always)]
pub fn inner(&self) -> &ffi::stream::Stream {
&self.inner
}
}
#[cfg(test)]
mod tests {
use super::*;
#[tokio::test]
async fn test_new() {
assert!(Stream::new().await.is_ok());
}
#[tokio::test]
async fn test_synchronize() {
let stream = Stream::new().await.unwrap();
assert!(stream.synchronize().await.is_ok());
}
#[tokio::test]
async fn test_synchronize_null_stream() {
let stream = Stream::null();
assert!(stream.synchronize().await.is_ok());
}
}