pub struct Stream { /* private fields */ }
Expand description
A stream of work for the device to perform.
See the module-level documentation for more information.
Implementations§
Source§impl Stream
impl Stream
Sourcepub fn new(flags: StreamFlags, priority: Option<i32>) -> CudaResult<Self>
pub fn new(flags: StreamFlags, priority: Option<i32>) -> CudaResult<Self>
Create a new stream with the given flags and optional priority.
By convention, priority
follows a convention where lower numbers represent greater
priorities. That is, work in a stream with a lower priority number may pre-empt work in
a stream with a higher priority number. Context::get_stream_priority_range
can be used
to get the range of valid priority values; if priority is set outside that range, it will
be automatically clamped to the lowest or highest number in the range.
§Examples
use rustacuda::stream::{Stream, StreamFlags};
// With default priority
let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
// With specific priority
let priority = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
Examples found in repository?
8fn main() -> Result<(), Box<dyn Error>> {
9 // Set up the context, load the module, and create a stream to run kernels in.
10 rustacuda::init(CudaFlags::empty())?;
11 let device = Device::get_device(0)?;
12 let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
13
14 let ptx = CString::new(include_str!("../resources/add.ptx"))?;
15 let module = Module::load_from_string(&ptx)?;
16 let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
17
18 // Create buffers for data
19 let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
20 let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
21 let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
22 let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
23
24 // This kernel adds each element in `in_x` and `in_y` and writes the result into `out`.
25 unsafe {
26 // Launch the kernel with one block of one thread, no dynamic shared memory on `stream`.
27 let result = launch!(module.sum<<<1, 1, 0, stream>>>(
28 in_x.as_device_ptr(),
29 in_y.as_device_ptr(),
30 out_1.as_device_ptr(),
31 out_1.len()
32 ));
33 result?;
34
35 // Launch the kernel again using the `function` form:
36 let function_name = CString::new("sum")?;
37 let sum = module.get_function(&function_name)?;
38 // Launch with 1x1x1 (1) blocks of 10x1x1 (10) threads, to show that you can use tuples to
39 // configure grid and block size.
40 let result = launch!(sum<<<(1, 1, 1), (10, 1, 1), 0, stream>>>(
41 in_x.as_device_ptr(),
42 in_y.as_device_ptr(),
43 out_2.as_device_ptr(),
44 out_2.len()
45 ));
46 result?;
47 }
48
49 // Kernel launches are asynchronous, so we wait for the kernels to finish executing.
50 stream.synchronize()?;
51
52 // Copy the results back to host memory
53 let mut out_host = [0.0f32; 20];
54 out_1.copy_to(&mut out_host[0..10])?;
55 out_2.copy_to(&mut out_host[10..20])?;
56
57 for x in out_host.iter() {
58 assert_eq!(3.0 as u32, *x as u32);
59 }
60
61 println!("Launched kernel successfully.");
62 Ok(())
63}
Sourcepub fn get_flags(&self) -> CudaResult<StreamFlags>
pub fn get_flags(&self) -> CudaResult<StreamFlags>
Return the flags which were used to create this stream.
§Examples
use rustacuda::stream::{Stream, StreamFlags};
let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
assert_eq!(StreamFlags::NON_BLOCKING, stream.get_flags().unwrap());
Sourcepub fn get_priority(&self) -> CudaResult<i32>
pub fn get_priority(&self) -> CudaResult<i32>
Return the priority of this stream.
If this stream was created without a priority, returns the default priority. If the stream was created with a priority outside the valid range, returns the clamped priority.
§Examples
use rustacuda::stream::{Stream, StreamFlags};
let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
println!("{}", stream.get_priority()?);
Sourcepub fn add_callback<T>(&self, callback: Box<T>) -> CudaResult<()>
pub fn add_callback<T>(&self, callback: Box<T>) -> CudaResult<()>
Add a callback to a stream.
The callback will be executed after all previously queued items in the stream have been completed. Subsequently queued items will not execute until the callback is finished.
Callbacks must not make any CUDA API calls.
The callback will be passed a CudaResult<()>
indicating the
current state of the device with Ok(())
denoting normal operation.
§Examples
use rustacuda::stream::{Stream, StreamFlags};
let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
// ... queue up some work on the stream
stream.add_callback(Box::new(|status| {
println!("Device status is {:?}", status);
}));
// ... queue up some more work on the stream
Sourcepub fn synchronize(&self) -> CudaResult<()>
pub fn synchronize(&self) -> CudaResult<()>
Wait until a stream’s tasks are completed.
Waits until the device has completed all operations scheduled for this stream.
§Examples
use rustacuda::stream::{Stream, StreamFlags};
let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
// ... queue up some work on the stream
// Wait for the work to be completed.
stream.synchronize()?;
Examples found in repository?
8fn main() -> Result<(), Box<dyn Error>> {
9 // Set up the context, load the module, and create a stream to run kernels in.
10 rustacuda::init(CudaFlags::empty())?;
11 let device = Device::get_device(0)?;
12 let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
13
14 let ptx = CString::new(include_str!("../resources/add.ptx"))?;
15 let module = Module::load_from_string(&ptx)?;
16 let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
17
18 // Create buffers for data
19 let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
20 let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
21 let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
22 let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
23
24 // This kernel adds each element in `in_x` and `in_y` and writes the result into `out`.
25 unsafe {
26 // Launch the kernel with one block of one thread, no dynamic shared memory on `stream`.
27 let result = launch!(module.sum<<<1, 1, 0, stream>>>(
28 in_x.as_device_ptr(),
29 in_y.as_device_ptr(),
30 out_1.as_device_ptr(),
31 out_1.len()
32 ));
33 result?;
34
35 // Launch the kernel again using the `function` form:
36 let function_name = CString::new("sum")?;
37 let sum = module.get_function(&function_name)?;
38 // Launch with 1x1x1 (1) blocks of 10x1x1 (10) threads, to show that you can use tuples to
39 // configure grid and block size.
40 let result = launch!(sum<<<(1, 1, 1), (10, 1, 1), 0, stream>>>(
41 in_x.as_device_ptr(),
42 in_y.as_device_ptr(),
43 out_2.as_device_ptr(),
44 out_2.len()
45 ));
46 result?;
47 }
48
49 // Kernel launches are asynchronous, so we wait for the kernels to finish executing.
50 stream.synchronize()?;
51
52 // Copy the results back to host memory
53 let mut out_host = [0.0f32; 20];
54 out_1.copy_to(&mut out_host[0..10])?;
55 out_2.copy_to(&mut out_host[10..20])?;
56
57 for x in out_host.iter() {
58 assert_eq!(3.0 as u32, *x as u32);
59 }
60
61 println!("Launched kernel successfully.");
62 Ok(())
63}
Sourcepub fn wait_event(
&self,
event: Event,
flags: StreamWaitEventFlags,
) -> CudaResult<()>
pub fn wait_event( &self, event: Event, flags: StreamWaitEventFlags, ) -> CudaResult<()>
Make the stream wait on an event.
All future work submitted to the stream will wait for the event to complete. Synchronization is performed on the device, if possible. The event may originate from different context or device than the stream.
§Example
use rustacuda::stream::{Stream, StreamFlags, StreamWaitEventFlags};
use rustacuda::event::{Event, EventFlags};
let stream_0 = Stream::new(StreamFlags::NON_BLOCKING, None)?;
let stream_1 = Stream::new(StreamFlags::NON_BLOCKING, None)?;
let event = Event::new(EventFlags::DEFAULT)?;
// do some work on stream_0 ...
// record an event
event.record(&stream_0)?;
// wait until the work on stream_0 is finished before continuing stream_1
stream_1.wait_event(event, StreamWaitEventFlags::DEFAULT)?;
}
Sourcepub fn drop(stream: Stream) -> DropResult<Stream>
pub fn drop(stream: Stream) -> DropResult<Stream>
Destroy a Stream
, returning an error.
Destroying a stream can return errors from previous asynchronous work. This function destroys the given stream and returns the error and the un-destroyed stream on failure.
§Example
use rustacuda::stream::{Stream, StreamFlags};
let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
match Stream::drop(stream) {
Ok(()) => println!("Successfully destroyed"),
Err((e, stream)) => {
println!("Failed to destroy stream: {:?}", e);
// Do something with stream
},
}