Struct Stream

Source

pub struct Stream { /* private fields */ }

Expand description

A stream of work for the device to perform.

See the module-level documentation for more information.

Implementations§

Source §

impl Stream

Source

pub fn new(flags: StreamFlags, priority: Option<i32>) -> CudaResult<Self>

Create a new stream with the given flags and optional priority.

By convention, priority follows a convention where lower numbers represent greater priorities. That is, work in a stream with a lower priority number may pre-empt work in a stream with a higher priority number. Context::get_stream_priority_range can be used to get the range of valid priority values; if priority is set outside that range, it will be automatically clamped to the lowest or highest number in the range.

§Examples

use rustacuda::stream::{Stream, StreamFlags};

// With default priority
let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;

// With specific priority
let priority = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;

Examples found in repository ?

examples/launch.rs (line 16)

8fn main() -> Result<(), Box<dyn Error>> {
9    // Set up the context, load the module, and create a stream to run kernels in.
10    rustacuda::init(CudaFlags::empty())?;
11    let device = Device::get_device(0)?;
12    let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
13
14    let ptx = CString::new(include_str!("../resources/add.ptx"))?;
15    let module = Module::load_from_string(&ptx)?;
16    let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
17
18    // Create buffers for data
19    let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
20    let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
21    let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
22    let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
23
24    // This kernel adds each element in `in_x` and `in_y` and writes the result into `out`.
25    unsafe {
26        // Launch the kernel with one block of one thread, no dynamic shared memory on `stream`.
27        let result = launch!(module.sum<<<1, 1, 0, stream>>>(
28            in_x.as_device_ptr(),
29            in_y.as_device_ptr(),
30            out_1.as_device_ptr(),
31            out_1.len()
32        ));
33        result?;
34
35        // Launch the kernel again using the `function` form:
36        let function_name = CString::new("sum")?;
37        let sum = module.get_function(&function_name)?;
38        // Launch with 1x1x1 (1) blocks of 10x1x1 (10) threads, to show that you can use tuples to
39        // configure grid and block size.
40        let result = launch!(sum<<<(1, 1, 1), (10, 1, 1), 0, stream>>>(
41            in_x.as_device_ptr(),
42            in_y.as_device_ptr(),
43            out_2.as_device_ptr(),
44            out_2.len()
45        ));
46        result?;
47    }
48
49    // Kernel launches are asynchronous, so we wait for the kernels to finish executing.
50    stream.synchronize()?;
51
52    // Copy the results back to host memory
53    let mut out_host = [0.0f32; 20];
54    out_1.copy_to(&mut out_host[0..10])?;
55    out_2.copy_to(&mut out_host[10..20])?;
56
57    for x in out_host.iter() {
58        assert_eq!(3.0 as u32, *x as u32);
59    }
60
61    println!("Launched kernel successfully.");
62    Ok(())
63}

Source

pub fn get_flags(&self) -> CudaResult<StreamFlags>

Return the flags which were used to create this stream.

§Examples

use rustacuda::stream::{Stream, StreamFlags};

let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
assert_eq!(StreamFlags::NON_BLOCKING, stream.get_flags().unwrap());

Source

pub fn get_priority(&self) -> CudaResult<i32>

Return the priority of this stream.

If this stream was created without a priority, returns the default priority. If the stream was created with a priority outside the valid range, returns the clamped priority.

§Examples

use rustacuda::stream::{Stream, StreamFlags};

let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
println!("{}", stream.get_priority()?);

Source

pub fn add_callback<T>(&self, callback: Box<T>) -> CudaResult<()>
where T: FnOnce(CudaResult<()>) + Send,

Add a callback to a stream.

The callback will be executed after all previously queued items in the stream have been completed. Subsequently queued items will not execute until the callback is finished.

Callbacks must not make any CUDA API calls.

The callback will be passed a CudaResult<()> indicating the current state of the device with Ok(()) denoting normal operation.

§Examples

use rustacuda::stream::{Stream, StreamFlags};

let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;

// ... queue up some work on the stream

stream.add_callback(Box::new(|status| {
    println!("Device status is {:?}", status);
}));

// ... queue up some more work on the stream

Source

pub fn synchronize(&self) -> CudaResult<()>

Wait until a stream’s tasks are completed.

Waits until the device has completed all operations scheduled for this stream.

§Examples

use rustacuda::stream::{Stream, StreamFlags};

let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;

// ... queue up some work on the stream

// Wait for the work to be completed.
stream.synchronize()?;

Examples found in repository ?

examples/launch.rs (line 50)

8fn main() -> Result<(), Box<dyn Error>> {
9    // Set up the context, load the module, and create a stream to run kernels in.
10    rustacuda::init(CudaFlags::empty())?;
11    let device = Device::get_device(0)?;
12    let _ctx = Context::create_and_push(ContextFlags::MAP_HOST | ContextFlags::SCHED_AUTO, device)?;
13
14    let ptx = CString::new(include_str!("../resources/add.ptx"))?;
15    let module = Module::load_from_string(&ptx)?;
16    let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
17
18    // Create buffers for data
19    let mut in_x = DeviceBuffer::from_slice(&[1.0f32; 10])?;
20    let mut in_y = DeviceBuffer::from_slice(&[2.0f32; 10])?;
21    let mut out_1 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
22    let mut out_2 = DeviceBuffer::from_slice(&[0.0f32; 10])?;
23
24    // This kernel adds each element in `in_x` and `in_y` and writes the result into `out`.
25    unsafe {
26        // Launch the kernel with one block of one thread, no dynamic shared memory on `stream`.
27        let result = launch!(module.sum<<<1, 1, 0, stream>>>(
28            in_x.as_device_ptr(),
29            in_y.as_device_ptr(),
30            out_1.as_device_ptr(),
31            out_1.len()
32        ));
33        result?;
34
35        // Launch the kernel again using the `function` form:
36        let function_name = CString::new("sum")?;
37        let sum = module.get_function(&function_name)?;
38        // Launch with 1x1x1 (1) blocks of 10x1x1 (10) threads, to show that you can use tuples to
39        // configure grid and block size.
40        let result = launch!(sum<<<(1, 1, 1), (10, 1, 1), 0, stream>>>(
41            in_x.as_device_ptr(),
42            in_y.as_device_ptr(),
43            out_2.as_device_ptr(),
44            out_2.len()
45        ));
46        result?;
47    }
48
49    // Kernel launches are asynchronous, so we wait for the kernels to finish executing.
50    stream.synchronize()?;
51
52    // Copy the results back to host memory
53    let mut out_host = [0.0f32; 20];
54    out_1.copy_to(&mut out_host[0..10])?;
55    out_2.copy_to(&mut out_host[10..20])?;
56
57    for x in out_host.iter() {
58        assert_eq!(3.0 as u32, *x as u32);
59    }
60
61    println!("Launched kernel successfully.");
62    Ok(())
63}

Source

pub fn wait_event( &self, event: Event, flags: StreamWaitEventFlags, ) -> CudaResult<()>

Make the stream wait on an event.

All future work submitted to the stream will wait for the event to complete. Synchronization is performed on the device, if possible. The event may originate from different context or device than the stream.

§Example

use rustacuda::stream::{Stream, StreamFlags, StreamWaitEventFlags};
use rustacuda::event::{Event, EventFlags};

let stream_0 = Stream::new(StreamFlags::NON_BLOCKING, None)?;
let stream_1 = Stream::new(StreamFlags::NON_BLOCKING, None)?;
let event = Event::new(EventFlags::DEFAULT)?;

// do some work on stream_0 ...

// record an event
event.record(&stream_0)?;

// wait until the work on stream_0 is finished before continuing stream_1
stream_1.wait_event(event, StreamWaitEventFlags::DEFAULT)?;
}

Source

pub fn drop(stream: Stream) -> DropResult<Stream>

Destroy a Stream, returning an error.

Destroying a stream can return errors from previous asynchronous work. This function destroys the given stream and returns the error and the un-destroyed stream on failure.

§Example

use rustacuda::stream::{Stream, StreamFlags};

let stream = Stream::new(StreamFlags::NON_BLOCKING, 1i32.into())?;
match Stream::drop(stream) {
    Ok(()) => println!("Successfully destroyed"),
    Err((e, stream)) => {
        println!("Failed to destroy stream: {:?}", e);
        // Do something with stream
    },
}