fil_rustacuda/
event.rs

1//! Events can be used to track status and dependencies, as well as to measure
2//! the duration of work submitted to a CUDA stream.
3//!
4//! In CUDA, most work is performed asynchronously. Events help to manage tasks
5//! scheduled on an asynchronous stream. This includes waiting for a task (or
6//! multiple tasks) to complete, and measuring the time duration it takes to
7//! complete a task. Events can also be used to sequence tasks on multiple
8//! streams within the same context by specifying dependent tasks (not supported
9//! yet by RustaCUDA).
10//!
11//! Events may be reused multiple times.
12
13// TODO: I'm not sure that these events are/can be safe by Rust's model of safety; they inherently
14// create state which can be mutated even while an immutable borrow is held.
15
16use crate::error::{CudaError, CudaResult, DropResult, ToResult};
17use crate::stream::Stream;
18use cuda_driver_sys::{
19    cuEventCreate, cuEventDestroy_v2, cuEventElapsedTime, cuEventQuery, cuEventRecord,
20    cuEventSynchronize, CUevent,
21};
22
23use std::mem;
24use std::ptr;
25
26bitflags! {
27    /// Bit flags for configuring a CUDA Event.
28    ///
29    /// The CUDA documentation claims that setting `DISABLE_TIMING` and `BLOCKING_SYNC` provides
30    /// the best performance for `query()` and `stream.wait_event()`.
31    pub struct EventFlags: u32 {
32        /// The default event creation flag.
33        const DEFAULT = 0x0;
34
35        /// Specify that the created event should busy-wait on blocking
36        /// function calls.
37        const BLOCKING_SYNC = 0x1;
38
39        /// Specify that the created event does not need to record timing data.
40        const DISABLE_TIMING = 0x2;
41
42        /// Specify that the created event may be used as an interprocess event.
43        /// (not supported yet by RustaCUDA). This flag requires
44        /// `DISABLE_TIMING` to be set as well.
45        const INTERPROCESS = 0x4;
46    }
47}
48
49/// Status enum that represents the current status of an event.
50#[derive(Clone, Copy, Debug, PartialEq)]
51pub enum EventStatus {
52    /// Ready indicates that all work captured by the event has been completed.
53    ///
54    /// The CUDA documentation states that for Unified Memory, `EventStatus::Ready` is
55    /// equivalent to having called `Event::synchronize`.
56    Ready,
57
58    /// `EventStatus::NotReady` indicates that the work captured by the event is still
59    /// incomplete.
60    NotReady,
61}
62
63/// An event to track work submitted to a stream.
64///
65/// See the module-level documentation for more information.
66#[derive(Debug)]
67pub struct Event(CUevent);
68
69impl Event {
70    /// Create a new event with the specified flags.
71    ///
72    /// # Example
73    ///
74    /// ```
75    /// # use rustacuda::quick_init;
76    /// # use std::error::Error;
77    /// # fn main() -> Result<(), Box<dyn Error>> {
78    /// # let _context = quick_init()?;
79    /// use rustacuda::event::{Event, EventFlags};
80    ///
81    /// // With default settings
82    /// let event = Event::new(EventFlags::DEFAULT)?;
83    /// # Ok(())
84    /// # }
85    /// ```
86    pub fn new(flags: EventFlags) -> CudaResult<Self> {
87        unsafe {
88            let mut event: CUevent = mem::zeroed();
89            cuEventCreate(&mut event, flags.bits()).to_result()?;
90            Ok(Event(event))
91        }
92    }
93
94    /// Add the event to the given stream of work. The event will be completed when the stream
95    /// completes all previously-submitted work and reaches the event in the queue.
96    ///
97    /// This function is used together with `query`, `synchronize`, and
98    /// `elapsed_time_f32`. See the respective functions for more information.
99    ///
100    /// If the event is created with `EventFlags::BLOCKING_SYNC`, then `record`
101    /// blocks until the event has actually been recorded.
102    ///
103    /// # Errors
104    ///
105    /// If the event and stream are not from the same context, an error is
106    /// returned.
107    ///
108    /// # Example
109    ///
110    /// ```
111    /// # use rustacuda::quick_init;
112    /// # use rustacuda::stream::{Stream, StreamFlags};
113    /// # use std::error::Error;
114    /// # fn main() -> Result<(), Box<dyn Error>> {
115    /// # let _context = quick_init()?;
116    /// use rustacuda::event::{Event, EventFlags};
117    ///
118    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
119    /// let event = Event::new(EventFlags::DEFAULT)?;
120    ///
121    /// // submit some work ...
122    ///
123    /// event.record(&stream)?;
124    /// # Ok(())
125    /// }
126    /// ```
127    pub fn record(&self, stream: &Stream) -> CudaResult<()> {
128        unsafe {
129            cuEventRecord(self.0, stream.as_inner()).to_result()?;
130            Ok(())
131        }
132    }
133
134    /// Return whether the stream this event was recorded on (see `record`) has processed this event
135    /// yet or not. A return value of `EventStatus::Ready` indicates that all work submitted before
136    /// the event has been completed.
137    ///
138    /// # Example
139    ///
140    /// ```
141    /// # use rustacuda::quick_init;
142    /// # use rustacuda::stream::{Stream, StreamFlags};
143    /// # use std::error::Error;
144    /// # fn main() -> Result<(), Box<dyn Error>> {
145    /// # let _context = quick_init()?;
146    /// use rustacuda::event::{Event, EventFlags, EventStatus};
147    ///
148    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
149    /// let event = Event::new(EventFlags::DEFAULT)?;
150    ///
151    /// // do some work ...
152    ///
153    /// // record an event
154    /// event.record(&stream)?;
155    ///
156    /// // ... wait some time ...
157    /// # event.synchronize()?;
158    ///
159    /// // query if the work is finished
160    /// let status = event.query()?;
161    /// assert_eq!(status, EventStatus::Ready);
162    /// # Ok(())
163    /// }
164    /// ```
165    pub fn query(&self) -> CudaResult<EventStatus> {
166        let result = unsafe { cuEventQuery(self.0).to_result() };
167
168        match result {
169            Ok(()) => Ok(EventStatus::Ready),
170            Err(CudaError::NotReady) => Ok(EventStatus::NotReady),
171            Err(other) => Err(other),
172        }
173    }
174
175    /// Wait for an event to complete.
176    ///
177    /// Blocks thread execution until all work submitted before the event was
178    /// recorded has completed. `EventFlags::BLOCKING_SYNC` controls the mode of
179    /// blocking. If the flag is set on event creation, the thread will sleep.
180    /// Otherwise, the thread will busy-wait.
181    ///
182    /// # Example
183    ///
184    /// ```
185    /// # use rustacuda::quick_init;
186    /// # use rustacuda::stream::{Stream, StreamFlags};
187    /// # use std::error::Error;
188    /// # fn main() -> Result<(), Box<dyn Error>> {
189    /// # let _context = quick_init()?;
190    /// use rustacuda::event::{Event, EventFlags};
191    ///
192    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
193    /// let event = Event::new(EventFlags::DEFAULT)?;
194    ///
195    /// // do some work ...
196    ///
197    /// // record an event
198    /// event.record(&stream)?;
199    ///
200    /// // wait until the work is finished
201    /// event.synchronize()?;
202    /// # Ok(())
203    /// }
204    /// ```
205    pub fn synchronize(&self) -> CudaResult<()> {
206        unsafe {
207            cuEventSynchronize(self.0).to_result()?;
208            Ok(())
209        }
210    }
211
212    /// Return the duration between two events.
213    ///
214    /// The duration is computed in milliseconds with a resolution of
215    /// approximately 0.5 microseconds. This can be used to measure the duration of work
216    /// queued in between the two events.
217    ///
218    /// # Errors
219    ///
220    /// `CudaError::NotReady` is returned if either event is not yet complete.
221    ///
222    /// `CudaError::InvalidHandle` is returned if
223    /// - the two events are not from the same context, or if
224    /// - `record` has not been called on either event, or if
225    /// - the `DISABLE_TIMING` flag is set on either event.
226    ///
227    /// # Example
228    ///
229    /// ```
230    /// # use rustacuda::quick_init;
231    /// # use rustacuda::stream::{Stream, StreamFlags};
232    /// # use rustacuda::launch;
233    /// # use rustacuda::module::Module;
234    /// # use rustacuda::memory::DeviceBox;
235    /// # use std::error::Error;
236    /// # use std::ffi::CString;
237    /// # fn main() -> Result<(), Box<dyn Error>> {
238    /// # let _context = quick_init()?;
239    /// # let module_data = CString::new(include_str!("../resources/add.ptx"))?;
240    /// # let module = Module::load_from_string(&module_data)?;
241    /// # let mut x = DeviceBox::new(&10.0f32)?;
242    /// # let mut y = DeviceBox::new(&20.0f32)?;
243    /// # let mut result = DeviceBox::new(&0.0f32)?;
244    /// use rustacuda::event::{Event, EventFlags};
245    ///
246    /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
247    /// let start_event = Event::new(EventFlags::DEFAULT)?;
248    /// let stop_event = Event::new(EventFlags::DEFAULT)?;
249    ///
250    /// // start recording time
251    /// start_event.record(&stream)?;
252    ///
253    /// // do some work ...
254    /// # unsafe {
255    /// #    launch!(module.sum<<<1, 1, 0, stream>>>(
256    /// #            x.as_device_ptr(),
257    /// #            y.as_device_ptr(),
258    /// #            result.as_device_ptr(),
259    /// #            1 // Length
260    /// #            ))?;
261    /// # }
262    ///
263    /// // stop recording time
264    /// stop_event.record(&stream)?;
265    ///
266    /// // wait for the work to complete
267    /// stop_event.synchronize()?;
268    ///
269    /// // compute the time elapsed between the start and stop events
270    /// let time = stop_event.elapsed_time_f32(&start_event)?;
271    ///
272    /// # assert!(time > 0.0);
273    /// # Ok(())
274    /// }
275    /// ```
276    pub fn elapsed_time_f32(&self, start: &Self) -> CudaResult<f32> {
277        unsafe {
278            let mut millis: f32 = 0.0;
279            cuEventElapsedTime(&mut millis, start.0, self.0).to_result()?;
280            Ok(millis)
281        }
282    }
283
284    // Get the inner `CUevent` from the `Event`.
285    //
286    // Necessary for certain CUDA functions outside of this
287    // module that expect a bare `CUevent`.
288    pub(crate) fn as_inner(&self) -> CUevent {
289        self.0
290    }
291
292    /// Destroy an `Event` returning an error.
293    ///
294    /// Destroying an event can return errors from previous asynchronous work.
295    /// This function destroys the given event and returns the error and the
296    /// un-destroyed event on failure.
297    ///
298    /// # Example
299    ///
300    /// ```
301    /// # use rustacuda::*;
302    /// # use std::error::Error;
303    /// # fn main() -> Result<(), Box<dyn Error>> {
304    /// # let _context = quick_init()?;
305    /// use rustacuda::event::{Event, EventFlags};
306    ///
307    /// let event = Event::new(EventFlags::DEFAULT)?;
308    /// match Event::drop(event) {
309    ///     Ok(()) => println!("Successfully destroyed"),
310    ///     Err((cuda_error, event)) => {
311    ///         println!("Failed to destroy event: {:?}", cuda_error);
312    ///         // Do something with event
313    ///     },
314    /// }
315    /// # Ok(())
316    /// # }
317    /// ```
318    pub fn drop(mut event: Event) -> DropResult<Event> {
319        if event.0.is_null() {
320            return Ok(());
321        }
322
323        unsafe {
324            let inner = mem::replace(&mut event.0, ptr::null_mut());
325            match cuEventDestroy_v2(inner).to_result() {
326                Ok(()) => {
327                    mem::forget(event);
328                    Ok(())
329                }
330                Err(e) => Err((e, Event(inner))),
331            }
332        }
333    }
334}
335
336impl Drop for Event {
337    fn drop(&mut self) {
338        unsafe { cuEventDestroy_v2(self.0) }
339            .to_result()
340            .expect("Failed to destroy CUDA event");
341    }
342}
343
344#[cfg(test)]
345mod test {
346    use super::*;
347    use crate::quick_init;
348    use crate::stream::StreamFlags;
349    use std::error::Error;
350
351    #[test]
352    fn test_new_with_flags() -> Result<(), Box<dyn Error>> {
353        let _context = quick_init()?;
354        let _event = Event::new(EventFlags::BLOCKING_SYNC | EventFlags::DISABLE_TIMING)?;
355        Ok(())
356    }
357
358    #[test]
359    fn test_record_with_wrong_context() -> Result<(), Box<dyn Error>> {
360        let _context = quick_init()?;
361        let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
362        let _new_context = quick_init()?;
363        let event = Event::new(EventFlags::DEFAULT)?;
364        let result = event.record(&stream);
365        assert_eq!(result, Err(CudaError::InvalidHandle));
366        Ok(())
367    }
368
369    #[test]
370    fn test_elapsed_time_f32_with_wrong_context() -> Result<(), Box<dyn Error>> {
371        let _context = quick_init()?;
372        let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
373        let fst_event = Event::new(EventFlags::DEFAULT)?;
374        fst_event.record(&fst_stream)?;
375
376        let _context = quick_init()?;
377        let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
378        let snd_event = Event::new(EventFlags::DEFAULT)?;
379        snd_event.record(&snd_stream)?;
380
381        fst_event.synchronize()?;
382        snd_event.synchronize()?;
383        let result = snd_event.elapsed_time_f32(&fst_event);
384        assert_eq!(result, Err(CudaError::InvalidHandle));
385        Ok(())
386    }
387
388    #[test]
389    fn test_elapsed_time_f32_with_different_streams() -> Result<(), Box<dyn Error>> {
390        let _context = quick_init()?;
391        let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
392        let fst_event = Event::new(EventFlags::DEFAULT)?;
393        fst_event.record(&fst_stream)?;
394
395        let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
396        let snd_event = Event::new(EventFlags::DEFAULT)?;
397        snd_event.record(&snd_stream)?;
398
399        fst_event.synchronize()?;
400        snd_event.synchronize()?;
401        let _result = snd_event.elapsed_time_f32(&fst_event)?;
402        Ok(())
403    }
404
405    #[test]
406    fn test_elapsed_time_f32_with_disable_timing() -> Result<(), Box<dyn Error>> {
407        let _context = quick_init()?;
408        let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
409
410        let start_event = Event::new(EventFlags::DISABLE_TIMING)?;
411        start_event.record(&stream)?;
412
413        let stop_event = Event::new(EventFlags::DEFAULT)?;
414        stop_event.record(&stream)?;
415
416        stop_event.synchronize()?;
417        let result = stop_event.elapsed_time_f32(&start_event);
418        assert_eq!(result, Err(CudaError::InvalidHandle));
419        Ok(())
420    }
421}