fil_rustacuda/event.rs
1//! Events can be used to track status and dependencies, as well as to measure
2//! the duration of work submitted to a CUDA stream.
3//!
4//! In CUDA, most work is performed asynchronously. Events help to manage tasks
5//! scheduled on an asynchronous stream. This includes waiting for a task (or
6//! multiple tasks) to complete, and measuring the time duration it takes to
7//! complete a task. Events can also be used to sequence tasks on multiple
8//! streams within the same context by specifying dependent tasks (not supported
9//! yet by RustaCUDA).
10//!
11//! Events may be reused multiple times.
12
13// TODO: I'm not sure that these events are/can be safe by Rust's model of safety; they inherently
14// create state which can be mutated even while an immutable borrow is held.
15
16use crate::error::{CudaError, CudaResult, DropResult, ToResult};
17use crate::stream::Stream;
18use cuda_driver_sys::{
19 cuEventCreate, cuEventDestroy_v2, cuEventElapsedTime, cuEventQuery, cuEventRecord,
20 cuEventSynchronize, CUevent,
21};
22
23use std::mem;
24use std::ptr;
25
26bitflags! {
27 /// Bit flags for configuring a CUDA Event.
28 ///
29 /// The CUDA documentation claims that setting `DISABLE_TIMING` and `BLOCKING_SYNC` provides
30 /// the best performance for `query()` and `stream.wait_event()`.
31 pub struct EventFlags: u32 {
32 /// The default event creation flag.
33 const DEFAULT = 0x0;
34
35 /// Specify that the created event should busy-wait on blocking
36 /// function calls.
37 const BLOCKING_SYNC = 0x1;
38
39 /// Specify that the created event does not need to record timing data.
40 const DISABLE_TIMING = 0x2;
41
42 /// Specify that the created event may be used as an interprocess event.
43 /// (not supported yet by RustaCUDA). This flag requires
44 /// `DISABLE_TIMING` to be set as well.
45 const INTERPROCESS = 0x4;
46 }
47}
48
49/// Status enum that represents the current status of an event.
50#[derive(Clone, Copy, Debug, PartialEq)]
51pub enum EventStatus {
52 /// Ready indicates that all work captured by the event has been completed.
53 ///
54 /// The CUDA documentation states that for Unified Memory, `EventStatus::Ready` is
55 /// equivalent to having called `Event::synchronize`.
56 Ready,
57
58 /// `EventStatus::NotReady` indicates that the work captured by the event is still
59 /// incomplete.
60 NotReady,
61}
62
63/// An event to track work submitted to a stream.
64///
65/// See the module-level documentation for more information.
66#[derive(Debug)]
67pub struct Event(CUevent);
68
69impl Event {
70 /// Create a new event with the specified flags.
71 ///
72 /// # Example
73 ///
74 /// ```
75 /// # use rustacuda::quick_init;
76 /// # use std::error::Error;
77 /// # fn main() -> Result<(), Box<dyn Error>> {
78 /// # let _context = quick_init()?;
79 /// use rustacuda::event::{Event, EventFlags};
80 ///
81 /// // With default settings
82 /// let event = Event::new(EventFlags::DEFAULT)?;
83 /// # Ok(())
84 /// # }
85 /// ```
86 pub fn new(flags: EventFlags) -> CudaResult<Self> {
87 unsafe {
88 let mut event: CUevent = mem::zeroed();
89 cuEventCreate(&mut event, flags.bits()).to_result()?;
90 Ok(Event(event))
91 }
92 }
93
94 /// Add the event to the given stream of work. The event will be completed when the stream
95 /// completes all previously-submitted work and reaches the event in the queue.
96 ///
97 /// This function is used together with `query`, `synchronize`, and
98 /// `elapsed_time_f32`. See the respective functions for more information.
99 ///
100 /// If the event is created with `EventFlags::BLOCKING_SYNC`, then `record`
101 /// blocks until the event has actually been recorded.
102 ///
103 /// # Errors
104 ///
105 /// If the event and stream are not from the same context, an error is
106 /// returned.
107 ///
108 /// # Example
109 ///
110 /// ```
111 /// # use rustacuda::quick_init;
112 /// # use rustacuda::stream::{Stream, StreamFlags};
113 /// # use std::error::Error;
114 /// # fn main() -> Result<(), Box<dyn Error>> {
115 /// # let _context = quick_init()?;
116 /// use rustacuda::event::{Event, EventFlags};
117 ///
118 /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
119 /// let event = Event::new(EventFlags::DEFAULT)?;
120 ///
121 /// // submit some work ...
122 ///
123 /// event.record(&stream)?;
124 /// # Ok(())
125 /// }
126 /// ```
127 pub fn record(&self, stream: &Stream) -> CudaResult<()> {
128 unsafe {
129 cuEventRecord(self.0, stream.as_inner()).to_result()?;
130 Ok(())
131 }
132 }
133
134 /// Return whether the stream this event was recorded on (see `record`) has processed this event
135 /// yet or not. A return value of `EventStatus::Ready` indicates that all work submitted before
136 /// the event has been completed.
137 ///
138 /// # Example
139 ///
140 /// ```
141 /// # use rustacuda::quick_init;
142 /// # use rustacuda::stream::{Stream, StreamFlags};
143 /// # use std::error::Error;
144 /// # fn main() -> Result<(), Box<dyn Error>> {
145 /// # let _context = quick_init()?;
146 /// use rustacuda::event::{Event, EventFlags, EventStatus};
147 ///
148 /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
149 /// let event = Event::new(EventFlags::DEFAULT)?;
150 ///
151 /// // do some work ...
152 ///
153 /// // record an event
154 /// event.record(&stream)?;
155 ///
156 /// // ... wait some time ...
157 /// # event.synchronize()?;
158 ///
159 /// // query if the work is finished
160 /// let status = event.query()?;
161 /// assert_eq!(status, EventStatus::Ready);
162 /// # Ok(())
163 /// }
164 /// ```
165 pub fn query(&self) -> CudaResult<EventStatus> {
166 let result = unsafe { cuEventQuery(self.0).to_result() };
167
168 match result {
169 Ok(()) => Ok(EventStatus::Ready),
170 Err(CudaError::NotReady) => Ok(EventStatus::NotReady),
171 Err(other) => Err(other),
172 }
173 }
174
175 /// Wait for an event to complete.
176 ///
177 /// Blocks thread execution until all work submitted before the event was
178 /// recorded has completed. `EventFlags::BLOCKING_SYNC` controls the mode of
179 /// blocking. If the flag is set on event creation, the thread will sleep.
180 /// Otherwise, the thread will busy-wait.
181 ///
182 /// # Example
183 ///
184 /// ```
185 /// # use rustacuda::quick_init;
186 /// # use rustacuda::stream::{Stream, StreamFlags};
187 /// # use std::error::Error;
188 /// # fn main() -> Result<(), Box<dyn Error>> {
189 /// # let _context = quick_init()?;
190 /// use rustacuda::event::{Event, EventFlags};
191 ///
192 /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
193 /// let event = Event::new(EventFlags::DEFAULT)?;
194 ///
195 /// // do some work ...
196 ///
197 /// // record an event
198 /// event.record(&stream)?;
199 ///
200 /// // wait until the work is finished
201 /// event.synchronize()?;
202 /// # Ok(())
203 /// }
204 /// ```
205 pub fn synchronize(&self) -> CudaResult<()> {
206 unsafe {
207 cuEventSynchronize(self.0).to_result()?;
208 Ok(())
209 }
210 }
211
212 /// Return the duration between two events.
213 ///
214 /// The duration is computed in milliseconds with a resolution of
215 /// approximately 0.5 microseconds. This can be used to measure the duration of work
216 /// queued in between the two events.
217 ///
218 /// # Errors
219 ///
220 /// `CudaError::NotReady` is returned if either event is not yet complete.
221 ///
222 /// `CudaError::InvalidHandle` is returned if
223 /// - the two events are not from the same context, or if
224 /// - `record` has not been called on either event, or if
225 /// - the `DISABLE_TIMING` flag is set on either event.
226 ///
227 /// # Example
228 ///
229 /// ```
230 /// # use rustacuda::quick_init;
231 /// # use rustacuda::stream::{Stream, StreamFlags};
232 /// # use rustacuda::launch;
233 /// # use rustacuda::module::Module;
234 /// # use rustacuda::memory::DeviceBox;
235 /// # use std::error::Error;
236 /// # use std::ffi::CString;
237 /// # fn main() -> Result<(), Box<dyn Error>> {
238 /// # let _context = quick_init()?;
239 /// # let module_data = CString::new(include_str!("../resources/add.ptx"))?;
240 /// # let module = Module::load_from_string(&module_data)?;
241 /// # let mut x = DeviceBox::new(&10.0f32)?;
242 /// # let mut y = DeviceBox::new(&20.0f32)?;
243 /// # let mut result = DeviceBox::new(&0.0f32)?;
244 /// use rustacuda::event::{Event, EventFlags};
245 ///
246 /// let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
247 /// let start_event = Event::new(EventFlags::DEFAULT)?;
248 /// let stop_event = Event::new(EventFlags::DEFAULT)?;
249 ///
250 /// // start recording time
251 /// start_event.record(&stream)?;
252 ///
253 /// // do some work ...
254 /// # unsafe {
255 /// # launch!(module.sum<<<1, 1, 0, stream>>>(
256 /// # x.as_device_ptr(),
257 /// # y.as_device_ptr(),
258 /// # result.as_device_ptr(),
259 /// # 1 // Length
260 /// # ))?;
261 /// # }
262 ///
263 /// // stop recording time
264 /// stop_event.record(&stream)?;
265 ///
266 /// // wait for the work to complete
267 /// stop_event.synchronize()?;
268 ///
269 /// // compute the time elapsed between the start and stop events
270 /// let time = stop_event.elapsed_time_f32(&start_event)?;
271 ///
272 /// # assert!(time > 0.0);
273 /// # Ok(())
274 /// }
275 /// ```
276 pub fn elapsed_time_f32(&self, start: &Self) -> CudaResult<f32> {
277 unsafe {
278 let mut millis: f32 = 0.0;
279 cuEventElapsedTime(&mut millis, start.0, self.0).to_result()?;
280 Ok(millis)
281 }
282 }
283
284 // Get the inner `CUevent` from the `Event`.
285 //
286 // Necessary for certain CUDA functions outside of this
287 // module that expect a bare `CUevent`.
288 pub(crate) fn as_inner(&self) -> CUevent {
289 self.0
290 }
291
292 /// Destroy an `Event` returning an error.
293 ///
294 /// Destroying an event can return errors from previous asynchronous work.
295 /// This function destroys the given event and returns the error and the
296 /// un-destroyed event on failure.
297 ///
298 /// # Example
299 ///
300 /// ```
301 /// # use rustacuda::*;
302 /// # use std::error::Error;
303 /// # fn main() -> Result<(), Box<dyn Error>> {
304 /// # let _context = quick_init()?;
305 /// use rustacuda::event::{Event, EventFlags};
306 ///
307 /// let event = Event::new(EventFlags::DEFAULT)?;
308 /// match Event::drop(event) {
309 /// Ok(()) => println!("Successfully destroyed"),
310 /// Err((cuda_error, event)) => {
311 /// println!("Failed to destroy event: {:?}", cuda_error);
312 /// // Do something with event
313 /// },
314 /// }
315 /// # Ok(())
316 /// # }
317 /// ```
318 pub fn drop(mut event: Event) -> DropResult<Event> {
319 if event.0.is_null() {
320 return Ok(());
321 }
322
323 unsafe {
324 let inner = mem::replace(&mut event.0, ptr::null_mut());
325 match cuEventDestroy_v2(inner).to_result() {
326 Ok(()) => {
327 mem::forget(event);
328 Ok(())
329 }
330 Err(e) => Err((e, Event(inner))),
331 }
332 }
333 }
334}
335
336impl Drop for Event {
337 fn drop(&mut self) {
338 unsafe { cuEventDestroy_v2(self.0) }
339 .to_result()
340 .expect("Failed to destroy CUDA event");
341 }
342}
343
344#[cfg(test)]
345mod test {
346 use super::*;
347 use crate::quick_init;
348 use crate::stream::StreamFlags;
349 use std::error::Error;
350
351 #[test]
352 fn test_new_with_flags() -> Result<(), Box<dyn Error>> {
353 let _context = quick_init()?;
354 let _event = Event::new(EventFlags::BLOCKING_SYNC | EventFlags::DISABLE_TIMING)?;
355 Ok(())
356 }
357
358 #[test]
359 fn test_record_with_wrong_context() -> Result<(), Box<dyn Error>> {
360 let _context = quick_init()?;
361 let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
362 let _new_context = quick_init()?;
363 let event = Event::new(EventFlags::DEFAULT)?;
364 let result = event.record(&stream);
365 assert_eq!(result, Err(CudaError::InvalidHandle));
366 Ok(())
367 }
368
369 #[test]
370 fn test_elapsed_time_f32_with_wrong_context() -> Result<(), Box<dyn Error>> {
371 let _context = quick_init()?;
372 let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
373 let fst_event = Event::new(EventFlags::DEFAULT)?;
374 fst_event.record(&fst_stream)?;
375
376 let _context = quick_init()?;
377 let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
378 let snd_event = Event::new(EventFlags::DEFAULT)?;
379 snd_event.record(&snd_stream)?;
380
381 fst_event.synchronize()?;
382 snd_event.synchronize()?;
383 let result = snd_event.elapsed_time_f32(&fst_event);
384 assert_eq!(result, Err(CudaError::InvalidHandle));
385 Ok(())
386 }
387
388 #[test]
389 fn test_elapsed_time_f32_with_different_streams() -> Result<(), Box<dyn Error>> {
390 let _context = quick_init()?;
391 let fst_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
392 let fst_event = Event::new(EventFlags::DEFAULT)?;
393 fst_event.record(&fst_stream)?;
394
395 let snd_stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
396 let snd_event = Event::new(EventFlags::DEFAULT)?;
397 snd_event.record(&snd_stream)?;
398
399 fst_event.synchronize()?;
400 snd_event.synchronize()?;
401 let _result = snd_event.elapsed_time_f32(&fst_event)?;
402 Ok(())
403 }
404
405 #[test]
406 fn test_elapsed_time_f32_with_disable_timing() -> Result<(), Box<dyn Error>> {
407 let _context = quick_init()?;
408 let stream = Stream::new(StreamFlags::NON_BLOCKING, None)?;
409
410 let start_event = Event::new(EventFlags::DISABLE_TIMING)?;
411 start_event.record(&stream)?;
412
413 let stop_event = Event::new(EventFlags::DEFAULT)?;
414 stop_event.record(&stream)?;
415
416 stop_event.synchronize()?;
417 let result = stop_event.elapsed_time_f32(&start_event);
418 assert_eq!(result, Err(CudaError::InvalidHandle));
419 Ok(())
420 }
421}