oxicuda_driver/event.rs
1//! CUDA event management for timing and synchronisation.
2//!
3//! Events can be recorded on a stream and used to measure elapsed time
4//! between GPU operations or to synchronise streams.
5//!
6//! # Timing example
7//!
8//! ```rust,no_run
9//! # use std::sync::Arc;
10//! # use oxicuda_driver::event::Event;
11//! # use oxicuda_driver::stream::Stream;
12//! # use oxicuda_driver::context::Context;
13//! # fn main() -> Result<(), oxicuda_driver::error::CudaError> {
14//! # let ctx: Arc<Context> = unimplemented!();
15//! let stream = Stream::new(&ctx)?;
16//! let start = Event::new()?;
17//! let end = Event::new()?;
18//!
19//! start.record(&stream)?;
20//! // ... enqueue GPU work on `stream` ...
21//! end.record(&stream)?;
22//! end.synchronize()?;
23//!
24//! let ms = Event::elapsed_time(&start, &end)?;
25//! println!("kernel took {ms:.3} ms");
26//! # Ok(())
27//! # }
28//! ```
29
30use crate::error::CudaResult;
31use crate::ffi::{CU_EVENT_DEFAULT, CUevent};
32use crate::loader::try_driver;
33use crate::stream::Stream;
34
35/// A CUDA event for timing and synchronisation.
36///
37/// Events are lightweight markers that can be recorded into a
38/// [`Stream`]. They support two primary use-cases:
39///
40/// 1. **Timing** — measure elapsed GPU time between two recorded events
41/// via [`Event::elapsed_time`].
42/// 2. **Synchronisation** — make one stream wait for work recorded in
43/// another stream via [`Stream::wait_event`].
44pub struct Event {
45 /// Raw CUDA event handle.
46 raw: CUevent,
47}
48
49// SAFETY: CUDA events are safe to send between threads when properly
50// synchronised via the driver API.
51unsafe impl Send for Event {}
52
53impl Event {
54 /// Creates a new event with [`CU_EVENT_DEFAULT`] flags.
55 ///
56 /// Default events record timing data. Use [`Event::with_flags`] to
57 /// create events with different characteristics (e.g. disable timing
58 /// for lower overhead).
59 ///
60 /// # Errors
61 ///
62 /// Returns a [`CudaError`](crate::error::CudaError) if the driver
63 /// call fails.
64 pub fn new() -> CudaResult<Self> {
65 Self::with_flags(CU_EVENT_DEFAULT)
66 }
67
68 /// Creates a new event with the specified flags.
69 ///
70 /// Common flag values (from [`crate::ffi`]):
71 ///
72 /// | Constant | Value | Description |
73 /// |---------------------------|-------|--------------------------------|
74 /// | `CU_EVENT_DEFAULT` | 0 | Default (records timing) |
75 /// | `CU_EVENT_BLOCKING_SYNC` | 1 | Use blocking synchronisation |
76 /// | `CU_EVENT_DISABLE_TIMING` | 2 | Disable timing (lower overhead)|
77 /// | `CU_EVENT_INTERPROCESS` | 4 | Usable across processes |
78 ///
79 /// Flags can be combined with bitwise OR.
80 ///
81 /// # Errors
82 ///
83 /// Returns a [`CudaError`](crate::error::CudaError) if the flags
84 /// are invalid or the driver call otherwise fails.
85 pub fn with_flags(flags: u32) -> CudaResult<Self> {
86 let api = try_driver()?;
87 let mut raw = CUevent::default();
88 crate::cuda_call!((api.cu_event_create)(&mut raw, flags))?;
89 Ok(Self { raw })
90 }
91
92 /// Records this event on the given stream.
93 ///
94 /// The event captures the point in the stream's command queue at
95 /// which it was recorded. Subsequent calls to [`Event::synchronize`]
96 /// or [`Event::elapsed_time`] reference this recorded point.
97 ///
98 /// # Errors
99 ///
100 /// Returns a [`CudaError`](crate::error::CudaError) if the stream
101 /// or event handle is invalid.
102 pub fn record(&self, stream: &Stream) -> CudaResult<()> {
103 let api = try_driver()?;
104 crate::cuda_call!((api.cu_event_record)(self.raw, stream.raw()))
105 }
106
107 /// Queries whether this event has completed.
108 ///
109 /// Returns `Ok(true)` if the event (and all preceding work in its
110 /// stream) has completed, `Ok(false)` if it is still pending.
111 ///
112 /// # Errors
113 ///
114 /// Returns a [`CudaError`](crate::error::CudaError) if the event
115 /// was not recorded or an unexpected driver error occurs (errors
116 /// other than `NotReady`).
117 pub fn query(&self) -> CudaResult<bool> {
118 let api = try_driver()?;
119 let rc = unsafe { (api.cu_event_query)(self.raw) };
120 if rc == 0 {
121 Ok(true)
122 } else if rc == crate::ffi::CUDA_ERROR_NOT_READY {
123 Ok(false)
124 } else {
125 Err(crate::error::CudaError::from_raw(rc))
126 }
127 }
128
129 /// Blocks the calling thread until this event has been recorded
130 /// and all preceding work in its stream has completed.
131 ///
132 /// # Errors
133 ///
134 /// Returns a [`CudaError`](crate::error::CudaError) if the event
135 /// was not recorded or the driver reports an error.
136 pub fn synchronize(&self) -> CudaResult<()> {
137 let api = try_driver()?;
138 crate::cuda_call!((api.cu_event_synchronize)(self.raw))
139 }
140
141 /// Computes the elapsed time in milliseconds between two recorded
142 /// events.
143 ///
144 /// Both `start` and `end` must have been previously recorded on a
145 /// stream, and `end` must have completed (e.g. via
146 /// [`Event::synchronize`]).
147 ///
148 /// # Errors
149 ///
150 /// Returns a [`CudaError`](crate::error::CudaError) if either event
151 /// has not been recorded, or if timing data is not available (e.g.
152 /// the events were created with `CU_EVENT_DISABLE_TIMING`).
153 pub fn elapsed_time(start: &Event, end: &Event) -> CudaResult<f32> {
154 let api = try_driver()?;
155 let mut ms: f32 = 0.0;
156 crate::cuda_call!((api.cu_event_elapsed_time)(&mut ms, start.raw, end.raw))?;
157 Ok(ms)
158 }
159
160 /// Returns the raw [`CUevent`] handle.
161 ///
162 /// # Safety (caller)
163 ///
164 /// The caller must not destroy or otherwise invalidate the handle
165 /// while this `Event` is still alive.
166 #[inline]
167 pub fn raw(&self) -> CUevent {
168 self.raw
169 }
170}
171
172impl Drop for Event {
173 fn drop(&mut self) {
174 if let Ok(api) = try_driver() {
175 let rc = unsafe { (api.cu_event_destroy_v2)(self.raw) };
176 if rc != 0 {
177 tracing::warn!(
178 cuda_error = rc,
179 event = ?self.raw,
180 "cuEventDestroy_v2 failed during drop"
181 );
182 }
183 }
184 }
185}