singe_cuda/event.rs
1use std::{cmp::Ordering, mem::ManuallyDrop, ptr, sync::Arc};
2
3use singe_cuda_sys::{driver, runtime};
4
5use crate::{
6 context::Context,
7 error::{Error, Result},
8 stream::{BorrowedStream, Stream, StreamBinding},
9 try_ffi,
10};
11
12bitflags::bitflags! {
13 /// Flags for CUDA event creation ([`Context::create_event_with_flags`]).
14 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
15 pub struct EventFlags: u32 {
16 const DEFAULT = driver::CUevent_flags::CU_EVENT_DEFAULT as _;
17 const BLOCKING_SYNC = driver::CUevent_flags::CU_EVENT_BLOCKING_SYNC as _;
18 const DISABLE_TIMING = driver::CUevent_flags::CU_EVENT_DISABLE_TIMING as _;
19 const INTERPROCESS = driver::CUevent_flags::CU_EVENT_INTERPROCESS as _;
20 }
21}
22
23bitflags::bitflags! {
24 /// Flags for `Event::record_raw`.
25 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
26 pub struct EventRecordFlags: u32 {
27 const DEFAULT = runtime::cudaEventRecordDefault;
28 const EXTERNAL = runtime::cudaEventRecordExternal;
29 }
30}
31
32#[derive(Debug)]
33pub struct Event {
34 handle: runtime::cudaEvent_t,
35 ctx: Arc<Context>,
36}
37
38impl PartialEq for Event {
39 fn eq(&self, other: &Self) -> bool {
40 self.handle == other.handle && Arc::ptr_eq(&self.ctx, &other.ctx)
41 }
42}
43
44impl Eq for Event {}
45
46impl PartialOrd for Event {
47 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
48 Some(self.cmp(other))
49 }
50}
51
52impl Ord for Event {
53 fn cmp(&self, other: &Self) -> Ordering {
54 let lhs = (Arc::as_ptr(&self.ctx) as usize, self.handle as usize);
55 let rhs = (Arc::as_ptr(&other.ctx) as usize, other.handle as usize);
56 lhs.cmp(&rhs)
57 }
58}
59
60impl Event {
61 /// Wraps an existing CUDA event handle.
62 ///
63 /// # Safety
64 ///
65 /// `handle` must be a valid CUDA event owned by `ctx`, and ownership of the
66 /// handle is transferred to the returned [`Event`]. The handle must not be
67 /// destroyed elsewhere after calling this function.
68 pub unsafe fn from_raw(handle: runtime::cudaEvent_t, ctx: Arc<Context>) -> Result<Self> {
69 if handle.is_null() {
70 return Err(Error::NullHandle);
71 }
72
73 Ok(Self { handle, ctx })
74 }
75
76 pub fn record(&self, stream: &Stream, flags: EventRecordFlags) -> Result<()> {
77 if stream.context() != self.context() {
78 return Err(Error::StreamContextMismatch);
79 }
80 self.record_raw(stream.as_raw(), flags)
81 }
82
83 pub fn record_borrowed(&self, stream: &BorrowedStream, flags: EventRecordFlags) -> Result<()> {
84 if stream.context() != self.context() {
85 return Err(Error::StreamContextMismatch);
86 }
87 self.record_raw(stream.as_raw(), flags)
88 }
89
90 pub fn record_on(&self, stream: &StreamBinding, flags: EventRecordFlags) -> Result<()> {
91 if stream.context() != self.context() {
92 return Err(Error::StreamContextMismatch);
93 }
94 self.record_raw(stream.as_raw(), flags)
95 }
96
97 fn record_raw(&self, stream: runtime::cudaStream_t, flags: EventRecordFlags) -> Result<()> {
98 self.ctx.bind()?;
99 unsafe {
100 try_ffi!(runtime::cudaEventRecordWithFlags(
101 self.as_raw(),
102 stream,
103 flags.bits(),
104 ))?;
105 }
106 Ok(())
107 }
108
109 /// Queries the status of all work currently captured by event.
110 /// See [`sys::cudaEventRecord`](singe_cuda_sys::runtime::cudaEventRecord) for details on what is captured by an event.
111 ///
112 /// Returns `true` if all captured work has been completed, or `false` if any captured work is incomplete.
113 ///
114 /// For the purposes of Unified Memory, a return value of `true` is equivalent to having called [`Event::synchronize`].
115 ///
116 /// # Errors
117 ///
118 /// Returns an error if CUDA cannot query the event. CUDA may also report
119 /// errors from previous asynchronous launches, internal runtime
120 /// initialization errors such as [`crate::error::Status::NotInitialized`],
121 /// [`crate::error::Status::CallRequiresNewerDriver`], or [`crate::error::Status::NoDevice`], and
122 /// callback diagnostics such as [`crate::error::Status::NotPermitted`].
123 pub fn query(&self) -> Result<bool> {
124 self.ctx.bind()?;
125 let error = unsafe { runtime::cudaEventQuery(self.as_raw()) };
126 match error {
127 runtime::cudaError_t::CUDA_SUCCESS => Ok(true),
128 runtime::cudaError_t::CUDA_ERROR_NOT_READY => Ok(false),
129 _ => Err(error.into()),
130 }
131 }
132
133 /// Waits until the completion of all work currently captured in event.
134 /// See [`sys::cudaEventRecord`](singe_cuda_sys::runtime::cudaEventRecord) for details on what is captured by an event.
135 ///
136 /// Waiting for an event created with [`EventFlags::BLOCKING_SYNC`] causes the calling CPU thread to block until the event has been completed by the device.
137 /// Without [`EventFlags::BLOCKING_SYNC`], the CPU thread will busy-wait until the event has been completed by the device.
138 ///
139 /// # Errors
140 ///
141 /// Returns an error if CUDA cannot wait for the event. CUDA may also report
142 /// errors from previous asynchronous launches, internal runtime
143 /// initialization errors such as [`crate::error::Status::NotInitialized`],
144 /// [`crate::error::Status::CallRequiresNewerDriver`], or [`crate::error::Status::NoDevice`], and
145 /// callback diagnostics such as [`crate::error::Status::NotPermitted`].
146 pub fn synchronize(&self) -> Result<()> {
147 self.ctx.bind()?;
148 unsafe {
149 try_ffi!(runtime::cudaEventSynchronize(self.as_raw()))?;
150 }
151 Ok(())
152 }
153
154 /// Computes the elapsed time between two events (in milliseconds with a resolution of around 0.5 microseconds).
155 /// Note this call is not guaranteed to return the latest errors for pending work.
156 /// Use it only for elapsed-time calculation; poll for completion on the events to be compared with [`Event::query`] instead.
157 ///
158 /// If either event was last recorded in a non-default stream, the resulting time may be greater than expected, even if both used the same stream handle.
159 /// This happens because the [`sys::cudaEventRecord`](singe_cuda_sys::runtime::cudaEventRecord) operation takes place asynchronously and there is no guarantee that the measured latency is actually just between the two events.
160 /// Any number of other different stream operations could execute in between the two measured events, thus altering the timing in a significant way.
161 ///
162 /// If [`sys::cudaEventRecord`](singe_cuda_sys::runtime::cudaEventRecord) has not been called on either event, then [`crate::error::Status::InvalidHandle`] is returned.
163 /// If [`sys::cudaEventRecord`](singe_cuda_sys::runtime::cudaEventRecord) has been called on both events but one or both of them has not yet been completed (that is, [`Event::query`] would return [`crate::error::Status::NotReady`] on at least one of the events), [`crate::error::Status::NotReady`] is returned.
164 /// If either event was created with [`EventFlags::DISABLE_TIMING`], this returns [`crate::error::Status::InvalidHandle`].
165 ///
166 /// # Errors
167 ///
168 /// Returns an error if the events belong to different contexts, either
169 /// event has not been recorded, either event is incomplete, timing was
170 /// disabled on either event, or CUDA rejects the elapsed-time query. CUDA
171 /// may also report errors from previous asynchronous launches, internal
172 /// runtime initialization errors such as [`crate::error::Status::NotInitialized`],
173 /// [`crate::error::Status::CallRequiresNewerDriver`], or [`crate::error::Status::NoDevice`], and
174 /// callback diagnostics such as [`crate::error::Status::NotPermitted`].
175 pub fn elapsed_time_since(&self, start: &Event) -> Result<f32> {
176 if self.context() != start.context() {
177 return Err(runtime::cudaError_t::CUDA_ERROR_INVALID_CONTEXT.into());
178 }
179
180 self.ctx.bind()?;
181 let mut milliseconds = 0.0f32;
182 unsafe {
183 try_ffi!(runtime::cudaEventElapsedTime(
184 &raw mut milliseconds,
185 start.as_raw(),
186 self.as_raw(),
187 ))?;
188 }
189 Ok(milliseconds)
190 }
191
192 pub fn context(&self) -> &Context {
193 &self.ctx
194 }
195
196 pub const fn as_raw(&self) -> runtime::cudaEvent_t {
197 self.handle
198 }
199
200 /// Consumes the event and returns the raw CUDA event handle without
201 /// destroying it.
202 ///
203 /// The caller becomes responsible for eventually destroying the returned
204 /// handle with CUDA.
205 pub fn into_raw(self) -> runtime::cudaEvent_t {
206 let event = ManuallyDrop::new(self);
207 event.handle
208 }
209}
210
211// CUDA events are synchronization handles. Recording/waiting uses CUDA's event
212// semantics and does not mutate Rust-owned state through shared references.
213unsafe impl Send for Event {}
214unsafe impl Sync for Event {}
215
216impl Drop for Event {
217 fn drop(&mut self) {
218 if let Err(err) = self.ctx.bind() {
219 #[cfg(debug_assertions)]
220 eprintln!("failed to bind context before destroying event: {err}");
221 }
222 unsafe {
223 if let Err(err) = try_ffi!(runtime::cudaEventDestroy(self.handle)) {
224 #[cfg(debug_assertions)]
225 eprintln!("failed to destroy CUDA event: {err}");
226 }
227 }
228 }
229}
230
231impl Context {
232 pub fn create_event(self: &Arc<Self>) -> Result<Event> {
233 self.create_event_with_flags(EventFlags::DEFAULT)
234 }
235
236 /// Creates an event object for the current device with the specified flags.
237 /// Valid flags include:
238 ///
239 /// * [`EventFlags::DEFAULT`]: default event creation flag.
240 /// * [`EventFlags::BLOCKING_SYNC`]: the event uses blocking synchronization.
241 /// A host thread that uses [`Event::synchronize`] to wait on an event created with this flag will block until the event actually completes.
242 /// * [`EventFlags::DISABLE_TIMING`]: the created event does not record timing data.
243 /// Events created with this flag specified and [`EventFlags::BLOCKING_SYNC`] not specified will provide the best performance when used with [`Stream::wait_event`] and [`Event::query`].
244 /// * [`EventFlags::INTERPROCESS`]: the created event may be used as an interprocess event by [`sys::cudaIpcGetEventHandle`](singe_cuda_sys::runtime::cudaIpcGetEventHandle).
245 /// [`EventFlags::INTERPROCESS`] must be specified along with [`EventFlags::DISABLE_TIMING`].
246 ///
247 /// # Errors
248 ///
249 /// Returns an error if the context cannot be bound, the flag combination is
250 /// invalid, CUDA cannot create the event, or CUDA returns a null event
251 /// handle. CUDA may also report errors from previous asynchronous launches,
252 /// internal runtime initialization errors such as
253 /// [`crate::error::Status::NotInitialized`], [`crate::error::Status::CallRequiresNewerDriver`],
254 /// or [`crate::error::Status::NoDevice`], and callback diagnostics such as
255 /// [`crate::error::Status::NotPermitted`].
256 pub fn create_event_with_flags(self: &Arc<Self>, flags: EventFlags) -> Result<Event> {
257 self.bind()?;
258 let mut handle = ptr::null_mut();
259 unsafe {
260 try_ffi!(runtime::cudaEventCreateWithFlags(
261 &raw mut handle,
262 flags.bits()
263 ))?;
264 }
265 if handle.is_null() {
266 return Err(Error::NullHandle);
267 }
268 unsafe { Event::from_raw(handle, Arc::clone(self)) }
269 }
270}