tracy_client/gpu.rs
1use std::{
2 convert::TryInto,
3 sync::{Arc, Mutex},
4};
5
6use crate::{Client, SpanLocation};
7
8#[repr(u8)]
9/// The API label associated with the given gpu context. The list here only includes
10/// APIs that are currently supported by Tracy's own gpu implementations.
11//
12// Copied from `tracy-client-sys/tracy/common/TracyQueue.hpp:391`. Comment on enum states
13// that the values are stable, due to potential serialization issues, so copying this enum
14// shouldn't be a problem.
15pub enum GpuContextType {
16 /// Stand in for other types of contexts.
17 Invalid = 0,
18 /// An OpenGL context
19 OpenGL = 1,
20 /// A Vulkan context
21 Vulkan = 2,
22 /// An OpenCL context
23 OpenCL = 3,
24 /// A D3D12 context.
25 Direct3D12 = 4,
26 /// A D3D11 context.
27 Direct3D11 = 5,
28}
29
30/// Context for creating gpu spans.
31///
32/// Generally corresponds to a single hardware queue.
33///
34/// The flow of creating and using gpu context generally looks like this:
35///
36/// ```rust,no_run
37/// # let client = tracy_client::Client::start();
38/// // The period of the gpu clock in nanoseconds, as provided by your GPU api.
39/// // This value corresponds to 1GHz.
40/// let period: f32 = 1_000_000_000.0;
41///
42/// // GPU API: Record writing a timestamp and resolve that to a mappable buffer.
43/// // GPU API: Submit the command buffer writing the timestamp.
44/// // GPU API: Immediately block until the submission is finished.
45/// // GPU API: Map buffer, get timestamp value.
46/// let starting_timestamp: i64 = /* whatever you get from this timestamp */ 0;
47///
48/// // Create the gpu context
49/// let gpu_context = client.new_gpu_context(
50/// Some("MyContext"),
51/// tracy_client::GpuContextType::Vulkan,
52/// starting_timestamp,
53/// period
54/// ).unwrap();
55///
56/// // Now you have some work that you want to time on the gpu.
57///
58/// // GPU API: Record writing a timestamp before the work.
59/// let mut span = gpu_context.span_alloc("MyGpuSpan1", "My::Work", "myfile.rs", 12).unwrap();
60///
61/// // GPU API: Record work.
62///
63/// // GPU API: Record writing a timestamp after the work.
64/// span.end_zone();
65///
66/// // Some time later, once the written timestamp values are available on the cpu.
67/// # let (starting_timestamp, ending_timestamp) = (0, 0);
68///
69/// span.upload_timestamp_start(starting_timestamp);
70/// span.upload_timestamp_end(ending_timestamp);
71/// ```
72#[derive(Clone)]
73pub struct GpuContext {
74 #[cfg(feature = "enable")]
75 _client: Client,
76 #[cfg(feature = "enable")]
77 value: u8,
78 #[cfg(feature = "enable")]
79 span_freelist: Arc<Mutex<Vec<u16>>>,
80 _private: (),
81}
82#[cfg(feature = "enable")]
83static GPU_CONTEXT_INDEX: Mutex<u8> = Mutex::new(0);
84
85/// Errors that can occur when creating a gpu context.
86#[derive(Debug)]
87pub enum GpuContextCreationError {
88 /// More than `u8::MAX` contexts have been created at any point in the program.
89 TooManyContextsCreated,
90}
91
92impl std::fmt::Display for GpuContextCreationError {
93 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94 write!(
95 f,
96 "More than 255 contexts have been created at any point in the execution of this program."
97 )
98 }
99}
100
101impl std::error::Error for GpuContextCreationError {}
102
103#[derive(Debug, PartialEq)]
104enum GpuSpanState {
105 /// The span has been started. All gpu spans start in this state.
106 Started,
107 /// The span has been ended, either waiting for timestamp upload or with
108 /// timestamp upload completed.
109 Ended,
110}
111
112/// Span for timing gpu work.
113///
114/// See the [context level documentation](GpuContext) for more information on use.
115///
116/// If the span is dropped early, the following happens:
117/// - If the span has not been ended, the span is ended. AND
118/// - If the span has not had values uploaded, the span is uploaded with
119/// the timestamps marking the start of the current gpu context. This
120/// will put the span out of the way of other spans.
121#[must_use]
122pub struct GpuSpan {
123 #[cfg(feature = "enable")]
124 context: GpuContext,
125 #[cfg(feature = "enable")]
126 start_query_id: u16,
127 #[cfg(feature = "enable")]
128 end_query_id: u16,
129 #[cfg(feature = "enable")]
130 state: GpuSpanState,
131 _private: (),
132}
133
134/// Errors that can occur when creating a gpu span.
135#[derive(Debug)]
136pub enum GpuSpanCreationError {
137 /// More than `32767` spans are still waiting for gpu data.
138 TooManyPendingSpans,
139}
140
141impl std::fmt::Display for GpuSpanCreationError {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 write!(
144 f,
145 "Too many spans still waiting for gpu data. There may not be more than 32767 spans that are pending gpu data at once."
146 )
147 }
148}
149
150impl std::error::Error for GpuSpanCreationError {}
151
152impl Client {
153 /// Creates a new GPU context.
154 ///
155 /// - `name` is the name of the context.
156 /// - `ty` is the type (backend) of the context.
157 /// - `gpu_timestamp` is the gpu side timestamp the corresponds (as close as possible) to this call.
158 /// - `period` is the period of the gpu clock in nanoseconds (setting 1.0 means the clock is 1GHz, 1000.0 means 1MHz, etc).
159 ///
160 /// See the [type level documentation](GpuContext) for more information.
161 ///
162 /// # Errors
163 ///
164 /// - If more than 255 contexts were made during the lifetime of the application.
165 pub fn new_gpu_context(
166 self,
167 name: Option<&str>,
168 ty: GpuContextType,
169 gpu_timestamp: i64,
170 period: f32,
171 ) -> Result<GpuContext, GpuContextCreationError> {
172 #[cfg(feature = "enable")]
173 {
174 // We use a mutex to lock the context index to prevent races when using fetch_add.
175 //
176 // This prevents multiple contexts getting the same context id.
177 let mut context_index_guard = GPU_CONTEXT_INDEX.lock().unwrap();
178 if *context_index_guard == 255 {
179 return Err(GpuContextCreationError::TooManyContextsCreated);
180 }
181 let context = *context_index_guard;
182 *context_index_guard += 1;
183 drop(context_index_guard);
184
185 // SAFETY:
186 // - We know we aren't re-using the context id because of the above logic.
187 unsafe {
188 sys::___tracy_emit_gpu_new_context_serial(sys::___tracy_gpu_new_context_data {
189 gpuTime: gpu_timestamp,
190 period,
191 context,
192 flags: 0,
193 type_: ty as u8,
194 });
195 };
196
197 if let Some(name) = name {
198 // SAFETY:
199 // - We've allocated a context.
200 // - The names will copied into the command stream, so the pointers do not need to last.
201 unsafe {
202 sys::___tracy_emit_gpu_context_name_serial(
203 sys::___tracy_gpu_context_name_data {
204 context,
205 name: name.as_ptr().cast(),
206 len: name.len().try_into().unwrap_or(u16::MAX),
207 },
208 );
209 }
210 }
211
212 Ok(GpuContext {
213 _client: self,
214 value: context,
215 span_freelist: Arc::new(Mutex::new((0..=u16::MAX).collect())),
216 _private: (),
217 })
218 }
219 #[cfg(not(feature = "enable"))]
220 Ok(GpuContext { _private: () })
221 }
222}
223
224impl GpuContext {
225 #[cfg(feature = "enable")]
226 fn alloc_span_ids(&self) -> Result<(u16, u16), GpuSpanCreationError> {
227 let mut freelist = self.span_freelist.lock().unwrap();
228 if freelist.len() < 2 {
229 return Err(GpuSpanCreationError::TooManyPendingSpans);
230 }
231 // These unwraps are unreachable.
232 let start = freelist.pop().unwrap();
233 let end = freelist.pop().unwrap();
234 Ok((start, end))
235 }
236
237 /// Creates a new gpu span with the given source location.
238 ///
239 /// This should be called right next to where you record the corresponding gpu timestamp. This
240 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
241 ///
242 /// # Errors
243 ///
244 /// - If there are more than 32767 spans waiting for gpu data at once.
245 pub fn span(
246 &self,
247 span_location: &'static SpanLocation,
248 ) -> Result<GpuSpan, GpuSpanCreationError> {
249 #[cfg(feature = "enable")]
250 {
251 let (start_query_id, end_query_id) = self.alloc_span_ids()?;
252
253 // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
254 // always be smaller than u64, so no data will be lost.
255 unsafe {
256 sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
257 srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
258 queryId: start_query_id,
259 context: self.value,
260 });
261 };
262
263 Ok(GpuSpan {
264 context: self.clone(),
265 start_query_id,
266 end_query_id,
267 state: GpuSpanState::Started,
268 _private: (),
269 })
270 }
271 #[cfg(not(feature = "enable"))]
272 Ok(GpuSpan { _private: () })
273 }
274
275 /// Creates a new gpu span with the given name, function, file, and line.
276 ///
277 /// This should be called right next to where you record the corresponding gpu timestamp. This
278 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
279 ///
280 /// # Errors
281 ///
282 /// - If there are more than 32767 spans waiting for gpu data at once.
283 pub fn span_alloc(
284 &self,
285 name: &str,
286 function: &str,
287 file: &str,
288 line: u32,
289 ) -> Result<GpuSpan, GpuSpanCreationError> {
290 #[cfg(feature = "enable")]
291 {
292 let srcloc = unsafe {
293 sys::___tracy_alloc_srcloc_name(
294 line,
295 file.as_ptr().cast(),
296 file.len(),
297 function.as_ptr().cast(),
298 function.len(),
299 name.as_ptr().cast(),
300 name.len(),
301 0,
302 )
303 };
304
305 let (start_query_id, end_query_id) = self.alloc_span_ids()?;
306
307 unsafe {
308 sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
309 srcloc,
310 queryId: start_query_id,
311 context: self.value,
312 });
313 };
314
315 Ok(GpuSpan {
316 context: self.clone(),
317 start_query_id,
318 end_query_id,
319 state: GpuSpanState::Started,
320 _private: (),
321 })
322 }
323 #[cfg(not(feature = "enable"))]
324 Ok(GpuSpan { _private: () })
325 }
326}
327
328impl GpuSpan {
329 /// Marks the end of the given gpu span. This should be called right next to where you record
330 /// the corresponding gpu timestamp for the end of the span. This allows tracy to correctly
331 /// associate the cpu time with the gpu timestamp.
332 ///
333 /// Only the first time you call this function will it actually emit a gpu zone end event. Any
334 /// subsequent calls will be ignored.
335 pub fn end_zone(&mut self) {
336 #[cfg(feature = "enable")]
337 {
338 if self.state != GpuSpanState::Started {
339 return;
340 }
341 unsafe {
342 sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
343 queryId: self.end_query_id,
344 context: self.context.value,
345 });
346 };
347 self.state = GpuSpanState::Ended;
348 }
349 }
350
351 /// Supplies the GPU timestamp for the start of this span.
352 ///
353 /// In order to avoid confusing Tracy, you must call
354 /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
355 /// monotonically increasing timestamp order. For example, if you have two
356 /// nested spans *outer* and *inner*, you must supply the timestamps in
357 /// this order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
358 /// *outer* end.
359 pub fn upload_timestamp_start(&self, start_timestamp: i64) {
360 #[cfg(feature = "enable")]
361 unsafe {
362 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
363 gpuTime: start_timestamp,
364 queryId: self.start_query_id,
365 context: self.context.value,
366 });
367 };
368 }
369
370 /// Supplies the GPU timestamp for the end of this span.
371 ///
372 /// In order to avoid confusing Tracy, you must call
373 /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
374 /// monotonically increasing timestamp order. For example, if you have two
375 /// nested spans *outer* and *inner*, you must supply the timestamps in this
376 /// order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
377 /// *outer* end.
378 pub fn upload_timestamp_end(&self, end_timestamp: i64) {
379 #[cfg(feature = "enable")]
380 unsafe {
381 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
382 gpuTime: end_timestamp,
383 queryId: self.end_query_id,
384 context: self.context.value,
385 });
386 };
387 }
388}
389
390impl Drop for GpuSpan {
391 fn drop(&mut self) {
392 #[cfg(feature = "enable")]
393 {
394 match self.state {
395 GpuSpanState::Started => {
396 self.end_zone();
397 }
398 GpuSpanState::Ended => {}
399 }
400
401 // Put the ids back into the freelist.
402 let mut freelist = self.context.span_freelist.lock().unwrap();
403 freelist.push(self.start_query_id);
404 freelist.push(self.end_query_id);
405 drop(freelist);
406 }
407 }
408}