tracy_client/gpu.rs
1use std::{
2 convert::TryInto,
3 sync::{Arc, Mutex},
4};
5
6use crate::{Client, SpanLocation};
7
8#[repr(u8)]
9/// The API label associated with the given gpu context. The list here only includes
10/// APIs that are currently supported by Tracy's own gpu implementations.
11//
12// Copied from `tracy-client-sys/tracy/common/TracyQueue.hpp:391`. Comment on enum states
13// that the values are stable, due to potential serialization issues, so copying this enum
14// shouldn't be a problem.
15pub enum GpuContextType {
16 /// Stand in for other types of contexts.
17 Invalid = 0,
18 /// An OpenGL context
19 OpenGL = 1,
20 /// A Vulkan context
21 Vulkan = 2,
22 /// An OpenCL context
23 OpenCL = 3,
24 /// A D3D12 context.
25 Direct3D12 = 4,
26 /// A D3D11 context.
27 Direct3D11 = 5,
28}
29
30/// Context for creating gpu spans.
31///
32/// Generally corresponds to a single hardware queue.
33///
34/// The flow of creating and using gpu context generally looks like this:
35///
36/// ```rust,no_run
37/// # let client = tracy_client::Client::start();
38/// // The period of the gpu clock in nanoseconds, as provided by your GPU api.
39/// // This value corresponds to 1GHz.
40/// let period: f32 = 1_000_000_000.0;
41///
42/// // GPU API: Record writing a timestamp and resolve that to a mappable buffer.
43/// // GPU API: Submit the command buffer writing the timestamp.
44/// // GPU API: Immediately block until the submission is finished.
45/// // GPU API: Map buffer, get timestamp value.
46/// let starting_timestamp: i64 = /* whatever you get from this timestamp */ 0;
47///
48/// // Create the gpu context
49/// let gpu_context = client.new_gpu_context(
50/// Some("MyContext"),
51/// tracy_client::GpuContextType::Vulkan,
52/// starting_timestamp,
53/// period
54/// ).unwrap();
55///
56/// // Now you have some work that you want to time on the gpu.
57///
58/// // GPU API: Record writing a timestamp before the work.
59/// let mut span = gpu_context.span_alloc("MyGpuSpan1", "My::Work", "myfile.rs", 12).unwrap();
60///
61/// // GPU API: Record work.
62///
63/// // GPU API: Record writing a timestamp after the work.
64/// span.end_zone();
65///
66/// // Some time later, once the written timestamp values are available on the cpu.
67/// # let (starting_timestamp, ending_timestamp) = (0, 0);
68///
69/// span.upload_timestamp_start(starting_timestamp);
70/// span.upload_timestamp_end(ending_timestamp);
71/// ```
72#[derive(Clone)]
73pub struct GpuContext {
74 #[cfg(feature = "enable")]
75 _client: Client,
76 #[cfg(feature = "enable")]
77 value: u8,
78 #[cfg(feature = "enable")]
79 span_freelist: Arc<Mutex<Vec<u16>>>,
80 _private: (),
81}
82#[cfg(feature = "enable")]
83static GPU_CONTEXT_INDEX: Mutex<u8> = Mutex::new(0);
84
85/// Errors that can occur when creating a gpu context.
86#[derive(Debug)]
87pub enum GpuContextCreationError {
88 /// More than `u8::MAX` contexts have been created at any point in the program.
89 TooManyContextsCreated,
90}
91
92impl std::fmt::Display for GpuContextCreationError {
93 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94 write!(
95 f,
96 "More than 255 contexts have been created at any point in the execution of this program."
97 )
98 }
99}
100
101impl std::error::Error for GpuContextCreationError {}
102
103#[derive(Debug, PartialEq)]
104enum GpuSpanState {
105 /// The span has been started. All gpu spans start in this state.
106 Started,
107 /// The span has been ended, either waiting for timestamp upload or with
108 /// timestamp upload completed.
109 Ended,
110}
111
112/// Span for timing gpu work.
113///
114/// See the [context level documentation](GpuContext) for more information on use.
115///
116/// If the span is dropped early, the following happens:
117/// - If the span has not been ended, the span is ended. AND
118/// - If the span has not had values uploaded, the span is uploaded with
119/// the timestamps marking the start of the current gpu context. This
120/// will put the span out of the way of other spans.
121#[must_use]
122pub struct GpuSpan {
123 #[cfg(feature = "enable")]
124 context: GpuContext,
125 #[cfg(feature = "enable")]
126 start_query_id: u16,
127 #[cfg(feature = "enable")]
128 end_query_id: u16,
129 #[cfg(feature = "enable")]
130 state: GpuSpanState,
131 _private: (),
132}
133
134/// Errors that can occur when creating a gpu span.
135#[derive(Debug)]
136pub enum GpuSpanCreationError {
137 /// More than `32767` spans are still waiting for gpu data.
138 TooManyPendingSpans,
139}
140
141impl std::fmt::Display for GpuSpanCreationError {
142 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143 write!(
144 f,
145 "Too many spans still waiting for gpu data. There may not be more than 32767 spans that are pending gpu data at once."
146 )
147 }
148}
149
150impl std::error::Error for GpuSpanCreationError {}
151
152impl Client {
153 /// Creates a new GPU context.
154 ///
155 /// - `name` is the name of the context.
156 /// - `ty` is the type (backend) of the context.
157 /// - `gpu_timestamp` is the gpu side timestamp the corresponds (as close as possible) to this call.
158 /// - `period` is the period of the gpu clock in nanoseconds (setting 1.0 means the clock is 1GHz, 1000.0 means 1MHz, etc).
159 ///
160 /// See the [type level documentation](GpuContext) for more information.
161 ///
162 /// # Errors
163 ///
164 /// - If more than 255 contexts were made during the lifetime of the application.
165 pub fn new_gpu_context(
166 self,
167 name: Option<&str>,
168 ty: GpuContextType,
169 gpu_timestamp: i64,
170 period: f32,
171 ) -> Result<GpuContext, GpuContextCreationError> {
172 #[cfg(feature = "enable")]
173 {
174 // We use a mutex to lock the context index to prevent races when using fetch_add.
175 //
176 // This prevents multiple contexts getting the same context id.
177 let mut context_index_guard = GPU_CONTEXT_INDEX.lock().unwrap();
178 if *context_index_guard == 255 {
179 return Err(GpuContextCreationError::TooManyContextsCreated);
180 }
181 let context = *context_index_guard;
182 *context_index_guard += 1;
183 drop(context_index_guard);
184
185 // SAFETY:
186 // - We know we aren't re-using the context id because of the above logic.
187 unsafe {
188 sys::___tracy_emit_gpu_new_context_serial(sys::___tracy_gpu_new_context_data {
189 gpuTime: gpu_timestamp,
190 period,
191 context,
192 flags: 0,
193 type_: ty as u8,
194 });
195 };
196
197 if let Some(name) = name {
198 // SAFETY:
199 // - We've allocated a context.
200 // - The names will copied into the command stream, so the pointers do not need to last.
201 unsafe {
202 sys::___tracy_emit_gpu_context_name_serial(
203 sys::___tracy_gpu_context_name_data {
204 context,
205 name: name.as_ptr().cast(),
206 len: name.len().try_into().unwrap_or(u16::MAX),
207 },
208 );
209 }
210 }
211
212 Ok(GpuContext {
213 _client: self,
214 value: context,
215 span_freelist: Arc::new(Mutex::new((0..=u16::MAX).collect())),
216 _private: (),
217 })
218 }
219 #[cfg(not(feature = "enable"))]
220 Ok(GpuContext { _private: () })
221 }
222}
223
224impl GpuContext {
225 #[cfg(feature = "enable")]
226 fn alloc_span_ids(&self) -> Result<(u16, u16), GpuSpanCreationError> {
227 let mut freelist = self.span_freelist.lock().unwrap();
228 if freelist.len() < 2 {
229 return Err(GpuSpanCreationError::TooManyPendingSpans);
230 }
231 // These unwraps are unreachable.
232 let start = freelist.pop().unwrap();
233 let end = freelist.pop().unwrap();
234 Ok((start, end))
235 }
236
237 /// Creates a new gpu span with the given source location.
238 ///
239 /// This should be called right next to where you record the corresponding gpu timestamp. This
240 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
241 ///
242 /// # Errors
243 ///
244 /// - If there are more than 32767 spans waiting for gpu data at once.
245 pub fn span(
246 &self,
247 span_location: &'static SpanLocation,
248 ) -> Result<GpuSpan, GpuSpanCreationError> {
249 #[cfg(feature = "enable")]
250 {
251 let (start_query_id, end_query_id) = self.alloc_span_ids()?;
252
253 // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
254 // always be smaller than u64, so no data will be lost.
255 unsafe {
256 sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
257 srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
258 queryId: start_query_id,
259 context: self.value,
260 });
261 };
262
263 Ok(GpuSpan {
264 context: self.clone(),
265 start_query_id,
266 end_query_id,
267 state: GpuSpanState::Started,
268 _private: (),
269 })
270 }
271 #[cfg(not(feature = "enable"))]
272 Ok(GpuSpan { _private: () })
273 }
274
275 /// Creates a new gpu span with the given name, function, file, and line.
276 ///
277 /// This should be called right next to where you record the corresponding gpu timestamp. This
278 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
279 ///
280 /// # Errors
281 ///
282 /// - If there are more than 32767 spans waiting for gpu data at once.
283 pub fn span_alloc(
284 &self,
285 name: &str,
286 function: &str,
287 file: &str,
288 line: u32,
289 ) -> Result<GpuSpan, GpuSpanCreationError> {
290 #[cfg(feature = "enable")]
291 {
292 let srcloc = unsafe {
293 sys::___tracy_alloc_srcloc_name(
294 line,
295 file.as_ptr().cast(),
296 file.len(),
297 function.as_ptr().cast(),
298 function.len(),
299 name.as_ptr().cast(),
300 name.len(),
301 0,
302 )
303 };
304
305 let (start_query_id, end_query_id) = self.alloc_span_ids()?;
306
307 unsafe {
308 sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
309 srcloc,
310 queryId: start_query_id,
311 context: self.value,
312 });
313 };
314
315 Ok(GpuSpan {
316 context: self.clone(),
317 start_query_id,
318 end_query_id,
319 state: GpuSpanState::Started,
320 _private: (),
321 })
322 }
323 #[cfg(not(feature = "enable"))]
324 Ok(GpuSpan { _private: () })
325 }
326
327 /// Begins a new manually tracked GPU span.
328 ///
329 /// You can use this instead of [`GpuContext::span()`] if you'd like to track the GPU span
330 /// manually. `query_id` is the id of the GPU timestamp query that you had created; when the
331 /// GPU timestamp is ready, call [`GpuContext::upload_gpu_timestamp()`] to upload it to Tracy.
332 ///
333 /// This should be called right next to where you record the corresponding GPU timestamp. This
334 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
335 pub fn begin_span(&self, span_location: &'static SpanLocation, query_id: u16) {
336 #[cfg(feature = "enable")]
337 // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
338 // always be smaller than u64, so no data will be lost.
339 unsafe {
340 sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
341 srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
342 queryId: query_id,
343 context: self.value,
344 });
345 };
346 }
347
348 /// Begins a new manually tracked GPU span with the given name, function, file, and line.
349 ///
350 /// You can use this instead of [`GpuContext::span()`] if you'd like to track the GPU span
351 /// manually.
352 ///
353 /// `query_id` is the id of the GPU timestamp query that you had created; when the GPU
354 /// timestamp is ready, call [`GpuContext::upload_gpu_timestamp()`] to upload it to Tracy.
355 ///
356 /// This should be called right next to where you record the corresponding GPU timestamp. This
357 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
358 pub fn begin_span_alloc(
359 &self,
360 name: &str,
361 function: &str,
362 file: &str,
363 line: u32,
364 query_id: u16,
365 ) {
366 #[cfg(feature = "enable")]
367 {
368 let srcloc = unsafe {
369 sys::___tracy_alloc_srcloc_name(
370 line,
371 file.as_ptr().cast(),
372 file.len(),
373 function.as_ptr().cast(),
374 function.len(),
375 name.as_ptr().cast(),
376 name.len(),
377 0,
378 )
379 };
380
381 unsafe {
382 sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
383 srcloc,
384 queryId: query_id,
385 context: self.value,
386 });
387 };
388 }
389 }
390
391 /// Ends a manually tracked GPU span.
392 ///
393 /// Call this to end a span started with [`GpuContext::begin_span()`] or
394 /// [`GpuContext::begin_span_alloc()`].
395 ///
396 /// `query_id` is the id of the GPU timestamp query that you had created; when the
397 /// GPU timestamp is ready, call [`GpuContext::upload_gpu_timestamp()`] to upload it to Tracy.
398 ///
399 /// This should be called right next to where you record the corresponding GPU timestamp. This
400 /// allows tracy to correctly associate the cpu time with the gpu timestamp.
401 pub fn end_span(&self, query_id: u16) {
402 #[cfg(feature = "enable")]
403 unsafe {
404 sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
405 queryId: query_id,
406 context: self.value,
407 });
408 };
409 }
410
411 /// Uploads a GPU timestamp for a manually tracked span.
412 ///
413 /// Call this to upload the ready GPU timestamp for a query corresponding to `query_id`.
414 pub fn upload_gpu_timestamp(&self, query_id: u16, gpu_timestamp: i64) {
415 #[cfg(feature = "enable")]
416 unsafe {
417 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
418 gpuTime: gpu_timestamp,
419 queryId: query_id,
420 context: self.value,
421 });
422 };
423 }
424
425 /// Communicates the current GPU timestamp to Tracy.
426 ///
427 /// Some GPUs (like AMD) will aggressively reset their timing when going into lower power
428 /// states. If your application does not continuously utilize the GPU, this will cause Tracy's
429 /// synchronization of CPU and GPU timestamps to immediately go out of sync, resulting in
430 /// broken GPU span display.
431 ///
432 /// You can use this method to resynchronize CPU and GPU timestamps. Fetch the current GPU
433 /// timestamp, then immediately call this method. It will synchronize the given `gpu_timestamp`
434 /// to the CPU timestamp at the time of this call.
435 pub fn sync_gpu_time(&self, gpu_timestamp: i64) {
436 #[cfg(feature = "enable")]
437 unsafe {
438 sys::___tracy_emit_gpu_time_sync_serial(sys::___tracy_gpu_time_sync_data {
439 gpuTime: gpu_timestamp,
440 context: self.value,
441 });
442 };
443 }
444}
445
446impl GpuSpan {
447 /// Marks the end of the given gpu span. This should be called right next to where you record
448 /// the corresponding gpu timestamp for the end of the span. This allows tracy to correctly
449 /// associate the cpu time with the gpu timestamp.
450 ///
451 /// Only the first time you call this function will it actually emit a gpu zone end event. Any
452 /// subsequent calls will be ignored.
453 pub fn end_zone(&mut self) {
454 #[cfg(feature = "enable")]
455 {
456 if self.state != GpuSpanState::Started {
457 return;
458 }
459 unsafe {
460 sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
461 queryId: self.end_query_id,
462 context: self.context.value,
463 });
464 };
465 self.state = GpuSpanState::Ended;
466 }
467 }
468
469 /// Supplies the GPU timestamp for the start of this span.
470 ///
471 /// In order to avoid confusing Tracy, you must call
472 /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
473 /// monotonically increasing timestamp order. For example, if you have two
474 /// nested spans *outer* and *inner*, you must supply the timestamps in
475 /// this order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
476 /// *outer* end.
477 pub fn upload_timestamp_start(&self, start_timestamp: i64) {
478 #[cfg(feature = "enable")]
479 unsafe {
480 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
481 gpuTime: start_timestamp,
482 queryId: self.start_query_id,
483 context: self.context.value,
484 });
485 };
486 }
487
488 /// Supplies the GPU timestamp for the end of this span.
489 ///
490 /// In order to avoid confusing Tracy, you must call
491 /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
492 /// monotonically increasing timestamp order. For example, if you have two
493 /// nested spans *outer* and *inner*, you must supply the timestamps in this
494 /// order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
495 /// *outer* end.
496 pub fn upload_timestamp_end(&self, end_timestamp: i64) {
497 #[cfg(feature = "enable")]
498 unsafe {
499 sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
500 gpuTime: end_timestamp,
501 queryId: self.end_query_id,
502 context: self.context.value,
503 });
504 };
505 }
506}
507
508impl Drop for GpuSpan {
509 fn drop(&mut self) {
510 #[cfg(feature = "enable")]
511 {
512 match self.state {
513 GpuSpanState::Started => {
514 self.end_zone();
515 }
516 GpuSpanState::Ended => {}
517 }
518
519 // Put the ids back into the freelist.
520 let mut freelist = self.context.span_freelist.lock().unwrap();
521 freelist.push(self.start_query_id);
522 freelist.push(self.end_query_id);
523 drop(freelist);
524 }
525 }
526}