tracy_client/
gpu.rs

1use std::{
2    convert::TryInto,
3    sync::{Arc, Mutex},
4};
5
6use crate::{Client, SpanLocation};
7
8#[repr(u8)]
9/// The API label associated with the given gpu context. The list here only includes
10/// APIs that are currently supported by Tracy's own gpu implementations.
11//
12// Copied from `tracy-client-sys/tracy/common/TracyQueue.hpp:391`. Comment on enum states
13// that the values are stable, due to potential serialization issues, so copying this enum
14// shouldn't be a problem.
15pub enum GpuContextType {
16    /// Stand in for other types of contexts.
17    Invalid = 0,
18    /// An OpenGL context
19    OpenGL = 1,
20    /// A Vulkan context
21    Vulkan = 2,
22    /// An OpenCL context
23    OpenCL = 3,
24    /// A D3D12 context.
25    Direct3D12 = 4,
26    /// A D3D11 context.
27    Direct3D11 = 5,
28}
29
30/// Context for creating gpu spans.
31///
32/// Generally corresponds to a single hardware queue.
33///
34/// The flow of creating and using gpu context generally looks like this:
35///
36/// ```rust,no_run
37/// # let client = tracy_client::Client::start();
38/// // The period of the gpu clock in nanoseconds, as provided by your GPU api.
39/// // This value corresponds to 1GHz.
40/// let period: f32 = 1_000_000_000.0;
41///
42/// // GPU API: Record writing a timestamp and resolve that to a mappable buffer.
43/// // GPU API: Submit the command buffer writing the timestamp.
44/// // GPU API: Immediately block until the submission is finished.
45/// // GPU API: Map buffer, get timestamp value.
46/// let starting_timestamp: i64 = /* whatever you get from this timestamp */ 0;
47///
48/// // Create the gpu context
49/// let gpu_context = client.new_gpu_context(
50///     Some("MyContext"),
51///     tracy_client::GpuContextType::Vulkan,
52///     starting_timestamp,
53///     period
54/// ).unwrap();
55///
56/// // Now you have some work that you want to time on the gpu.
57///
58/// // GPU API: Record writing a timestamp before the work.
59/// let mut span = gpu_context.span_alloc("MyGpuSpan1", "My::Work", "myfile.rs", 12).unwrap();
60///
61/// // GPU API: Record work.
62///
63/// // GPU API: Record writing a timestamp after the work.
64/// span.end_zone();
65///
66/// // Some time later, once the written timestamp values are available on the cpu.
67/// # let (starting_timestamp, ending_timestamp) = (0, 0);
68///
69/// span.upload_timestamp_start(starting_timestamp);
70/// span.upload_timestamp_end(ending_timestamp);
71/// ```
72#[derive(Clone)]
73pub struct GpuContext {
74    #[cfg(feature = "enable")]
75    _client: Client,
76    #[cfg(feature = "enable")]
77    value: u8,
78    #[cfg(feature = "enable")]
79    span_freelist: Arc<Mutex<Vec<u16>>>,
80    _private: (),
81}
82#[cfg(feature = "enable")]
83static GPU_CONTEXT_INDEX: Mutex<u8> = Mutex::new(0);
84
85/// Errors that can occur when creating a gpu context.
86#[derive(Debug)]
87pub enum GpuContextCreationError {
88    /// More than `u8::MAX` contexts have been created at any point in the program.
89    TooManyContextsCreated,
90}
91
92impl std::fmt::Display for GpuContextCreationError {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        write!(
95            f,
96            "More than 255 contexts have been created at any point in the execution of this program."
97        )
98    }
99}
100
101impl std::error::Error for GpuContextCreationError {}
102
103#[derive(Debug, PartialEq)]
104enum GpuSpanState {
105    /// The span has been started. All gpu spans start in this state.
106    Started,
107    /// The span has been ended, either waiting for timestamp upload or with
108    /// timestamp upload completed.
109    Ended,
110}
111
112/// Span for timing gpu work.
113///
114/// See the [context level documentation](GpuContext) for more information on use.
115///
116/// If the span is dropped early, the following happens:
117/// - If the span has not been ended, the span is ended. AND
118/// - If the span has not had values uploaded, the span is uploaded with
119///   the timestamps marking the start of the current gpu context. This
120///   will put the span out of the way of other spans.
121#[must_use]
122pub struct GpuSpan {
123    #[cfg(feature = "enable")]
124    context: GpuContext,
125    #[cfg(feature = "enable")]
126    start_query_id: u16,
127    #[cfg(feature = "enable")]
128    end_query_id: u16,
129    #[cfg(feature = "enable")]
130    state: GpuSpanState,
131    _private: (),
132}
133
134/// Errors that can occur when creating a gpu span.
135#[derive(Debug)]
136pub enum GpuSpanCreationError {
137    /// More than `32767` spans are still waiting for gpu data.
138    TooManyPendingSpans,
139}
140
141impl std::fmt::Display for GpuSpanCreationError {
142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143        write!(
144            f,
145            "Too many spans still waiting for gpu data. There may not be more than 32767 spans that are pending gpu data at once."
146        )
147    }
148}
149
150impl std::error::Error for GpuSpanCreationError {}
151
152impl Client {
153    /// Creates a new GPU context.
154    ///
155    /// - `name` is the name of the context.
156    /// - `ty` is the type (backend) of the context.
157    /// - `gpu_timestamp` is the gpu side timestamp the corresponds (as close as possible) to this call.
158    /// - `period` is the period of the gpu clock in nanoseconds (setting 1.0 means the clock is 1GHz, 1000.0 means 1MHz, etc).
159    ///
160    /// See the [type level documentation](GpuContext) for more information.
161    ///
162    /// # Errors
163    ///
164    /// - If more than 255 contexts were made during the lifetime of the application.
165    pub fn new_gpu_context(
166        self,
167        name: Option<&str>,
168        ty: GpuContextType,
169        gpu_timestamp: i64,
170        period: f32,
171    ) -> Result<GpuContext, GpuContextCreationError> {
172        #[cfg(feature = "enable")]
173        {
174            // We use a mutex to lock the context index to prevent races when using fetch_add.
175            //
176            // This prevents multiple contexts getting the same context id.
177            let mut context_index_guard = GPU_CONTEXT_INDEX.lock().unwrap();
178            if *context_index_guard == 255 {
179                return Err(GpuContextCreationError::TooManyContextsCreated);
180            }
181            let context = *context_index_guard;
182            *context_index_guard += 1;
183            drop(context_index_guard);
184
185            // SAFETY:
186            // - We know we aren't re-using the context id because of the above logic.
187            unsafe {
188                sys::___tracy_emit_gpu_new_context_serial(sys::___tracy_gpu_new_context_data {
189                    gpuTime: gpu_timestamp,
190                    period,
191                    context,
192                    flags: 0,
193                    type_: ty as u8,
194                });
195            };
196
197            if let Some(name) = name {
198                // SAFETY:
199                // - We've allocated a context.
200                // - The names will copied into the command stream, so the pointers do not need to last.
201                unsafe {
202                    sys::___tracy_emit_gpu_context_name_serial(
203                        sys::___tracy_gpu_context_name_data {
204                            context,
205                            name: name.as_ptr().cast(),
206                            len: name.len().try_into().unwrap_or(u16::MAX),
207                        },
208                    );
209                }
210            }
211
212            Ok(GpuContext {
213                _client: self,
214                value: context,
215                span_freelist: Arc::new(Mutex::new((0..=u16::MAX).collect())),
216                _private: (),
217            })
218        }
219        #[cfg(not(feature = "enable"))]
220        Ok(GpuContext { _private: () })
221    }
222}
223
224impl GpuContext {
225    #[cfg(feature = "enable")]
226    fn alloc_span_ids(&self) -> Result<(u16, u16), GpuSpanCreationError> {
227        let mut freelist = self.span_freelist.lock().unwrap();
228        if freelist.len() < 2 {
229            return Err(GpuSpanCreationError::TooManyPendingSpans);
230        }
231        // These unwraps are unreachable.
232        let start = freelist.pop().unwrap();
233        let end = freelist.pop().unwrap();
234        Ok((start, end))
235    }
236
237    /// Creates a new gpu span with the given source location.
238    ///
239    /// This should be called right next to where you record the corresponding gpu timestamp. This
240    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
241    ///
242    /// # Errors
243    ///
244    /// - If there are more than 32767 spans waiting for gpu data at once.
245    pub fn span(
246        &self,
247        span_location: &'static SpanLocation,
248    ) -> Result<GpuSpan, GpuSpanCreationError> {
249        #[cfg(feature = "enable")]
250        {
251            let (start_query_id, end_query_id) = self.alloc_span_ids()?;
252
253            // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
254            // always be smaller than u64, so no data will be lost.
255            unsafe {
256                sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
257                    srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
258                    queryId: start_query_id,
259                    context: self.value,
260                });
261            };
262
263            Ok(GpuSpan {
264                context: self.clone(),
265                start_query_id,
266                end_query_id,
267                state: GpuSpanState::Started,
268                _private: (),
269            })
270        }
271        #[cfg(not(feature = "enable"))]
272        Ok(GpuSpan { _private: () })
273    }
274
275    /// Creates a new gpu span with the given name, function, file, and line.
276    ///
277    /// This should be called right next to where you record the corresponding gpu timestamp. This
278    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
279    ///
280    /// # Errors
281    ///
282    /// - If there are more than 32767 spans waiting for gpu data at once.
283    pub fn span_alloc(
284        &self,
285        name: &str,
286        function: &str,
287        file: &str,
288        line: u32,
289    ) -> Result<GpuSpan, GpuSpanCreationError> {
290        #[cfg(feature = "enable")]
291        {
292            let srcloc = unsafe {
293                sys::___tracy_alloc_srcloc_name(
294                    line,
295                    file.as_ptr().cast(),
296                    file.len(),
297                    function.as_ptr().cast(),
298                    function.len(),
299                    name.as_ptr().cast(),
300                    name.len(),
301                    0,
302                )
303            };
304
305            let (start_query_id, end_query_id) = self.alloc_span_ids()?;
306
307            unsafe {
308                sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
309                    srcloc,
310                    queryId: start_query_id,
311                    context: self.value,
312                });
313            };
314
315            Ok(GpuSpan {
316                context: self.clone(),
317                start_query_id,
318                end_query_id,
319                state: GpuSpanState::Started,
320                _private: (),
321            })
322        }
323        #[cfg(not(feature = "enable"))]
324        Ok(GpuSpan { _private: () })
325    }
326
327    /// Begins a new manually tracked GPU span.
328    ///
329    /// You can use this instead of [`GpuContext::span()`] if you'd like to track the GPU span
330    /// manually. `query_id` is the id of the GPU timestamp query that you had created; when the
331    /// GPU timestamp is ready, call [`GpuContext::upload_gpu_timestamp()`] to upload it to Tracy.
332    ///
333    /// This should be called right next to where you record the corresponding GPU timestamp. This
334    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
335    pub fn begin_span(&self, span_location: &'static SpanLocation, query_id: u16) {
336        #[cfg(feature = "enable")]
337        // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
338        // always be smaller than u64, so no data will be lost.
339        unsafe {
340            sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
341                srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
342                queryId: query_id,
343                context: self.value,
344            });
345        };
346    }
347
348    /// Begins a new manually tracked GPU span with the given name, function, file, and line.
349    ///
350    /// You can use this instead of [`GpuContext::span()`] if you'd like to track the GPU span
351    /// manually.
352    ///
353    /// `query_id` is the id of the GPU timestamp query that you had created; when the GPU
354    /// timestamp is ready, call [`GpuContext::upload_gpu_timestamp()`] to upload it to Tracy.
355    ///
356    /// This should be called right next to where you record the corresponding GPU timestamp. This
357    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
358    pub fn begin_span_alloc(
359        &self,
360        name: &str,
361        function: &str,
362        file: &str,
363        line: u32,
364        query_id: u16,
365    ) {
366        #[cfg(feature = "enable")]
367        {
368            let srcloc = unsafe {
369                sys::___tracy_alloc_srcloc_name(
370                    line,
371                    file.as_ptr().cast(),
372                    file.len(),
373                    function.as_ptr().cast(),
374                    function.len(),
375                    name.as_ptr().cast(),
376                    name.len(),
377                    0,
378                )
379            };
380
381            unsafe {
382                sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
383                    srcloc,
384                    queryId: query_id,
385                    context: self.value,
386                });
387            };
388        }
389    }
390
391    /// Ends a manually tracked GPU span.
392    ///
393    /// Call this to end a span started with [`GpuContext::begin_span()`] or
394    /// [`GpuContext::begin_span_alloc()`].
395    ///
396    /// `query_id` is the id of the GPU timestamp query that you had created; when the
397    /// GPU timestamp is ready, call [`GpuContext::upload_gpu_timestamp()`] to upload it to Tracy.
398    ///
399    /// This should be called right next to where you record the corresponding GPU timestamp. This
400    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
401    pub fn end_span(&self, query_id: u16) {
402        #[cfg(feature = "enable")]
403        unsafe {
404            sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
405                queryId: query_id,
406                context: self.value,
407            });
408        };
409    }
410
411    /// Uploads a GPU timestamp for a manually tracked span.
412    ///
413    /// Call this to upload the ready GPU timestamp for a query corresponding to `query_id`.
414    pub fn upload_gpu_timestamp(&self, query_id: u16, gpu_timestamp: i64) {
415        #[cfg(feature = "enable")]
416        unsafe {
417            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
418                gpuTime: gpu_timestamp,
419                queryId: query_id,
420                context: self.value,
421            });
422        };
423    }
424
425    /// Communicates the current GPU timestamp to Tracy.
426    ///
427    /// Some GPUs (like AMD) will aggressively reset their timing when going into lower power
428    /// states. If your application does not continuously utilize the GPU, this will cause Tracy's
429    /// synchronization of CPU and GPU timestamps to immediately go out of sync, resulting in
430    /// broken GPU span display.
431    ///
432    /// You can use this method to resynchronize CPU and GPU timestamps. Fetch the current GPU
433    /// timestamp, then immediately call this method. It will synchronize the given `gpu_timestamp`
434    /// to the CPU timestamp at the time of this call.
435    pub fn sync_gpu_time(&self, gpu_timestamp: i64) {
436        #[cfg(feature = "enable")]
437        unsafe {
438            sys::___tracy_emit_gpu_time_sync_serial(sys::___tracy_gpu_time_sync_data {
439                gpuTime: gpu_timestamp,
440                context: self.value,
441            });
442        };
443    }
444}
445
446impl GpuSpan {
447    /// Marks the end of the given gpu span. This should be called right next to where you record
448    /// the corresponding gpu timestamp for the end of the span. This allows tracy to correctly
449    /// associate the cpu time with the gpu timestamp.
450    ///
451    /// Only the first time you call this function will it actually emit a gpu zone end event. Any
452    /// subsequent calls will be ignored.
453    pub fn end_zone(&mut self) {
454        #[cfg(feature = "enable")]
455        {
456            if self.state != GpuSpanState::Started {
457                return;
458            }
459            unsafe {
460                sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
461                    queryId: self.end_query_id,
462                    context: self.context.value,
463                });
464            };
465            self.state = GpuSpanState::Ended;
466        }
467    }
468
469    /// Supplies the GPU timestamp for the start of this span.
470    ///
471    /// In order to avoid confusing Tracy, you must call
472    /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
473    /// monotonically increasing timestamp order. For example, if you have two
474    /// nested spans *outer* and *inner*, you must supply the timestamps in
475    /// this order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
476    /// *outer* end.
477    pub fn upload_timestamp_start(&self, start_timestamp: i64) {
478        #[cfg(feature = "enable")]
479        unsafe {
480            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
481                gpuTime: start_timestamp,
482                queryId: self.start_query_id,
483                context: self.context.value,
484            });
485        };
486    }
487
488    /// Supplies the GPU timestamp for the end of this span.
489    ///
490    /// In order to avoid confusing Tracy, you must call
491    /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
492    /// monotonically increasing timestamp order. For example, if you have two
493    /// nested spans *outer* and *inner*, you must supply the timestamps in this
494    /// order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
495    /// *outer* end.
496    pub fn upload_timestamp_end(&self, end_timestamp: i64) {
497        #[cfg(feature = "enable")]
498        unsafe {
499            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
500                gpuTime: end_timestamp,
501                queryId: self.end_query_id,
502                context: self.context.value,
503            });
504        };
505    }
506}
507
508impl Drop for GpuSpan {
509    fn drop(&mut self) {
510        #[cfg(feature = "enable")]
511        {
512            match self.state {
513                GpuSpanState::Started => {
514                    self.end_zone();
515                }
516                GpuSpanState::Ended => {}
517            }
518
519            // Put the ids back into the freelist.
520            let mut freelist = self.context.span_freelist.lock().unwrap();
521            freelist.push(self.start_query_id);
522            freelist.push(self.end_query_id);
523            drop(freelist);
524        }
525    }
526}