tracy_client/
gpu.rs

1use std::{
2    convert::TryInto,
3    sync::{Arc, Mutex},
4};
5
6use crate::{Client, SpanLocation};
7
8#[repr(u8)]
9/// The API label associated with the given gpu context. The list here only includes
10/// APIs that are currently supported by Tracy's own gpu implementations.
11//
12// Copied from `tracy-client-sys/tracy/common/TracyQueue.hpp:391`. Comment on enum states
13// that the values are stable, due to potential serialization issues, so copying this enum
14// shouldn't be a problem.
15pub enum GpuContextType {
16    /// Stand in for other types of contexts.
17    Invalid = 0,
18    /// An OpenGL context
19    OpenGL = 1,
20    /// A Vulkan context
21    Vulkan = 2,
22    /// An OpenCL context
23    OpenCL = 3,
24    /// A D3D12 context.
25    Direct3D12 = 4,
26    /// A D3D11 context.
27    Direct3D11 = 5,
28}
29
30/// Context for creating gpu spans.
31///
32/// Generally corresponds to a single hardware queue.
33///
34/// The flow of creating and using gpu context generally looks like this:
35///
36/// ```rust,no_run
37/// # let client = tracy_client::Client::start();
38/// // The period of the gpu clock in nanoseconds, as provided by your GPU api.
39/// // This value corresponds to 1GHz.
40/// let period: f32 = 1_000_000_000.0;
41///
42/// // GPU API: Record writing a timestamp and resolve that to a mappable buffer.
43/// // GPU API: Submit the command buffer writing the timestamp.
44/// // GPU API: Immediately block until the submission is finished.
45/// // GPU API: Map buffer, get timestamp value.
46/// let starting_timestamp: i64 = /* whatever you get from this timestamp */ 0;
47///
48/// // Create the gpu context
49/// let gpu_context = client.new_gpu_context(
50///     Some("MyContext"),
51///     tracy_client::GpuContextType::Vulkan,
52///     starting_timestamp,
53///     period
54/// ).unwrap();
55///
56/// // Now you have some work that you want to time on the gpu.
57///
58/// // GPU API: Record writing a timestamp before the work.
59/// let mut span = gpu_context.span_alloc("MyGpuSpan1", "My::Work", "myfile.rs", 12).unwrap();
60///
61/// // GPU API: Record work.
62///
63/// // GPU API: Record writing a timestamp after the work.
64/// span.end_zone();
65///
66/// // Some time later, once the written timestamp values are available on the cpu.
67/// # let (starting_timestamp, ending_timestamp) = (0, 0);
68///
69/// span.upload_timestamp_start(starting_timestamp);
70/// span.upload_timestamp_end(ending_timestamp);
71/// ```
72#[derive(Clone)]
73pub struct GpuContext {
74    #[cfg(feature = "enable")]
75    _client: Client,
76    #[cfg(feature = "enable")]
77    value: u8,
78    #[cfg(feature = "enable")]
79    span_freelist: Arc<Mutex<Vec<u16>>>,
80    _private: (),
81}
82#[cfg(feature = "enable")]
83static GPU_CONTEXT_INDEX: Mutex<u8> = Mutex::new(0);
84
85/// Errors that can occur when creating a gpu context.
86#[derive(Debug)]
87pub enum GpuContextCreationError {
88    /// More than `u8::MAX` contexts have been created at any point in the program.
89    TooManyContextsCreated,
90}
91
92impl std::fmt::Display for GpuContextCreationError {
93    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
94        write!(
95            f,
96            "More than 255 contexts have been created at any point in the execution of this program."
97        )
98    }
99}
100
101impl std::error::Error for GpuContextCreationError {}
102
103#[derive(Debug, PartialEq)]
104enum GpuSpanState {
105    /// The span has been started. All gpu spans start in this state.
106    Started,
107    /// The span has been ended, either waiting for timestamp upload or with
108    /// timestamp upload completed.
109    Ended,
110}
111
112/// Span for timing gpu work.
113///
114/// See the [context level documentation](GpuContext) for more information on use.
115///
116/// If the span is dropped early, the following happens:
117/// - If the span has not been ended, the span is ended. AND
118/// - If the span has not had values uploaded, the span is uploaded with
119///   the timestamps marking the start of the current gpu context. This
120///   will put the span out of the way of other spans.
121#[must_use]
122pub struct GpuSpan {
123    #[cfg(feature = "enable")]
124    context: GpuContext,
125    #[cfg(feature = "enable")]
126    start_query_id: u16,
127    #[cfg(feature = "enable")]
128    end_query_id: u16,
129    #[cfg(feature = "enable")]
130    state: GpuSpanState,
131    _private: (),
132}
133
134/// Errors that can occur when creating a gpu span.
135#[derive(Debug)]
136pub enum GpuSpanCreationError {
137    /// More than `32767` spans are still waiting for gpu data.
138    TooManyPendingSpans,
139}
140
141impl std::fmt::Display for GpuSpanCreationError {
142    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
143        write!(
144            f,
145            "Too many spans still waiting for gpu data. There may not be more than 32767 spans that are pending gpu data at once."
146        )
147    }
148}
149
150impl std::error::Error for GpuSpanCreationError {}
151
152impl Client {
153    /// Creates a new GPU context.
154    ///
155    /// - `name` is the name of the context.
156    /// - `ty` is the type (backend) of the context.
157    /// - `gpu_timestamp` is the gpu side timestamp the corresponds (as close as possible) to this call.
158    /// - `period` is the period of the gpu clock in nanoseconds (setting 1.0 means the clock is 1GHz, 1000.0 means 1MHz, etc).
159    ///
160    /// See the [type level documentation](GpuContext) for more information.
161    ///
162    /// # Errors
163    ///
164    /// - If more than 255 contexts were made during the lifetime of the application.
165    pub fn new_gpu_context(
166        self,
167        name: Option<&str>,
168        ty: GpuContextType,
169        gpu_timestamp: i64,
170        period: f32,
171    ) -> Result<GpuContext, GpuContextCreationError> {
172        #[cfg(feature = "enable")]
173        {
174            // We use a mutex to lock the context index to prevent races when using fetch_add.
175            //
176            // This prevents multiple contexts getting the same context id.
177            let mut context_index_guard = GPU_CONTEXT_INDEX.lock().unwrap();
178            if *context_index_guard == 255 {
179                return Err(GpuContextCreationError::TooManyContextsCreated);
180            }
181            let context = *context_index_guard;
182            *context_index_guard += 1;
183            drop(context_index_guard);
184
185            // SAFETY:
186            // - We know we aren't re-using the context id because of the above logic.
187            unsafe {
188                sys::___tracy_emit_gpu_new_context_serial(sys::___tracy_gpu_new_context_data {
189                    gpuTime: gpu_timestamp,
190                    period,
191                    context,
192                    flags: 0,
193                    type_: ty as u8,
194                });
195            };
196
197            if let Some(name) = name {
198                // SAFETY:
199                // - We've allocated a context.
200                // - The names will copied into the command stream, so the pointers do not need to last.
201                unsafe {
202                    sys::___tracy_emit_gpu_context_name_serial(
203                        sys::___tracy_gpu_context_name_data {
204                            context,
205                            name: name.as_ptr().cast(),
206                            len: name.len().try_into().unwrap_or(u16::MAX),
207                        },
208                    );
209                }
210            }
211
212            Ok(GpuContext {
213                _client: self,
214                value: context,
215                span_freelist: Arc::new(Mutex::new((0..=u16::MAX).collect())),
216                _private: (),
217            })
218        }
219        #[cfg(not(feature = "enable"))]
220        Ok(GpuContext { _private: () })
221    }
222}
223
224impl GpuContext {
225    #[cfg(feature = "enable")]
226    fn alloc_span_ids(&self) -> Result<(u16, u16), GpuSpanCreationError> {
227        let mut freelist = self.span_freelist.lock().unwrap();
228        if freelist.len() < 2 {
229            return Err(GpuSpanCreationError::TooManyPendingSpans);
230        }
231        // These unwraps are unreachable.
232        let start = freelist.pop().unwrap();
233        let end = freelist.pop().unwrap();
234        Ok((start, end))
235    }
236
237    /// Creates a new gpu span with the given source location.
238    ///
239    /// This should be called right next to where you record the corresponding gpu timestamp. This
240    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
241    ///
242    /// # Errors
243    ///
244    /// - If there are more than 32767 spans waiting for gpu data at once.
245    pub fn span(
246        &self,
247        span_location: &'static SpanLocation,
248    ) -> Result<GpuSpan, GpuSpanCreationError> {
249        #[cfg(feature = "enable")]
250        {
251            let (start_query_id, end_query_id) = self.alloc_span_ids()?;
252
253            // SAFETY: We know that the span location is valid forever as it is 'static. `usize` will
254            // always be smaller than u64, so no data will be lost.
255            unsafe {
256                sys::___tracy_emit_gpu_zone_begin_serial(sys::___tracy_gpu_zone_begin_data {
257                    srcloc: std::ptr::addr_of!(span_location.data) as usize as u64,
258                    queryId: start_query_id,
259                    context: self.value,
260                });
261            };
262
263            Ok(GpuSpan {
264                context: self.clone(),
265                start_query_id,
266                end_query_id,
267                state: GpuSpanState::Started,
268                _private: (),
269            })
270        }
271        #[cfg(not(feature = "enable"))]
272        Ok(GpuSpan { _private: () })
273    }
274
275    /// Creates a new gpu span with the given name, function, file, and line.
276    ///
277    /// This should be called right next to where you record the corresponding gpu timestamp. This
278    /// allows tracy to correctly associate the cpu time with the gpu timestamp.
279    ///
280    /// # Errors
281    ///
282    /// - If there are more than 32767 spans waiting for gpu data at once.
283    pub fn span_alloc(
284        &self,
285        name: &str,
286        function: &str,
287        file: &str,
288        line: u32,
289    ) -> Result<GpuSpan, GpuSpanCreationError> {
290        #[cfg(feature = "enable")]
291        {
292            let srcloc = unsafe {
293                sys::___tracy_alloc_srcloc_name(
294                    line,
295                    file.as_ptr().cast(),
296                    file.len(),
297                    function.as_ptr().cast(),
298                    function.len(),
299                    name.as_ptr().cast(),
300                    name.len(),
301                    0,
302                )
303            };
304
305            let (start_query_id, end_query_id) = self.alloc_span_ids()?;
306
307            unsafe {
308                sys::___tracy_emit_gpu_zone_begin_alloc_serial(sys::___tracy_gpu_zone_begin_data {
309                    srcloc,
310                    queryId: start_query_id,
311                    context: self.value,
312                });
313            };
314
315            Ok(GpuSpan {
316                context: self.clone(),
317                start_query_id,
318                end_query_id,
319                state: GpuSpanState::Started,
320                _private: (),
321            })
322        }
323        #[cfg(not(feature = "enable"))]
324        Ok(GpuSpan { _private: () })
325    }
326}
327
328impl GpuSpan {
329    /// Marks the end of the given gpu span. This should be called right next to where you record
330    /// the corresponding gpu timestamp for the end of the span. This allows tracy to correctly
331    /// associate the cpu time with the gpu timestamp.
332    ///
333    /// Only the first time you call this function will it actually emit a gpu zone end event. Any
334    /// subsequent calls will be ignored.
335    pub fn end_zone(&mut self) {
336        #[cfg(feature = "enable")]
337        {
338            if self.state != GpuSpanState::Started {
339                return;
340            }
341            unsafe {
342                sys::___tracy_emit_gpu_zone_end_serial(sys::___tracy_gpu_zone_end_data {
343                    queryId: self.end_query_id,
344                    context: self.context.value,
345                });
346            };
347            self.state = GpuSpanState::Ended;
348        }
349    }
350
351    /// Supplies the GPU timestamp for the start of this span.
352    ///
353    /// In order to avoid confusing Tracy, you must call
354    /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
355    /// monotonically increasing timestamp order. For example, if you have two
356    /// nested spans *outer* and *inner*, you must supply the timestamps in
357    /// this order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
358    /// *outer* end.
359    pub fn upload_timestamp_start(&self, start_timestamp: i64) {
360        #[cfg(feature = "enable")]
361        unsafe {
362            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
363                gpuTime: start_timestamp,
364                queryId: self.start_query_id,
365                context: self.context.value,
366            });
367        };
368    }
369
370    /// Supplies the GPU timestamp for the end of this span.
371    ///
372    /// In order to avoid confusing Tracy, you must call
373    /// [`Self::upload_timestamp_start`] and [`Self::upload_timestamp_end`] in
374    /// monotonically increasing timestamp order. For example, if you have two
375    /// nested spans *outer* and *inner*, you must supply the timestamps in this
376    /// order: (1) *outer* start; (2) *inner* start; (3) *inner* end; (4)
377    /// *outer* end.
378    pub fn upload_timestamp_end(&self, end_timestamp: i64) {
379        #[cfg(feature = "enable")]
380        unsafe {
381            sys::___tracy_emit_gpu_time_serial(sys::___tracy_gpu_time_data {
382                gpuTime: end_timestamp,
383                queryId: self.end_query_id,
384                context: self.context.value,
385            });
386        };
387    }
388}
389
390impl Drop for GpuSpan {
391    fn drop(&mut self) {
392        #[cfg(feature = "enable")]
393        {
394            match self.state {
395                GpuSpanState::Started => {
396                    self.end_zone();
397                }
398                GpuSpanState::Ended => {}
399            }
400
401            // Put the ids back into the freelist.
402            let mut freelist = self.context.span_freelist.lock().unwrap();
403            freelist.push(self.start_query_id);
404            freelist.push(self.end_query_id);
405            drop(freelist);
406        }
407    }
408}