Skip to main content

astrelis_render/
gpu_profiling.rs

1//! GPU profiling via `wgpu-profiler` with puffin visualization.
2//!
3//! When the `gpu-profiling` feature is enabled, this module provides a
4//! [`GpuFrameProfiler`] that wraps `wgpu_profiler::GpuProfiler` and
5//! automatically reports GPU timing data to puffin.
6//!
7//! When the feature is disabled, all types and methods become zero-cost no-ops.
8//!
9//! # Automatic Integration
10//!
11//! The recommended usage is to attach the profiler to a [`RenderableWindow`](crate::RenderableWindow):
12//!
13//! ```ignore
14//! // At init:
15//! let ctx = GraphicsContext::new_owned_with_descriptor(
16//!     GraphicsContextDescriptor::new()
17//!         .request_capability::<GpuFrameProfiler>()
18//! ).await?;
19//! let profiler = Arc::new(GpuFrameProfiler::new(&ctx)?);
20//! window.set_gpu_profiler(profiler);
21//!
22//! // Each frame — GPU profiling is fully automatic:
23//! let mut frame = window.begin_drawing();
24//! frame.clear_and_render(RenderTarget::Surface, Color::BLACK, |pass| {
25//!     // GPU scope "main_pass" is automatically active
26//! });
27//! frame.finish(); // auto: resolve_queries -> submit -> end_frame
28//! ```
29//!
30//! # Manual Scoping
31//!
32//! For custom GPU scopes outside of render passes, use [`FrameContext::with_gpu_scope`](crate::FrameContext::with_gpu_scope):
33//!
34//! ```ignore
35//! frame.with_gpu_scope("upload_data", |encoder| {
36//!     encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
37//! });
38//! ```
39
40use crate::capability::{GpuRequirements, RenderCapability};
41use crate::features::GpuFeatures;
42
43// ============================================================================
44// RenderCapability — works in both enabled and disabled configurations
45// ============================================================================
46
47impl RenderCapability for GpuFrameProfiler {
48    fn requirements() -> GpuRequirements {
49        // All three timestamp features are requested (best-effort), not required.
50        // wgpu-profiler gracefully degrades if any are unavailable:
51        // - TIMESTAMP_QUERY: base feature, allows timestamp writes on pass definition
52        // - TIMESTAMP_QUERY_INSIDE_ENCODERS: allows scopes on command encoders
53        // - TIMESTAMP_QUERY_INSIDE_PASSES: allows scopes on render/compute passes
54        GpuRequirements::new()
55            .request_features(
56                GpuFeatures::TIMESTAMP_QUERY
57                    | GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS
58                    | GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES,
59            )
60    }
61
62    fn name() -> &'static str {
63        "GpuFrameProfiler"
64    }
65}
66
67// ============================================================================
68// Feature: gpu-profiling ENABLED
69// ============================================================================
70#[cfg(feature = "gpu-profiling")]
71mod enabled {
72    use std::sync::{Arc, Mutex};
73
74    use crate::context::GraphicsContext;
75    use crate::features::GpuFeatures;
76
77    /// GPU frame profiler wrapping `wgpu_profiler::GpuProfiler`.
78    ///
79    /// All methods take `&self` using interior mutability (`Mutex`), making it
80    /// easy to share the profiler between `RenderableWindow` and `FrameContext`
81    /// via `Arc<GpuFrameProfiler>`.
82    ///
83    /// Create one per application. The profiler is automatically driven each frame
84    /// when attached to a `RenderableWindow` via [`set_gpu_profiler`]:
85    /// - GPU scopes are created around render passes in `with_pass()` / `clear_and_render()`
86    /// - Queries are resolved and the frame is ended in `FrameContext::Drop`
87    ///
88    /// For manual use:
89    /// 1. Open GPU scopes with [`scope`](Self::scope) on command encoders or render passes.
90    /// 2. Call [`resolve_queries`](Self::resolve_queries) before submitting the encoder.
91    /// 3. Call [`end_frame`](Self::end_frame) after queue submit.
92    ///
93    /// Results are automatically forwarded to puffin via
94    /// `wgpu_profiler::puffin::output_frame_to_puffin`.
95    ///
96    /// # Timestamp Queries
97    ///
98    /// If the device was created with `TIMESTAMP_QUERY` enabled (via
99    /// `request_capability::<GpuFrameProfiler>()`), scopes produce actual GPU
100    /// timing data. Otherwise, wgpu-profiler falls back to debug groups only.
101    pub struct GpuFrameProfiler {
102        profiler: Mutex<wgpu_profiler::GpuProfiler>,
103        timestamp_period: f32,
104        has_timestamps: bool,
105    }
106
107    impl GpuFrameProfiler {
108        /// Create a new GPU frame profiler.
109        ///
110        /// The profiler inspects the device features to determine whether
111        /// `TIMESTAMP_QUERY` is available. If not, it still works but only
112        /// records debug group labels (no timing data).
113        pub fn new(context: &Arc<GraphicsContext>) -> Result<Self, wgpu_profiler::CreationError> {
114            let has_timestamps = context.has_feature(GpuFeatures::TIMESTAMP_QUERY);
115            let has_encoder_timestamps =
116                context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS);
117            let has_pass_timestamps =
118                context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES);
119
120            if has_timestamps {
121                tracing::info!(
122                    "GPU profiler: TIMESTAMP_QUERY=yes, INSIDE_ENCODERS={}, INSIDE_PASSES={}",
123                    if has_encoder_timestamps { "yes" } else { "no" },
124                    if has_pass_timestamps { "yes" } else { "no" },
125                );
126                if !has_encoder_timestamps {
127                    tracing::warn!(
128                        "GPU profiler: TIMESTAMP_QUERY_INSIDE_ENCODERS not available — \
129                         scopes on command encoders will not produce timing data"
130                    );
131                }
132                if !has_pass_timestamps {
133                    tracing::warn!(
134                        "GPU profiler: TIMESTAMP_QUERY_INSIDE_PASSES not available — \
135                         scopes on render/compute passes will not produce timing data"
136                    );
137                }
138            } else {
139                tracing::warn!(
140                    "GPU profiler: TIMESTAMP_QUERY not enabled — debug groups only, no timing data. \
141                     Use GraphicsContextDescriptor::request_capability::<GpuFrameProfiler>() to request it."
142                );
143            }
144
145            let profiler = wgpu_profiler::GpuProfiler::new(
146                context.device(),
147                wgpu_profiler::GpuProfilerSettings::default(),
148            )?;
149            let timestamp_period = context.queue().get_timestamp_period();
150
151            Ok(Self {
152                profiler: Mutex::new(profiler),
153                timestamp_period,
154                has_timestamps,
155            })
156        }
157
158        /// Whether this profiler has actual GPU timestamp query support.
159        ///
160        /// If `false`, scopes still appear in the profiler as debug groups
161        /// but without timing data.
162        pub fn has_timestamp_queries(&self) -> bool {
163            self.has_timestamps
164        }
165
166        /// Open a profiling scope on a command encoder or render/compute pass.
167        ///
168        /// The scope is automatically closed when the returned guard is dropped.
169        ///
170        /// Returns a [`GpuProfileScope`] that wraps the underlying `wgpu_profiler::Scope`
171        /// and holds the `Mutex` guard. Access the recorder via `Deref`/`DerefMut`.
172        ///
173        /// # Panics
174        ///
175        /// Panics if the internal profiler lock is poisoned.
176        pub fn scope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder>(
177            &'a self,
178            label: impl Into<String>,
179            encoder_or_pass: &'a mut Recorder,
180        ) -> GpuProfileScope<'a, Recorder> {
181            let profiler = self.profiler.lock().unwrap();
182            // SAFETY: We extend the MutexGuard's lifetime to match &self ('a).
183            // This is sound because:
184            // 1. The GpuProfiler lives as long as self (lifetime 'a)
185            // 2. GpuProfiler::scope() only needs &self (immutable borrow)
186            // 3. The caller must drop the scope before calling resolve_queries/end_frame
187            //    (which is guaranteed by the frame lifecycle: scopes live within render passes,
188            //    resolve/end happen in FrameContext::Drop after all passes are done)
189            let profiler_ptr = &*profiler as *const wgpu_profiler::GpuProfiler;
190            let profiler_ref: &'a wgpu_profiler::GpuProfiler = unsafe { &*profiler_ptr };
191            let scope = profiler_ref.scope(label, encoder_or_pass);
192            GpuProfileScope {
193                scope,
194                _borrow: profiler,
195            }
196        }
197
198        /// Resolve all pending queries. Call this before submitting the encoder.
199        pub fn resolve_queries(&self, encoder: &mut wgpu::CommandEncoder) {
200            self.profiler.lock().unwrap().resolve_queries(encoder);
201        }
202
203        /// End the current profiling frame. Call this after queue submit.
204        ///
205        /// Processes finished frames and reports results to puffin.
206        pub fn end_frame(&self) -> Result<(), wgpu_profiler::EndFrameError> {
207            let mut profiler = self.profiler.lock().unwrap();
208            profiler.end_frame()?;
209
210            // Process any finished frames and report to puffin
211            if let Some(results) = profiler.process_finished_frame(self.timestamp_period) {
212                wgpu_profiler::puffin::output_frame_to_puffin(
213                    &mut puffin::GlobalProfiler::lock(),
214                    &results,
215                );
216            }
217
218            Ok(())
219        }
220
221        /// Get a reference to the inner `Mutex<wgpu_profiler::GpuProfiler>` for advanced use.
222        pub fn inner(&self) -> &Mutex<wgpu_profiler::GpuProfiler> {
223            &self.profiler
224        }
225    }
226
227    /// A GPU profiling scope that wraps `wgpu_profiler::Scope` and holds
228    /// the `Mutex` guard.
229    ///
230    /// This type implements `Deref`/`DerefMut` to the underlying recorder
231    /// (command encoder or render/compute pass), so you can use it as a
232    /// drop-in replacement for the recorder.
233    ///
234    /// The scope is automatically closed (GPU timestamp written) when dropped.
235    pub struct GpuProfileScope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder> {
236        scope: wgpu_profiler::Scope<'a, Recorder>,
237        _borrow: std::sync::MutexGuard<'a, wgpu_profiler::GpuProfiler>,
238    }
239
240    impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::Deref
241        for GpuProfileScope<'_, Recorder>
242    {
243        type Target = Recorder;
244
245        fn deref(&self) -> &Self::Target {
246            &self.scope
247        }
248    }
249
250    impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::DerefMut
251        for GpuProfileScope<'_, Recorder>
252    {
253        fn deref_mut(&mut self) -> &mut Self::Target {
254            &mut self.scope
255        }
256    }
257}
258
259#[cfg(feature = "gpu-profiling")]
260pub use enabled::*;
261
262// ============================================================================
263// Feature: gpu-profiling DISABLED (zero-cost no-ops)
264// ============================================================================
265#[cfg(not(feature = "gpu-profiling"))]
266mod disabled {
267    use std::sync::Arc;
268
269    use crate::context::GraphicsContext;
270
271    /// No-op GPU frame profiler (gpu-profiling feature disabled).
272    ///
273    /// All methods are no-ops that compile to nothing. The `&self` signatures
274    /// match the enabled version for API compatibility.
275    pub struct GpuFrameProfiler;
276
277    impl GpuFrameProfiler {
278        /// No-op: create a new GPU frame profiler.
279        pub fn new(_context: &Arc<GraphicsContext>) -> Result<Self, GpuFrameProfilerError> {
280            Ok(Self)
281        }
282
283        /// No-op: always returns false.
284        pub fn has_timestamp_queries(&self) -> bool {
285            false
286        }
287
288        /// No-op: resolve queries.
289        pub fn resolve_queries(&self, _encoder: &mut wgpu::CommandEncoder) {}
290
291        /// No-op: end frame.
292        pub fn end_frame(&self) -> Result<(), GpuFrameProfilerError> {
293            Ok(())
294        }
295    }
296
297    /// Placeholder error type when gpu-profiling is disabled.
298    #[derive(Debug)]
299    pub struct GpuFrameProfilerError;
300
301    impl std::fmt::Display for GpuFrameProfilerError {
302        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
303            write!(f, "GPU profiling is disabled")
304        }
305    }
306
307    impl std::error::Error for GpuFrameProfilerError {}
308}
309
310#[cfg(not(feature = "gpu-profiling"))]
311pub use disabled::*;
312
313// ============================================================================
314// Convenience Macro
315// ============================================================================
316
317/// Execute a block of code within a GPU profiling scope on a `FrameContext`.
318///
319/// When the `gpu-profiling` feature is enabled and a GPU profiler is attached
320/// to the frame, this creates a GPU timing scope around the block.
321/// When disabled or no profiler is attached, the block is executed directly.
322///
323/// This is syntactic sugar for [`FrameContext::with_gpu_scope`](crate::FrameContext::with_gpu_scope).
324///
325/// # Usage
326///
327/// ```ignore
328/// use astrelis_render::gpu_profile_scope;
329///
330/// gpu_profile_scope!(frame, "upload_textures", |encoder| {
331///     encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
332/// });
333/// ```
334#[macro_export]
335macro_rules! gpu_profile_scope {
336    ($frame:expr, $label:expr, $body:expr) => {
337        $frame.with_gpu_scope($label, $body)
338    };
339}