Skip to main content

astrelis_render/
gpu_profiling.rs

1//! GPU profiling via `wgpu-profiler` with puffin visualization.
2//!
3//! When the `gpu-profiling` feature is enabled, this module provides a
4//! [`GpuFrameProfiler`] that wraps `wgpu_profiler::GpuProfiler` and
5//! automatically reports GPU timing data to puffin.
6//!
7//! When the feature is disabled, all types and methods become zero-cost no-ops.
8//!
9//! # Automatic Integration
10//!
11//! The recommended usage is to attach the profiler to a [`RenderableWindow`](crate::RenderableWindow):
12//!
13//! ```ignore
14//! // At init:
15//! let ctx = GraphicsContext::new_owned_with_descriptor(
16//!     GraphicsContextDescriptor::new()
17//!         .request_capability::<GpuFrameProfiler>()
18//! ).await?;
19//! let profiler = Arc::new(GpuFrameProfiler::new(&ctx)?);
20//! window.set_gpu_profiler(profiler);
21//!
22//! // Each frame — GPU profiling is fully automatic:
23//! let mut frame = window.begin_drawing();
24//! frame.clear_and_render(RenderTarget::Surface, Color::BLACK, |pass| {
25//!     // GPU scope "main_pass" is automatically active
26//! });
27//! frame.finish(); // auto: resolve_queries -> submit -> end_frame
28//! ```
29//!
30//! # Manual Scoping
31//!
32//! For custom GPU scopes outside of render passes:
33//!
34//! ```ignore
35//! frame.with_gpu_scope("upload_data", |encoder| {
36//!     encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
37//! });
38//! ```
39
40use crate::capability::{GpuRequirements, RenderCapability};
41use crate::features::GpuFeatures;
42
43// ============================================================================
44// RenderCapability — works in both enabled and disabled configurations
45// ============================================================================
46
47impl RenderCapability for GpuFrameProfiler {
48    fn requirements() -> GpuRequirements {
49        // All three timestamp features are requested (best-effort), not required.
50        // wgpu-profiler gracefully degrades if any are unavailable:
51        // - TIMESTAMP_QUERY: base feature, allows timestamp writes on pass definition
52        // - TIMESTAMP_QUERY_INSIDE_ENCODERS: allows scopes on command encoders
53        // - TIMESTAMP_QUERY_INSIDE_PASSES: allows scopes on render/compute passes
54        GpuRequirements::new().request_features(
55            GpuFeatures::TIMESTAMP_QUERY
56                | GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS
57                | GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES,
58        )
59    }
60
61    fn name() -> &'static str {
62        "GpuFrameProfiler"
63    }
64}
65
66// ============================================================================
67// Feature: gpu-profiling ENABLED
68// ============================================================================
69#[cfg(feature = "gpu-profiling")]
70mod enabled {
71    use std::sync::{Arc, Mutex};
72
73    use crate::context::GraphicsContext;
74    use crate::features::GpuFeatures;
75
76    /// GPU frame profiler wrapping `wgpu_profiler::GpuProfiler`.
77    ///
78    /// All methods take `&self` using interior mutability (`Mutex`), making it
79    /// easy to share the profiler between `RenderableWindow` and `FrameContext`
80    /// via `Arc<GpuFrameProfiler>`.
81    ///
82    /// Create one per application. The profiler is automatically driven each frame
83    /// when attached to a `RenderableWindow` via [`set_gpu_profiler`]:
84    /// - GPU scopes are created around render passes in `with_pass()` / `clear_and_render()`
85    /// - Queries are resolved and the frame is ended in `FrameContext::Drop`
86    ///
87    /// For manual use:
88    /// 1. Open GPU scopes with [`scope`](Self::scope) on command encoders or render passes.
89    /// 2. Call [`resolve_queries`](Self::resolve_queries) before submitting the encoder.
90    /// 3. Call [`end_frame`](Self::end_frame) after queue submit.
91    ///
92    /// Results are automatically forwarded to puffin via
93    /// `wgpu_profiler::puffin::output_frame_to_puffin`.
94    ///
95    /// # Timestamp Queries
96    ///
97    /// If the device was created with `TIMESTAMP_QUERY` enabled (via
98    /// `request_capability::<GpuFrameProfiler>()`), scopes produce actual GPU
99    /// timing data. Otherwise, wgpu-profiler falls back to debug groups only.
100    pub struct GpuFrameProfiler {
101        profiler: Mutex<wgpu_profiler::GpuProfiler>,
102        timestamp_period: f32,
103        has_timestamps: bool,
104    }
105
106    impl GpuFrameProfiler {
107        /// Create a new GPU frame profiler.
108        ///
109        /// The profiler inspects the device features to determine whether
110        /// `TIMESTAMP_QUERY` is available. If not, it still works but only
111        /// records debug group labels (no timing data).
112        pub fn new(context: &Arc<GraphicsContext>) -> Result<Self, wgpu_profiler::CreationError> {
113            let has_timestamps = context.has_feature(GpuFeatures::TIMESTAMP_QUERY);
114            let has_encoder_timestamps =
115                context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS);
116            let has_pass_timestamps =
117                context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES);
118
119            if has_timestamps {
120                tracing::info!(
121                    "GPU profiler: TIMESTAMP_QUERY=yes, INSIDE_ENCODERS={}, INSIDE_PASSES={}",
122                    if has_encoder_timestamps { "yes" } else { "no" },
123                    if has_pass_timestamps { "yes" } else { "no" },
124                );
125                if !has_encoder_timestamps {
126                    tracing::warn!(
127                        "GPU profiler: TIMESTAMP_QUERY_INSIDE_ENCODERS not available — \
128                         scopes on command encoders will not produce timing data"
129                    );
130                }
131                if !has_pass_timestamps {
132                    tracing::warn!(
133                        "GPU profiler: TIMESTAMP_QUERY_INSIDE_PASSES not available — \
134                         scopes on render/compute passes will not produce timing data"
135                    );
136                }
137            } else {
138                tracing::warn!(
139                    "GPU profiler: TIMESTAMP_QUERY not enabled — debug groups only, no timing data. \
140                     Use GraphicsContextDescriptor::request_capability::<GpuFrameProfiler>() to request it."
141                );
142            }
143
144            let profiler = wgpu_profiler::GpuProfiler::new(
145                context.device(),
146                wgpu_profiler::GpuProfilerSettings::default(),
147            )?;
148            let timestamp_period = context.queue().get_timestamp_period();
149
150            Ok(Self {
151                profiler: Mutex::new(profiler),
152                timestamp_period,
153                has_timestamps,
154            })
155        }
156
157        /// Whether this profiler has actual GPU timestamp query support.
158        ///
159        /// If `false`, scopes still appear in the profiler as debug groups
160        /// but without timing data.
161        pub fn has_timestamp_queries(&self) -> bool {
162            self.has_timestamps
163        }
164
165        /// Open a profiling scope on a command encoder or render/compute pass.
166        ///
167        /// The scope is automatically closed when the returned guard is dropped.
168        ///
169        /// Returns a [`GpuProfileScope`] that wraps the underlying `wgpu_profiler::Scope`
170        /// and holds the `Mutex` guard. Access the recorder via `Deref`/`DerefMut`.
171        ///
172        /// # Panics
173        ///
174        /// Panics if the internal profiler lock is poisoned.
175        pub fn scope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder>(
176            &'a self,
177            label: impl Into<String>,
178            encoder_or_pass: &'a mut Recorder,
179        ) -> GpuProfileScope<'a, Recorder> {
180            let profiler = self.profiler.lock().unwrap();
181            // SAFETY: We extend the MutexGuard's lifetime to match &self ('a).
182            // This is sound because:
183            // 1. The GpuProfiler lives as long as self (lifetime 'a)
184            // 2. GpuProfiler::scope() only needs &self (immutable borrow)
185            // 3. The caller must drop the scope before calling resolve_queries/end_frame
186            //    (which is guaranteed by the frame lifecycle: scopes live within render passes,
187            //    resolve/end happen in FrameContext::Drop after all passes are done)
188            let profiler_ptr = &*profiler as *const wgpu_profiler::GpuProfiler;
189            let profiler_ref: &'a wgpu_profiler::GpuProfiler = unsafe { &*profiler_ptr };
190            let scope = profiler_ref.scope(label, encoder_or_pass);
191            GpuProfileScope {
192                scope,
193                _borrow: profiler,
194            }
195        }
196
197        /// Resolve all pending queries. Call this before submitting the encoder.
198        pub fn resolve_queries(&self, encoder: &mut wgpu::CommandEncoder) {
199            self.profiler.lock().unwrap().resolve_queries(encoder);
200        }
201
202        /// End the current profiling frame. Call this after queue submit.
203        ///
204        /// Processes finished frames and reports results to puffin.
205        pub fn end_frame(&self) -> Result<(), wgpu_profiler::EndFrameError> {
206            let mut profiler = self.profiler.lock().unwrap();
207            profiler.end_frame()?;
208
209            // Process any finished frames and report to puffin
210            if let Some(results) = profiler.process_finished_frame(self.timestamp_period) {
211                wgpu_profiler::puffin::output_frame_to_puffin(
212                    &mut puffin::GlobalProfiler::lock(),
213                    &results,
214                );
215            }
216
217            Ok(())
218        }
219
220        /// Get a reference to the inner `Mutex<wgpu_profiler::GpuProfiler>` for advanced use.
221        pub fn inner(&self) -> &Mutex<wgpu_profiler::GpuProfiler> {
222            &self.profiler
223        }
224    }
225
226    /// A GPU profiling scope that wraps `wgpu_profiler::Scope` and holds
227    /// the `Mutex` guard.
228    ///
229    /// This type implements `Deref`/`DerefMut` to the underlying recorder
230    /// (command encoder or render/compute pass), so you can use it as a
231    /// drop-in replacement for the recorder.
232    ///
233    /// The scope is automatically closed (GPU timestamp written) when dropped.
234    pub struct GpuProfileScope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder> {
235        scope: wgpu_profiler::Scope<'a, Recorder>,
236        _borrow: std::sync::MutexGuard<'a, wgpu_profiler::GpuProfiler>,
237    }
238
239    impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::Deref
240        for GpuProfileScope<'_, Recorder>
241    {
242        type Target = Recorder;
243
244        fn deref(&self) -> &Self::Target {
245            &self.scope
246        }
247    }
248
249    impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::DerefMut
250        for GpuProfileScope<'_, Recorder>
251    {
252        fn deref_mut(&mut self) -> &mut Self::Target {
253            &mut self.scope
254        }
255    }
256}
257
258#[cfg(feature = "gpu-profiling")]
259pub use enabled::*;
260
261// ============================================================================
262// Feature: gpu-profiling DISABLED (zero-cost no-ops)
263// ============================================================================
264#[cfg(not(feature = "gpu-profiling"))]
265mod disabled {
266    use std::sync::Arc;
267
268    use crate::context::GraphicsContext;
269
270    /// No-op GPU frame profiler (gpu-profiling feature disabled).
271    ///
272    /// All methods are no-ops that compile to nothing. The `&self` signatures
273    /// match the enabled version for API compatibility.
274    pub struct GpuFrameProfiler;
275
276    impl GpuFrameProfiler {
277        /// No-op: create a new GPU frame profiler.
278        pub fn new(_context: &Arc<GraphicsContext>) -> Result<Self, GpuFrameProfilerError> {
279            Ok(Self)
280        }
281
282        /// No-op: always returns false.
283        pub fn has_timestamp_queries(&self) -> bool {
284            false
285        }
286
287        /// No-op: resolve queries.
288        pub fn resolve_queries(&self, _encoder: &mut wgpu::CommandEncoder) {}
289
290        /// No-op: end frame.
291        pub fn end_frame(&self) -> Result<(), GpuFrameProfilerError> {
292            Ok(())
293        }
294    }
295
296    /// Placeholder error type when gpu-profiling is disabled.
297    #[derive(Debug)]
298    pub struct GpuFrameProfilerError;
299
300    impl std::fmt::Display for GpuFrameProfilerError {
301        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
302            write!(f, "GPU profiling is disabled")
303        }
304    }
305
306    impl std::error::Error for GpuFrameProfilerError {}
307}
308
309#[cfg(not(feature = "gpu-profiling"))]
310pub use disabled::*;
311
312// ============================================================================
313// Convenience Macro
314// ============================================================================
315
316/// Execute a block of code within a GPU profiling scope on a `Frame`.
317///
318/// When the `gpu-profiling` feature is enabled and a GPU profiler is attached
319/// to the frame, this creates a GPU timing scope around the block.
320/// When disabled or no profiler is attached, the block is executed directly.
321///
322/// # Usage
323///
324/// ```ignore
325/// use astrelis_render::gpu_profile_scope;
326///
327/// gpu_profile_scope!(frame, "upload_textures", |encoder| {
328///     encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
329/// });
330/// ```
331#[macro_export]
332macro_rules! gpu_profile_scope {
333    ($frame:expr, $label:expr, $body:expr) => {
334        $frame.with_gpu_scope($label, $body)
335    };
336}