Skip to main content

astrelis_render/
gpu_profiling.rs

1//! GPU profiling via `wgpu-profiler` with puffin visualization.
2//!
3//! When the `gpu-profiling` feature is enabled, this module provides a
4//! [`GpuFrameProfiler`] that wraps `wgpu_profiler::GpuProfiler` and
5//! automatically reports GPU timing data to puffin.
6//!
7//! When the feature is disabled, all types and methods become zero-cost no-ops.
8//!
9//! # Automatic Integration
10//!
11//! The recommended usage is to attach the profiler to a [`RenderWindow`](crate::RenderWindow):
12//!
13//! ```ignore
14//! // At init:
15//! let ctx = GraphicsContext::new_owned_with_descriptor(
16//!     GraphicsContextDescriptor::new()
17//!         .request_capability::<GpuFrameProfiler>()
18//! ).await?;
19//! let profiler = Arc::new(GpuFrameProfiler::new(&ctx)?);
20//! window.set_gpu_profiler(profiler);
21//!
22//! // Each frame — GPU profiling is fully automatic:
23//! let mut frame = window.begin_drawing();
24//! {
25//!     let mut pass = frame.render_pass()
26//!         .target(RenderTarget::Surface)
27//!         .clear_color(Color::BLACK)
28//!         .build();
29//!     // GPU scope "main_pass" is automatically active
30//! }
31//! frame.finish(); // auto: resolve_queries -> submit -> end_frame
32//! ```
33//!
34//! # Manual Scoping
35//!
36//! For custom GPU scopes outside of render passes:
37//!
38//! ```ignore
39//! frame.with_gpu_scope("upload_data", |encoder| {
40//!     encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
41//! });
42//! ```
43
44use crate::capability::{GpuRequirements, RenderCapability};
45use crate::features::GpuFeatures;
46
47// ============================================================================
48// RenderCapability — works in both enabled and disabled configurations
49// ============================================================================
50
51impl RenderCapability for GpuFrameProfiler {
52    fn requirements() -> GpuRequirements {
53        // All three timestamp features are requested (best-effort), not required.
54        // wgpu-profiler gracefully degrades if any are unavailable:
55        // - TIMESTAMP_QUERY: base feature, allows timestamp writes on pass definition
56        // - TIMESTAMP_QUERY_INSIDE_ENCODERS: allows scopes on command encoders
57        // - TIMESTAMP_QUERY_INSIDE_PASSES: allows scopes on render/compute passes
58        GpuRequirements::new().request_features(
59            GpuFeatures::TIMESTAMP_QUERY
60                | GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS
61                | GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES,
62        )
63    }
64
65    fn name() -> &'static str {
66        "GpuFrameProfiler"
67    }
68}
69
70// ============================================================================
71// Feature: gpu-profiling ENABLED
72// ============================================================================
73#[cfg(feature = "gpu-profiling")]
74mod enabled {
75    use std::sync::{Arc, Mutex};
76
77    use crate::context::GraphicsContext;
78    use crate::features::GpuFeatures;
79
80    /// GPU frame profiler wrapping `wgpu_profiler::GpuProfiler`.
81    ///
82    /// All methods take `&self` using interior mutability (`Mutex`), making it
83    /// easy to share the profiler between `RenderWindow` and `Frame`
84    /// via `Arc<GpuFrameProfiler>`.
85    ///
86    /// Create one per application. The profiler is automatically driven each frame
87    /// when attached to a `RenderWindow` via [`set_gpu_profiler`]:
88    /// - GPU scopes are created around render passes
89    /// - Queries are resolved and the frame is ended in `Frame::Drop`
90    ///
91    /// For manual use:
92    /// 1. Open GPU scopes with [`scope`](Self::scope) on command encoders or render passes.
93    /// 2. Call [`resolve_queries`](Self::resolve_queries) before submitting the encoder.
94    /// 3. Call [`end_frame`](Self::end_frame) after queue submit.
95    ///
96    /// Results are automatically forwarded to puffin via
97    /// `wgpu_profiler::puffin::output_frame_to_puffin`.
98    ///
99    /// # Timestamp Queries
100    ///
101    /// If the device was created with `TIMESTAMP_QUERY` enabled (via
102    /// `request_capability::<GpuFrameProfiler>()`), scopes produce actual GPU
103    /// timing data. Otherwise, wgpu-profiler falls back to debug groups only.
104    pub struct GpuFrameProfiler {
105        profiler: Mutex<wgpu_profiler::GpuProfiler>,
106        timestamp_period: f32,
107        has_timestamps: bool,
108    }
109
110    impl GpuFrameProfiler {
111        /// Create a new GPU frame profiler.
112        ///
113        /// The profiler inspects the device features to determine whether
114        /// `TIMESTAMP_QUERY` is available. If not, it still works but only
115        /// records debug group labels (no timing data).
116        pub fn new(context: &Arc<GraphicsContext>) -> Result<Self, wgpu_profiler::CreationError> {
117            let has_timestamps = context.has_feature(GpuFeatures::TIMESTAMP_QUERY);
118            let has_encoder_timestamps =
119                context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS);
120            let has_pass_timestamps =
121                context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES);
122
123            if has_timestamps {
124                tracing::info!(
125                    "GPU profiler: TIMESTAMP_QUERY=yes, INSIDE_ENCODERS={}, INSIDE_PASSES={}",
126                    if has_encoder_timestamps { "yes" } else { "no" },
127                    if has_pass_timestamps { "yes" } else { "no" },
128                );
129                if !has_encoder_timestamps {
130                    tracing::warn!(
131                        "GPU profiler: TIMESTAMP_QUERY_INSIDE_ENCODERS not available — \
132                         scopes on command encoders will not produce timing data"
133                    );
134                }
135                if !has_pass_timestamps {
136                    tracing::warn!(
137                        "GPU profiler: TIMESTAMP_QUERY_INSIDE_PASSES not available — \
138                         scopes on render/compute passes will not produce timing data"
139                    );
140                }
141            } else {
142                tracing::warn!(
143                    "GPU profiler: TIMESTAMP_QUERY not enabled — debug groups only, no timing data. \
144                     Use GraphicsContextDescriptor::request_capability::<GpuFrameProfiler>() to request it."
145                );
146            }
147
148            let profiler = wgpu_profiler::GpuProfiler::new(
149                context.device(),
150                wgpu_profiler::GpuProfilerSettings::default(),
151            )?;
152            let timestamp_period = context.queue().get_timestamp_period();
153
154            Ok(Self {
155                profiler: Mutex::new(profiler),
156                timestamp_period,
157                has_timestamps,
158            })
159        }
160
161        /// Whether this profiler has actual GPU timestamp query support.
162        ///
163        /// If `false`, scopes still appear in the profiler as debug groups
164        /// but without timing data.
165        pub fn has_timestamp_queries(&self) -> bool {
166            self.has_timestamps
167        }
168
169        /// Open a profiling scope on a command encoder or render/compute pass.
170        ///
171        /// The scope is automatically closed when the returned guard is dropped.
172        ///
173        /// Returns a [`GpuProfileScope`] that wraps the underlying `wgpu_profiler::Scope`
174        /// and holds the `Mutex` guard. Access the recorder via `Deref`/`DerefMut`.
175        ///
176        /// # Panics
177        ///
178        /// Panics if the internal profiler lock is poisoned.
179        pub fn scope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder>(
180            &'a self,
181            label: impl Into<String>,
182            encoder_or_pass: &'a mut Recorder,
183        ) -> GpuProfileScope<'a, Recorder> {
184            let profiler = self.profiler.lock().unwrap();
185            // SAFETY: We extend the MutexGuard's lifetime to match &self ('a).
186            // This is sound because:
187            // 1. The GpuProfiler lives as long as self (lifetime 'a)
188            // 2. GpuProfiler::scope() only needs &self (immutable borrow)
189            // 3. The caller must drop the scope before calling resolve_queries/end_frame
190            //    (which is guaranteed by the frame lifecycle: scopes live within render passes,
191            //    resolve/end happen in Frame::Drop after all passes are done)
192            let profiler_ptr = &*profiler as *const wgpu_profiler::GpuProfiler;
193            let profiler_ref: &'a wgpu_profiler::GpuProfiler = unsafe { &*profiler_ptr };
194            let scope = profiler_ref.scope(label, encoder_or_pass);
195            GpuProfileScope {
196                scope,
197                _borrow: profiler,
198            }
199        }
200
201        /// Resolve all pending queries. Call this before submitting the encoder.
202        pub fn resolve_queries(&self, encoder: &mut wgpu::CommandEncoder) {
203            self.profiler.lock().unwrap().resolve_queries(encoder);
204        }
205
206        /// End the current profiling frame. Call this after queue submit.
207        ///
208        /// Processes finished frames and reports results to puffin.
209        pub fn end_frame(&self) -> Result<(), wgpu_profiler::EndFrameError> {
210            let mut profiler = self.profiler.lock().unwrap();
211            profiler.end_frame()?;
212
213            // Process any finished frames and report to puffin
214            if let Some(results) = profiler.process_finished_frame(self.timestamp_period) {
215                wgpu_profiler::puffin::output_frame_to_puffin(
216                    &mut puffin::GlobalProfiler::lock(),
217                    &results,
218                );
219            }
220
221            Ok(())
222        }
223
224        /// Get a reference to the inner `Mutex<wgpu_profiler::GpuProfiler>` for advanced use.
225        pub fn inner(&self) -> &Mutex<wgpu_profiler::GpuProfiler> {
226            &self.profiler
227        }
228    }
229
230    /// A GPU profiling scope that wraps `wgpu_profiler::Scope` and holds
231    /// the `Mutex` guard.
232    ///
233    /// This type implements `Deref`/`DerefMut` to the underlying recorder
234    /// (command encoder or render/compute pass), so you can use it as a
235    /// drop-in replacement for the recorder.
236    ///
237    /// The scope is automatically closed (GPU timestamp written) when dropped.
238    pub struct GpuProfileScope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder> {
239        scope: wgpu_profiler::Scope<'a, Recorder>,
240        _borrow: std::sync::MutexGuard<'a, wgpu_profiler::GpuProfiler>,
241    }
242
243    impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::Deref
244        for GpuProfileScope<'_, Recorder>
245    {
246        type Target = Recorder;
247
248        fn deref(&self) -> &Self::Target {
249            &self.scope
250        }
251    }
252
253    impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::DerefMut
254        for GpuProfileScope<'_, Recorder>
255    {
256        fn deref_mut(&mut self) -> &mut Self::Target {
257            &mut self.scope
258        }
259    }
260}
261
262#[cfg(feature = "gpu-profiling")]
263pub use enabled::*;
264
265// ============================================================================
266// Feature: gpu-profiling DISABLED (zero-cost no-ops)
267// ============================================================================
268#[cfg(not(feature = "gpu-profiling"))]
269mod disabled {
270    use std::sync::Arc;
271
272    use crate::context::GraphicsContext;
273
274    /// No-op GPU frame profiler (gpu-profiling feature disabled).
275    ///
276    /// All methods are no-ops that compile to nothing. The `&self` signatures
277    /// match the enabled version for API compatibility.
278    pub struct GpuFrameProfiler;
279
280    impl GpuFrameProfiler {
281        /// No-op: create a new GPU frame profiler.
282        pub fn new(_context: &Arc<GraphicsContext>) -> Result<Self, GpuFrameProfilerError> {
283            Ok(Self)
284        }
285
286        /// No-op: always returns false.
287        pub fn has_timestamp_queries(&self) -> bool {
288            false
289        }
290
291        /// No-op: resolve queries.
292        pub fn resolve_queries(&self, _encoder: &mut wgpu::CommandEncoder) {}
293
294        /// No-op: end frame.
295        pub fn end_frame(&self) -> Result<(), GpuFrameProfilerError> {
296            Ok(())
297        }
298    }
299
300    /// Placeholder error type when gpu-profiling is disabled.
301    #[derive(Debug)]
302    pub struct GpuFrameProfilerError;
303
304    impl std::fmt::Display for GpuFrameProfilerError {
305        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
306            write!(f, "GPU profiling is disabled")
307        }
308    }
309
310    impl std::error::Error for GpuFrameProfilerError {}
311}
312
313#[cfg(not(feature = "gpu-profiling"))]
314pub use disabled::*;
315
316// ============================================================================
317// Convenience Macro
318// ============================================================================
319
320/// Execute a block of code within a GPU profiling scope on a `Frame`.
321///
322/// When the `gpu-profiling` feature is enabled and a GPU profiler is attached
323/// to the frame, this creates a GPU timing scope around the block.
324/// When disabled or no profiler is attached, the block is executed directly.
325///
326/// # Usage
327///
328/// ```ignore
329/// use astrelis_render::gpu_profile_scope;
330///
331/// gpu_profile_scope!(frame, "upload_textures", |encoder| {
332///     encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
333/// });
334/// ```
335#[macro_export]
336macro_rules! gpu_profile_scope {
337    ($frame:expr, $label:expr, $body:expr) => {
338        $frame.with_gpu_scope($label, $body)
339    };
340}