astrelis_render/gpu_profiling.rs
1//! GPU profiling via `wgpu-profiler` with puffin visualization.
2//!
3//! When the `gpu-profiling` feature is enabled, this module provides a
4//! [`GpuFrameProfiler`] that wraps `wgpu_profiler::GpuProfiler` and
5//! automatically reports GPU timing data to puffin.
6//!
7//! When the feature is disabled, all types and methods become zero-cost no-ops.
8//!
9//! # Automatic Integration
10//!
11//! The recommended usage is to attach the profiler to a [`RenderWindow`](crate::RenderWindow):
12//!
13//! ```ignore
14//! // At init:
15//! let ctx = GraphicsContext::new_owned_with_descriptor(
16//! GraphicsContextDescriptor::new()
17//! .request_capability::<GpuFrameProfiler>()
18//! ).await?;
19//! let profiler = Arc::new(GpuFrameProfiler::new(&ctx)?);
20//! window.set_gpu_profiler(profiler);
21//!
22//! // Each frame — GPU profiling is fully automatic:
23//! let mut frame = window.begin_drawing();
24//! {
25//! let mut pass = frame.render_pass()
26//! .target(RenderTarget::Surface)
27//! .clear_color(Color::BLACK)
28//! .build();
29//! // GPU scope "main_pass" is automatically active
30//! }
31//! frame.finish(); // auto: resolve_queries -> submit -> end_frame
32//! ```
33//!
34//! # Manual Scoping
35//!
36//! For custom GPU scopes outside of render passes:
37//!
38//! ```ignore
39//! frame.with_gpu_scope("upload_data", |encoder| {
40//! encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
41//! });
42//! ```
43
44use crate::capability::{GpuRequirements, RenderCapability};
45use crate::features::GpuFeatures;
46
47// ============================================================================
48// RenderCapability — works in both enabled and disabled configurations
49// ============================================================================
50
51impl RenderCapability for GpuFrameProfiler {
52 fn requirements() -> GpuRequirements {
53 // All three timestamp features are requested (best-effort), not required.
54 // wgpu-profiler gracefully degrades if any are unavailable:
55 // - TIMESTAMP_QUERY: base feature, allows timestamp writes on pass definition
56 // - TIMESTAMP_QUERY_INSIDE_ENCODERS: allows scopes on command encoders
57 // - TIMESTAMP_QUERY_INSIDE_PASSES: allows scopes on render/compute passes
58 GpuRequirements::new().request_features(
59 GpuFeatures::TIMESTAMP_QUERY
60 | GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS
61 | GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES,
62 )
63 }
64
65 fn name() -> &'static str {
66 "GpuFrameProfiler"
67 }
68}
69
70// ============================================================================
71// Feature: gpu-profiling ENABLED
72// ============================================================================
73#[cfg(feature = "gpu-profiling")]
74mod enabled {
75 use std::sync::{Arc, Mutex};
76
77 use crate::context::GraphicsContext;
78 use crate::features::GpuFeatures;
79
80 /// GPU frame profiler wrapping `wgpu_profiler::GpuProfiler`.
81 ///
82 /// All methods take `&self` using interior mutability (`Mutex`), making it
83 /// easy to share the profiler between `RenderWindow` and `Frame`
84 /// via `Arc<GpuFrameProfiler>`.
85 ///
86 /// Create one per application. The profiler is automatically driven each frame
87 /// when attached to a `RenderWindow` via [`set_gpu_profiler`]:
88 /// - GPU scopes are created around render passes
89 /// - Queries are resolved and the frame is ended in `Frame::Drop`
90 ///
91 /// For manual use:
92 /// 1. Open GPU scopes with [`scope`](Self::scope) on command encoders or render passes.
93 /// 2. Call [`resolve_queries`](Self::resolve_queries) before submitting the encoder.
94 /// 3. Call [`end_frame`](Self::end_frame) after queue submit.
95 ///
96 /// Results are automatically forwarded to puffin via
97 /// `wgpu_profiler::puffin::output_frame_to_puffin`.
98 ///
99 /// # Timestamp Queries
100 ///
101 /// If the device was created with `TIMESTAMP_QUERY` enabled (via
102 /// `request_capability::<GpuFrameProfiler>()`), scopes produce actual GPU
103 /// timing data. Otherwise, wgpu-profiler falls back to debug groups only.
104 pub struct GpuFrameProfiler {
105 profiler: Mutex<wgpu_profiler::GpuProfiler>,
106 timestamp_period: f32,
107 has_timestamps: bool,
108 }
109
110 impl GpuFrameProfiler {
111 /// Create a new GPU frame profiler.
112 ///
113 /// The profiler inspects the device features to determine whether
114 /// `TIMESTAMP_QUERY` is available. If not, it still works but only
115 /// records debug group labels (no timing data).
116 pub fn new(context: &Arc<GraphicsContext>) -> Result<Self, wgpu_profiler::CreationError> {
117 let has_timestamps = context.has_feature(GpuFeatures::TIMESTAMP_QUERY);
118 let has_encoder_timestamps =
119 context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS);
120 let has_pass_timestamps =
121 context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES);
122
123 if has_timestamps {
124 tracing::info!(
125 "GPU profiler: TIMESTAMP_QUERY=yes, INSIDE_ENCODERS={}, INSIDE_PASSES={}",
126 if has_encoder_timestamps { "yes" } else { "no" },
127 if has_pass_timestamps { "yes" } else { "no" },
128 );
129 if !has_encoder_timestamps {
130 tracing::warn!(
131 "GPU profiler: TIMESTAMP_QUERY_INSIDE_ENCODERS not available — \
132 scopes on command encoders will not produce timing data"
133 );
134 }
135 if !has_pass_timestamps {
136 tracing::warn!(
137 "GPU profiler: TIMESTAMP_QUERY_INSIDE_PASSES not available — \
138 scopes on render/compute passes will not produce timing data"
139 );
140 }
141 } else {
142 tracing::warn!(
143 "GPU profiler: TIMESTAMP_QUERY not enabled — debug groups only, no timing data. \
144 Use GraphicsContextDescriptor::request_capability::<GpuFrameProfiler>() to request it."
145 );
146 }
147
148 let profiler = wgpu_profiler::GpuProfiler::new(
149 context.device(),
150 wgpu_profiler::GpuProfilerSettings::default(),
151 )?;
152 let timestamp_period = context.queue().get_timestamp_period();
153
154 Ok(Self {
155 profiler: Mutex::new(profiler),
156 timestamp_period,
157 has_timestamps,
158 })
159 }
160
161 /// Whether this profiler has actual GPU timestamp query support.
162 ///
163 /// If `false`, scopes still appear in the profiler as debug groups
164 /// but without timing data.
165 pub fn has_timestamp_queries(&self) -> bool {
166 self.has_timestamps
167 }
168
169 /// Open a profiling scope on a command encoder or render/compute pass.
170 ///
171 /// The scope is automatically closed when the returned guard is dropped.
172 ///
173 /// Returns a [`GpuProfileScope`] that wraps the underlying `wgpu_profiler::Scope`
174 /// and holds the `Mutex` guard. Access the recorder via `Deref`/`DerefMut`.
175 ///
176 /// # Panics
177 ///
178 /// Panics if the internal profiler lock is poisoned.
179 pub fn scope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder>(
180 &'a self,
181 label: impl Into<String>,
182 encoder_or_pass: &'a mut Recorder,
183 ) -> GpuProfileScope<'a, Recorder> {
184 let profiler = self.profiler.lock().unwrap();
185 // SAFETY: We extend the MutexGuard's lifetime to match &self ('a).
186 // This is sound because:
187 // 1. The GpuProfiler lives as long as self (lifetime 'a)
188 // 2. GpuProfiler::scope() only needs &self (immutable borrow)
189 // 3. The caller must drop the scope before calling resolve_queries/end_frame
190 // (which is guaranteed by the frame lifecycle: scopes live within render passes,
191 // resolve/end happen in Frame::Drop after all passes are done)
192 let profiler_ptr = &*profiler as *const wgpu_profiler::GpuProfiler;
193 let profiler_ref: &'a wgpu_profiler::GpuProfiler = unsafe { &*profiler_ptr };
194 let scope = profiler_ref.scope(label, encoder_or_pass);
195 GpuProfileScope {
196 scope,
197 _borrow: profiler,
198 }
199 }
200
201 /// Resolve all pending queries. Call this before submitting the encoder.
202 pub fn resolve_queries(&self, encoder: &mut wgpu::CommandEncoder) {
203 self.profiler.lock().unwrap().resolve_queries(encoder);
204 }
205
206 /// End the current profiling frame. Call this after queue submit.
207 ///
208 /// Processes finished frames and reports results to puffin.
209 pub fn end_frame(&self) -> Result<(), wgpu_profiler::EndFrameError> {
210 let mut profiler = self.profiler.lock().unwrap();
211 profiler.end_frame()?;
212
213 // Process any finished frames and report to puffin
214 if let Some(results) = profiler.process_finished_frame(self.timestamp_period) {
215 wgpu_profiler::puffin::output_frame_to_puffin(
216 &mut puffin::GlobalProfiler::lock(),
217 &results,
218 );
219 }
220
221 Ok(())
222 }
223
224 /// Get a reference to the inner `Mutex<wgpu_profiler::GpuProfiler>` for advanced use.
225 pub fn inner(&self) -> &Mutex<wgpu_profiler::GpuProfiler> {
226 &self.profiler
227 }
228 }
229
230 /// A GPU profiling scope that wraps `wgpu_profiler::Scope` and holds
231 /// the `Mutex` guard.
232 ///
233 /// This type implements `Deref`/`DerefMut` to the underlying recorder
234 /// (command encoder or render/compute pass), so you can use it as a
235 /// drop-in replacement for the recorder.
236 ///
237 /// The scope is automatically closed (GPU timestamp written) when dropped.
238 pub struct GpuProfileScope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder> {
239 scope: wgpu_profiler::Scope<'a, Recorder>,
240 _borrow: std::sync::MutexGuard<'a, wgpu_profiler::GpuProfiler>,
241 }
242
243 impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::Deref
244 for GpuProfileScope<'_, Recorder>
245 {
246 type Target = Recorder;
247
248 fn deref(&self) -> &Self::Target {
249 &self.scope
250 }
251 }
252
253 impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::DerefMut
254 for GpuProfileScope<'_, Recorder>
255 {
256 fn deref_mut(&mut self) -> &mut Self::Target {
257 &mut self.scope
258 }
259 }
260}
261
262#[cfg(feature = "gpu-profiling")]
263pub use enabled::*;
264
265// ============================================================================
266// Feature: gpu-profiling DISABLED (zero-cost no-ops)
267// ============================================================================
268#[cfg(not(feature = "gpu-profiling"))]
269mod disabled {
270 use std::sync::Arc;
271
272 use crate::context::GraphicsContext;
273
274 /// No-op GPU frame profiler (gpu-profiling feature disabled).
275 ///
276 /// All methods are no-ops that compile to nothing. The `&self` signatures
277 /// match the enabled version for API compatibility.
278 pub struct GpuFrameProfiler;
279
280 impl GpuFrameProfiler {
281 /// No-op: create a new GPU frame profiler.
282 pub fn new(_context: &Arc<GraphicsContext>) -> Result<Self, GpuFrameProfilerError> {
283 Ok(Self)
284 }
285
286 /// No-op: always returns false.
287 pub fn has_timestamp_queries(&self) -> bool {
288 false
289 }
290
291 /// No-op: resolve queries.
292 pub fn resolve_queries(&self, _encoder: &mut wgpu::CommandEncoder) {}
293
294 /// No-op: end frame.
295 pub fn end_frame(&self) -> Result<(), GpuFrameProfilerError> {
296 Ok(())
297 }
298 }
299
300 /// Placeholder error type when gpu-profiling is disabled.
301 #[derive(Debug)]
302 pub struct GpuFrameProfilerError;
303
304 impl std::fmt::Display for GpuFrameProfilerError {
305 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
306 write!(f, "GPU profiling is disabled")
307 }
308 }
309
310 impl std::error::Error for GpuFrameProfilerError {}
311}
312
313#[cfg(not(feature = "gpu-profiling"))]
314pub use disabled::*;
315
316// ============================================================================
317// Convenience Macro
318// ============================================================================
319
320/// Execute a block of code within a GPU profiling scope on a `Frame`.
321///
322/// When the `gpu-profiling` feature is enabled and a GPU profiler is attached
323/// to the frame, this creates a GPU timing scope around the block.
324/// When disabled or no profiler is attached, the block is executed directly.
325///
326/// # Usage
327///
328/// ```ignore
329/// use astrelis_render::gpu_profile_scope;
330///
331/// gpu_profile_scope!(frame, "upload_textures", |encoder| {
332/// encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
333/// });
334/// ```
335#[macro_export]
336macro_rules! gpu_profile_scope {
337 ($frame:expr, $label:expr, $body:expr) => {
338 $frame.with_gpu_scope($label, $body)
339 };
340}