astrelis_render/gpu_profiling.rs
1//! GPU profiling via `wgpu-profiler` with puffin visualization.
2//!
3//! When the `gpu-profiling` feature is enabled, this module provides a
4//! [`GpuFrameProfiler`] that wraps `wgpu_profiler::GpuProfiler` and
5//! automatically reports GPU timing data to puffin.
6//!
7//! When the feature is disabled, all types and methods become zero-cost no-ops.
8//!
9//! # Automatic Integration
10//!
11//! The recommended usage is to attach the profiler to a [`RenderableWindow`](crate::RenderableWindow):
12//!
13//! ```ignore
14//! // At init:
15//! let ctx = GraphicsContext::new_owned_with_descriptor(
16//! GraphicsContextDescriptor::new()
17//! .request_capability::<GpuFrameProfiler>()
18//! ).await?;
19//! let profiler = Arc::new(GpuFrameProfiler::new(&ctx)?);
20//! window.set_gpu_profiler(profiler);
21//!
22//! // Each frame — GPU profiling is fully automatic:
23//! let mut frame = window.begin_drawing();
24//! frame.clear_and_render(RenderTarget::Surface, Color::BLACK, |pass| {
25//! // GPU scope "main_pass" is automatically active
26//! });
27//! frame.finish(); // auto: resolve_queries -> submit -> end_frame
28//! ```
29//!
30//! # Manual Scoping
31//!
32//! For custom GPU scopes outside of render passes:
33//!
34//! ```ignore
35//! frame.with_gpu_scope("upload_data", |encoder| {
36//! encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
37//! });
38//! ```
39
40use crate::capability::{GpuRequirements, RenderCapability};
41use crate::features::GpuFeatures;
42
43// ============================================================================
44// RenderCapability — works in both enabled and disabled configurations
45// ============================================================================
46
47impl RenderCapability for GpuFrameProfiler {
48 fn requirements() -> GpuRequirements {
49 // All three timestamp features are requested (best-effort), not required.
50 // wgpu-profiler gracefully degrades if any are unavailable:
51 // - TIMESTAMP_QUERY: base feature, allows timestamp writes on pass definition
52 // - TIMESTAMP_QUERY_INSIDE_ENCODERS: allows scopes on command encoders
53 // - TIMESTAMP_QUERY_INSIDE_PASSES: allows scopes on render/compute passes
54 GpuRequirements::new().request_features(
55 GpuFeatures::TIMESTAMP_QUERY
56 | GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS
57 | GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES,
58 )
59 }
60
61 fn name() -> &'static str {
62 "GpuFrameProfiler"
63 }
64}
65
66// ============================================================================
67// Feature: gpu-profiling ENABLED
68// ============================================================================
69#[cfg(feature = "gpu-profiling")]
70mod enabled {
71 use std::sync::{Arc, Mutex};
72
73 use crate::context::GraphicsContext;
74 use crate::features::GpuFeatures;
75
76 /// GPU frame profiler wrapping `wgpu_profiler::GpuProfiler`.
77 ///
78 /// All methods take `&self` using interior mutability (`Mutex`), making it
79 /// easy to share the profiler between `RenderableWindow` and `FrameContext`
80 /// via `Arc<GpuFrameProfiler>`.
81 ///
82 /// Create one per application. The profiler is automatically driven each frame
83 /// when attached to a `RenderableWindow` via [`set_gpu_profiler`]:
84 /// - GPU scopes are created around render passes in `with_pass()` / `clear_and_render()`
85 /// - Queries are resolved and the frame is ended in `FrameContext::Drop`
86 ///
87 /// For manual use:
88 /// 1. Open GPU scopes with [`scope`](Self::scope) on command encoders or render passes.
89 /// 2. Call [`resolve_queries`](Self::resolve_queries) before submitting the encoder.
90 /// 3. Call [`end_frame`](Self::end_frame) after queue submit.
91 ///
92 /// Results are automatically forwarded to puffin via
93 /// `wgpu_profiler::puffin::output_frame_to_puffin`.
94 ///
95 /// # Timestamp Queries
96 ///
97 /// If the device was created with `TIMESTAMP_QUERY` enabled (via
98 /// `request_capability::<GpuFrameProfiler>()`), scopes produce actual GPU
99 /// timing data. Otherwise, wgpu-profiler falls back to debug groups only.
100 pub struct GpuFrameProfiler {
101 profiler: Mutex<wgpu_profiler::GpuProfiler>,
102 timestamp_period: f32,
103 has_timestamps: bool,
104 }
105
106 impl GpuFrameProfiler {
107 /// Create a new GPU frame profiler.
108 ///
109 /// The profiler inspects the device features to determine whether
110 /// `TIMESTAMP_QUERY` is available. If not, it still works but only
111 /// records debug group labels (no timing data).
112 pub fn new(context: &Arc<GraphicsContext>) -> Result<Self, wgpu_profiler::CreationError> {
113 let has_timestamps = context.has_feature(GpuFeatures::TIMESTAMP_QUERY);
114 let has_encoder_timestamps =
115 context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS);
116 let has_pass_timestamps =
117 context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES);
118
119 if has_timestamps {
120 tracing::info!(
121 "GPU profiler: TIMESTAMP_QUERY=yes, INSIDE_ENCODERS={}, INSIDE_PASSES={}",
122 if has_encoder_timestamps { "yes" } else { "no" },
123 if has_pass_timestamps { "yes" } else { "no" },
124 );
125 if !has_encoder_timestamps {
126 tracing::warn!(
127 "GPU profiler: TIMESTAMP_QUERY_INSIDE_ENCODERS not available — \
128 scopes on command encoders will not produce timing data"
129 );
130 }
131 if !has_pass_timestamps {
132 tracing::warn!(
133 "GPU profiler: TIMESTAMP_QUERY_INSIDE_PASSES not available — \
134 scopes on render/compute passes will not produce timing data"
135 );
136 }
137 } else {
138 tracing::warn!(
139 "GPU profiler: TIMESTAMP_QUERY not enabled — debug groups only, no timing data. \
140 Use GraphicsContextDescriptor::request_capability::<GpuFrameProfiler>() to request it."
141 );
142 }
143
144 let profiler = wgpu_profiler::GpuProfiler::new(
145 context.device(),
146 wgpu_profiler::GpuProfilerSettings::default(),
147 )?;
148 let timestamp_period = context.queue().get_timestamp_period();
149
150 Ok(Self {
151 profiler: Mutex::new(profiler),
152 timestamp_period,
153 has_timestamps,
154 })
155 }
156
157 /// Whether this profiler has actual GPU timestamp query support.
158 ///
159 /// If `false`, scopes still appear in the profiler as debug groups
160 /// but without timing data.
161 pub fn has_timestamp_queries(&self) -> bool {
162 self.has_timestamps
163 }
164
165 /// Open a profiling scope on a command encoder or render/compute pass.
166 ///
167 /// The scope is automatically closed when the returned guard is dropped.
168 ///
169 /// Returns a [`GpuProfileScope`] that wraps the underlying `wgpu_profiler::Scope`
170 /// and holds the `Mutex` guard. Access the recorder via `Deref`/`DerefMut`.
171 ///
172 /// # Panics
173 ///
174 /// Panics if the internal profiler lock is poisoned.
175 pub fn scope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder>(
176 &'a self,
177 label: impl Into<String>,
178 encoder_or_pass: &'a mut Recorder,
179 ) -> GpuProfileScope<'a, Recorder> {
180 let profiler = self.profiler.lock().unwrap();
181 // SAFETY: We extend the MutexGuard's lifetime to match &self ('a).
182 // This is sound because:
183 // 1. The GpuProfiler lives as long as self (lifetime 'a)
184 // 2. GpuProfiler::scope() only needs &self (immutable borrow)
185 // 3. The caller must drop the scope before calling resolve_queries/end_frame
186 // (which is guaranteed by the frame lifecycle: scopes live within render passes,
187 // resolve/end happen in FrameContext::Drop after all passes are done)
188 let profiler_ptr = &*profiler as *const wgpu_profiler::GpuProfiler;
189 let profiler_ref: &'a wgpu_profiler::GpuProfiler = unsafe { &*profiler_ptr };
190 let scope = profiler_ref.scope(label, encoder_or_pass);
191 GpuProfileScope {
192 scope,
193 _borrow: profiler,
194 }
195 }
196
197 /// Resolve all pending queries. Call this before submitting the encoder.
198 pub fn resolve_queries(&self, encoder: &mut wgpu::CommandEncoder) {
199 self.profiler.lock().unwrap().resolve_queries(encoder);
200 }
201
202 /// End the current profiling frame. Call this after queue submit.
203 ///
204 /// Processes finished frames and reports results to puffin.
205 pub fn end_frame(&self) -> Result<(), wgpu_profiler::EndFrameError> {
206 let mut profiler = self.profiler.lock().unwrap();
207 profiler.end_frame()?;
208
209 // Process any finished frames and report to puffin
210 if let Some(results) = profiler.process_finished_frame(self.timestamp_period) {
211 wgpu_profiler::puffin::output_frame_to_puffin(
212 &mut puffin::GlobalProfiler::lock(),
213 &results,
214 );
215 }
216
217 Ok(())
218 }
219
220 /// Get a reference to the inner `Mutex<wgpu_profiler::GpuProfiler>` for advanced use.
221 pub fn inner(&self) -> &Mutex<wgpu_profiler::GpuProfiler> {
222 &self.profiler
223 }
224 }
225
226 /// A GPU profiling scope that wraps `wgpu_profiler::Scope` and holds
227 /// the `Mutex` guard.
228 ///
229 /// This type implements `Deref`/`DerefMut` to the underlying recorder
230 /// (command encoder or render/compute pass), so you can use it as a
231 /// drop-in replacement for the recorder.
232 ///
233 /// The scope is automatically closed (GPU timestamp written) when dropped.
234 pub struct GpuProfileScope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder> {
235 scope: wgpu_profiler::Scope<'a, Recorder>,
236 _borrow: std::sync::MutexGuard<'a, wgpu_profiler::GpuProfiler>,
237 }
238
239 impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::Deref
240 for GpuProfileScope<'_, Recorder>
241 {
242 type Target = Recorder;
243
244 fn deref(&self) -> &Self::Target {
245 &self.scope
246 }
247 }
248
249 impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::DerefMut
250 for GpuProfileScope<'_, Recorder>
251 {
252 fn deref_mut(&mut self) -> &mut Self::Target {
253 &mut self.scope
254 }
255 }
256}
257
258#[cfg(feature = "gpu-profiling")]
259pub use enabled::*;
260
261// ============================================================================
262// Feature: gpu-profiling DISABLED (zero-cost no-ops)
263// ============================================================================
264#[cfg(not(feature = "gpu-profiling"))]
265mod disabled {
266 use std::sync::Arc;
267
268 use crate::context::GraphicsContext;
269
270 /// No-op GPU frame profiler (gpu-profiling feature disabled).
271 ///
272 /// All methods are no-ops that compile to nothing. The `&self` signatures
273 /// match the enabled version for API compatibility.
274 pub struct GpuFrameProfiler;
275
276 impl GpuFrameProfiler {
277 /// No-op: create a new GPU frame profiler.
278 pub fn new(_context: &Arc<GraphicsContext>) -> Result<Self, GpuFrameProfilerError> {
279 Ok(Self)
280 }
281
282 /// No-op: always returns false.
283 pub fn has_timestamp_queries(&self) -> bool {
284 false
285 }
286
287 /// No-op: resolve queries.
288 pub fn resolve_queries(&self, _encoder: &mut wgpu::CommandEncoder) {}
289
290 /// No-op: end frame.
291 pub fn end_frame(&self) -> Result<(), GpuFrameProfilerError> {
292 Ok(())
293 }
294 }
295
296 /// Placeholder error type when gpu-profiling is disabled.
297 #[derive(Debug)]
298 pub struct GpuFrameProfilerError;
299
300 impl std::fmt::Display for GpuFrameProfilerError {
301 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
302 write!(f, "GPU profiling is disabled")
303 }
304 }
305
306 impl std::error::Error for GpuFrameProfilerError {}
307}
308
309#[cfg(not(feature = "gpu-profiling"))]
310pub use disabled::*;
311
312// ============================================================================
313// Convenience Macro
314// ============================================================================
315
316/// Execute a block of code within a GPU profiling scope on a `Frame`.
317///
318/// When the `gpu-profiling` feature is enabled and a GPU profiler is attached
319/// to the frame, this creates a GPU timing scope around the block.
320/// When disabled or no profiler is attached, the block is executed directly.
321///
322/// # Usage
323///
324/// ```ignore
325/// use astrelis_render::gpu_profile_scope;
326///
327/// gpu_profile_scope!(frame, "upload_textures", |encoder| {
328/// encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
329/// });
330/// ```
331#[macro_export]
332macro_rules! gpu_profile_scope {
333 ($frame:expr, $label:expr, $body:expr) => {
334 $frame.with_gpu_scope($label, $body)
335 };
336}