astrelis_render/gpu_profiling.rs
1//! GPU profiling via `wgpu-profiler` with puffin visualization.
2//!
3//! When the `gpu-profiling` feature is enabled, this module provides a
4//! [`GpuFrameProfiler`] that wraps `wgpu_profiler::GpuProfiler` and
5//! automatically reports GPU timing data to puffin.
6//!
7//! When the feature is disabled, all types and methods become zero-cost no-ops.
8//!
9//! # Automatic Integration
10//!
11//! The recommended usage is to attach the profiler to a [`RenderableWindow`](crate::RenderableWindow):
12//!
13//! ```ignore
14//! // At init:
15//! let ctx = GraphicsContext::new_owned_with_descriptor(
16//! GraphicsContextDescriptor::new()
17//! .request_capability::<GpuFrameProfiler>()
18//! ).await?;
19//! let profiler = Arc::new(GpuFrameProfiler::new(&ctx)?);
20//! window.set_gpu_profiler(profiler);
21//!
22//! // Each frame — GPU profiling is fully automatic:
23//! let mut frame = window.begin_drawing();
24//! frame.clear_and_render(RenderTarget::Surface, Color::BLACK, |pass| {
25//! // GPU scope "main_pass" is automatically active
26//! });
27//! frame.finish(); // auto: resolve_queries -> submit -> end_frame
28//! ```
29//!
30//! # Manual Scoping
31//!
32//! For custom GPU scopes outside of render passes, use [`FrameContext::with_gpu_scope`](crate::FrameContext::with_gpu_scope):
33//!
34//! ```ignore
35//! frame.with_gpu_scope("upload_data", |encoder| {
36//! encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
37//! });
38//! ```
39
40use crate::capability::{GpuRequirements, RenderCapability};
41use crate::features::GpuFeatures;
42
43// ============================================================================
44// RenderCapability — works in both enabled and disabled configurations
45// ============================================================================
46
47impl RenderCapability for GpuFrameProfiler {
48 fn requirements() -> GpuRequirements {
49 // All three timestamp features are requested (best-effort), not required.
50 // wgpu-profiler gracefully degrades if any are unavailable:
51 // - TIMESTAMP_QUERY: base feature, allows timestamp writes on pass definition
52 // - TIMESTAMP_QUERY_INSIDE_ENCODERS: allows scopes on command encoders
53 // - TIMESTAMP_QUERY_INSIDE_PASSES: allows scopes on render/compute passes
54 GpuRequirements::new()
55 .request_features(
56 GpuFeatures::TIMESTAMP_QUERY
57 | GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS
58 | GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES,
59 )
60 }
61
62 fn name() -> &'static str {
63 "GpuFrameProfiler"
64 }
65}
66
67// ============================================================================
68// Feature: gpu-profiling ENABLED
69// ============================================================================
70#[cfg(feature = "gpu-profiling")]
71mod enabled {
72 use std::sync::{Arc, Mutex};
73
74 use crate::context::GraphicsContext;
75 use crate::features::GpuFeatures;
76
77 /// GPU frame profiler wrapping `wgpu_profiler::GpuProfiler`.
78 ///
79 /// All methods take `&self` using interior mutability (`Mutex`), making it
80 /// easy to share the profiler between `RenderableWindow` and `FrameContext`
81 /// via `Arc<GpuFrameProfiler>`.
82 ///
83 /// Create one per application. The profiler is automatically driven each frame
84 /// when attached to a `RenderableWindow` via [`set_gpu_profiler`]:
85 /// - GPU scopes are created around render passes in `with_pass()` / `clear_and_render()`
86 /// - Queries are resolved and the frame is ended in `FrameContext::Drop`
87 ///
88 /// For manual use:
89 /// 1. Open GPU scopes with [`scope`](Self::scope) on command encoders or render passes.
90 /// 2. Call [`resolve_queries`](Self::resolve_queries) before submitting the encoder.
91 /// 3. Call [`end_frame`](Self::end_frame) after queue submit.
92 ///
93 /// Results are automatically forwarded to puffin via
94 /// `wgpu_profiler::puffin::output_frame_to_puffin`.
95 ///
96 /// # Timestamp Queries
97 ///
98 /// If the device was created with `TIMESTAMP_QUERY` enabled (via
99 /// `request_capability::<GpuFrameProfiler>()`), scopes produce actual GPU
100 /// timing data. Otherwise, wgpu-profiler falls back to debug groups only.
101 pub struct GpuFrameProfiler {
102 profiler: Mutex<wgpu_profiler::GpuProfiler>,
103 timestamp_period: f32,
104 has_timestamps: bool,
105 }
106
107 impl GpuFrameProfiler {
108 /// Create a new GPU frame profiler.
109 ///
110 /// The profiler inspects the device features to determine whether
111 /// `TIMESTAMP_QUERY` is available. If not, it still works but only
112 /// records debug group labels (no timing data).
113 pub fn new(context: &Arc<GraphicsContext>) -> Result<Self, wgpu_profiler::CreationError> {
114 let has_timestamps = context.has_feature(GpuFeatures::TIMESTAMP_QUERY);
115 let has_encoder_timestamps =
116 context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_ENCODERS);
117 let has_pass_timestamps =
118 context.has_feature(GpuFeatures::TIMESTAMP_QUERY_INSIDE_PASSES);
119
120 if has_timestamps {
121 tracing::info!(
122 "GPU profiler: TIMESTAMP_QUERY=yes, INSIDE_ENCODERS={}, INSIDE_PASSES={}",
123 if has_encoder_timestamps { "yes" } else { "no" },
124 if has_pass_timestamps { "yes" } else { "no" },
125 );
126 if !has_encoder_timestamps {
127 tracing::warn!(
128 "GPU profiler: TIMESTAMP_QUERY_INSIDE_ENCODERS not available — \
129 scopes on command encoders will not produce timing data"
130 );
131 }
132 if !has_pass_timestamps {
133 tracing::warn!(
134 "GPU profiler: TIMESTAMP_QUERY_INSIDE_PASSES not available — \
135 scopes on render/compute passes will not produce timing data"
136 );
137 }
138 } else {
139 tracing::warn!(
140 "GPU profiler: TIMESTAMP_QUERY not enabled — debug groups only, no timing data. \
141 Use GraphicsContextDescriptor::request_capability::<GpuFrameProfiler>() to request it."
142 );
143 }
144
145 let profiler = wgpu_profiler::GpuProfiler::new(
146 context.device(),
147 wgpu_profiler::GpuProfilerSettings::default(),
148 )?;
149 let timestamp_period = context.queue().get_timestamp_period();
150
151 Ok(Self {
152 profiler: Mutex::new(profiler),
153 timestamp_period,
154 has_timestamps,
155 })
156 }
157
158 /// Whether this profiler has actual GPU timestamp query support.
159 ///
160 /// If `false`, scopes still appear in the profiler as debug groups
161 /// but without timing data.
162 pub fn has_timestamp_queries(&self) -> bool {
163 self.has_timestamps
164 }
165
166 /// Open a profiling scope on a command encoder or render/compute pass.
167 ///
168 /// The scope is automatically closed when the returned guard is dropped.
169 ///
170 /// Returns a [`GpuProfileScope`] that wraps the underlying `wgpu_profiler::Scope`
171 /// and holds the `Mutex` guard. Access the recorder via `Deref`/`DerefMut`.
172 ///
173 /// # Panics
174 ///
175 /// Panics if the internal profiler lock is poisoned.
176 pub fn scope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder>(
177 &'a self,
178 label: impl Into<String>,
179 encoder_or_pass: &'a mut Recorder,
180 ) -> GpuProfileScope<'a, Recorder> {
181 let profiler = self.profiler.lock().unwrap();
182 // SAFETY: We extend the MutexGuard's lifetime to match &self ('a).
183 // This is sound because:
184 // 1. The GpuProfiler lives as long as self (lifetime 'a)
185 // 2. GpuProfiler::scope() only needs &self (immutable borrow)
186 // 3. The caller must drop the scope before calling resolve_queries/end_frame
187 // (which is guaranteed by the frame lifecycle: scopes live within render passes,
188 // resolve/end happen in FrameContext::Drop after all passes are done)
189 let profiler_ptr = &*profiler as *const wgpu_profiler::GpuProfiler;
190 let profiler_ref: &'a wgpu_profiler::GpuProfiler = unsafe { &*profiler_ptr };
191 let scope = profiler_ref.scope(label, encoder_or_pass);
192 GpuProfileScope {
193 scope,
194 _borrow: profiler,
195 }
196 }
197
198 /// Resolve all pending queries. Call this before submitting the encoder.
199 pub fn resolve_queries(&self, encoder: &mut wgpu::CommandEncoder) {
200 self.profiler.lock().unwrap().resolve_queries(encoder);
201 }
202
203 /// End the current profiling frame. Call this after queue submit.
204 ///
205 /// Processes finished frames and reports results to puffin.
206 pub fn end_frame(&self) -> Result<(), wgpu_profiler::EndFrameError> {
207 let mut profiler = self.profiler.lock().unwrap();
208 profiler.end_frame()?;
209
210 // Process any finished frames and report to puffin
211 if let Some(results) = profiler.process_finished_frame(self.timestamp_period) {
212 wgpu_profiler::puffin::output_frame_to_puffin(
213 &mut puffin::GlobalProfiler::lock(),
214 &results,
215 );
216 }
217
218 Ok(())
219 }
220
221 /// Get a reference to the inner `Mutex<wgpu_profiler::GpuProfiler>` for advanced use.
222 pub fn inner(&self) -> &Mutex<wgpu_profiler::GpuProfiler> {
223 &self.profiler
224 }
225 }
226
227 /// A GPU profiling scope that wraps `wgpu_profiler::Scope` and holds
228 /// the `Mutex` guard.
229 ///
230 /// This type implements `Deref`/`DerefMut` to the underlying recorder
231 /// (command encoder or render/compute pass), so you can use it as a
232 /// drop-in replacement for the recorder.
233 ///
234 /// The scope is automatically closed (GPU timestamp written) when dropped.
235 pub struct GpuProfileScope<'a, Recorder: wgpu_profiler::ProfilerCommandRecorder> {
236 scope: wgpu_profiler::Scope<'a, Recorder>,
237 _borrow: std::sync::MutexGuard<'a, wgpu_profiler::GpuProfiler>,
238 }
239
240 impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::Deref
241 for GpuProfileScope<'_, Recorder>
242 {
243 type Target = Recorder;
244
245 fn deref(&self) -> &Self::Target {
246 &self.scope
247 }
248 }
249
250 impl<Recorder: wgpu_profiler::ProfilerCommandRecorder> std::ops::DerefMut
251 for GpuProfileScope<'_, Recorder>
252 {
253 fn deref_mut(&mut self) -> &mut Self::Target {
254 &mut self.scope
255 }
256 }
257}
258
259#[cfg(feature = "gpu-profiling")]
260pub use enabled::*;
261
262// ============================================================================
263// Feature: gpu-profiling DISABLED (zero-cost no-ops)
264// ============================================================================
265#[cfg(not(feature = "gpu-profiling"))]
266mod disabled {
267 use std::sync::Arc;
268
269 use crate::context::GraphicsContext;
270
271 /// No-op GPU frame profiler (gpu-profiling feature disabled).
272 ///
273 /// All methods are no-ops that compile to nothing. The `&self` signatures
274 /// match the enabled version for API compatibility.
275 pub struct GpuFrameProfiler;
276
277 impl GpuFrameProfiler {
278 /// No-op: create a new GPU frame profiler.
279 pub fn new(_context: &Arc<GraphicsContext>) -> Result<Self, GpuFrameProfilerError> {
280 Ok(Self)
281 }
282
283 /// No-op: always returns false.
284 pub fn has_timestamp_queries(&self) -> bool {
285 false
286 }
287
288 /// No-op: resolve queries.
289 pub fn resolve_queries(&self, _encoder: &mut wgpu::CommandEncoder) {}
290
291 /// No-op: end frame.
292 pub fn end_frame(&self) -> Result<(), GpuFrameProfilerError> {
293 Ok(())
294 }
295 }
296
297 /// Placeholder error type when gpu-profiling is disabled.
298 #[derive(Debug)]
299 pub struct GpuFrameProfilerError;
300
301 impl std::fmt::Display for GpuFrameProfilerError {
302 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
303 write!(f, "GPU profiling is disabled")
304 }
305 }
306
307 impl std::error::Error for GpuFrameProfilerError {}
308}
309
310#[cfg(not(feature = "gpu-profiling"))]
311pub use disabled::*;
312
313// ============================================================================
314// Convenience Macro
315// ============================================================================
316
317/// Execute a block of code within a GPU profiling scope on a `FrameContext`.
318///
319/// When the `gpu-profiling` feature is enabled and a GPU profiler is attached
320/// to the frame, this creates a GPU timing scope around the block.
321/// When disabled or no profiler is attached, the block is executed directly.
322///
323/// This is syntactic sugar for [`FrameContext::with_gpu_scope`](crate::FrameContext::with_gpu_scope).
324///
325/// # Usage
326///
327/// ```ignore
328/// use astrelis_render::gpu_profile_scope;
329///
330/// gpu_profile_scope!(frame, "upload_textures", |encoder| {
331/// encoder.copy_buffer_to_buffer(&src, 0, &dst, 0, size);
332/// });
333/// ```
334#[macro_export]
335macro_rules! gpu_profile_scope {
336 ($frame:expr, $label:expr, $body:expr) => {
337 $frame.with_gpu_scope($label, $body)
338 };
339}