Skip to main content

oximedia_gpu/
gpu_profiler.rs

1//! GPU profiling and timing utilities.
2//!
3//! Provides timestamp-based GPU profiling with named scopes for measuring
4//! execution time of GPU operations, pipelines, and individual passes.
5
6#![allow(dead_code)]
7
8use std::collections::HashMap;
9use std::time::{Duration, Instant};
10
11/// A single GPU timestamp sample.
12///
13/// Records the start and optional end time of a GPU operation.
14#[derive(Debug, Clone)]
15pub struct GpuTimestamp {
16    /// Human-readable label for this timestamp.
17    pub label: String,
18    /// Wall-clock time when the operation began.
19    pub start: Instant,
20    /// Wall-clock time when the operation ended, if completed.
21    pub end: Option<Instant>,
22}
23
24impl GpuTimestamp {
25    /// Create a new timestamp starting now.
26    #[must_use]
27    pub fn begin(label: impl Into<String>) -> Self {
28        Self {
29            label: label.into(),
30            start: Instant::now(),
31            end: None,
32        }
33    }
34
35    /// Mark this timestamp as finished.
36    pub fn finish(&mut self) {
37        self.end = Some(Instant::now());
38    }
39
40    /// Return the elapsed duration, or `None` if not yet finished.
41    #[must_use]
42    pub fn elapsed(&self) -> Option<Duration> {
43        self.end.map(|e| e.duration_since(self.start))
44    }
45
46    /// Return elapsed microseconds as `f64`, or `None` if not yet finished.
47    #[allow(clippy::cast_precision_loss)]
48    #[must_use]
49    pub fn elapsed_us(&self) -> Option<f64> {
50        self.elapsed().map(|d| d.as_nanos() as f64 / 1_000.0)
51    }
52}
53
54/// An RAII guard that automatically finishes a [`GpuTimestamp`] on drop.
55///
56/// Created via [`GpuProfiler::scope`].
57pub struct GpuProfilerScope<'a> {
58    profiler: &'a mut GpuProfiler,
59    key: String,
60}
61
62impl<'a> GpuProfilerScope<'a> {
63    fn new(profiler: &'a mut GpuProfiler, key: String) -> Self {
64        Self { profiler, key }
65    }
66}
67
68impl Drop for GpuProfilerScope<'_> {
69    fn drop(&mut self) {
70        self.profiler.end_scope(&self.key);
71    }
72}
73
74/// Aggregate statistics for a named GPU scope.
75#[derive(Debug, Clone, Default)]
76pub struct ScopeStats {
77    /// Total number of samples collected.
78    pub count: u64,
79    /// Accumulated duration of all samples.
80    pub total: Duration,
81    /// Minimum single-sample duration.
82    pub min: Option<Duration>,
83    /// Maximum single-sample duration.
84    pub max: Option<Duration>,
85}
86
87impl ScopeStats {
88    /// Record a new sample duration.
89    pub fn record(&mut self, d: Duration) {
90        self.count += 1;
91        self.total += d;
92        self.min = Some(self.min.map_or(d, |m| m.min(d)));
93        self.max = Some(self.max.map_or(d, |m| m.max(d)));
94    }
95
96    /// Compute the mean duration across all samples.
97    #[must_use]
98    pub fn mean(&self) -> Option<Duration> {
99        if self.count == 0 {
100            None
101        } else {
102            Some(self.total / self.count as u32)
103        }
104    }
105}
106
107/// Central GPU profiler that owns all active and completed timestamps.
108///
109/// # Example
110///
111/// ```
112/// use oximedia_gpu::gpu_profiler::GpuProfiler;
113///
114/// let mut profiler = GpuProfiler::new();
115/// profiler.begin("tonemap");
116/// // ... GPU work ...
117/// profiler.end("tonemap");
118/// let summary = profiler.summary();
119/// assert!(summary.contains_key("tonemap"));
120/// ```
121#[derive(Debug, Default)]
122pub struct GpuProfiler {
123    /// Currently active (open) timestamps keyed by scope label.
124    active: HashMap<String, GpuTimestamp>,
125    /// Accumulated statistics per scope label.
126    stats: HashMap<String, ScopeStats>,
127}
128
129impl GpuProfiler {
130    /// Create a new, empty profiler.
131    #[must_use]
132    pub fn new() -> Self {
133        Self::default()
134    }
135
136    /// Begin a new profiling scope with the given label.
137    ///
138    /// If a scope with this label is already active it is overwritten.
139    pub fn begin(&mut self, label: impl Into<String>) {
140        let label = label.into();
141        self.active
142            .insert(label.clone(), GpuTimestamp::begin(label));
143    }
144
145    /// End an active scope identified by `label`.
146    ///
147    /// Records the elapsed time into cumulative stats. Does nothing if the
148    /// label is not currently active.
149    pub fn end(&mut self, label: &str) {
150        if let Some(mut ts) = self.active.remove(label) {
151            ts.finish();
152            if let Some(d) = ts.elapsed() {
153                self.stats.entry(label.to_owned()).or_default().record(d);
154            }
155        }
156    }
157
158    /// Internal helper used by `GpuProfilerScope::drop`.
159    fn end_scope(&mut self, key: &str) {
160        self.end(key);
161    }
162
163    /// Open a scope and return an RAII guard that calls `end` on drop.
164    pub fn scope(&mut self, label: impl Into<String>) -> GpuProfilerScope<'_> {
165        let key = label.into();
166        self.begin(key.clone());
167        GpuProfilerScope::new(self, key)
168    }
169
170    /// Return a snapshot of the accumulated statistics for every scope.
171    #[must_use]
172    pub fn summary(&self) -> &HashMap<String, ScopeStats> {
173        &self.stats
174    }
175
176    /// Reset all statistics and discard any active scopes.
177    pub fn reset(&mut self) {
178        self.active.clear();
179        self.stats.clear();
180    }
181
182    /// Return the number of distinct scope labels that have been recorded.
183    #[must_use]
184    pub fn scope_count(&self) -> usize {
185        self.stats.len()
186    }
187
188    /// Return `true` if there are no recorded statistics.
189    #[must_use]
190    pub fn is_empty(&self) -> bool {
191        self.stats.is_empty()
192    }
193}
194
195#[cfg(test)]
196mod tests {
197    use super::*;
198    use std::thread;
199
200    #[test]
201    fn timestamp_begin_not_finished() {
202        let ts = GpuTimestamp::begin("test");
203        assert_eq!(ts.label, "test");
204        assert!(ts.end.is_none());
205        assert!(ts.elapsed().is_none());
206    }
207
208    #[test]
209    fn timestamp_finish_elapsed() {
210        let mut ts = GpuTimestamp::begin("op");
211        thread::sleep(Duration::from_millis(1));
212        ts.finish();
213        let e = ts.elapsed().expect("should have elapsed");
214        assert!(e >= Duration::from_millis(1));
215    }
216
217    #[test]
218    fn timestamp_elapsed_us_some() {
219        let mut ts = GpuTimestamp::begin("op");
220        ts.finish();
221        assert!(ts.elapsed_us().is_some());
222    }
223
224    #[test]
225    fn timestamp_elapsed_us_none_when_unfinished() {
226        let ts = GpuTimestamp::begin("op");
227        assert!(ts.elapsed_us().is_none());
228    }
229
230    #[test]
231    fn scope_stats_empty_mean_none() {
232        let s = ScopeStats::default();
233        assert!(s.mean().is_none());
234    }
235
236    #[test]
237    fn scope_stats_records_single() {
238        let mut s = ScopeStats::default();
239        s.record(Duration::from_millis(10));
240        assert_eq!(s.count, 1);
241        assert_eq!(s.mean(), Some(Duration::from_millis(10)));
242    }
243
244    #[test]
245    fn scope_stats_min_max() {
246        let mut s = ScopeStats::default();
247        s.record(Duration::from_millis(5));
248        s.record(Duration::from_millis(15));
249        assert_eq!(s.min, Some(Duration::from_millis(5)));
250        assert_eq!(s.max, Some(Duration::from_millis(15)));
251    }
252
253    #[test]
254    fn profiler_begin_end_records_stats() {
255        let mut p = GpuProfiler::new();
256        p.begin("pass");
257        p.end("pass");
258        assert!(p.summary().contains_key("pass"));
259        assert_eq!(p.summary()["pass"].count, 1);
260    }
261
262    #[test]
263    fn profiler_end_unknown_label_no_panic() {
264        let mut p = GpuProfiler::new();
265        p.end("nonexistent"); // should not panic
266    }
267
268    #[test]
269    fn profiler_scope_raii() {
270        let mut p = GpuProfiler::new();
271        {
272            let _scope = p.scope("render");
273        }
274        assert!(p.summary().contains_key("render"));
275    }
276
277    #[test]
278    fn profiler_reset_clears_all() {
279        let mut p = GpuProfiler::new();
280        p.begin("x");
281        p.end("x");
282        p.reset();
283        assert!(p.is_empty());
284        assert_eq!(p.scope_count(), 0);
285    }
286
287    #[test]
288    fn profiler_scope_count() {
289        let mut p = GpuProfiler::new();
290        p.begin("a");
291        p.end("a");
292        p.begin("b");
293        p.end("b");
294        assert_eq!(p.scope_count(), 2);
295    }
296
297    #[test]
298    fn profiler_multiple_samples_accumulate() {
299        let mut p = GpuProfiler::new();
300        for _ in 0..3 {
301            p.begin("pass");
302            p.end("pass");
303        }
304        assert_eq!(p.summary()["pass"].count, 3);
305    }
306
307    #[test]
308    fn profiler_is_empty_initially() {
309        let p = GpuProfiler::new();
310        assert!(p.is_empty());
311    }
312
313    #[test]
314    fn profiler_default_equals_new() {
315        let p: GpuProfiler = GpuProfiler::default();
316        assert!(p.is_empty());
317    }
318}