Skip to main content

oxiui_render_wgpu/gpu/
frame_pacing.rs

1//! GPU frame timing and adaptive present-mode selection.
2//!
3//! [`FrameTimer`] wraps wgpu's `QuerySet` timestamp API to measure how long
4//! each frame's GPU work takes.  A rolling histogram accumulates the last N
5//! frame times so the caller can make data-driven decisions about present mode
6//! and quality settings.
7//!
8//! # Feature gate
9//!
10//! Timestamp queries require [`wgpu::Features::TIMESTAMP_QUERY`].  The timer
11//! gracefully degrades to CPU-side `std::time::Instant` measurements when the
12//! feature is unavailable.
13//!
14//! # Present-mode heuristic
15//!
16//! - If the 99th-percentile frame time exceeds `target_frame_ms * 1.5`, the
17//!   timer recommends switching to `Fifo` (V-sync) to reduce latency.
18//! - If the 99th-percentile frame time stays below `target_frame_ms * 0.5`,
19//!   `Mailbox` (triple-buffered tearing-free) is recommended.
20//! - Otherwise, `Fifo` (V-sync) is the safe default recommendation.
21
22use std::time::{Duration, Instant};
23
24// ── FrameHistogram ────────────────────────────────────────────────────────────
25
26/// A fixed-size circular buffer of frame durations.
27const HISTOGRAM_SIZE: usize = 64;
28
29/// Rolling histogram of the last `HISTOGRAM_SIZE` frame durations (in µs).
30#[derive(Debug)]
31pub struct FrameHistogram {
32    samples: [u64; HISTOGRAM_SIZE],
33    head: usize,
34    count: usize,
35}
36
37impl Default for FrameHistogram {
38    fn default() -> Self {
39        Self {
40            samples: [0u64; HISTOGRAM_SIZE],
41            head: 0,
42            count: 0,
43        }
44    }
45}
46
47impl FrameHistogram {
48    /// Push a new frame duration (in microseconds).
49    pub fn push(&mut self, duration_us: u64) {
50        self.samples[self.head] = duration_us;
51        self.head = (self.head + 1) % HISTOGRAM_SIZE;
52        self.count = (self.count + 1).min(HISTOGRAM_SIZE);
53    }
54
55    /// Number of samples currently held.
56    pub fn len(&self) -> usize {
57        self.count
58    }
59
60    /// Return `true` if no samples have been recorded yet.
61    pub fn is_empty(&self) -> bool {
62        self.count == 0
63    }
64
65    /// Mean frame duration in microseconds over the recorded samples.
66    pub fn mean_us(&self) -> f64 {
67        if self.count == 0 {
68            return 0.0;
69        }
70        let sum: u64 = self.samples[..self.count].iter().sum();
71        sum as f64 / self.count as f64
72    }
73
74    /// Minimum frame duration in the histogram (µs).
75    pub fn min_us(&self) -> u64 {
76        self.samples[..self.count]
77            .iter()
78            .copied()
79            .min()
80            .unwrap_or(0)
81    }
82
83    /// Maximum frame duration in the histogram (µs).
84    pub fn max_us(&self) -> u64 {
85        self.samples[..self.count]
86            .iter()
87            .copied()
88            .max()
89            .unwrap_or(0)
90    }
91
92    /// Approximate p99 frame duration (µs): the 99th percentile of the sorted
93    /// samples in the histogram.
94    pub fn p99_us(&self) -> u64 {
95        if self.count == 0 {
96            return 0;
97        }
98        let mut sorted = self.samples[..self.count].to_vec();
99        sorted.sort_unstable();
100        let idx = ((sorted.len() as f64 * 0.99) as usize).min(sorted.len() - 1);
101        sorted[idx]
102    }
103}
104
105// ── FrameTimerMode ────────────────────────────────────────────────────────────
106
107/// Whether the timer uses GPU timestamps or CPU time.
108#[derive(Clone, Copy, Debug, PartialEq, Eq)]
109pub enum FrameTimerMode {
110    /// GPU timestamp queries (high accuracy, requires `TIMESTAMP_QUERY`).
111    GpuTimestamp,
112    /// CPU-side `Instant` measurements (approximate, always available).
113    CpuFallback,
114}
115
116// ── PresentModeRecommendation ─────────────────────────────────────────────────
117
118/// A recommended wgpu present mode based on recent frame timings.
119#[derive(Clone, Copy, Debug, PartialEq, Eq)]
120pub enum PresentModeRecommendation {
121    /// V-sync (safe, reduced latency when GPU is overloaded).
122    Fifo,
123    /// Triple-buffered, no tearing — ideal when GPU has headroom.
124    Mailbox,
125    /// Immediate present (lowest latency, may tear).
126    Immediate,
127}
128
129impl PresentModeRecommendation {
130    /// Convert to the corresponding [`wgpu::PresentMode`].
131    pub fn to_wgpu(self) -> wgpu::PresentMode {
132        match self {
133            Self::Fifo => wgpu::PresentMode::Fifo,
134            Self::Mailbox => wgpu::PresentMode::Mailbox,
135            Self::Immediate => wgpu::PresentMode::Immediate,
136        }
137    }
138}
139
140// ── FrameTimer ────────────────────────────────────────────────────────────────
141
142/// GPU frame timer with CPU fallback.
143///
144/// Record the start of a frame with [`FrameTimer::begin_frame`] and the end
145/// with [`FrameTimer::end_frame`].  After each frame the duration is pushed to
146/// the internal [`FrameHistogram`].  Call
147/// [`FrameTimer::recommend_present_mode`] to get an adaptive present-mode
148/// suggestion.
149///
150/// When `TIMESTAMP_QUERY` is available, a `QuerySet` with two entries is used
151/// for sub-millisecond GPU-side measurements.  Otherwise, CPU `Instant` is
152/// used as a coarser fallback.
153pub struct FrameTimer {
154    /// The rolling frame-time histogram.
155    pub histogram: FrameHistogram,
156    /// How this timer measures time.
157    pub mode: FrameTimerMode,
158    /// Target frame duration.
159    target_frame: Duration,
160    /// CPU start time (fallback mode).
161    cpu_start: Option<Instant>,
162    /// GPU timestamp `QuerySet` (two entries: start + end).
163    timestamp_query_set: Option<wgpu::QuerySet>,
164    /// Resolve buffer for timestamp queries.
165    timestamp_resolve_buf: Option<wgpu::Buffer>,
166    /// Readback buffer for timestamp queries.
167    timestamp_readback_buf: Option<wgpu::Buffer>,
168    /// Timestamp period (nanoseconds per tick) from the adapter.
169    timestamp_period_ns: f32,
170    /// Whether a GPU begin query has been issued this frame.
171    gpu_query_pending: bool,
172}
173
174impl FrameTimer {
175    /// Create a new frame timer.
176    ///
177    /// If `device.features()` contains `TIMESTAMP_QUERY`, GPU-side timing is
178    /// used.  Otherwise, the timer falls back to CPU `Instant`.
179    ///
180    /// `target_fps` is used to compute `target_frame` for the present-mode
181    /// heuristic.  Pass 60 for a standard 60 Hz target.
182    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, target_fps: u32) -> Self {
183        let target_frame = Duration::from_micros(1_000_000 / target_fps.max(1) as u64);
184        let has_timestamps = device.features().contains(wgpu::Features::TIMESTAMP_QUERY);
185
186        if has_timestamps {
187            let timestamp_period_ns = queue.get_timestamp_period();
188            let query_set = device.create_query_set(&wgpu::QuerySetDescriptor {
189                label: Some("oxiui-render-wgpu frame timer queries"),
190                ty: wgpu::QueryType::Timestamp,
191                count: 2,
192            });
193            let resolve_buf = device.create_buffer(&wgpu::BufferDescriptor {
194                label: Some("oxiui-render-wgpu timestamp resolve"),
195                size: 16, // 2 × u64
196                usage: wgpu::BufferUsages::QUERY_RESOLVE | wgpu::BufferUsages::COPY_SRC,
197                mapped_at_creation: false,
198            });
199            let readback_buf = device.create_buffer(&wgpu::BufferDescriptor {
200                label: Some("oxiui-render-wgpu timestamp readback"),
201                size: 16,
202                usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
203                mapped_at_creation: false,
204            });
205            Self {
206                histogram: FrameHistogram::default(),
207                mode: FrameTimerMode::GpuTimestamp,
208                target_frame,
209                cpu_start: None,
210                timestamp_query_set: Some(query_set),
211                timestamp_resolve_buf: Some(resolve_buf),
212                timestamp_readback_buf: Some(readback_buf),
213                timestamp_period_ns,
214                gpu_query_pending: false,
215            }
216        } else {
217            Self {
218                histogram: FrameHistogram::default(),
219                mode: FrameTimerMode::CpuFallback,
220                target_frame,
221                cpu_start: None,
222                timestamp_query_set: None,
223                timestamp_resolve_buf: None,
224                timestamp_readback_buf: None,
225                timestamp_period_ns: 1.0,
226                gpu_query_pending: false,
227            }
228        }
229    }
230
231    /// Record the start of a frame.
232    ///
233    /// In GPU mode, `encoder` should be the first encoder for the frame.
234    /// In CPU fallback mode, `encoder` is ignored.
235    pub fn begin_frame(&mut self, encoder: &mut wgpu::CommandEncoder) {
236        match self.mode {
237            FrameTimerMode::GpuTimestamp => {
238                if let Some(ref qs) = self.timestamp_query_set {
239                    encoder.write_timestamp(qs, 0);
240                    self.gpu_query_pending = true;
241                }
242            }
243            FrameTimerMode::CpuFallback => {
244                self.cpu_start = Some(Instant::now());
245            }
246        }
247    }
248
249    /// Record the end of a frame.
250    ///
251    /// In GPU mode, `encoder` should be the last encoder for the frame (after
252    /// all render passes).  Call `resolve` after submitting the encoder to
253    /// copy the timestamps to the readback buffer.
254    pub fn end_frame(&mut self, encoder: &mut wgpu::CommandEncoder) {
255        match self.mode {
256            FrameTimerMode::GpuTimestamp => {
257                if let Some(ref qs) = self.timestamp_query_set {
258                    encoder.write_timestamp(qs, 1);
259                }
260            }
261            FrameTimerMode::CpuFallback => {
262                if let Some(start) = self.cpu_start.take() {
263                    let us = start.elapsed().as_micros() as u64;
264                    self.histogram.push(us);
265                }
266            }
267        }
268    }
269
270    /// Resolve GPU timestamps to the readback buffer.
271    ///
272    /// Call this in a *separate* encoder submitted immediately after the frame
273    /// encoder.  The resolve copy is a GPU operation that must follow the
274    /// write_timestamp commands in the previous encoder.
275    ///
276    /// No-op in CPU fallback mode or if no GPU query is pending.
277    pub fn resolve_timestamps(&mut self, encoder: &mut wgpu::CommandEncoder) {
278        if self.mode != FrameTimerMode::GpuTimestamp || !self.gpu_query_pending {
279            return;
280        }
281        if let (Some(ref qs), Some(ref resolve_buf)) =
282            (&self.timestamp_query_set, &self.timestamp_resolve_buf)
283        {
284            encoder.resolve_query_set(qs, 0..2, resolve_buf, 0);
285            if let Some(ref readback_buf) = self.timestamp_readback_buf {
286                encoder.copy_buffer_to_buffer(resolve_buf, 0, readback_buf, 0, 16);
287            }
288        }
289    }
290
291    /// Read back the GPU timestamps (if available) and update the histogram.
292    ///
293    /// Must be called after the resolve encoder has been submitted *and* the
294    /// GPU has completed (e.g. after `device.poll(Wait)`).
295    ///
296    /// No-op in CPU fallback mode or if no GPU query is pending.
297    pub fn collect_gpu_timestamps(&mut self, device: &wgpu::Device) {
298        if self.mode != FrameTimerMode::GpuTimestamp || !self.gpu_query_pending {
299            return;
300        }
301        self.gpu_query_pending = false;
302
303        let Some(ref readback_buf) = self.timestamp_readback_buf else {
304            return;
305        };
306
307        let slice = readback_buf.slice(..);
308        slice.map_async(wgpu::MapMode::Read, |_| {});
309        // Non-blocking: if the device has already been polled (Wait mode), the
310        // mapping is ready.  If not, we skip this frame's sample to avoid
311        // blocking the render thread.
312        if device.poll(wgpu::PollType::Poll).is_ok() {
313            let data = slice.get_mapped_range();
314            let timestamps: [u64; 2] = bytemuck::pod_read_unaligned(&data[..16]);
315            drop(data);
316            readback_buf.unmap();
317
318            if timestamps[1] >= timestamps[0] {
319                let ticks = timestamps[1] - timestamps[0];
320                let ns = ticks as f64 * self.timestamp_period_ns as f64;
321                let us = (ns / 1_000.0) as u64;
322                self.histogram.push(us);
323            }
324        } else {
325            readback_buf.unmap();
326        }
327    }
328
329    /// Recommend a [`wgpu::PresentMode`] based on the recent frame-time histogram.
330    ///
331    /// Requires at least 4 samples to make a recommendation; returns `Fifo`
332    /// until then.
333    pub fn recommend_present_mode(&self) -> PresentModeRecommendation {
334        if self.histogram.len() < 4 {
335            return PresentModeRecommendation::Fifo;
336        }
337        let p99_us = self.histogram.p99_us();
338        let target_us = self.target_frame.as_micros() as u64;
339
340        if p99_us > target_us * 3 / 2 {
341            // GPU is consistently late — stay on Fifo to avoid stalls.
342            PresentModeRecommendation::Fifo
343        } else if p99_us < target_us / 2 {
344            // GPU has lots of headroom — Mailbox or Immediate.
345            PresentModeRecommendation::Mailbox
346        } else {
347            PresentModeRecommendation::Fifo
348        }
349    }
350}
351
352// ── Tests ─────────────────────────────────────────────────────────────────────
353
354#[cfg(test)]
355mod tests {
356    use super::*;
357
358    #[test]
359    fn histogram_push_and_stats() {
360        let mut h = FrameHistogram::default();
361        assert!(h.is_empty());
362        h.push(8_000); // 8 ms
363        h.push(16_000); // 16 ms
364        h.push(12_000); // 12 ms
365        assert_eq!(h.len(), 3);
366        let mean = h.mean_us();
367        assert!((mean - 12_000.0).abs() < 1.0, "mean should be ~12000 µs");
368        assert_eq!(h.min_us(), 8_000);
369        assert_eq!(h.max_us(), 16_000);
370    }
371
372    #[test]
373    fn histogram_p99_is_maximum_in_small_set() {
374        let mut h = FrameHistogram::default();
375        for ms in 1u64..=10u64 {
376            h.push(ms * 1_000);
377        }
378        // p99 in a 10-sample set: index 9 (the max) = 10_000 µs.
379        assert_eq!(h.p99_us(), 10_000);
380    }
381
382    #[test]
383    fn histogram_wraps_at_capacity() {
384        let mut h = FrameHistogram::default();
385        // Push more than HISTOGRAM_SIZE samples.
386        for i in 0..(HISTOGRAM_SIZE + 10) as u64 {
387            h.push(i);
388        }
389        assert_eq!(
390            h.len(),
391            HISTOGRAM_SIZE,
392            "should be capped at HISTOGRAM_SIZE"
393        );
394    }
395
396    #[test]
397    fn recommend_present_mode_fifo_when_insufficient_samples() {
398        let instance = wgpu::Instance::default();
399        let adapter = pollster::block_on(instance.request_adapter(&wgpu::RequestAdapterOptions {
400            power_preference: wgpu::PowerPreference::default(),
401            force_fallback_adapter: false,
402            compatible_surface: None,
403        }));
404        let Some(adapter) = adapter.ok() else {
405            return; // no GPU — skip GPU test
406        };
407        let (device, queue) = pollster::block_on(adapter.request_device(&wgpu::DeviceDescriptor {
408            label: Some("frame-timer test device"),
409            required_features: wgpu::Features::empty(),
410            required_limits: wgpu::Limits::downlevel_defaults(),
411            memory_hints: wgpu::MemoryHints::Performance,
412            experimental_features: wgpu::ExperimentalFeatures::disabled(),
413            trace: wgpu::Trace::Off,
414        }))
415        .expect("request_device");
416
417        let timer = FrameTimer::new(&device, &queue, 60);
418        // No samples recorded yet → always Fifo.
419        assert_eq!(
420            timer.recommend_present_mode(),
421            PresentModeRecommendation::Fifo
422        );
423    }
424
425    #[test]
426    fn recommend_mailbox_when_fast() {
427        let mut h = FrameHistogram::default();
428        // Simulate very fast GPU: frames take 2 ms, target 16 ms → p99 << target/2.
429        for _ in 0..10 {
430            h.push(2_000); // 2 ms
431        }
432        // p99 = 2000, target = 16_667, 2000 < 8333 → Mailbox.
433        let timer_mode = FrameTimerMode::CpuFallback;
434        let _ = timer_mode; // just for doc purposes
435
436        // Use the histogram directly.
437        let p99 = h.p99_us();
438        let target_us = 16_667u64;
439        let rec = if p99 < target_us / 2 {
440            PresentModeRecommendation::Mailbox
441        } else {
442            PresentModeRecommendation::Fifo
443        };
444        assert_eq!(rec, PresentModeRecommendation::Mailbox);
445    }
446
447    #[test]
448    fn present_mode_recommendation_to_wgpu() {
449        assert_eq!(
450            PresentModeRecommendation::Fifo.to_wgpu(),
451            wgpu::PresentMode::Fifo
452        );
453        assert_eq!(
454            PresentModeRecommendation::Mailbox.to_wgpu(),
455            wgpu::PresentMode::Mailbox
456        );
457        assert_eq!(
458            PresentModeRecommendation::Immediate.to_wgpu(),
459            wgpu::PresentMode::Immediate
460        );
461    }
462}