Skip to main content

oximedia_gpu/
double_buffer.rs

1//! Double-buffered GPU command submission.
2//!
3//! Overlaps CPU work (encoding the next frame's commands) with GPU work
4//! (executing the current frame's commands) using a ping-pong buffer scheme.
5//!
6//! # Architecture
7//!
8//! ```text
9//! Frame N:   CPU encodes buffer A   ───▶  GPU executes buffer A
10//! Frame N+1: CPU encodes buffer B   ───▶  GPU executes buffer B
11//! Frame N+2: CPU encodes buffer A   ───▶  GPU executes buffer A  (recycled)
12//!            └──── overlapped ─────┘
13//! ```
14//!
15//! The [`DoubleBufferSubmitter`] manages two command slots and alternates
16//! between them.  While the GPU processes slot A, the CPU can prepare
17//! commands in slot B, minimising idle time on both sides.
18//!
19//! # Status
20//!
21//! This module provides the scheduling and statistics logic.  Actual GPU
22//! command execution is delegated to the caller via closures.
23
24use std::collections::VecDeque;
25use std::time::{Duration, Instant};
26
27// ─── Slot state ─────────────────────────────────────────────────────────────
28
29/// State of a single command buffer slot.
30#[derive(Debug, Clone, Copy, PartialEq, Eq)]
31pub enum SlotState {
32    /// The slot is available for the CPU to encode commands.
33    Idle,
34    /// The CPU is currently encoding commands into this slot.
35    Encoding,
36    /// Commands have been submitted to the GPU and are in flight.
37    InFlight,
38    /// The GPU has finished executing; results can be read back.
39    Complete,
40}
41
42impl std::fmt::Display for SlotState {
43    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
44        match self {
45            Self::Idle => write!(f, "idle"),
46            Self::Encoding => write!(f, "encoding"),
47            Self::InFlight => write!(f, "in-flight"),
48            Self::Complete => write!(f, "complete"),
49        }
50    }
51}
52
53// ─── Slot ───────────────────────────────────────────────────────────────────
54
55/// A command buffer slot with timing information.
56#[derive(Debug, Clone)]
57pub struct CommandSlot {
58    /// Current state.
59    state: SlotState,
60    /// Frame index assigned to this slot (monotonically increasing).
61    frame_index: u64,
62    /// When encoding started for this slot.
63    encode_start: Option<Instant>,
64    /// Duration of the encoding phase.
65    encode_duration: Option<Duration>,
66    /// When submission to the GPU occurred.
67    submit_time: Option<Instant>,
68    /// Duration from submission to completion.
69    gpu_duration: Option<Duration>,
70    /// Opaque payload attached by the caller during encoding.
71    payload_size: usize,
72}
73
74impl CommandSlot {
75    fn new() -> Self {
76        Self {
77            state: SlotState::Idle,
78            frame_index: 0,
79            encode_start: None,
80            encode_duration: None,
81            submit_time: None,
82            gpu_duration: None,
83            payload_size: 0,
84        }
85    }
86
87    /// Current state of this slot.
88    #[must_use]
89    pub fn state(&self) -> SlotState {
90        self.state
91    }
92
93    /// Frame index assigned to this slot.
94    #[must_use]
95    pub fn frame_index(&self) -> u64 {
96        self.frame_index
97    }
98
99    /// Duration of the last encode phase, if available.
100    #[must_use]
101    pub fn encode_duration(&self) -> Option<Duration> {
102        self.encode_duration
103    }
104
105    /// Duration of the last GPU execution phase, if available.
106    #[must_use]
107    pub fn gpu_duration(&self) -> Option<Duration> {
108        self.gpu_duration
109    }
110
111    /// Size of the payload attached during encoding.
112    #[must_use]
113    pub fn payload_size(&self) -> usize {
114        self.payload_size
115    }
116}
117
118// ─── Statistics ─────────────────────────────────────────────────────────────
119
120/// Performance statistics for the double-buffer submitter.
121#[derive(Debug, Clone, Default)]
122pub struct DoubleBufferStats {
123    /// Total frames submitted.
124    pub total_frames: u64,
125    /// Total frames completed.
126    pub completed_frames: u64,
127    /// Total frames that failed during GPU execution.
128    pub failed_frames: u64,
129    /// Total CPU encoding time across all frames.
130    pub total_encode_time: Duration,
131    /// Total GPU execution time across all frames.
132    pub total_gpu_time: Duration,
133    /// Number of times the CPU had to stall waiting for a free slot.
134    pub cpu_stalls: u64,
135    /// Rolling window of recent frame latencies (encode + GPU).
136    recent_latencies: VecDeque<Duration>,
137}
138
139impl DoubleBufferStats {
140    const MAX_RECENT: usize = 64;
141
142    fn record_latency(&mut self, latency: Duration) {
143        if self.recent_latencies.len() >= Self::MAX_RECENT {
144            self.recent_latencies.pop_front();
145        }
146        self.recent_latencies.push_back(latency);
147    }
148
149    /// Average frame latency over the recent window.
150    #[must_use]
151    pub fn avg_latency(&self) -> Duration {
152        if self.recent_latencies.is_empty() {
153            return Duration::ZERO;
154        }
155        let total: Duration = self.recent_latencies.iter().sum();
156        total / self.recent_latencies.len() as u32
157    }
158
159    /// Maximum frame latency in the recent window.
160    #[must_use]
161    pub fn max_latency(&self) -> Duration {
162        self.recent_latencies
163            .iter()
164            .max()
165            .copied()
166            .unwrap_or(Duration::ZERO)
167    }
168
169    /// Minimum frame latency in the recent window.
170    #[must_use]
171    pub fn min_latency(&self) -> Duration {
172        self.recent_latencies
173            .iter()
174            .min()
175            .copied()
176            .unwrap_or(Duration::ZERO)
177    }
178
179    /// Estimated CPU utilisation (encoding time / total time).
180    #[must_use]
181    pub fn cpu_utilisation(&self) -> f64 {
182        let total = self.total_encode_time + self.total_gpu_time;
183        if total.is_zero() {
184            return 0.0;
185        }
186        self.total_encode_time.as_secs_f64() / total.as_secs_f64()
187    }
188
189    /// Estimated GPU utilisation (GPU time / total time).
190    #[must_use]
191    pub fn gpu_utilisation(&self) -> f64 {
192        let total = self.total_encode_time + self.total_gpu_time;
193        if total.is_zero() {
194            return 0.0;
195        }
196        self.total_gpu_time.as_secs_f64() / total.as_secs_f64()
197    }
198
199    /// Estimated throughput in frames per second (based on recent window).
200    #[must_use]
201    pub fn estimated_fps(&self) -> f64 {
202        let avg = self.avg_latency();
203        if avg.is_zero() {
204            return 0.0;
205        }
206        1.0 / avg.as_secs_f64()
207    }
208}
209
210// ─── Error ──────────────────────────────────────────────────────────────────
211
212/// Errors from the double-buffer submitter.
213#[derive(Debug, Clone)]
214pub enum DoubleBufferError {
215    /// No idle slot is available (both are in-flight or encoding).
216    NoFreeSlot,
217    /// Attempted to submit a slot that is not in the Encoding state.
218    InvalidSlotState {
219        expected: SlotState,
220        actual: SlotState,
221    },
222    /// The GPU work closure returned an error.
223    GpuWorkFailed(String),
224}
225
226impl std::fmt::Display for DoubleBufferError {
227    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
228        match self {
229            Self::NoFreeSlot => write!(f, "no free command slot available"),
230            Self::InvalidSlotState { expected, actual } => {
231                write!(f, "slot in state {actual}, expected {expected}")
232            }
233            Self::GpuWorkFailed(msg) => write!(f, "GPU work failed: {msg}"),
234        }
235    }
236}
237
238impl std::error::Error for DoubleBufferError {}
239
240type Result<T> = std::result::Result<T, DoubleBufferError>;
241
242// ─── DoubleBufferSubmitter ──────────────────────────────────────────────────
243
244/// Double-buffered command submission manager.
245///
246/// Manages two [`CommandSlot`]s in a ping-pong fashion to enable overlapping
247/// CPU encoding and GPU execution.
248pub struct DoubleBufferSubmitter {
249    slots: [CommandSlot; 2],
250    /// Index of the slot currently being used for encoding (0 or 1).
251    active_slot: usize,
252    /// Monotonically increasing frame counter.
253    frame_counter: u64,
254    /// Accumulated statistics.
255    stats: DoubleBufferStats,
256}
257
258impl DoubleBufferSubmitter {
259    /// Create a new double-buffer submitter.
260    #[must_use]
261    pub fn new() -> Self {
262        Self {
263            slots: [CommandSlot::new(), CommandSlot::new()],
264            active_slot: 0,
265            frame_counter: 0,
266            stats: DoubleBufferStats::default(),
267        }
268    }
269
270    /// Begin encoding a new frame in the next available slot.
271    ///
272    /// Returns the slot index (0 or 1) that was acquired.
273    ///
274    /// # Errors
275    ///
276    /// Returns [`DoubleBufferError::NoFreeSlot`] if both slots are busy.
277    pub fn begin_encode(&mut self) -> Result<usize> {
278        // Find an idle or complete slot.
279        let slot_idx = self.find_free_slot()?;
280        let slot = &mut self.slots[slot_idx];
281        slot.state = SlotState::Encoding;
282        slot.frame_index = self.frame_counter;
283        slot.encode_start = Some(Instant::now());
284        slot.encode_duration = None;
285        slot.submit_time = None;
286        slot.gpu_duration = None;
287        slot.payload_size = 0;
288        self.active_slot = slot_idx;
289        self.frame_counter += 1;
290        Ok(slot_idx)
291    }
292
293    /// Set the payload size for the current encoding slot.
294    ///
295    /// This is informational and can be used for statistics.
296    pub fn set_payload_size(&mut self, slot_idx: usize, size: usize) {
297        if slot_idx < 2 {
298            self.slots[slot_idx].payload_size = size;
299        }
300    }
301
302    /// Finish encoding and submit the slot for GPU execution.
303    ///
304    /// The `gpu_work` closure receives the slot index and should perform
305    /// the actual GPU command submission.
306    ///
307    /// # Errors
308    ///
309    /// Returns an error if the slot is not in the Encoding state or if
310    /// `gpu_work` fails.
311    pub fn submit<F>(&mut self, slot_idx: usize, gpu_work: F) -> Result<()>
312    where
313        F: FnOnce(usize) -> std::result::Result<(), String>,
314    {
315        if slot_idx >= 2 {
316            return Err(DoubleBufferError::InvalidSlotState {
317                expected: SlotState::Encoding,
318                actual: SlotState::Idle,
319            });
320        }
321        let slot = &mut self.slots[slot_idx];
322        if slot.state != SlotState::Encoding {
323            return Err(DoubleBufferError::InvalidSlotState {
324                expected: SlotState::Encoding,
325                actual: slot.state,
326            });
327        }
328
329        // Record encode duration.
330        if let Some(start) = slot.encode_start {
331            let dur = start.elapsed();
332            slot.encode_duration = Some(dur);
333            self.stats.total_encode_time += dur;
334        }
335
336        slot.submit_time = Some(Instant::now());
337        slot.state = SlotState::InFlight;
338        self.stats.total_frames += 1;
339
340        // Execute the GPU work.
341        if let Err(msg) = gpu_work(slot_idx) {
342            slot.state = SlotState::Idle;
343            self.stats.failed_frames += 1;
344            return Err(DoubleBufferError::GpuWorkFailed(msg));
345        }
346
347        Ok(())
348    }
349
350    /// Mark a slot as complete (GPU execution finished).
351    ///
352    /// Should be called when the GPU fence/completion signal fires.
353    pub fn mark_complete(&mut self, slot_idx: usize) {
354        if slot_idx >= 2 {
355            return;
356        }
357        let slot = &mut self.slots[slot_idx];
358        if slot.state != SlotState::InFlight {
359            return;
360        }
361
362        if let Some(submit) = slot.submit_time {
363            let gpu_dur = submit.elapsed();
364            slot.gpu_duration = Some(gpu_dur);
365            self.stats.total_gpu_time += gpu_dur;
366
367            // Total latency = encode + GPU
368            let total = slot.encode_duration.unwrap_or(Duration::ZERO) + gpu_dur;
369            self.stats.record_latency(total);
370        }
371
372        slot.state = SlotState::Complete;
373        self.stats.completed_frames += 1;
374    }
375
376    /// Convenience: encode, submit, and immediately mark complete.
377    ///
378    /// Useful for synchronous (non-pipelined) operation.
379    ///
380    /// # Errors
381    ///
382    /// Returns an error if no slot is free or if GPU work fails.
383    pub fn submit_sync<F>(&mut self, payload_size: usize, gpu_work: F) -> Result<u64>
384    where
385        F: FnOnce(usize) -> std::result::Result<(), String>,
386    {
387        let slot_idx = self.begin_encode()?;
388        self.set_payload_size(slot_idx, payload_size);
389        self.submit(slot_idx, gpu_work)?;
390        self.mark_complete(slot_idx);
391        Ok(self.slots[slot_idx].frame_index)
392    }
393
394    /// Get a snapshot of the current statistics.
395    #[must_use]
396    pub fn stats(&self) -> &DoubleBufferStats {
397        &self.stats
398    }
399
400    /// Get the state of a specific slot.
401    #[must_use]
402    pub fn slot(&self, idx: usize) -> Option<&CommandSlot> {
403        self.slots.get(idx)
404    }
405
406    /// Get the current active (encoding) slot index.
407    #[must_use]
408    pub fn active_slot(&self) -> usize {
409        self.active_slot
410    }
411
412    /// The total number of frames that have been assigned frame indices.
413    #[must_use]
414    pub fn frame_counter(&self) -> u64 {
415        self.frame_counter
416    }
417
418    /// Reset both slots to idle and clear statistics.
419    pub fn reset(&mut self) {
420        self.slots = [CommandSlot::new(), CommandSlot::new()];
421        self.active_slot = 0;
422        self.frame_counter = 0;
423        self.stats = DoubleBufferStats::default();
424    }
425
426    /// Force-reclaim a complete or idle slot (for error recovery).
427    pub fn reclaim(&mut self, slot_idx: usize) {
428        if slot_idx < 2 {
429            self.slots[slot_idx].state = SlotState::Idle;
430        }
431    }
432
433    // ── Private ─────────────────────────────────────────────────────────────
434
435    fn find_free_slot(&mut self) -> Result<usize> {
436        // Prefer the slot that is not the active one.
437        let other = 1 - self.active_slot;
438
439        // Check other slot first.
440        if self.slots[other].state == SlotState::Idle
441            || self.slots[other].state == SlotState::Complete
442        {
443            return Ok(other);
444        }
445
446        // Check active slot.
447        if self.slots[self.active_slot].state == SlotState::Idle
448            || self.slots[self.active_slot].state == SlotState::Complete
449        {
450            return Ok(self.active_slot);
451        }
452
453        self.stats.cpu_stalls += 1;
454        Err(DoubleBufferError::NoFreeSlot)
455    }
456}
457
458impl Default for DoubleBufferSubmitter {
459    fn default() -> Self {
460        Self::new()
461    }
462}
463
464// ─── Tests ──────────────────────────────────────────────────────────────────
465
466#[cfg(test)]
467mod tests {
468    use super::*;
469
470    #[test]
471    fn test_new_submitter_is_idle() {
472        let sub = DoubleBufferSubmitter::new();
473        assert_eq!(sub.slot(0).map(|s| s.state()), Some(SlotState::Idle));
474        assert_eq!(sub.slot(1).map(|s| s.state()), Some(SlotState::Idle));
475        assert_eq!(sub.frame_counter(), 0);
476    }
477
478    #[test]
479    fn test_begin_encode_transitions_to_encoding() {
480        let mut sub = DoubleBufferSubmitter::new();
481        let idx = sub.begin_encode().expect("begin encode");
482        assert_eq!(sub.slot(idx).map(|s| s.state()), Some(SlotState::Encoding));
483    }
484
485    #[test]
486    fn test_submit_transitions_to_in_flight() {
487        let mut sub = DoubleBufferSubmitter::new();
488        let idx = sub.begin_encode().expect("begin encode");
489        sub.submit(idx, |_| Ok(())).expect("submit");
490        assert_eq!(sub.slot(idx).map(|s| s.state()), Some(SlotState::InFlight));
491    }
492
493    #[test]
494    fn test_mark_complete_transitions() {
495        let mut sub = DoubleBufferSubmitter::new();
496        let idx = sub.begin_encode().expect("begin encode");
497        sub.submit(idx, |_| Ok(())).expect("submit");
498        sub.mark_complete(idx);
499        assert_eq!(sub.slot(idx).map(|s| s.state()), Some(SlotState::Complete));
500    }
501
502    #[test]
503    fn test_submit_sync_full_cycle() {
504        let mut sub = DoubleBufferSubmitter::new();
505        let frame_id = sub.submit_sync(1024, |_| Ok(())).expect("sync submit");
506        assert_eq!(frame_id, 0);
507        assert_eq!(sub.stats().total_frames, 1);
508        assert_eq!(sub.stats().completed_frames, 1);
509    }
510
511    #[test]
512    fn test_double_buffer_alternates_slots() {
513        let mut sub = DoubleBufferSubmitter::new();
514        let idx0 = sub.begin_encode().expect("encode 0");
515        sub.submit(idx0, |_| Ok(())).expect("submit 0");
516        sub.mark_complete(idx0);
517
518        let idx1 = sub.begin_encode().expect("encode 1");
519        assert_ne!(idx0, idx1, "should alternate to the other slot");
520        sub.submit(idx1, |_| Ok(())).expect("submit 1");
521        sub.mark_complete(idx1);
522
523        assert_eq!(sub.stats().total_frames, 2);
524        assert_eq!(sub.stats().completed_frames, 2);
525    }
526
527    #[test]
528    fn test_no_free_slot_when_both_in_flight() {
529        let mut sub = DoubleBufferSubmitter::new();
530        let idx0 = sub.begin_encode().expect("encode 0");
531        sub.submit(idx0, |_| Ok(())).expect("submit 0");
532        // idx0 is InFlight
533
534        let idx1 = sub.begin_encode().expect("encode 1");
535        sub.submit(idx1, |_| Ok(())).expect("submit 1");
536        // idx1 is InFlight
537
538        // Both slots busy
539        let result = sub.begin_encode();
540        assert!(result.is_err());
541        assert_eq!(sub.stats().cpu_stalls, 1);
542    }
543
544    #[test]
545    fn test_gpu_work_failure_records_stats() {
546        let mut sub = DoubleBufferSubmitter::new();
547        let idx = sub.begin_encode().expect("encode");
548        let result = sub.submit(idx, |_| Err("simulated failure".to_string()));
549        assert!(result.is_err());
550        assert_eq!(sub.stats().failed_frames, 1);
551        // Slot should be reset to idle on failure
552        assert_eq!(sub.slot(idx).map(|s| s.state()), Some(SlotState::Idle));
553    }
554
555    #[test]
556    fn test_submit_wrong_state_returns_error() {
557        let mut sub = DoubleBufferSubmitter::new();
558        // Slot 0 is idle, not encoding — submit should fail.
559        let result = sub.submit(0, |_| Ok(()));
560        assert!(result.is_err());
561    }
562
563    #[test]
564    fn test_payload_size_tracking() {
565        let mut sub = DoubleBufferSubmitter::new();
566        let idx = sub.begin_encode().expect("encode");
567        sub.set_payload_size(idx, 4096);
568        assert_eq!(sub.slot(idx).map(|s| s.payload_size()), Some(4096));
569    }
570
571    #[test]
572    fn test_reset_clears_everything() {
573        let mut sub = DoubleBufferSubmitter::new();
574        let _ = sub.submit_sync(100, |_| Ok(()));
575        let _ = sub.submit_sync(200, |_| Ok(()));
576        sub.reset();
577        assert_eq!(sub.frame_counter(), 0);
578        assert_eq!(sub.stats().total_frames, 0);
579        assert_eq!(sub.slot(0).map(|s| s.state()), Some(SlotState::Idle));
580        assert_eq!(sub.slot(1).map(|s| s.state()), Some(SlotState::Idle));
581    }
582
583    #[test]
584    fn test_reclaim_resets_slot() {
585        let mut sub = DoubleBufferSubmitter::new();
586        let idx = sub.begin_encode().expect("encode");
587        sub.submit(idx, |_| Ok(())).expect("submit");
588        assert_eq!(sub.slot(idx).map(|s| s.state()), Some(SlotState::InFlight));
589        sub.reclaim(idx);
590        assert_eq!(sub.slot(idx).map(|s| s.state()), Some(SlotState::Idle));
591    }
592
593    #[test]
594    fn test_frame_index_increments() {
595        let mut sub = DoubleBufferSubmitter::new();
596        let f0 = sub.submit_sync(10, |_| Ok(())).expect("f0");
597        let f1 = sub.submit_sync(10, |_| Ok(())).expect("f1");
598        let f2 = sub.submit_sync(10, |_| Ok(())).expect("f2");
599        assert_eq!(f0, 0);
600        assert_eq!(f1, 1);
601        assert_eq!(f2, 2);
602    }
603
604    #[test]
605    fn test_stats_avg_latency_not_zero_after_frames() {
606        let mut sub = DoubleBufferSubmitter::new();
607        for _ in 0..5 {
608            let _ = sub.submit_sync(100, |_| Ok(()));
609        }
610        // Latencies might be very small but should be recorded
611        assert_eq!(sub.stats().completed_frames, 5);
612    }
613
614    #[test]
615    fn test_stats_utilisation_bounded() {
616        let mut sub = DoubleBufferSubmitter::new();
617        for _ in 0..10 {
618            let _ = sub.submit_sync(100, |_| Ok(()));
619        }
620        let cpu_u = sub.stats().cpu_utilisation();
621        let gpu_u = sub.stats().gpu_utilisation();
622        assert!(
623            cpu_u >= 0.0 && cpu_u <= 1.0,
624            "cpu utilisation out of range: {cpu_u}"
625        );
626        assert!(
627            gpu_u >= 0.0 && gpu_u <= 1.0,
628            "gpu utilisation out of range: {gpu_u}"
629        );
630    }
631
632    #[test]
633    fn test_slot_out_of_range() {
634        let sub = DoubleBufferSubmitter::new();
635        assert!(sub.slot(2).is_none());
636        assert!(sub.slot(99).is_none());
637    }
638
639    #[test]
640    fn test_mark_complete_ignored_for_non_inflight() {
641        let mut sub = DoubleBufferSubmitter::new();
642        // Slot 0 is Idle — mark_complete should be a no-op
643        sub.mark_complete(0);
644        assert_eq!(sub.slot(0).map(|s| s.state()), Some(SlotState::Idle));
645        assert_eq!(sub.stats().completed_frames, 0);
646    }
647
648    #[test]
649    fn test_default_impl() {
650        let sub = DoubleBufferSubmitter::default();
651        assert_eq!(sub.frame_counter(), 0);
652    }
653
654    #[test]
655    fn test_slot_state_display() {
656        assert_eq!(format!("{}", SlotState::Idle), "idle");
657        assert_eq!(format!("{}", SlotState::Encoding), "encoding");
658        assert_eq!(format!("{}", SlotState::InFlight), "in-flight");
659        assert_eq!(format!("{}", SlotState::Complete), "complete");
660    }
661
662    #[test]
663    fn test_max_min_latency_with_empty_stats() {
664        let stats = DoubleBufferStats::default();
665        assert_eq!(stats.max_latency(), Duration::ZERO);
666        assert_eq!(stats.min_latency(), Duration::ZERO);
667        assert_eq!(stats.avg_latency(), Duration::ZERO);
668    }
669
670    #[test]
671    fn test_estimated_fps_zero_when_no_frames() {
672        let stats = DoubleBufferStats::default();
673        assert_eq!(stats.estimated_fps(), 0.0);
674    }
675}