Skip to main content

oximedia_codec/rate_control/
scene_adaptive.rs

1//! Scene-adaptive bitrate allocation using content analysis.
2//!
3//! This module extends the lookahead-based rate control with scene detection
4//! and content-type analysis to dynamically allocate bits across scene
5//! boundaries and content types (action, talking heads, static, etc.).
6//!
7//! # Algorithm Overview
8//!
9//! 1. Classify each lookahead frame by content type using spatial/temporal metrics
10//! 2. Detect scene cuts via SAD-based inter-frame difference
11//! 3. Compute per-scene complexity budgets from content classification
12//! 4. Derive per-frame bit targets adjusted for scene transitions
13//!
14//! # References
15//!
16//! - x264/x265 scene-adaptive rate control
17//! - Reinhard et al., "Scene change detection and adaptive encoding"
18
19#![forbid(unsafe_code)]
20#![allow(clippy::cast_precision_loss)]
21#![allow(clippy::cast_possible_truncation)]
22#![allow(clippy::cast_sign_loss)]
23#![allow(clippy::cast_lossless)]
24
25use crate::error::{CodecError, CodecResult};
26
27// ─────────────────────────────────────────────────────────────────────────────
28// Content classification
29// ─────────────────────────────────────────────────────────────────────────────
30
31/// Coarse content-type classification used to drive bit allocation.
32///
33/// Each variant has a different "complexity multiplier" — the fraction of the
34/// average bits-per-frame budget to award this frame type.
35#[derive(Debug, Clone, Copy, PartialEq, Eq)]
36pub enum SceneContentType {
37    /// Fast motion (sports, action) — high motion complexity.
38    HighMotion,
39    /// Mixed scene with moderate motion (drama, news).
40    MidMotion,
41    /// Mostly-static content (talking heads, slides).
42    StaticScene,
43    /// Fade-in / fade-out / dissolve transition.
44    Transition,
45    /// Hard cut — first frame of a new scene.
46    SceneCut,
47}
48
49impl SceneContentType {
50    /// Bit-allocation multiplier relative to the average budget.
51    ///
52    /// Values >1.0 mean "allocate more bits"; <1.0 mean "save bits here".
53    #[must_use]
54    pub fn complexity_multiplier(self) -> f32 {
55        match self {
56            Self::HighMotion => 1.55,
57            Self::MidMotion => 1.10,
58            Self::StaticScene => 0.65,
59            Self::Transition => 0.80,
60            Self::SceneCut => 1.40, // Scene-cut I-frame boost
61        }
62    }
63
64    /// Human-readable label.
65    #[must_use]
66    pub fn label(self) -> &'static str {
67        match self {
68            Self::HighMotion => "high-motion",
69            Self::MidMotion => "mid-motion",
70            Self::StaticScene => "static",
71            Self::Transition => "transition",
72            Self::SceneCut => "scene-cut",
73        }
74    }
75}
76
77// ─────────────────────────────────────────────────────────────────────────────
78// Frame metrics used by the allocator
79// ─────────────────────────────────────────────────────────────────────────────
80
81/// Per-frame content metrics fed to the scene-adaptive allocator.
82#[derive(Debug, Clone)]
83pub struct FrameContentMetrics {
84    /// Frame index in presentation order.
85    pub frame_index: u64,
86    /// Spatial complexity (0.0 = flat, 1.0 = maximally complex).
87    pub spatial_complexity: f32,
88    /// Temporal complexity (0.0 = no change, 1.0 = complete scene change).
89    pub temporal_complexity: f32,
90    /// Average inter-frame SAD normalised to [0, 1].
91    pub normalised_sad: f32,
92    /// True when a hard scene cut is suspected.
93    pub is_scene_cut: bool,
94}
95
96impl FrameContentMetrics {
97    /// Build metrics from raw pixel statistics.
98    ///
99    /// # Arguments
100    ///
101    /// * `frame_index`      – Presentation-order index
102    /// * `spatial_var`      – Spatial variance of the luma plane (raw value)
103    /// * `inter_frame_sad`  – Sum of absolute differences vs previous frame
104    /// * `frame_pixels`     – Total luma pixels in the frame
105    ///
106    /// `spatial_var` and `inter_frame_sad` are normalised internally so that
107    /// a value of `1.0` represents the worst-case / maximum-complexity signal.
108    #[must_use]
109    pub fn from_raw(
110        frame_index: u64,
111        spatial_var: f32,
112        inter_frame_sad: f64,
113        frame_pixels: u32,
114    ) -> Self {
115        // Normalise spatial variance: typical max ≈ 255² / 4 ≈ 16256.0
116        let spatial_complexity = (spatial_var / 16256.0_f32).min(1.0).max(0.0);
117
118        // Normalise inter-frame SAD: max possible = 255 * pixels
119        let max_sad = 255.0_f64 * frame_pixels as f64;
120        let normalised_sad = if max_sad > 0.0 {
121            (inter_frame_sad / max_sad).min(1.0).max(0.0) as f32
122        } else {
123            0.0
124        };
125
126        // Hard scene cut: SAD-based threshold (>15% of pixels fully changed)
127        let is_scene_cut = normalised_sad > 0.15;
128        let temporal_complexity = normalised_sad;
129
130        Self {
131            frame_index,
132            spatial_complexity,
133            temporal_complexity,
134            normalised_sad,
135            is_scene_cut,
136        }
137    }
138
139    /// Classify this frame's content type.
140    #[must_use]
141    pub fn classify(&self) -> SceneContentType {
142        if self.is_scene_cut {
143            return SceneContentType::SceneCut;
144        }
145        // Detect transition: moderate temporal change, lower spatial complexity
146        if self.temporal_complexity > 0.06
147            && self.temporal_complexity < 0.15
148            && self.spatial_complexity < 0.3
149        {
150            return SceneContentType::Transition;
151        }
152        // Threshold for high-motion
153        if self.temporal_complexity >= 0.15 {
154            return SceneContentType::HighMotion;
155        }
156        // Threshold for mid-motion
157        if self.temporal_complexity >= 0.04 {
158            return SceneContentType::MidMotion;
159        }
160        SceneContentType::StaticScene
161    }
162}
163
164// ─────────────────────────────────────────────────────────────────────────────
165// Scene descriptor
166// ─────────────────────────────────────────────────────────────────────────────
167
168/// A contiguous run of frames that share the same scene.
169#[derive(Debug, Clone)]
170pub struct Scene {
171    /// Index of the first frame in this scene.
172    pub start_frame: u64,
173    /// Index of the last frame in this scene (inclusive).
174    pub end_frame: u64,
175    /// Dominant content classification for this scene.
176    pub content_type: SceneContentType,
177    /// Average spatial complexity across the scene.
178    pub avg_spatial: f32,
179    /// Average temporal complexity across the scene.
180    pub avg_temporal: f32,
181}
182
183impl Scene {
184    /// Number of frames in the scene.
185    #[must_use]
186    pub fn frame_count(&self) -> u64 {
187        self.end_frame.saturating_sub(self.start_frame) + 1
188    }
189
190    /// Bit-allocation multiplier for this scene.
191    #[must_use]
192    pub fn bit_multiplier(&self) -> f32 {
193        // Blend content-type multiplier with direct spatial complexity
194        let ct_mult = self.content_type.complexity_multiplier();
195        let spatial_boost = 1.0 + 0.3 * self.avg_spatial;
196        0.6 * ct_mult + 0.4 * spatial_boost
197    }
198}
199
200// ─────────────────────────────────────────────────────────────────────────────
201// Scene-adaptive allocator
202// ─────────────────────────────────────────────────────────────────────────────
203
204/// Configuration for the scene-adaptive allocator.
205#[derive(Debug, Clone)]
206pub struct SceneAdaptiveConfig {
207    /// Target average bitrate in bits per second.
208    pub target_bitrate: u64,
209    /// Frame rate (frames per second).
210    pub frame_rate: f64,
211    /// SAD threshold for scene-cut detection (fraction of pixels that must
212    /// differ; 0.0–1.0). Defaults to 0.15.
213    pub scene_cut_threshold: f32,
214    /// Minimum scene length in frames before another cut is allowed.
215    /// Prevents very short "flash" scenes from dominating allocation.
216    pub min_scene_frames: u32,
217    /// Maximum bit-allocation ratio per frame (clamp against wild swings).
218    /// Default: 4.0 × average.
219    pub max_per_frame_ratio: f32,
220    /// Minimum bit-allocation ratio per frame. Default: 0.1 × average.
221    pub min_per_frame_ratio: f32,
222}
223
224impl Default for SceneAdaptiveConfig {
225    fn default() -> Self {
226        Self {
227            target_bitrate: 4_000_000, // 4 Mbps
228            frame_rate: 30.0,
229            scene_cut_threshold: 0.15,
230            min_scene_frames: 4,
231            max_per_frame_ratio: 4.0,
232            min_per_frame_ratio: 0.10,
233        }
234    }
235}
236
237impl SceneAdaptiveConfig {
238    /// Average bits per frame at the target bitrate and frame rate.
239    #[must_use]
240    pub fn avg_bits_per_frame(&self) -> f64 {
241        if self.frame_rate > 0.0 {
242            self.target_bitrate as f64 / self.frame_rate
243        } else {
244            0.0
245        }
246    }
247}
248
249/// Per-frame bit target emitted by the allocator.
250#[derive(Debug, Clone)]
251pub struct FrameBitTarget {
252    /// Frame index (presentation order).
253    pub frame_index: u64,
254    /// Recommended bit budget for this frame.
255    pub target_bits: u64,
256    /// Content classification.
257    pub content_type: SceneContentType,
258    /// Allocation multiplier applied (for diagnostics).
259    pub multiplier: f32,
260}
261
262/// Scene-adaptive bitrate allocator.
263///
264/// Feed frame metrics in presentation order via [`Self::push_frame`]; call
265/// [`Self::flush`] at end-of-stream to ensure the last scene is fully allocated.
266/// Retrieve frame bit targets via [`Self::drain_targets`].
267pub struct SceneAdaptiveAllocator {
268    config: SceneAdaptiveConfig,
269    /// Pending frames not yet assigned to a scene.
270    pending: Vec<FrameContentMetrics>,
271    /// Completed scenes.
272    scenes: Vec<Scene>,
273    /// Allocated frame targets ready for consumption.
274    targets: Vec<FrameBitTarget>,
275    /// Frames in the current open scene.
276    current_scene_frames: Vec<FrameContentMetrics>,
277    /// Frames since the last accepted scene cut.
278    frames_since_cut: u32,
279}
280
281impl SceneAdaptiveAllocator {
282    /// Create a new allocator with the given configuration.
283    #[must_use]
284    pub fn new(config: SceneAdaptiveConfig) -> Self {
285        Self {
286            config,
287            pending: Vec::new(),
288            scenes: Vec::new(),
289            targets: Vec::new(),
290            current_scene_frames: Vec::new(),
291            frames_since_cut: 0,
292        }
293    }
294
295    /// Push metrics for the next frame in presentation order.
296    ///
297    /// When a scene cut is detected (and `min_scene_frames` has elapsed),
298    /// the current scene is closed and bit targets are emitted for it.
299    pub fn push_frame(&mut self, metrics: FrameContentMetrics) -> CodecResult<()> {
300        self.frames_since_cut += 1;
301
302        let is_cut = metrics.is_scene_cut
303            && self.frames_since_cut >= self.config.min_scene_frames
304            && !self.current_scene_frames.is_empty();
305
306        if is_cut {
307            self.close_current_scene()?;
308            self.frames_since_cut = 0;
309        }
310
311        self.current_scene_frames.push(metrics);
312        Ok(())
313    }
314
315    /// Flush all remaining buffered frames and emit their bit targets.
316    ///
317    /// Must be called at end-of-stream.
318    pub fn flush(&mut self) -> CodecResult<()> {
319        if !self.current_scene_frames.is_empty() {
320            self.close_current_scene()?;
321        }
322        Ok(())
323    }
324
325    /// Drain all available [`FrameBitTarget`] entries.
326    pub fn drain_targets(&mut self) -> Vec<FrameBitTarget> {
327        std::mem::take(&mut self.targets)
328    }
329
330    /// Return a reference to completed scenes (for diagnostics / tests).
331    #[must_use]
332    pub fn scenes(&self) -> &[Scene] {
333        &self.scenes
334    }
335
336    // ─────────────────────────────────────────────────────────────────────
337    // Internal helpers
338    // ─────────────────────────────────────────────────────────────────────
339
340    /// Close the current scene, build a [`Scene`] descriptor, and emit
341    /// per-frame bit targets for every frame in that scene.
342    fn close_current_scene(&mut self) -> CodecResult<()> {
343        if self.current_scene_frames.is_empty() {
344            return Ok(());
345        }
346
347        let frames = std::mem::take(&mut self.current_scene_frames);
348
349        // Aggregate statistics
350        let n = frames.len() as f32;
351        let avg_spatial = frames.iter().map(|f| f.spatial_complexity).sum::<f32>() / n;
352        let avg_temporal = frames.iter().map(|f| f.temporal_complexity).sum::<f32>() / n;
353
354        // Dominant content type: most complex frame wins for scene-cuts; otherwise majority
355        let content_type = dominant_content_type(&frames);
356
357        let start_frame = frames
358            .first()
359            .ok_or_else(|| CodecError::InvalidData("empty scene".into()))?
360            .frame_index;
361        let end_frame = frames
362            .last()
363            .ok_or_else(|| CodecError::InvalidData("empty scene".into()))?
364            .frame_index;
365
366        let scene = Scene {
367            start_frame,
368            end_frame,
369            content_type,
370            avg_spatial,
371            avg_temporal,
372        };
373
374        let scene_mult = scene.bit_multiplier();
375        let avg_bits = self.config.avg_bits_per_frame();
376
377        // Per-frame allocation: scale by individual frame complexity within
378        // the scene, normalised so the scene total equals the budgeted total.
379        let scene_total_budget = avg_bits * frames.len() as f64 * scene_mult as f64;
380
381        // Compute unnormalised weights from per-frame spatial+temporal
382        let weights: Vec<f32> = frames
383            .iter()
384            .map(|f| {
385                let ct_mult = f.classify().complexity_multiplier();
386                ct_mult * (1.0 + 0.5 * f.spatial_complexity + 0.5 * f.temporal_complexity)
387            })
388            .collect();
389        let weight_sum: f32 = weights.iter().sum();
390        let weight_sum = if weight_sum > 0.0 { weight_sum } else { 1.0 };
391
392        for (frame_metrics, w) in frames.iter().zip(weights.iter()) {
393            let raw_bits = scene_total_budget * (*w as f64 / weight_sum as f64);
394            // Clamp to [min, max] ratios
395            let min_bits = avg_bits * self.config.min_per_frame_ratio as f64;
396            let max_bits = avg_bits * self.config.max_per_frame_ratio as f64;
397            let target_bits = raw_bits.min(max_bits).max(min_bits) as u64;
398
399            self.targets.push(FrameBitTarget {
400                frame_index: frame_metrics.frame_index,
401                target_bits,
402                content_type: frame_metrics.classify(),
403                multiplier: *w / (weight_sum / frames.len() as f32),
404            });
405        }
406
407        self.scenes.push(scene);
408        Ok(())
409    }
410}
411
412/// Pick the dominant [`SceneContentType`] from a slice of frame metrics.
413///
414/// Scene-cut frames take priority; otherwise use the most-frequent type.
415fn dominant_content_type(frames: &[FrameContentMetrics]) -> SceneContentType {
416    // If any frame is a scene cut the whole scene is labelled as one
417    if frames.iter().any(|f| f.is_scene_cut) {
418        return SceneContentType::SceneCut;
419    }
420    let mut counts = [0u32; 5]; // indexed by variant discriminant
421    for f in frames {
422        let idx = match f.classify() {
423            SceneContentType::HighMotion => 0,
424            SceneContentType::MidMotion => 1,
425            SceneContentType::StaticScene => 2,
426            SceneContentType::Transition => 3,
427            SceneContentType::SceneCut => 4,
428        };
429        counts[idx] += 1;
430    }
431    let max_idx = counts
432        .iter()
433        .enumerate()
434        .max_by_key(|&(_, &c)| c)
435        .map(|(i, _)| i)
436        .unwrap_or(2);
437    match max_idx {
438        0 => SceneContentType::HighMotion,
439        1 => SceneContentType::MidMotion,
440        3 => SceneContentType::Transition,
441        4 => SceneContentType::SceneCut,
442        _ => SceneContentType::StaticScene,
443    }
444}
445
446// ─────────────────────────────────────────────────────────────────────────────
447// Tests
448// ─────────────────────────────────────────────────────────────────────────────
449
450#[cfg(test)]
451mod tests {
452    use super::*;
453
454    // ── FrameContentMetrics ──────────────────────────────────────────────────
455
456    #[test]
457    fn test_from_raw_static_frame() {
458        let m = FrameContentMetrics::from_raw(0, 100.0, 0.001 * 1920.0 * 1080.0, 1920 * 1080);
459        assert!(!m.is_scene_cut);
460        assert!(m.spatial_complexity > 0.0 && m.spatial_complexity < 1.0);
461        assert!(m.temporal_complexity < 0.15);
462    }
463
464    #[test]
465    fn test_from_raw_scene_cut() {
466        // 20% of pixels fully changed → should be flagged as scene cut
467        let pixels = 1920u32 * 1080;
468        let sad = 0.20 * 255.0 * (pixels as f64);
469        let m = FrameContentMetrics::from_raw(5, 8000.0, sad, pixels);
470        assert!(m.is_scene_cut);
471        assert_eq!(m.classify(), SceneContentType::SceneCut);
472    }
473
474    #[test]
475    fn test_classify_static() {
476        let m = FrameContentMetrics {
477            frame_index: 0,
478            spatial_complexity: 0.1,
479            temporal_complexity: 0.01,
480            normalised_sad: 0.01,
481            is_scene_cut: false,
482        };
483        assert_eq!(m.classify(), SceneContentType::StaticScene);
484    }
485
486    #[test]
487    fn test_classify_high_motion() {
488        let m = FrameContentMetrics {
489            frame_index: 1,
490            spatial_complexity: 0.5,
491            temporal_complexity: 0.40,
492            normalised_sad: 0.40,
493            is_scene_cut: false,
494        };
495        assert_eq!(m.classify(), SceneContentType::HighMotion);
496    }
497
498    #[test]
499    fn test_classify_transition() {
500        let m = FrameContentMetrics {
501            frame_index: 2,
502            spatial_complexity: 0.20,
503            temporal_complexity: 0.10,
504            normalised_sad: 0.10,
505            is_scene_cut: false,
506        };
507        assert_eq!(m.classify(), SceneContentType::Transition);
508    }
509
510    // ── SceneContentType ─────────────────────────────────────────────────────
511
512    #[test]
513    fn test_multipliers_ordering() {
514        // HighMotion should allocate more bits than StaticScene
515        assert!(
516            SceneContentType::HighMotion.complexity_multiplier()
517                > SceneContentType::StaticScene.complexity_multiplier()
518        );
519        // SceneCut should be ≥ MidMotion
520        assert!(
521            SceneContentType::SceneCut.complexity_multiplier()
522                >= SceneContentType::MidMotion.complexity_multiplier()
523        );
524    }
525
526    // ── SceneAdaptiveAllocator ───────────────────────────────────────────────
527
528    fn make_metrics(frame_index: u64, temporal: f32, is_cut: bool) -> FrameContentMetrics {
529        FrameContentMetrics {
530            frame_index,
531            spatial_complexity: 0.3,
532            temporal_complexity: temporal,
533            normalised_sad: temporal,
534            is_scene_cut: is_cut,
535        }
536    }
537
538    #[test]
539    fn test_allocator_single_scene() {
540        let cfg = SceneAdaptiveConfig {
541            target_bitrate: 1_000_000,
542            frame_rate: 10.0,
543            ..Default::default()
544        };
545        let mut alloc = SceneAdaptiveAllocator::new(cfg);
546        for i in 0..10u64 {
547            alloc.push_frame(make_metrics(i, 0.05, false)).unwrap();
548        }
549        alloc.flush().unwrap();
550        let targets = alloc.drain_targets();
551        assert_eq!(targets.len(), 10, "all 10 frames should have targets");
552        for t in &targets {
553            assert!(t.target_bits > 0, "target_bits must be positive");
554        }
555    }
556
557    #[test]
558    fn test_allocator_two_scenes() {
559        let cfg = SceneAdaptiveConfig {
560            target_bitrate: 2_000_000,
561            frame_rate: 25.0,
562            min_scene_frames: 2,
563            ..Default::default()
564        };
565        let mut alloc = SceneAdaptiveAllocator::new(cfg);
566        // 5 frames of static content
567        for i in 0..5u64 {
568            alloc.push_frame(make_metrics(i, 0.01, false)).unwrap();
569        }
570        // Scene cut at frame 5
571        alloc.push_frame(make_metrics(5, 0.50, true)).unwrap();
572        // 4 more high-motion frames
573        for i in 6..10u64 {
574            alloc.push_frame(make_metrics(i, 0.35, false)).unwrap();
575        }
576        alloc.flush().unwrap();
577        let targets = alloc.drain_targets();
578        assert_eq!(targets.len(), 10);
579        // Scene 1 is static → lower bits than scene 2 (high motion)
580        let scene1_avg: f64 = targets[..5]
581            .iter()
582            .map(|t| t.target_bits as f64)
583            .sum::<f64>()
584            / 5.0;
585        let scene2_avg: f64 = targets[5..]
586            .iter()
587            .map(|t| t.target_bits as f64)
588            .sum::<f64>()
589            / 5.0;
590        assert!(
591            scene2_avg > scene1_avg,
592            "high-motion scene should get more bits: {} vs {}",
593            scene2_avg,
594            scene1_avg
595        );
596    }
597
598    #[test]
599    fn test_allocator_clamps_targets() {
600        let cfg = SceneAdaptiveConfig {
601            target_bitrate: 500_000,
602            frame_rate: 30.0,
603            max_per_frame_ratio: 3.0,
604            min_per_frame_ratio: 0.2,
605            ..Default::default()
606        };
607        let avg_bits = cfg.avg_bits_per_frame();
608        let mut alloc = SceneAdaptiveAllocator::new(cfg.clone());
609        for i in 0..30u64 {
610            // extreme temporal complexity to try to blow past max
611            alloc.push_frame(make_metrics(i, 0.99, false)).unwrap();
612        }
613        alloc.flush().unwrap();
614        let targets = alloc.drain_targets();
615        for t in &targets {
616            let ratio = t.target_bits as f64 / avg_bits;
617            assert!(
618                ratio <= cfg.max_per_frame_ratio as f64 + 1e-6,
619                "ratio {} exceeds max {}",
620                ratio,
621                cfg.max_per_frame_ratio
622            );
623            assert!(
624                ratio >= cfg.min_per_frame_ratio as f64 - 1e-6,
625                "ratio {} below min {}",
626                ratio,
627                cfg.min_per_frame_ratio
628            );
629        }
630    }
631
632    #[test]
633    fn test_scene_descriptors() {
634        let cfg = SceneAdaptiveConfig {
635            min_scene_frames: 2,
636            ..Default::default()
637        };
638        let mut alloc = SceneAdaptiveAllocator::new(cfg);
639        for i in 0..4u64 {
640            alloc.push_frame(make_metrics(i, 0.01, false)).unwrap();
641        }
642        alloc.push_frame(make_metrics(4, 0.50, true)).unwrap();
643        for i in 5..8u64 {
644            alloc.push_frame(make_metrics(i, 0.20, false)).unwrap();
645        }
646        alloc.flush().unwrap();
647        let scenes = alloc.scenes().to_vec();
648        assert_eq!(scenes.len(), 2, "should detect exactly 2 scenes");
649        assert_eq!(scenes[0].start_frame, 0);
650        assert_eq!(scenes[0].end_frame, 3);
651        assert_eq!(scenes[1].start_frame, 4);
652    }
653
654    #[test]
655    fn test_avg_bits_per_frame() {
656        let cfg = SceneAdaptiveConfig {
657            target_bitrate: 3_000_000,
658            frame_rate: 30.0,
659            ..Default::default()
660        };
661        let expected = 3_000_000.0 / 30.0;
662        let got = cfg.avg_bits_per_frame();
663        assert!(
664            (got - expected).abs() < 1.0,
665            "expected ~{expected}, got {got}"
666        );
667    }
668}