Skip to main content

viser_complexity/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::fmt;
3use std::time::Duration;
4use tokio::process::Command;
5use viser_ffmpeg::{ffmpeg_path, probe};
6
7/// Classified scene type based on spatial/temporal complexity.
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum SceneClass {
10    /// Black / fade / freeze frame (near-zero motion and detail)
11    Black,
12    /// Static / talking-heads (low spatial, low temporal)
13    Static,
14    /// Detailed / landscape (high spatial, low temporal)
15    Detailed,
16    /// Motion / action (low spatial, high temporal)
17    Motion,
18    /// Complex / crowd / nature (high spatial, high temporal)
19    Complex,
20}
21
22impl fmt::Display for SceneClass {
23    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24        match self {
25            SceneClass::Black => write!(f, "black"),
26            SceneClass::Static => write!(f, "static"),
27            SceneClass::Detailed => write!(f, "detailed"),
28            SceneClass::Motion => write!(f, "motion"),
29            SceneClass::Complex => write!(f, "complex"),
30        }
31    }
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct FrameComplexity {
36    pub pts: Duration,
37    pub spatial: f64,    // normalized entropy (0-1)
38    pub temporal: f64,   // inter-frame luma difference (0-255)
39    pub dct_energy: f64, // average DCT coefficient energy
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct SegmentComplexity {
44    pub start: Duration,
45    pub end: Duration,
46    pub duration: Duration,
47    pub avg_spatial: f64,
48    pub avg_temporal: f64,
49    pub max_spatial: f64,
50    pub max_temporal: f64,
51    pub score: f64, // combined 0-100 complexity score
52    pub scene_class: SceneClass,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct Profile {
57    pub frames: Vec<FrameComplexity>,
58    pub segments: Vec<SegmentComplexity>,
59    pub avg_spatial: f64,
60    pub avg_temporal: f64,
61    pub overall_score: f64,
62}
63
64#[derive(Debug, Clone)]
65pub struct AnalyzeOpts {
66    pub segment_duration: Duration,
67    pub subsample: i32,
68}
69
70impl Default for AnalyzeOpts {
71    fn default() -> Self {
72        Self { segment_duration: Duration::from_secs(2), subsample: 1 }
73    }
74}
75
76/// Extracts per-frame complexity metrics and aggregates them into segments.
77pub async fn analyze(path: &str, opts: AnalyzeOpts) -> anyhow::Result<Profile> {
78    let seg_dur = if opts.segment_duration.is_zero() {
79        Duration::from_secs(2)
80    } else {
81        opts.segment_duration
82    };
83    let subsample = if opts.subsample <= 0 { 1 } else { opts.subsample };
84
85    let probe_result = probe(path).await?;
86    let total_duration = Duration::from_secs_f64(probe_result.format.duration);
87
88    let select_filter =
89        if subsample > 1 { format!("select='not(mod(n\\,{subsample}))',") } else { String::new() };
90
91    let filter = format!("{select_filter}entropy,signalstats,metadata=mode=print:file=-");
92    let args = ["-i", path, "-vf", &filter, "-f", "null", "-"];
93
94    let output = Command::new(ffmpeg_path())
95        .args(args)
96        .stdout(std::process::Stdio::piped())
97        .stderr(std::process::Stdio::piped())
98        .output()
99        .await?;
100
101    if !output.status.success() {
102        let stderr = String::from_utf8_lossy(&output.stderr);
103        anyhow::bail!("complexity analysis failed: {stderr}");
104    }
105
106    let stdout = String::from_utf8_lossy(&output.stdout);
107    let frames = parse_complexity_output(&stdout);
108
109    if frames.is_empty() {
110        anyhow::bail!("no frames analyzed");
111    }
112
113    let segments = aggregate_segments(&frames, total_duration, seg_dur);
114
115    let n = frames.len() as f64;
116    let avg_spatial: f64 = frames.iter().map(|f| f.spatial).sum::<f64>() / n;
117    let avg_temporal: f64 = frames.iter().map(|f| f.temporal).sum::<f64>() / n;
118    let overall_score = compute_score(avg_spatial, avg_temporal);
119
120    Ok(Profile { frames, segments, avg_spatial, avg_temporal, overall_score })
121}
122
123fn parse_complexity_output(output: &str) -> Vec<FrameComplexity> {
124    let mut frames = Vec::new();
125    let mut current =
126        FrameComplexity { pts: Duration::ZERO, spatial: 0.0, temporal: 0.0, dct_energy: 0.0 };
127    let mut has_pts = false;
128
129    for line in output.lines() {
130        if line.starts_with("frame:") {
131            if has_pts {
132                frames.push(current.clone());
133            }
134            current = FrameComplexity {
135                pts: Duration::ZERO,
136                spatial: 0.0,
137                temporal: 0.0,
138                dct_energy: 0.0,
139            };
140            has_pts = false;
141
142            if let Some(pts_time) = extract_field(line, "pts_time:") {
143                if let Ok(seconds) = pts_time.parse::<f64>() {
144                    current.pts = Duration::from_secs_f64(seconds);
145                    has_pts = true;
146                }
147            }
148            continue;
149        }
150
151        if let Some(val) = line.strip_prefix("lavfi.entropy.normalized_entropy.normal.Y=") {
152            current.spatial = val.parse().unwrap_or(0.0);
153        }
154        if let Some(val) = line.strip_prefix("lavfi.signalstats.YDIF=") {
155            current.temporal = val.parse().unwrap_or(0.0);
156        }
157        if let Some(val) = line.strip_prefix("lavfi.signalstats.YHIGH=") {
158            current.dct_energy = val.parse().unwrap_or(0.0);
159        }
160        if let Some(val) = line.strip_prefix("lavfi.signalstats.YLOW=") {
161            let y_low: f64 = val.parse().unwrap_or(0.0);
162            current.dct_energy -= y_low;
163            if current.dct_energy < 0.0 {
164                current.dct_energy = 0.0;
165            }
166        }
167    }
168
169    if has_pts {
170        frames.push(current);
171    }
172
173    frames
174}
175
176fn extract_field<'a>(line: &'a str, key: &str) -> Option<&'a str> {
177    let idx = line.find(key)?;
178    let rest = &line[idx + key.len()..];
179    let rest = rest.trim_start();
180    let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
181    Some(&rest[..end])
182}
183
184fn aggregate_segments(
185    frames: &[FrameComplexity],
186    total_duration: Duration,
187    seg_duration: Duration,
188) -> Vec<SegmentComplexity> {
189    let mut segments = Vec::new();
190    let mut seg_start = Duration::ZERO;
191
192    while seg_start < total_duration {
193        let seg_end = (seg_start + seg_duration).min(total_duration);
194
195        let seg_frames: Vec<&FrameComplexity> =
196            frames.iter().filter(|f| f.pts >= seg_start && f.pts < seg_end).collect();
197
198        if !seg_frames.is_empty() {
199            let spatial: Vec<f64> = seg_frames.iter().map(|f| f.spatial).collect();
200            let temporal: Vec<f64> = seg_frames.iter().map(|f| f.temporal).collect();
201            let dct: Vec<f64> = seg_frames.iter().map(|f| f.dct_energy).collect();
202
203            let avg_dct = mean(&dct);
204            let avg_s = mean(&spatial);
205            let avg_t = mean(&temporal);
206
207            let scene_class = classify_scene(avg_s, avg_t, avg_dct);
208
209            segments.push(SegmentComplexity {
210                start: seg_start,
211                end: seg_end,
212                duration: seg_end - seg_start,
213                avg_spatial: avg_s,
214                avg_temporal: avg_t,
215                max_spatial: max_val(&spatial),
216                max_temporal: max_val(&temporal),
217                score: compute_score_with_dct(avg_s, avg_t, avg_dct),
218                scene_class,
219            });
220        }
221
222        seg_start = seg_end;
223    }
224
225    segments
226}
227
228fn compute_score(spatial: f64, temporal: f64) -> f64 {
229    let spatial_norm = ((spatial - 0.5) * 200.0).clamp(0.0, 100.0);
230    let temporal_norm = (temporal * 3.33).min(100.0);
231    spatial_norm * 0.6 + temporal_norm * 0.4
232}
233
234fn compute_score_with_dct(spatial: f64, temporal: f64, dct_energy: f64) -> f64 {
235    let spatial_norm = ((spatial - 0.5) * 200.0).clamp(0.0, 100.0);
236    let temporal_norm = (temporal * 3.33).min(100.0);
237    let dct_norm = (dct_energy * 0.5).min(100.0);
238    spatial_norm * 0.4 + dct_norm * 0.3 + temporal_norm * 0.3
239}
240
241/// Classify a segment by its spatial/temporal complexity profile.
242pub fn classify_scene(spatial: f64, temporal: f64, dct_energy: f64) -> SceneClass {
243    let s = spatial;
244    let t = temporal;
245    let d = dct_energy;
246
247    // Black / fade: near-zero motion, entropy, and detail
248    if t < 1.0 && s <= 0.3 && d < 5.0 {
249        return SceneClass::Black;
250    }
251
252    // Motion / action: high inter-frame difference, moderate detail
253    if t > 8.0 && s < 0.65 {
254        return SceneClass::Motion;
255    }
256
257    // Complex / crowd / nature: high texture + high motion
258    if s > 0.7 && t > 5.0 {
259        return SceneClass::Complex;
260    }
261
262    // Detailed / landscape: high entropy, low motion
263    if s >= 0.65 && t <= 5.0 {
264        return SceneClass::Detailed;
265    }
266
267    // Static / talking-heads: low motion, moderate entropy
268    SceneClass::Static
269}
270
271fn mean(vals: &[f64]) -> f64 {
272    if vals.is_empty() {
273        return 0.0;
274    }
275    vals.iter().sum::<f64>() / vals.len() as f64
276}
277
278fn max_val(vals: &[f64]) -> f64 {
279    vals.iter().copied().fold(f64::NEG_INFINITY, f64::max)
280}
281
282/// Content type classification result.
283#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
284pub enum ContentType {
285    /// Natural video content (film, sports, etc.)
286    Natural,
287    /// Screen content (slides, code, UI, screencasts)
288    Screen,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct ScreenContentDetection {
293    pub content_type: ContentType,
294    pub confidence: f64, // 0-100
295    pub reason: String,
296}
297
298/// Detects whether a video is screen content based on complexity profile heuristics.
299///
300/// Screen content signatures:
301/// - Very high spatial complexity (sharp edges, text)
302/// - Very low temporal complexity (static or near-static)
303/// - High fraction of frames with minimal temporal change
304pub fn detect_screen_content(profile: &Profile) -> ScreenContentDetection {
305    if profile.frames.is_empty() {
306        return ScreenContentDetection {
307            content_type: ContentType::Natural,
308            confidence: 0.0,
309            reason: "no frames analyzed".into(),
310        };
311    }
312
313    // Screen content heuristics
314    let static_fraction = profile.frames.iter().filter(|f| f.temporal < 1.5).count() as f64
315        / profile.frames.len() as f64;
316
317    let has_sharp_edges = profile.avg_spatial > 0.75;
318    let is_mostly_static = static_fraction > 0.8;
319    let high_dct_low_temporal = profile.avg_temporal < 2.0
320        && (profile.segments.iter().any(|s| s.avg_spatial > 0.7) || profile.avg_spatial > 0.7);
321
322    let score = if has_sharp_edges && is_mostly_static {
323        // Classic slide/UI: sharp edges, barely moves
324        90.0
325    } else if high_dct_low_temporal {
326        // Code/screenshot: high DCT energy but low motion
327        70.0
328    } else if is_mostly_static && profile.avg_spatial > 0.6 {
329        // Mostly static with moderate edges
330        50.0
331    } else if static_fraction > 0.6 && profile.avg_temporal < 3.0 {
332        // Leaning static
333        30.0
334    } else {
335        0.0
336    };
337
338    let content_type = if score >= 50.0 { ContentType::Screen } else { ContentType::Natural };
339    let reason = if score >= 90.0 {
340        format!(
341            "sharp edges (spatial={:.2}) + mostly static ({:.0}% frames) — classic screen content",
342            profile.avg_spatial,
343            static_fraction * 100.0
344        )
345    } else if score >= 70.0 {
346        format!(
347            "high spatial/DCT energy (spatial={:.2}) with low temporal ({:.1}) — likely screen content",
348            profile.avg_spatial, profile.avg_temporal
349        )
350    } else if score >= 50.0 {
351        format!(
352            "mostly static ({:.0}% frames) with moderate spatial ({:.2}) — possible screen content",
353            static_fraction * 100.0,
354            profile.avg_spatial
355        )
356    } else if score >= 30.0 {
357        format!("leaning static ({:.0}% frames)", static_fraction * 100.0)
358    } else {
359        "natural video content detected".into()
360    };
361
362    ScreenContentDetection { content_type, confidence: score, reason }
363}
364
365#[cfg(test)]
366mod screen_tests {
367    use super::*;
368
369    fn mk_frame(pts_secs: f64, spatial: f64, temporal: f64, dct: f64) -> FrameComplexity {
370        FrameComplexity {
371            pts: Duration::from_secs_f64(pts_secs),
372            spatial,
373            temporal,
374            dct_energy: dct,
375        }
376    }
377
378    #[test]
379    fn test_detect_screen_content_slides() {
380        // All frames have sharp edges, no motion
381        let frames: Vec<_> =
382            (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.85, 0.2, 100.0)).collect();
383        let profile = Profile {
384            frames: frames.clone(),
385            segments: vec![],
386            avg_spatial: 0.85,
387            avg_temporal: 0.2,
388            overall_score: 0.0,
389        };
390        let detection = detect_screen_content(&profile);
391        assert_eq!(detection.content_type, ContentType::Screen);
392        assert!(detection.confidence >= 90.0);
393    }
394
395    #[test]
396    fn test_detect_screen_content_natural_video() {
397        let frames: Vec<_> = (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.5, 10.0, 50.0)).collect();
398        let profile = Profile {
399            frames,
400            segments: vec![],
401            avg_spatial: 0.5,
402            avg_temporal: 10.0,
403            overall_score: 0.0,
404        };
405        let detection = detect_screen_content(&profile);
406        assert_eq!(detection.content_type, ContentType::Natural);
407        assert_eq!(detection.confidence, 0.0);
408    }
409
410    #[test]
411    fn test_detect_screen_content_empty() {
412        let profile = Profile {
413            frames: vec![],
414            segments: vec![],
415            avg_spatial: 0.0,
416            avg_temporal: 0.0,
417            overall_score: 0.0,
418        };
419        let detection = detect_screen_content(&profile);
420        assert_eq!(detection.content_type, ContentType::Natural);
421        assert_eq!(detection.confidence, 0.0);
422    }
423
424    #[test]
425    fn test_detect_screen_content_code_capture() {
426        // High spatial, low temporal, moderate DCT
427        let frames: Vec<_> = (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.78, 1.5, 80.0)).collect();
428        let profile = Profile {
429            frames: frames.clone(),
430            segments: vec![],
431            avg_spatial: 0.78,
432            avg_temporal: 1.5,
433            overall_score: 0.0,
434        };
435        let detection = detect_screen_content(&profile);
436        assert_eq!(detection.content_type, ContentType::Screen);
437        assert!(detection.confidence >= 70.0, "expected >= 70, got {}", detection.confidence);
438    }
439}
440
441#[cfg(test)]
442mod tests {
443    use super::*;
444
445    fn frame(pts_secs: f64, spatial: f64, temporal: f64, dct: f64) -> FrameComplexity {
446        FrameComplexity {
447            pts: Duration::from_secs_f64(pts_secs),
448            spatial,
449            temporal,
450            dct_energy: dct,
451        }
452    }
453
454    #[test]
455    fn test_mean_empty() {
456        assert!((mean(&[]) - 0.0).abs() < 1e-9);
457    }
458
459    #[test]
460    fn test_mean_single() {
461        assert!((mean(&[42.0]) - 42.0).abs() < 1e-9);
462    }
463
464    #[test]
465    fn test_mean_multiple() {
466        assert!((mean(&[1.0, 2.0, 3.0]) - 2.0).abs() < 1e-9);
467    }
468
469    #[test]
470    fn test_max_val_empty_negative_inf() {
471        assert!(max_val(&[]).is_infinite() && max_val(&[]).is_sign_negative());
472    }
473
474    #[test]
475    fn test_max_val() {
476        assert!((max_val(&[1.0, 5.0, 3.0]) - 5.0).abs() < 1e-9);
477    }
478
479    #[test]
480    fn test_compute_score_bounds() {
481        let s = compute_score(0.5, 0.0);
482        assert!((0.0..=100.0).contains(&s));
483    }
484
485    #[test]
486    fn test_compute_score_zero_input() {
487        let s = compute_score(0.0, 0.0);
488        assert!(s >= 0.0);
489    }
490
491    #[test]
492    fn test_compute_score_high_input() {
493        let s = compute_score(1.0, 30.0); // temporal 30*3.33=99.9, spatial (1-0.5)*200=100
494        assert!(s <= 100.0);
495        assert!(s > 50.0);
496    }
497
498    #[test]
499    fn test_compute_score_with_dct() {
500        let s = compute_score_with_dct(0.5, 0.0, 0.0);
501        assert!(s >= 0.0);
502    }
503
504    #[test]
505    fn test_parse_complexity_output_empty() {
506        let frames = parse_complexity_output("");
507        assert!(frames.is_empty());
508    }
509
510    #[test]
511    fn test_parse_complexity_output_basic() {
512        let output = "\
513frame: 1 pts_time:0.000
514lavfi.entropy.normalized_entropy.normal.Y=0.6
515lavfi.signalstats.YDIF=2.5
516lavfi.signalstats.YHIGH=100.0
517lavfi.signalstats.YLOW=30.0
518frame: 2 pts_time:1.000
519lavfi.entropy.normalized_entropy.normal.Y=0.7
520lavfi.signalstats.YDIF=3.0
521lavfi.signalstats.YHIGH=120.0
522lavfi.signalstats.YLOW=40.0
523";
524        let frames = parse_complexity_output(output);
525        assert_eq!(frames.len(), 2);
526
527        assert!((frames[0].spatial - 0.6).abs() < 1e-9);
528        assert!((frames[0].temporal - 2.5).abs() < 1e-9);
529        assert!((frames[0].dct_energy - 70.0).abs() < 1e-9); // 100 - 30
530
531        assert!((frames[1].spatial - 0.7).abs() < 1e-9);
532        assert!((frames[1].temporal - 3.0).abs() < 1e-9);
533        assert!((frames[1].dct_energy - 80.0).abs() < 1e-9); // 120 - 40
534    }
535
536    #[test]
537    fn test_parse_complexity_output_handles_partial_data() {
538        let output = "\
539frame: 1 pts_time:0.000
540lavfi.entropy.normalized_entropy.normal.Y=0.5
541frame: 2 pts_time:1.000
542lavfi.signalstats.YDIF=1.0
543";
544        let frames = parse_complexity_output(output);
545        assert_eq!(frames.len(), 2);
546    }
547
548    #[test]
549    fn test_parse_complexity_output_negative_dct() {
550        // If YLOW > YHIGH, dct_energy should clamp to 0
551        let output = "\
552frame: 1 pts_time:0.000
553lavfi.signalstats.YHIGH=30.0
554lavfi.signalstats.YLOW=50.0
555";
556        let frames = parse_complexity_output(output);
557        assert!((frames[0].dct_energy - 0.0).abs() < 1e-9);
558    }
559
560    #[test]
561    fn test_aggregate_segments_single_segment() {
562        let frames = vec![
563            frame(0.0, 0.5, 1.0, 10.0),
564            frame(0.5, 0.6, 2.0, 20.0),
565            frame(1.0, 0.7, 3.0, 30.0),
566        ];
567        let segs = aggregate_segments(&frames, Duration::from_secs(2), Duration::from_secs(2));
568        assert_eq!(segs.len(), 1);
569        assert!((segs[0].avg_spatial - 0.6).abs() < 0.01);
570        assert!((segs[0].avg_temporal - 2.0).abs() < 0.01);
571        assert!((segs[0].max_spatial - 0.7).abs() < 1e-9);
572        assert_eq!(segs[0].start, Duration::ZERO);
573        assert_eq!(segs[0].end, Duration::from_secs(2));
574    }
575
576    #[test]
577    fn test_aggregate_segments_multiple() {
578        let frames = vec![
579            frame(0.0, 0.4, 1.0, 5.0),
580            frame(0.5, 0.5, 1.5, 6.0),
581            frame(1.0, 0.6, 2.0, 7.0),
582            frame(1.5, 0.7, 2.5, 8.0),
583            frame(2.0, 0.8, 3.0, 9.0),
584            frame(2.5, 0.9, 3.5, 10.0),
585        ];
586        let segs = aggregate_segments(&frames, Duration::from_secs(3), Duration::from_secs(1));
587        assert_eq!(segs.len(), 3);
588        assert_eq!(segs[0].start, Duration::from_secs(0));
589        assert_eq!(segs[1].start, Duration::from_secs(1));
590        assert_eq!(segs[2].start, Duration::from_secs(2));
591    }
592
593    #[test]
594    fn test_aggregate_segments_empty_bucket() {
595        // Evenly spaced frames with a gap
596        let frames = vec![frame(0.0, 0.5, 1.0, 5.0), frame(3.0, 0.8, 3.0, 10.0)];
597        let segs = aggregate_segments(&frames, Duration::from_secs(4), Duration::from_secs(2));
598        assert_eq!(segs.len(), 2); // seg 0 has frame[0], seg 1 has frame[1]
599    }
600
601    #[test]
602    fn test_classify_black() {
603        assert_eq!(classify_scene(0.2, 0.5, 2.0), SceneClass::Black);
604    }
605
606    #[test]
607    fn test_classify_static() {
608        assert_eq!(classify_scene(0.4, 1.5, 10.0), SceneClass::Static);
609    }
610
611    #[test]
612    fn test_classify_detailed() {
613        assert_eq!(classify_scene(0.7, 3.0, 50.0), SceneClass::Detailed);
614    }
615
616    #[test]
617    fn test_classify_motion() {
618        assert_eq!(classify_scene(0.4, 10.0, 30.0), SceneClass::Motion);
619    }
620
621    #[test]
622    fn test_classify_complex() {
623        assert_eq!(classify_scene(0.8, 6.0, 60.0), SceneClass::Complex);
624    }
625
626    #[test]
627    fn test_classify_edges() {
628        // Boundary conditions
629        assert_eq!(classify_scene(0.65, 5.0, 30.0), SceneClass::Detailed);
630        assert_eq!(classify_scene(0.6, 9.0, 20.0), SceneClass::Motion);
631    }
632
633    #[test]
634    fn test_scene_class_display() {
635        assert_eq!(SceneClass::Black.to_string(), "black");
636        assert_eq!(SceneClass::Static.to_string(), "static");
637        assert_eq!(SceneClass::Detailed.to_string(), "detailed");
638        assert_eq!(SceneClass::Motion.to_string(), "motion");
639        assert_eq!(SceneClass::Complex.to_string(), "complex");
640    }
641
642    #[test]
643    fn test_segment_has_scene_class() {
644        let frames = vec![FrameComplexity {
645            pts: Duration::from_secs_f64(0.0),
646            spatial: 0.2,
647            temporal: 0.5,
648            dct_energy: 2.0,
649        }];
650        let segs = aggregate_segments(&frames, Duration::from_secs(2), Duration::from_secs(2));
651        assert_eq!(segs.len(), 1);
652        assert_eq!(segs[0].scene_class, SceneClass::Black);
653    }
654
655    #[test]
656    fn test_analyze_opts_default() {
657        let opts = AnalyzeOpts::default();
658        assert_eq!(opts.segment_duration, Duration::from_secs(2));
659        assert_eq!(opts.subsample, 1);
660    }
661}