Skip to main content

viser_complexity/
lib.rs

1use serde::{Deserialize, Serialize};
2use std::time::Duration;
3use tokio::process::Command;
4use viser_ffmpeg::{ffmpeg_path, probe};
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
7pub struct FrameComplexity {
8    pub pts: Duration,
9    pub spatial: f64,    // normalized entropy (0-1)
10    pub temporal: f64,   // inter-frame luma difference (0-255)
11    pub dct_energy: f64, // average DCT coefficient energy
12}
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
15pub struct SegmentComplexity {
16    pub start: Duration,
17    pub end: Duration,
18    pub duration: Duration,
19    pub avg_spatial: f64,
20    pub avg_temporal: f64,
21    pub max_spatial: f64,
22    pub max_temporal: f64,
23    pub score: f64, // combined 0-100 complexity score
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
27pub struct Profile {
28    pub frames: Vec<FrameComplexity>,
29    pub segments: Vec<SegmentComplexity>,
30    pub avg_spatial: f64,
31    pub avg_temporal: f64,
32    pub overall_score: f64,
33}
34
35#[derive(Debug, Clone)]
36pub struct AnalyzeOpts {
37    pub segment_duration: Duration,
38    pub subsample: i32,
39}
40
41impl Default for AnalyzeOpts {
42    fn default() -> Self {
43        Self { segment_duration: Duration::from_secs(2), subsample: 1 }
44    }
45}
46
47/// Extracts per-frame complexity metrics and aggregates them into segments.
48pub async fn analyze(path: &str, opts: AnalyzeOpts) -> anyhow::Result<Profile> {
49    let seg_dur = if opts.segment_duration.is_zero() {
50        Duration::from_secs(2)
51    } else {
52        opts.segment_duration
53    };
54    let subsample = if opts.subsample <= 0 { 1 } else { opts.subsample };
55
56    let probe_result = probe(path).await?;
57    let total_duration = Duration::from_secs_f64(probe_result.format.duration);
58
59    let select_filter =
60        if subsample > 1 { format!("select='not(mod(n\\,{subsample}))',") } else { String::new() };
61
62    let filter = format!("{select_filter}entropy,signalstats,metadata=mode=print:file=-");
63    let args = ["-i", path, "-vf", &filter, "-f", "null", "-"];
64
65    let output = Command::new(ffmpeg_path())
66        .args(args)
67        .stdout(std::process::Stdio::piped())
68        .stderr(std::process::Stdio::piped())
69        .output()
70        .await?;
71
72    if !output.status.success() {
73        let stderr = String::from_utf8_lossy(&output.stderr);
74        anyhow::bail!("complexity analysis failed: {stderr}");
75    }
76
77    let stdout = String::from_utf8_lossy(&output.stdout);
78    let frames = parse_complexity_output(&stdout);
79
80    if frames.is_empty() {
81        anyhow::bail!("no frames analyzed");
82    }
83
84    let segments = aggregate_segments(&frames, total_duration, seg_dur);
85
86    let n = frames.len() as f64;
87    let avg_spatial: f64 = frames.iter().map(|f| f.spatial).sum::<f64>() / n;
88    let avg_temporal: f64 = frames.iter().map(|f| f.temporal).sum::<f64>() / n;
89    let overall_score = compute_score(avg_spatial, avg_temporal);
90
91    Ok(Profile { frames, segments, avg_spatial, avg_temporal, overall_score })
92}
93
94fn parse_complexity_output(output: &str) -> Vec<FrameComplexity> {
95    let mut frames = Vec::new();
96    let mut current =
97        FrameComplexity { pts: Duration::ZERO, spatial: 0.0, temporal: 0.0, dct_energy: 0.0 };
98    let mut has_pts = false;
99
100    for line in output.lines() {
101        if line.starts_with("frame:") {
102            if has_pts {
103                frames.push(current.clone());
104            }
105            current = FrameComplexity {
106                pts: Duration::ZERO,
107                spatial: 0.0,
108                temporal: 0.0,
109                dct_energy: 0.0,
110            };
111            has_pts = false;
112
113            if let Some(pts_time) = extract_field(line, "pts_time:") {
114                if let Ok(seconds) = pts_time.parse::<f64>() {
115                    current.pts = Duration::from_secs_f64(seconds);
116                    has_pts = true;
117                }
118            }
119            continue;
120        }
121
122        if let Some(val) = line.strip_prefix("lavfi.entropy.normalized_entropy.normal.Y=") {
123            current.spatial = val.parse().unwrap_or(0.0);
124        }
125        if let Some(val) = line.strip_prefix("lavfi.signalstats.YDIF=") {
126            current.temporal = val.parse().unwrap_or(0.0);
127        }
128        if let Some(val) = line.strip_prefix("lavfi.signalstats.YHIGH=") {
129            current.dct_energy = val.parse().unwrap_or(0.0);
130        }
131        if let Some(val) = line.strip_prefix("lavfi.signalstats.YLOW=") {
132            let y_low: f64 = val.parse().unwrap_or(0.0);
133            current.dct_energy -= y_low;
134            if current.dct_energy < 0.0 {
135                current.dct_energy = 0.0;
136            }
137        }
138    }
139
140    if has_pts {
141        frames.push(current);
142    }
143
144    frames
145}
146
147fn extract_field<'a>(line: &'a str, key: &str) -> Option<&'a str> {
148    let idx = line.find(key)?;
149    let rest = &line[idx + key.len()..];
150    let rest = rest.trim_start();
151    let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
152    Some(&rest[..end])
153}
154
155fn aggregate_segments(
156    frames: &[FrameComplexity],
157    total_duration: Duration,
158    seg_duration: Duration,
159) -> Vec<SegmentComplexity> {
160    let mut segments = Vec::new();
161    let mut seg_start = Duration::ZERO;
162
163    while seg_start < total_duration {
164        let seg_end = (seg_start + seg_duration).min(total_duration);
165
166        let seg_frames: Vec<&FrameComplexity> =
167            frames.iter().filter(|f| f.pts >= seg_start && f.pts < seg_end).collect();
168
169        if !seg_frames.is_empty() {
170            let spatial: Vec<f64> = seg_frames.iter().map(|f| f.spatial).collect();
171            let temporal: Vec<f64> = seg_frames.iter().map(|f| f.temporal).collect();
172            let dct: Vec<f64> = seg_frames.iter().map(|f| f.dct_energy).collect();
173
174            let avg_dct = mean(&dct);
175            let avg_s = mean(&spatial);
176            let avg_t = mean(&temporal);
177
178            segments.push(SegmentComplexity {
179                start: seg_start,
180                end: seg_end,
181                duration: seg_end - seg_start,
182                avg_spatial: avg_s,
183                avg_temporal: avg_t,
184                max_spatial: max_val(&spatial),
185                max_temporal: max_val(&temporal),
186                score: compute_score_with_dct(avg_s, avg_t, avg_dct),
187            });
188        }
189
190        seg_start = seg_end;
191    }
192
193    segments
194}
195
196fn compute_score(spatial: f64, temporal: f64) -> f64 {
197    let spatial_norm = ((spatial - 0.5) * 200.0).clamp(0.0, 100.0);
198    let temporal_norm = (temporal * 3.33).min(100.0);
199    spatial_norm * 0.6 + temporal_norm * 0.4
200}
201
202fn compute_score_with_dct(spatial: f64, temporal: f64, dct_energy: f64) -> f64 {
203    let spatial_norm = ((spatial - 0.5) * 200.0).clamp(0.0, 100.0);
204    let temporal_norm = (temporal * 3.33).min(100.0);
205    let dct_norm = (dct_energy * 0.5).min(100.0);
206    spatial_norm * 0.4 + dct_norm * 0.3 + temporal_norm * 0.3
207}
208
209fn mean(vals: &[f64]) -> f64 {
210    if vals.is_empty() {
211        return 0.0;
212    }
213    vals.iter().sum::<f64>() / vals.len() as f64
214}
215
216fn max_val(vals: &[f64]) -> f64 {
217    vals.iter().copied().fold(f64::NEG_INFINITY, f64::max)
218}
219
220/// Content type classification result.
221#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
222pub enum ContentType {
223    /// Natural video content (film, sports, etc.)
224    Natural,
225    /// Screen content (slides, code, UI, screencasts)
226    Screen,
227}
228
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub struct ScreenContentDetection {
231    pub content_type: ContentType,
232    pub confidence: f64, // 0-100
233    pub reason: String,
234}
235
236/// Detects whether a video is screen content based on complexity profile heuristics.
237///
238/// Screen content signatures:
239/// - Very high spatial complexity (sharp edges, text)
240/// - Very low temporal complexity (static or near-static)
241/// - High fraction of frames with minimal temporal change
242pub fn detect_screen_content(profile: &Profile) -> ScreenContentDetection {
243    if profile.frames.is_empty() {
244        return ScreenContentDetection {
245            content_type: ContentType::Natural,
246            confidence: 0.0,
247            reason: "no frames analyzed".into(),
248        };
249    }
250
251    // Screen content heuristics
252    let static_fraction = profile.frames.iter().filter(|f| f.temporal < 1.5).count() as f64
253        / profile.frames.len() as f64;
254
255    let has_sharp_edges = profile.avg_spatial > 0.75;
256    let is_mostly_static = static_fraction > 0.8;
257    let high_dct_low_temporal = profile.avg_temporal < 2.0
258        && (profile.segments.iter().any(|s| s.avg_spatial > 0.7) || profile.avg_spatial > 0.7);
259
260    let score = if has_sharp_edges && is_mostly_static {
261        // Classic slide/UI: sharp edges, barely moves
262        90.0
263    } else if high_dct_low_temporal {
264        // Code/screenshot: high DCT energy but low motion
265        70.0
266    } else if is_mostly_static && profile.avg_spatial > 0.6 {
267        // Mostly static with moderate edges
268        50.0
269    } else if static_fraction > 0.6 && profile.avg_temporal < 3.0 {
270        // Leaning static
271        30.0
272    } else {
273        0.0
274    };
275
276    let content_type = if score >= 50.0 { ContentType::Screen } else { ContentType::Natural };
277    let reason = if score >= 90.0 {
278        format!(
279            "sharp edges (spatial={:.2}) + mostly static ({:.0}% frames) — classic screen content",
280            profile.avg_spatial,
281            static_fraction * 100.0
282        )
283    } else if score >= 70.0 {
284        format!(
285            "high spatial/DCT energy (spatial={:.2}) with low temporal ({:.1}) — likely screen content",
286            profile.avg_spatial, profile.avg_temporal
287        )
288    } else if score >= 50.0 {
289        format!(
290            "mostly static ({:.0}% frames) with moderate spatial ({:.2}) — possible screen content",
291            static_fraction * 100.0,
292            profile.avg_spatial
293        )
294    } else if score >= 30.0 {
295        format!("leaning static ({:.0}% frames)", static_fraction * 100.0)
296    } else {
297        "natural video content detected".into()
298    };
299
300    ScreenContentDetection { content_type, confidence: score, reason }
301}
302
303#[cfg(test)]
304mod screen_tests {
305    use super::*;
306
307    fn mk_frame(pts_secs: f64, spatial: f64, temporal: f64, dct: f64) -> FrameComplexity {
308        FrameComplexity {
309            pts: Duration::from_secs_f64(pts_secs),
310            spatial,
311            temporal,
312            dct_energy: dct,
313        }
314    }
315
316    #[test]
317    fn test_detect_screen_content_slides() {
318        // All frames have sharp edges, no motion
319        let frames: Vec<_> =
320            (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.85, 0.2, 100.0)).collect();
321        let profile = Profile {
322            frames: frames.clone(),
323            segments: vec![],
324            avg_spatial: 0.85,
325            avg_temporal: 0.2,
326            overall_score: 0.0,
327        };
328        let detection = detect_screen_content(&profile);
329        assert_eq!(detection.content_type, ContentType::Screen);
330        assert!(detection.confidence >= 90.0);
331    }
332
333    #[test]
334    fn test_detect_screen_content_natural_video() {
335        let frames: Vec<_> = (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.5, 10.0, 50.0)).collect();
336        let profile = Profile {
337            frames,
338            segments: vec![],
339            avg_spatial: 0.5,
340            avg_temporal: 10.0,
341            overall_score: 0.0,
342        };
343        let detection = detect_screen_content(&profile);
344        assert_eq!(detection.content_type, ContentType::Natural);
345        assert_eq!(detection.confidence, 0.0);
346    }
347
348    #[test]
349    fn test_detect_screen_content_empty() {
350        let profile = Profile {
351            frames: vec![],
352            segments: vec![],
353            avg_spatial: 0.0,
354            avg_temporal: 0.0,
355            overall_score: 0.0,
356        };
357        let detection = detect_screen_content(&profile);
358        assert_eq!(detection.content_type, ContentType::Natural);
359        assert_eq!(detection.confidence, 0.0);
360    }
361
362    #[test]
363    fn test_detect_screen_content_code_capture() {
364        // High spatial, low temporal, moderate DCT
365        let frames: Vec<_> = (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.78, 1.5, 80.0)).collect();
366        let profile = Profile {
367            frames: frames.clone(),
368            segments: vec![],
369            avg_spatial: 0.78,
370            avg_temporal: 1.5,
371            overall_score: 0.0,
372        };
373        let detection = detect_screen_content(&profile);
374        assert_eq!(detection.content_type, ContentType::Screen);
375        assert!(detection.confidence >= 70.0, "expected >= 70, got {}", detection.confidence);
376    }
377}
378
379#[cfg(test)]
380mod tests {
381    use super::*;
382
383    fn frame(pts_secs: f64, spatial: f64, temporal: f64, dct: f64) -> FrameComplexity {
384        FrameComplexity {
385            pts: Duration::from_secs_f64(pts_secs),
386            spatial,
387            temporal,
388            dct_energy: dct,
389        }
390    }
391
392    #[test]
393    fn test_mean_empty() {
394        assert!((mean(&[]) - 0.0).abs() < 1e-9);
395    }
396
397    #[test]
398    fn test_mean_single() {
399        assert!((mean(&[42.0]) - 42.0).abs() < 1e-9);
400    }
401
402    #[test]
403    fn test_mean_multiple() {
404        assert!((mean(&[1.0, 2.0, 3.0]) - 2.0).abs() < 1e-9);
405    }
406
407    #[test]
408    fn test_max_val_empty_negative_inf() {
409        assert!(max_val(&[]).is_infinite() && max_val(&[]).is_sign_negative());
410    }
411
412    #[test]
413    fn test_max_val() {
414        assert!((max_val(&[1.0, 5.0, 3.0]) - 5.0).abs() < 1e-9);
415    }
416
417    #[test]
418    fn test_compute_score_bounds() {
419        let s = compute_score(0.5, 0.0);
420        assert!(s >= 0.0 && s <= 100.0);
421    }
422
423    #[test]
424    fn test_compute_score_zero_input() {
425        let s = compute_score(0.0, 0.0);
426        assert!(s >= 0.0);
427    }
428
429    #[test]
430    fn test_compute_score_high_input() {
431        let s = compute_score(1.0, 30.0); // temporal 30*3.33=99.9, spatial (1-0.5)*200=100
432        assert!(s <= 100.0);
433        assert!(s > 50.0);
434    }
435
436    #[test]
437    fn test_compute_score_with_dct() {
438        let s = compute_score_with_dct(0.5, 0.0, 0.0);
439        assert!(s >= 0.0);
440    }
441
442    #[test]
443    fn test_parse_complexity_output_empty() {
444        let frames = parse_complexity_output("");
445        assert!(frames.is_empty());
446    }
447
448    #[test]
449    fn test_parse_complexity_output_basic() {
450        let output = "\
451frame: 1 pts_time:0.000
452lavfi.entropy.normalized_entropy.normal.Y=0.6
453lavfi.signalstats.YDIF=2.5
454lavfi.signalstats.YHIGH=100.0
455lavfi.signalstats.YLOW=30.0
456frame: 2 pts_time:1.000
457lavfi.entropy.normalized_entropy.normal.Y=0.7
458lavfi.signalstats.YDIF=3.0
459lavfi.signalstats.YHIGH=120.0
460lavfi.signalstats.YLOW=40.0
461";
462        let frames = parse_complexity_output(output);
463        assert_eq!(frames.len(), 2);
464
465        assert!((frames[0].spatial - 0.6).abs() < 1e-9);
466        assert!((frames[0].temporal - 2.5).abs() < 1e-9);
467        assert!((frames[0].dct_energy - 70.0).abs() < 1e-9); // 100 - 30
468
469        assert!((frames[1].spatial - 0.7).abs() < 1e-9);
470        assert!((frames[1].temporal - 3.0).abs() < 1e-9);
471        assert!((frames[1].dct_energy - 80.0).abs() < 1e-9); // 120 - 40
472    }
473
474    #[test]
475    fn test_parse_complexity_output_handles_partial_data() {
476        let output = "\
477frame: 1 pts_time:0.000
478lavfi.entropy.normalized_entropy.normal.Y=0.5
479frame: 2 pts_time:1.000
480lavfi.signalstats.YDIF=1.0
481";
482        let frames = parse_complexity_output(output);
483        assert_eq!(frames.len(), 2);
484    }
485
486    #[test]
487    fn test_parse_complexity_output_negative_dct() {
488        // If YLOW > YHIGH, dct_energy should clamp to 0
489        let output = "\
490frame: 1 pts_time:0.000
491lavfi.signalstats.YHIGH=30.0
492lavfi.signalstats.YLOW=50.0
493";
494        let frames = parse_complexity_output(output);
495        assert!((frames[0].dct_energy - 0.0).abs() < 1e-9);
496    }
497
498    #[test]
499    fn test_aggregate_segments_single_segment() {
500        let frames = vec![
501            frame(0.0, 0.5, 1.0, 10.0),
502            frame(0.5, 0.6, 2.0, 20.0),
503            frame(1.0, 0.7, 3.0, 30.0),
504        ];
505        let segs = aggregate_segments(&frames, Duration::from_secs(2), Duration::from_secs(2));
506        assert_eq!(segs.len(), 1);
507        assert!((segs[0].avg_spatial - 0.6).abs() < 0.01);
508        assert!((segs[0].avg_temporal - 2.0).abs() < 0.01);
509        assert!((segs[0].max_spatial - 0.7).abs() < 1e-9);
510        assert_eq!(segs[0].start, Duration::ZERO);
511        assert_eq!(segs[0].end, Duration::from_secs(2));
512    }
513
514    #[test]
515    fn test_aggregate_segments_multiple() {
516        let frames = vec![
517            frame(0.0, 0.4, 1.0, 5.0),
518            frame(0.5, 0.5, 1.5, 6.0),
519            frame(1.0, 0.6, 2.0, 7.0),
520            frame(1.5, 0.7, 2.5, 8.0),
521            frame(2.0, 0.8, 3.0, 9.0),
522            frame(2.5, 0.9, 3.5, 10.0),
523        ];
524        let segs = aggregate_segments(&frames, Duration::from_secs(3), Duration::from_secs(1));
525        assert_eq!(segs.len(), 3);
526        assert_eq!(segs[0].start, Duration::from_secs(0));
527        assert_eq!(segs[1].start, Duration::from_secs(1));
528        assert_eq!(segs[2].start, Duration::from_secs(2));
529    }
530
531    #[test]
532    fn test_aggregate_segments_empty_bucket() {
533        // Evenly spaced frames with a gap
534        let frames = vec![frame(0.0, 0.5, 1.0, 5.0), frame(3.0, 0.8, 3.0, 10.0)];
535        let segs = aggregate_segments(&frames, Duration::from_secs(4), Duration::from_secs(2));
536        assert_eq!(segs.len(), 2); // seg 0 has frame[0], seg 1 has frame[1]
537    }
538
539    #[test]
540    fn test_analyze_opts_default() {
541        let opts = AnalyzeOpts::default();
542        assert_eq!(opts.segment_duration, Duration::from_secs(2));
543        assert_eq!(opts.subsample, 1);
544    }
545}