1use serde::{Deserialize, Serialize};
2use std::fmt;
3use std::time::Duration;
4use tokio::process::Command;
5use viser_ffmpeg::{ffmpeg_path, probe};
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
9pub enum SceneClass {
10 Black,
12 Static,
14 Detailed,
16 Motion,
18 Complex,
20}
21
22impl fmt::Display for SceneClass {
23 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
24 match self {
25 SceneClass::Black => write!(f, "black"),
26 SceneClass::Static => write!(f, "static"),
27 SceneClass::Detailed => write!(f, "detailed"),
28 SceneClass::Motion => write!(f, "motion"),
29 SceneClass::Complex => write!(f, "complex"),
30 }
31 }
32}
33
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct FrameComplexity {
36 pub pts: Duration,
37 pub spatial: f64, pub temporal: f64, pub dct_energy: f64, }
41
42#[derive(Debug, Clone, Serialize, Deserialize)]
43pub struct SegmentComplexity {
44 pub start: Duration,
45 pub end: Duration,
46 pub duration: Duration,
47 pub avg_spatial: f64,
48 pub avg_temporal: f64,
49 pub max_spatial: f64,
50 pub max_temporal: f64,
51 pub score: f64, pub scene_class: SceneClass,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct Profile {
57 pub frames: Vec<FrameComplexity>,
58 pub segments: Vec<SegmentComplexity>,
59 pub avg_spatial: f64,
60 pub avg_temporal: f64,
61 pub overall_score: f64,
62}
63
64#[derive(Debug, Clone)]
65pub struct AnalyzeOpts {
66 pub segment_duration: Duration,
67 pub subsample: i32,
68}
69
70impl Default for AnalyzeOpts {
71 fn default() -> Self {
72 Self { segment_duration: Duration::from_secs(2), subsample: 1 }
73 }
74}
75
76pub async fn analyze(path: &str, opts: AnalyzeOpts) -> anyhow::Result<Profile> {
78 let seg_dur = if opts.segment_duration.is_zero() {
79 Duration::from_secs(2)
80 } else {
81 opts.segment_duration
82 };
83 let subsample = if opts.subsample <= 0 { 1 } else { opts.subsample };
84
85 let probe_result = probe(path).await?;
86 let total_duration = Duration::from_secs_f64(probe_result.format.duration);
87
88 let select_filter =
89 if subsample > 1 { format!("select='not(mod(n\\,{subsample}))',") } else { String::new() };
90
91 let filter = format!("{select_filter}entropy,signalstats,metadata=mode=print:file=-");
92 let args = ["-i", path, "-vf", &filter, "-f", "null", "-"];
93
94 let output = Command::new(ffmpeg_path())
95 .args(args)
96 .stdout(std::process::Stdio::piped())
97 .stderr(std::process::Stdio::piped())
98 .output()
99 .await?;
100
101 if !output.status.success() {
102 let stderr = String::from_utf8_lossy(&output.stderr);
103 anyhow::bail!("complexity analysis failed: {stderr}");
104 }
105
106 let stdout = String::from_utf8_lossy(&output.stdout);
107 let frames = parse_complexity_output(&stdout);
108
109 if frames.is_empty() {
110 anyhow::bail!("no frames analyzed");
111 }
112
113 let segments = aggregate_segments(&frames, total_duration, seg_dur);
114
115 let n = frames.len() as f64;
116 let avg_spatial: f64 = frames.iter().map(|f| f.spatial).sum::<f64>() / n;
117 let avg_temporal: f64 = frames.iter().map(|f| f.temporal).sum::<f64>() / n;
118 let overall_score = compute_score(avg_spatial, avg_temporal);
119
120 Ok(Profile { frames, segments, avg_spatial, avg_temporal, overall_score })
121}
122
123fn parse_complexity_output(output: &str) -> Vec<FrameComplexity> {
124 let mut frames = Vec::new();
125 let mut current =
126 FrameComplexity { pts: Duration::ZERO, spatial: 0.0, temporal: 0.0, dct_energy: 0.0 };
127 let mut has_pts = false;
128
129 for line in output.lines() {
130 if line.starts_with("frame:") {
131 if has_pts {
132 frames.push(current.clone());
133 }
134 current = FrameComplexity {
135 pts: Duration::ZERO,
136 spatial: 0.0,
137 temporal: 0.0,
138 dct_energy: 0.0,
139 };
140 has_pts = false;
141
142 if let Some(pts_time) = extract_field(line, "pts_time:") {
143 if let Ok(seconds) = pts_time.parse::<f64>() {
144 current.pts = Duration::from_secs_f64(seconds);
145 has_pts = true;
146 }
147 }
148 continue;
149 }
150
151 if let Some(val) = line.strip_prefix("lavfi.entropy.normalized_entropy.normal.Y=") {
152 current.spatial = val.parse().unwrap_or(0.0);
153 }
154 if let Some(val) = line.strip_prefix("lavfi.signalstats.YDIF=") {
155 current.temporal = val.parse().unwrap_or(0.0);
156 }
157 if let Some(val) = line.strip_prefix("lavfi.signalstats.YHIGH=") {
158 current.dct_energy = val.parse().unwrap_or(0.0);
159 }
160 if let Some(val) = line.strip_prefix("lavfi.signalstats.YLOW=") {
161 let y_low: f64 = val.parse().unwrap_or(0.0);
162 current.dct_energy -= y_low;
163 if current.dct_energy < 0.0 {
164 current.dct_energy = 0.0;
165 }
166 }
167 }
168
169 if has_pts {
170 frames.push(current);
171 }
172
173 frames
174}
175
176fn extract_field<'a>(line: &'a str, key: &str) -> Option<&'a str> {
177 let idx = line.find(key)?;
178 let rest = &line[idx + key.len()..];
179 let rest = rest.trim_start();
180 let end = rest.find(|c: char| c.is_whitespace()).unwrap_or(rest.len());
181 Some(&rest[..end])
182}
183
184fn aggregate_segments(
185 frames: &[FrameComplexity],
186 total_duration: Duration,
187 seg_duration: Duration,
188) -> Vec<SegmentComplexity> {
189 let mut segments = Vec::new();
190 let mut seg_start = Duration::ZERO;
191
192 while seg_start < total_duration {
193 let seg_end = (seg_start + seg_duration).min(total_duration);
194
195 let seg_frames: Vec<&FrameComplexity> =
196 frames.iter().filter(|f| f.pts >= seg_start && f.pts < seg_end).collect();
197
198 if !seg_frames.is_empty() {
199 let spatial: Vec<f64> = seg_frames.iter().map(|f| f.spatial).collect();
200 let temporal: Vec<f64> = seg_frames.iter().map(|f| f.temporal).collect();
201 let dct: Vec<f64> = seg_frames.iter().map(|f| f.dct_energy).collect();
202
203 let avg_dct = mean(&dct);
204 let avg_s = mean(&spatial);
205 let avg_t = mean(&temporal);
206
207 let scene_class = classify_scene(avg_s, avg_t, avg_dct);
208
209 segments.push(SegmentComplexity {
210 start: seg_start,
211 end: seg_end,
212 duration: seg_end - seg_start,
213 avg_spatial: avg_s,
214 avg_temporal: avg_t,
215 max_spatial: max_val(&spatial),
216 max_temporal: max_val(&temporal),
217 score: compute_score_with_dct(avg_s, avg_t, avg_dct),
218 scene_class,
219 });
220 }
221
222 seg_start = seg_end;
223 }
224
225 segments
226}
227
228fn compute_score(spatial: f64, temporal: f64) -> f64 {
229 let spatial_norm = ((spatial - 0.5) * 200.0).clamp(0.0, 100.0);
230 let temporal_norm = (temporal * 3.33).min(100.0);
231 spatial_norm * 0.6 + temporal_norm * 0.4
232}
233
234fn compute_score_with_dct(spatial: f64, temporal: f64, dct_energy: f64) -> f64 {
235 let spatial_norm = ((spatial - 0.5) * 200.0).clamp(0.0, 100.0);
236 let temporal_norm = (temporal * 3.33).min(100.0);
237 let dct_norm = (dct_energy * 0.5).min(100.0);
238 spatial_norm * 0.4 + dct_norm * 0.3 + temporal_norm * 0.3
239}
240
241pub fn classify_scene(spatial: f64, temporal: f64, dct_energy: f64) -> SceneClass {
243 let s = spatial;
244 let t = temporal;
245 let d = dct_energy;
246
247 if t < 1.0 && s <= 0.3 && d < 5.0 {
249 return SceneClass::Black;
250 }
251
252 if t > 8.0 && s < 0.65 {
254 return SceneClass::Motion;
255 }
256
257 if s > 0.7 && t > 5.0 {
259 return SceneClass::Complex;
260 }
261
262 if s >= 0.65 && t <= 5.0 {
264 return SceneClass::Detailed;
265 }
266
267 SceneClass::Static
269}
270
271fn mean(vals: &[f64]) -> f64 {
272 if vals.is_empty() {
273 return 0.0;
274 }
275 vals.iter().sum::<f64>() / vals.len() as f64
276}
277
278fn max_val(vals: &[f64]) -> f64 {
279 vals.iter().copied().fold(f64::NEG_INFINITY, f64::max)
280}
281
282#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
284pub enum ContentType {
285 Natural,
287 Screen,
289}
290
291#[derive(Debug, Clone, Serialize, Deserialize)]
292pub struct ScreenContentDetection {
293 pub content_type: ContentType,
294 pub confidence: f64, pub reason: String,
296}
297
298pub fn detect_screen_content(profile: &Profile) -> ScreenContentDetection {
305 if profile.frames.is_empty() {
306 return ScreenContentDetection {
307 content_type: ContentType::Natural,
308 confidence: 0.0,
309 reason: "no frames analyzed".into(),
310 };
311 }
312
313 let static_fraction = profile.frames.iter().filter(|f| f.temporal < 1.5).count() as f64
315 / profile.frames.len() as f64;
316
317 let has_sharp_edges = profile.avg_spatial > 0.75;
318 let is_mostly_static = static_fraction > 0.8;
319 let high_dct_low_temporal = profile.avg_temporal < 2.0
320 && (profile.segments.iter().any(|s| s.avg_spatial > 0.7) || profile.avg_spatial > 0.7);
321
322 let score = if has_sharp_edges && is_mostly_static {
323 90.0
325 } else if high_dct_low_temporal {
326 70.0
328 } else if is_mostly_static && profile.avg_spatial > 0.6 {
329 50.0
331 } else if static_fraction > 0.6 && profile.avg_temporal < 3.0 {
332 30.0
334 } else {
335 0.0
336 };
337
338 let content_type = if score >= 50.0 { ContentType::Screen } else { ContentType::Natural };
339 let reason = if score >= 90.0 {
340 format!(
341 "sharp edges (spatial={:.2}) + mostly static ({:.0}% frames) — classic screen content",
342 profile.avg_spatial,
343 static_fraction * 100.0
344 )
345 } else if score >= 70.0 {
346 format!(
347 "high spatial/DCT energy (spatial={:.2}) with low temporal ({:.1}) — likely screen content",
348 profile.avg_spatial, profile.avg_temporal
349 )
350 } else if score >= 50.0 {
351 format!(
352 "mostly static ({:.0}% frames) with moderate spatial ({:.2}) — possible screen content",
353 static_fraction * 100.0,
354 profile.avg_spatial
355 )
356 } else if score >= 30.0 {
357 format!("leaning static ({:.0}% frames)", static_fraction * 100.0)
358 } else {
359 "natural video content detected".into()
360 };
361
362 ScreenContentDetection { content_type, confidence: score, reason }
363}
364
365#[cfg(test)]
366mod screen_tests {
367 use super::*;
368
369 fn mk_frame(pts_secs: f64, spatial: f64, temporal: f64, dct: f64) -> FrameComplexity {
370 FrameComplexity {
371 pts: Duration::from_secs_f64(pts_secs),
372 spatial,
373 temporal,
374 dct_energy: dct,
375 }
376 }
377
378 #[test]
379 fn test_detect_screen_content_slides() {
380 let frames: Vec<_> =
382 (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.85, 0.2, 100.0)).collect();
383 let profile = Profile {
384 frames: frames.clone(),
385 segments: vec![],
386 avg_spatial: 0.85,
387 avg_temporal: 0.2,
388 overall_score: 0.0,
389 };
390 let detection = detect_screen_content(&profile);
391 assert_eq!(detection.content_type, ContentType::Screen);
392 assert!(detection.confidence >= 90.0);
393 }
394
395 #[test]
396 fn test_detect_screen_content_natural_video() {
397 let frames: Vec<_> = (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.5, 10.0, 50.0)).collect();
398 let profile = Profile {
399 frames,
400 segments: vec![],
401 avg_spatial: 0.5,
402 avg_temporal: 10.0,
403 overall_score: 0.0,
404 };
405 let detection = detect_screen_content(&profile);
406 assert_eq!(detection.content_type, ContentType::Natural);
407 assert_eq!(detection.confidence, 0.0);
408 }
409
410 #[test]
411 fn test_detect_screen_content_empty() {
412 let profile = Profile {
413 frames: vec![],
414 segments: vec![],
415 avg_spatial: 0.0,
416 avg_temporal: 0.0,
417 overall_score: 0.0,
418 };
419 let detection = detect_screen_content(&profile);
420 assert_eq!(detection.content_type, ContentType::Natural);
421 assert_eq!(detection.confidence, 0.0);
422 }
423
424 #[test]
425 fn test_detect_screen_content_code_capture() {
426 let frames: Vec<_> = (0..100).map(|i| mk_frame(i as f64 * 0.04, 0.78, 1.5, 80.0)).collect();
428 let profile = Profile {
429 frames: frames.clone(),
430 segments: vec![],
431 avg_spatial: 0.78,
432 avg_temporal: 1.5,
433 overall_score: 0.0,
434 };
435 let detection = detect_screen_content(&profile);
436 assert_eq!(detection.content_type, ContentType::Screen);
437 assert!(detection.confidence >= 70.0, "expected >= 70, got {}", detection.confidence);
438 }
439}
440
441#[cfg(test)]
442mod tests {
443 use super::*;
444
445 fn frame(pts_secs: f64, spatial: f64, temporal: f64, dct: f64) -> FrameComplexity {
446 FrameComplexity {
447 pts: Duration::from_secs_f64(pts_secs),
448 spatial,
449 temporal,
450 dct_energy: dct,
451 }
452 }
453
454 #[test]
455 fn test_mean_empty() {
456 assert!((mean(&[]) - 0.0).abs() < 1e-9);
457 }
458
459 #[test]
460 fn test_mean_single() {
461 assert!((mean(&[42.0]) - 42.0).abs() < 1e-9);
462 }
463
464 #[test]
465 fn test_mean_multiple() {
466 assert!((mean(&[1.0, 2.0, 3.0]) - 2.0).abs() < 1e-9);
467 }
468
469 #[test]
470 fn test_max_val_empty_negative_inf() {
471 assert!(max_val(&[]).is_infinite() && max_val(&[]).is_sign_negative());
472 }
473
474 #[test]
475 fn test_max_val() {
476 assert!((max_val(&[1.0, 5.0, 3.0]) - 5.0).abs() < 1e-9);
477 }
478
479 #[test]
480 fn test_compute_score_bounds() {
481 let s = compute_score(0.5, 0.0);
482 assert!((0.0..=100.0).contains(&s));
483 }
484
485 #[test]
486 fn test_compute_score_zero_input() {
487 let s = compute_score(0.0, 0.0);
488 assert!(s >= 0.0);
489 }
490
491 #[test]
492 fn test_compute_score_high_input() {
493 let s = compute_score(1.0, 30.0); assert!(s <= 100.0);
495 assert!(s > 50.0);
496 }
497
498 #[test]
499 fn test_compute_score_with_dct() {
500 let s = compute_score_with_dct(0.5, 0.0, 0.0);
501 assert!(s >= 0.0);
502 }
503
504 #[test]
505 fn test_parse_complexity_output_empty() {
506 let frames = parse_complexity_output("");
507 assert!(frames.is_empty());
508 }
509
510 #[test]
511 fn test_parse_complexity_output_basic() {
512 let output = "\
513frame: 1 pts_time:0.000
514lavfi.entropy.normalized_entropy.normal.Y=0.6
515lavfi.signalstats.YDIF=2.5
516lavfi.signalstats.YHIGH=100.0
517lavfi.signalstats.YLOW=30.0
518frame: 2 pts_time:1.000
519lavfi.entropy.normalized_entropy.normal.Y=0.7
520lavfi.signalstats.YDIF=3.0
521lavfi.signalstats.YHIGH=120.0
522lavfi.signalstats.YLOW=40.0
523";
524 let frames = parse_complexity_output(output);
525 assert_eq!(frames.len(), 2);
526
527 assert!((frames[0].spatial - 0.6).abs() < 1e-9);
528 assert!((frames[0].temporal - 2.5).abs() < 1e-9);
529 assert!((frames[0].dct_energy - 70.0).abs() < 1e-9); assert!((frames[1].spatial - 0.7).abs() < 1e-9);
532 assert!((frames[1].temporal - 3.0).abs() < 1e-9);
533 assert!((frames[1].dct_energy - 80.0).abs() < 1e-9); }
535
536 #[test]
537 fn test_parse_complexity_output_handles_partial_data() {
538 let output = "\
539frame: 1 pts_time:0.000
540lavfi.entropy.normalized_entropy.normal.Y=0.5
541frame: 2 pts_time:1.000
542lavfi.signalstats.YDIF=1.0
543";
544 let frames = parse_complexity_output(output);
545 assert_eq!(frames.len(), 2);
546 }
547
548 #[test]
549 fn test_parse_complexity_output_negative_dct() {
550 let output = "\
552frame: 1 pts_time:0.000
553lavfi.signalstats.YHIGH=30.0
554lavfi.signalstats.YLOW=50.0
555";
556 let frames = parse_complexity_output(output);
557 assert!((frames[0].dct_energy - 0.0).abs() < 1e-9);
558 }
559
560 #[test]
561 fn test_aggregate_segments_single_segment() {
562 let frames = vec![
563 frame(0.0, 0.5, 1.0, 10.0),
564 frame(0.5, 0.6, 2.0, 20.0),
565 frame(1.0, 0.7, 3.0, 30.0),
566 ];
567 let segs = aggregate_segments(&frames, Duration::from_secs(2), Duration::from_secs(2));
568 assert_eq!(segs.len(), 1);
569 assert!((segs[0].avg_spatial - 0.6).abs() < 0.01);
570 assert!((segs[0].avg_temporal - 2.0).abs() < 0.01);
571 assert!((segs[0].max_spatial - 0.7).abs() < 1e-9);
572 assert_eq!(segs[0].start, Duration::ZERO);
573 assert_eq!(segs[0].end, Duration::from_secs(2));
574 }
575
576 #[test]
577 fn test_aggregate_segments_multiple() {
578 let frames = vec![
579 frame(0.0, 0.4, 1.0, 5.0),
580 frame(0.5, 0.5, 1.5, 6.0),
581 frame(1.0, 0.6, 2.0, 7.0),
582 frame(1.5, 0.7, 2.5, 8.0),
583 frame(2.0, 0.8, 3.0, 9.0),
584 frame(2.5, 0.9, 3.5, 10.0),
585 ];
586 let segs = aggregate_segments(&frames, Duration::from_secs(3), Duration::from_secs(1));
587 assert_eq!(segs.len(), 3);
588 assert_eq!(segs[0].start, Duration::from_secs(0));
589 assert_eq!(segs[1].start, Duration::from_secs(1));
590 assert_eq!(segs[2].start, Duration::from_secs(2));
591 }
592
593 #[test]
594 fn test_aggregate_segments_empty_bucket() {
595 let frames = vec![frame(0.0, 0.5, 1.0, 5.0), frame(3.0, 0.8, 3.0, 10.0)];
597 let segs = aggregate_segments(&frames, Duration::from_secs(4), Duration::from_secs(2));
598 assert_eq!(segs.len(), 2); }
600
601 #[test]
602 fn test_classify_black() {
603 assert_eq!(classify_scene(0.2, 0.5, 2.0), SceneClass::Black);
604 }
605
606 #[test]
607 fn test_classify_static() {
608 assert_eq!(classify_scene(0.4, 1.5, 10.0), SceneClass::Static);
609 }
610
611 #[test]
612 fn test_classify_detailed() {
613 assert_eq!(classify_scene(0.7, 3.0, 50.0), SceneClass::Detailed);
614 }
615
616 #[test]
617 fn test_classify_motion() {
618 assert_eq!(classify_scene(0.4, 10.0, 30.0), SceneClass::Motion);
619 }
620
621 #[test]
622 fn test_classify_complex() {
623 assert_eq!(classify_scene(0.8, 6.0, 60.0), SceneClass::Complex);
624 }
625
626 #[test]
627 fn test_classify_edges() {
628 assert_eq!(classify_scene(0.65, 5.0, 30.0), SceneClass::Detailed);
630 assert_eq!(classify_scene(0.6, 9.0, 20.0), SceneClass::Motion);
631 }
632
633 #[test]
634 fn test_scene_class_display() {
635 assert_eq!(SceneClass::Black.to_string(), "black");
636 assert_eq!(SceneClass::Static.to_string(), "static");
637 assert_eq!(SceneClass::Detailed.to_string(), "detailed");
638 assert_eq!(SceneClass::Motion.to_string(), "motion");
639 assert_eq!(SceneClass::Complex.to_string(), "complex");
640 }
641
642 #[test]
643 fn test_segment_has_scene_class() {
644 let frames = vec![FrameComplexity {
645 pts: Duration::from_secs_f64(0.0),
646 spatial: 0.2,
647 temporal: 0.5,
648 dct_energy: 2.0,
649 }];
650 let segs = aggregate_segments(&frames, Duration::from_secs(2), Duration::from_secs(2));
651 assert_eq!(segs.len(), 1);
652 assert_eq!(segs[0].scene_class, SceneClass::Black);
653 }
654
655 #[test]
656 fn test_analyze_opts_default() {
657 let opts = AnalyzeOpts::default();
658 assert_eq!(opts.segment_duration, Duration::from_secs(2));
659 assert_eq!(opts.subsample, 1);
660 }
661}