1#![doc = include_str!("../README.md")]
2
3pub mod runtime;
4pub mod surface;
5use std::collections::{BTreeMap, BTreeSet};
6use std::fmt;
7use std::ops::{Add, Sub};
8
9use num_rational::Rational64;
10use num_traits::ToPrimitive;
11use thiserror::Error;
12
13#[derive(Debug, Error)]
14pub enum DetectError {
16 #[error("unsupported pixel format: {0:?}")]
17 UnsupportedPixelFormat(PixelFormat),
19 #[error("unsupported audio sample format: {0:?}")]
20 UnsupportedAudioSampleFormat(AudioSampleFormat),
22 #[error("invalid frame buffer: expected at least {expected} bytes, got {actual}")]
23 InvalidFrameBuffer {
25 expected: usize,
27 actual: usize,
29 },
30 #[error("invalid dimensions: {width}x{height}")]
31 InvalidDimensions {
33 width: u32,
35 height: u32,
37 },
38 #[error("invalid audio format: sample_rate={sample_rate}, channels={channels}")]
39 InvalidAudioFormat {
41 sample_rate: u32,
43 channels: u16,
45 },
46 #[error("video source error: {0}")]
47 Source(String),
49 #[error("invalid argument: {0}")]
50 InvalidArgument(String),
52 #[error("I/O error: {0}")]
53 Io(#[from] std::io::Error),
55}
56
57pub type Result<T> = std::result::Result<T, DetectError>;
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
61pub struct Timebase {
63 pub num: i32,
65 pub den: i32,
67}
68
69impl Timebase {
70 pub const fn new(num: i32, den: i32) -> Self {
72 Self { num, den }
73 }
74
75 pub fn seconds_per_tick(self) -> f64 {
77 self.num as f64 / self.den as f64
78 }
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
82pub struct Timestamp {
84 pub pts: i64,
86 pub timebase: Timebase,
88}
89
90impl Timestamp {
91 pub const fn new(pts: i64, timebase: Timebase) -> Self {
93 Self { pts, timebase }
94 }
95
96 pub fn seconds(self) -> f64 {
98 self.pts as f64 * self.timebase.seconds_per_tick()
99 }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
103pub struct FramePosition {
105 pub frame_index: u64,
107 pub timestamp: Timestamp,
109}
110
111impl FramePosition {
112 pub fn from_frame_index(frame_index: u64, fps: Rational64) -> Self {
114 let den = *fps.numer() as i32;
115 let num = *fps.denom() as i32;
116 Self {
117 frame_index,
118 timestamp: Timestamp::new(frame_index as i64, Timebase::new(num, den)),
119 }
120 }
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
124pub struct FrameTimecode {
126 pub frame_index: u64,
128 pub fps: Rational64,
130}
131
132impl FrameTimecode {
133 pub fn from_frames(frame_index: u64, fps: Rational64) -> Self {
135 Self { frame_index, fps }
136 }
137
138 pub fn from_pts(
141 frame_index: u64,
142 fps: Rational64,
143 pts: i64,
144 timebase: Timebase,
145 ) -> TimestampedFrameTimecode {
146 TimestampedFrameTimecode {
147 timecode: Self::from_frames(frame_index, fps),
148 timestamp: Timestamp::new(pts, timebase),
149 }
150 }
151
152 pub fn from_seconds(seconds: f64, fps: Rational64) -> Result<Self> {
154 if seconds < 0.0 {
155 return Err(DetectError::InvalidArgument(
156 "seconds must be greater than or equal to zero".to_string(),
157 ));
158 }
159 let fps_float = fps.to_f64().ok_or_else(|| {
160 DetectError::InvalidArgument("frame rate cannot be represented".to_string())
161 })?;
162 Ok(Self {
163 frame_index: (seconds * fps_float).round() as u64,
164 fps,
165 })
166 }
167
168 pub fn parse(input: &str, fps: Rational64) -> Result<Self> {
170 if input.chars().all(|c| c.is_ascii_digit()) {
171 let frame_index = input.parse::<u64>().map_err(|err| {
172 DetectError::InvalidArgument(format!("invalid frame number `{input}`: {err}"))
173 })?;
174 return Ok(Self { frame_index, fps });
175 }
176 if !input.contains(':') {
177 let seconds = input.parse::<f64>().map_err(|err| {
178 DetectError::InvalidArgument(format!("invalid seconds `{input}`: {err}"))
179 })?;
180 return Self::from_seconds(seconds, fps);
181 }
182
183 let parts: Vec<&str> = input.split(':').collect();
184 if parts.len() != 3 {
185 return Err(DetectError::InvalidArgument(format!(
186 "invalid timecode `{input}`"
187 )));
188 }
189 let hours = parts[0].parse::<f64>().map_err(|err| {
190 DetectError::InvalidArgument(format!("invalid hours in `{input}`: {err}"))
191 })?;
192 let minutes = parts[1].parse::<f64>().map_err(|err| {
193 DetectError::InvalidArgument(format!("invalid minutes in `{input}`: {err}"))
194 })?;
195 let seconds = parts[2].parse::<f64>().map_err(|err| {
196 DetectError::InvalidArgument(format!("invalid seconds in `{input}`: {err}"))
197 })?;
198 Self::from_seconds((hours * 3600.0) + (minutes * 60.0) + seconds, fps)
199 }
200
201 pub fn seconds(self) -> f64 {
203 self.frame_index as f64 / self.fps.to_f64().unwrap_or(1.0)
204 }
205
206 pub fn timecode(self, precision: usize) -> String {
208 let factor = 10_f64.powi(precision as i32);
209 let total = (self.seconds() * factor).round() / factor;
210 let hours = (total / 3600.0).floor() as u64;
211 let minutes = ((total - hours as f64 * 3600.0) / 60.0).floor() as u64;
212 let seconds = total - hours as f64 * 3600.0 - minutes as f64 * 60.0;
213 if precision == 0 {
214 format!("{hours:02}:{minutes:02}:{:02}", seconds.round() as u64)
215 } else {
216 let whole = seconds.floor() as u64;
217 let frac = ((seconds - whole as f64) * factor).round() as u64;
218 format!("{hours:02}:{minutes:02}:{whole:02}.{frac:0precision$}")
219 }
220 }
221
222 pub fn position(self) -> FramePosition {
224 FramePosition::from_frame_index(self.frame_index, self.fps)
225 }
226
227 pub const fn pts(self) -> Option<i64> {
232 None
233 }
234
235 pub const fn time_base(self) -> Option<Timebase> {
241 None
242 }
243}
244
245#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
246pub struct TimestampedFrameTimecode {
249 pub timecode: FrameTimecode,
251 pub timestamp: Timestamp,
253}
254
255impl TimestampedFrameTimecode {
256 pub const fn frame_index(self) -> u64 {
258 self.timecode.frame_index
259 }
260
261 pub const fn fps(self) -> Rational64 {
263 self.timecode.fps
264 }
265
266 pub const fn pts(self) -> Option<i64> {
268 Some(self.timestamp.pts)
269 }
270
271 pub const fn time_base(self) -> Option<Timebase> {
273 Some(self.timestamp.timebase)
274 }
275
276 pub fn seconds(self) -> f64 {
278 self.timestamp.seconds()
279 }
280
281 pub fn timecode(self, precision: usize) -> String {
283 self.timecode.timecode(precision)
284 }
285
286 pub const fn position(self) -> FramePosition {
288 FramePosition {
289 frame_index: self.timecode.frame_index,
290 timestamp: self.timestamp,
291 }
292 }
293}
294
295impl Add<u64> for FrameTimecode {
296 type Output = FrameTimecode;
297
298 fn add(self, rhs: u64) -> Self::Output {
299 Self {
300 frame_index: self.frame_index + rhs,
301 fps: self.fps,
302 }
303 }
304}
305
306impl Sub<u64> for FrameTimecode {
307 type Output = FrameTimecode;
308
309 fn sub(self, rhs: u64) -> Self::Output {
310 Self {
311 frame_index: self.frame_index.saturating_sub(rhs),
312 fps: self.fps,
313 }
314 }
315}
316
317impl fmt::Display for FrameTimecode {
318 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
319 f.write_str(&self.timecode(3))
320 }
321}
322
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324pub enum PixelFormat {
326 Rgb24,
328 Bgr24,
330}
331
332#[derive(Debug, Clone, Copy)]
333pub struct VideoFrame<'a> {
335 pub position: FramePosition,
337 pub width: u32,
339 pub height: u32,
341 pub pixel_format: PixelFormat,
343 pub data: &'a [u8],
345 pub stride: usize,
347}
348
349impl<'a> VideoFrame<'a> {
350 pub fn rgb24(position: FramePosition, width: u32, height: u32, data: &'a [u8]) -> Result<Self> {
352 Self::packed(
353 position,
354 width,
355 height,
356 PixelFormat::Rgb24,
357 data,
358 width as usize * 3,
359 )
360 }
361
362 pub fn bgr24(position: FramePosition, width: u32, height: u32, data: &'a [u8]) -> Result<Self> {
364 Self::packed(
365 position,
366 width,
367 height,
368 PixelFormat::Bgr24,
369 data,
370 width as usize * 3,
371 )
372 }
373
374 pub fn packed(
376 position: FramePosition,
377 width: u32,
378 height: u32,
379 pixel_format: PixelFormat,
380 data: &'a [u8],
381 stride: usize,
382 ) -> Result<Self> {
383 if width == 0 || height == 0 {
384 return Err(DetectError::InvalidDimensions { width, height });
385 }
386 let expected = stride * height as usize;
387 if data.len() < expected {
388 return Err(DetectError::InvalidFrameBuffer {
389 expected,
390 actual: data.len(),
391 });
392 }
393 Ok(Self {
394 position,
395 width,
396 height,
397 pixel_format,
398 data,
399 stride,
400 })
401 }
402
403 pub fn pixel_rgb(&self, x: u32, y: u32) -> [u8; 3] {
405 let i = y as usize * self.stride + x as usize * 3;
406 match self.pixel_format {
407 PixelFormat::Rgb24 => [self.data[i], self.data[i + 1], self.data[i + 2]],
408 PixelFormat::Bgr24 => [self.data[i + 2], self.data[i + 1], self.data[i]],
409 }
410 }
411
412 pub fn pixel_count(&self) -> usize {
414 self.width as usize * self.height as usize
415 }
416}
417
418#[derive(Debug, Clone, PartialEq)]
419pub struct Scene {
421 pub start: FramePosition,
423 pub end: FramePosition,
425}
426
427#[derive(Debug, Clone, PartialEq)]
428pub struct Cut {
430 pub position: FramePosition,
432 pub detector: &'static str,
434 pub score: Option<f32>,
436}
437
438pub trait MetricsSink {
440 fn set_metric(&mut self, frame_index: u64, key: &str, value: f64);
442}
443
444#[derive(Debug, Default, Clone, PartialEq)]
445pub struct MetricsStore {
447 rows: BTreeMap<u64, BTreeMap<String, f64>>,
448 keys: BTreeSet<String>,
449}
450
451impl MetricsStore {
452 pub fn rows(&self) -> &BTreeMap<u64, BTreeMap<String, f64>> {
454 &self.rows
455 }
456
457 pub fn keys(&self) -> impl Iterator<Item = &str> + '_ {
459 self.keys.iter().map(String::as_str)
460 }
461
462 pub fn get(&self, frame_index: u64, key: &str) -> Option<f64> {
464 self.rows
465 .get(&frame_index)
466 .and_then(|row| row.get(key))
467 .copied()
468 }
469}
470
471impl MetricsSink for MetricsStore {
472 fn set_metric(&mut self, frame_index: u64, key: &str, value: f64) {
473 self.keys.insert(key.to_string());
474 self.rows
475 .entry(frame_index)
476 .or_default()
477 .insert(key.to_string(), value);
478 }
479}
480
481pub trait SceneDetector {
483 fn name(&self) -> &'static str;
485 fn metric_keys(&self) -> &'static [&'static str];
487 fn event_buffer_len(&self) -> usize {
489 0
490 }
491 fn process_frame(
493 &mut self,
494 frame: &VideoFrame<'_>,
495 metrics: Option<&mut dyn MetricsSink>,
496 ) -> Result<Vec<Cut>>;
497 fn finish(
499 &mut self,
500 _last_position: FramePosition,
501 _metrics: Option<&mut dyn MetricsSink>,
502 ) -> Result<Vec<Cut>> {
503 Ok(Vec::new())
504 }
505}
506
507#[derive(Debug, Clone, Copy, PartialEq, Eq)]
508pub enum FlashFilterMode {
510 Merge,
512 Suppress,
514}
515
516#[derive(Debug, Clone)]
517struct FlashFilter {
518 mode: FlashFilterMode,
519 length: u64,
520 last_above: Option<u64>,
521 merge_enabled: bool,
522 merge_triggered: bool,
523 merge_start: Option<u64>,
524}
525
526impl FlashFilter {
527 fn new(mode: FlashFilterMode, length: u64) -> Self {
528 Self {
529 mode,
530 length,
531 last_above: None,
532 merge_enabled: false,
533 merge_triggered: false,
534 merge_start: None,
535 }
536 }
537
538 fn max_behind(&self) -> usize {
539 match self.mode {
540 FlashFilterMode::Suppress => 0,
541 FlashFilterMode::Merge => self.length as usize,
542 }
543 }
544
545 fn filter(&mut self, frame_index: u64, above_threshold: bool) -> Vec<u64> {
546 if self.length == 0 {
547 return above_threshold.then_some(frame_index).into_iter().collect();
548 }
549 if self.last_above.is_none() {
550 self.last_above = Some(frame_index);
551 }
552 match self.mode {
553 FlashFilterMode::Suppress => self.filter_suppress(frame_index, above_threshold),
554 FlashFilterMode::Merge => self.filter_merge(frame_index, above_threshold),
555 }
556 }
557
558 fn filter_suppress(&mut self, frame_index: u64, above_threshold: bool) -> Vec<u64> {
559 let last_above = self.last_above.unwrap_or(frame_index);
560 let min_length_met = frame_index.saturating_sub(last_above) >= self.length;
561 if above_threshold && min_length_met {
562 self.last_above = Some(frame_index);
563 vec![frame_index]
564 } else {
565 Vec::new()
566 }
567 }
568
569 fn filter_merge(&mut self, frame_index: u64, above_threshold: bool) -> Vec<u64> {
570 let last_above = self.last_above.unwrap_or(frame_index);
571 let min_length_met = frame_index.saturating_sub(last_above) >= self.length;
572 if above_threshold {
573 self.last_above = Some(frame_index);
574 }
575 let current_last_above = self.last_above.unwrap_or(frame_index);
576 if self.merge_triggered {
577 let merged = current_last_above.saturating_sub(self.merge_start.unwrap_or(frame_index));
578 if min_length_met && !above_threshold && merged >= self.length {
579 self.merge_triggered = false;
580 return vec![current_last_above];
581 }
582 return Vec::new();
583 }
584 if !above_threshold {
585 return Vec::new();
586 }
587 if min_length_met {
588 self.merge_enabled = true;
589 return vec![frame_index];
590 }
591 if self.merge_enabled {
592 self.merge_triggered = true;
593 self.merge_start = Some(frame_index);
594 }
595 Vec::new()
596 }
597}
598
599#[derive(Debug, Clone, Copy)]
600pub struct ContentWeights {
602 pub delta_hue: f32,
604 pub delta_sat: f32,
606 pub delta_lum: f32,
608 pub delta_edges: f32,
610}
611
612impl Default for ContentWeights {
613 fn default() -> Self {
614 Self {
615 delta_hue: 1.0,
616 delta_sat: 1.0,
617 delta_lum: 1.0,
618 delta_edges: 0.0,
619 }
620 }
621}
622
623impl ContentWeights {
624 pub const LUMA_ONLY: Self = Self {
626 delta_hue: 0.0,
627 delta_sat: 0.0,
628 delta_lum: 1.0,
629 delta_edges: 0.0,
630 };
631
632 fn total(self) -> f32 {
633 self.delta_hue.abs() + self.delta_sat.abs() + self.delta_lum.abs() + self.delta_edges.abs()
634 }
635}
636
637#[derive(Debug, Clone)]
638pub struct ContentDetector {
640 threshold: f32,
641 scorer: ContentScorer,
642 flash_filter: FlashFilter,
643}
644
645impl Default for ContentDetector {
646 fn default() -> Self {
647 Self::new(27.0, 15)
648 }
649}
650
651impl ContentDetector {
652 pub const METRIC_KEYS: &'static [&'static str] = &[
654 "content_val",
655 "delta_hue",
656 "delta_sat",
657 "delta_lum",
658 "delta_edges",
659 ];
660
661 pub fn new(threshold: f32, min_scene_len: u64) -> Self {
663 Self {
664 threshold,
665 scorer: ContentScorer::new(ContentWeights::default(), None),
666 flash_filter: FlashFilter::new(FlashFilterMode::Merge, min_scene_len),
667 }
668 }
669
670 pub fn with_weights(mut self, weights: ContentWeights) -> Self {
672 self.scorer.weights = weights;
673 self
674 }
675
676 pub fn luma_only(mut self, value: bool) -> Self {
678 if value {
679 self.scorer.weights = ContentWeights::LUMA_ONLY;
680 }
681 self
682 }
683
684 pub fn kernel_size(mut self, value: Option<usize>) -> Result<Self> {
686 if let Some(size) = value {
687 if size < 3 || size % 2 == 0 {
688 return Err(DetectError::InvalidArgument(
689 "kernel_size must be an odd integer >= 3".to_string(),
690 ));
691 }
692 }
693 self.scorer.kernel_size = value;
694 Ok(self)
695 }
696
697 pub fn filter_mode(mut self, mode: FlashFilterMode, min_scene_len: u64) -> Self {
699 self.flash_filter = FlashFilter::new(mode, min_scene_len);
700 self
701 }
702}
703
704impl SceneDetector for ContentDetector {
705 fn name(&self) -> &'static str {
706 "content"
707 }
708
709 fn metric_keys(&self) -> &'static [&'static str] {
710 Self::METRIC_KEYS
711 }
712
713 fn event_buffer_len(&self) -> usize {
714 self.flash_filter.max_behind()
715 }
716
717 fn process_frame(
718 &mut self,
719 frame: &VideoFrame<'_>,
720 metrics: Option<&mut dyn MetricsSink>,
721 ) -> Result<Vec<Cut>> {
722 let score = self.scorer.score(frame, metrics)?;
723 let cut_frames = self
724 .flash_filter
725 .filter(frame.position.frame_index, score >= self.threshold);
726 Ok(cut_frames
727 .into_iter()
728 .map(|frame_index| Cut {
729 position: position_like(frame.position, frame_index),
730 detector: self.name(),
731 score: Some(score),
732 })
733 .collect())
734 }
735}
736
737#[derive(Debug, Clone)]
738struct ContentScorer {
739 weights: ContentWeights,
740 kernel_size: Option<usize>,
741 last: Option<FrameData>,
742 current: FrameData,
743}
744
745impl ContentScorer {
746 fn new(weights: ContentWeights, kernel_size: Option<usize>) -> Self {
747 Self {
748 weights,
749 kernel_size,
750 last: None,
751 current: FrameData::default(),
752 }
753 }
754
755 fn score(
756 &mut self,
757 frame: &VideoFrame<'_>,
758 metrics: Option<&mut dyn MetricsSink>,
759 ) -> Result<f32> {
760 let calculate_edges = self.weights.delta_edges > 0.0;
761 let Some(previous) = self.last.as_ref() else {
762 self.current
763 .fill_from_frame(frame, calculate_edges, self.kernel_size)?;
764 self.last = Some(std::mem::take(&mut self.current));
765 if let Some(metrics) = metrics {
766 set_content_metrics(
767 metrics,
768 frame.position.frame_index,
769 0.0,
770 (0.0, 0.0, 0.0, 0.0),
771 );
772 }
773 return Ok(0.0);
774 };
775
776 let (dh, ds, dl) = if !calculate_edges && previous.pixel_count() == frame.pixel_count() {
777 self.current.fill_from_frame_and_diff(frame, previous)?
778 } else {
779 self.current
780 .fill_from_frame(frame, calculate_edges, self.kernel_size)?;
781 (
782 mean_abs_diff(&self.current.hue, &previous.hue),
783 mean_abs_diff(&self.current.sat, &previous.sat),
784 mean_abs_diff(&self.current.lum, &previous.lum),
785 )
786 };
787 let de = match (&self.current.edges, &previous.edges) {
788 (Some(left), Some(right)) => mean_abs_diff(left, right),
789 _ => 0.0,
790 };
791 let total_weight = self.weights.total();
792 let score = if total_weight == 0.0 {
793 0.0
794 } else {
795 ((dh * self.weights.delta_hue)
796 + (ds * self.weights.delta_sat)
797 + (dl * self.weights.delta_lum)
798 + (de * self.weights.delta_edges))
799 / total_weight
800 };
801 if let Some(metrics) = metrics {
802 set_content_metrics(metrics, frame.position.frame_index, score, (dh, ds, dl, de));
803 }
804 if let Some(previous) = self.last.as_mut() {
805 std::mem::swap(previous, &mut self.current);
806 }
807 Ok(score)
808 }
809}
810
811fn set_content_metrics(
812 metrics: &mut dyn MetricsSink,
813 frame_index: u64,
814 score: f32,
815 components: (f32, f32, f32, f32),
816) {
817 metrics.set_metric(frame_index, "content_val", score as f64);
818 metrics.set_metric(frame_index, "delta_hue", components.0 as f64);
819 metrics.set_metric(frame_index, "delta_sat", components.1 as f64);
820 metrics.set_metric(frame_index, "delta_lum", components.2 as f64);
821 metrics.set_metric(frame_index, "delta_edges", components.3 as f64);
822}
823
824#[derive(Debug, Default, Clone)]
825struct FrameData {
826 hue: Vec<u8>,
827 sat: Vec<u8>,
828 lum: Vec<u8>,
829 edges: Option<Vec<u8>>,
830}
831
832impl FrameData {
833 fn pixel_count(&self) -> usize {
834 self.lum.len()
835 }
836
837 fn fill_from_frame(
838 &mut self,
839 frame: &VideoFrame<'_>,
840 calculate_edges: bool,
841 kernel_size: Option<usize>,
842 ) -> Result<()> {
843 let pixels = frame.pixel_count();
844 self.prepare_component_buffers(pixels);
845 match frame.pixel_format {
846 PixelFormat::Rgb24 => self.fill_from_rgb24(frame)?,
847 PixelFormat::Bgr24 => self.fill_from_bgr24(frame)?,
848 }
849 self.edges = calculate_edges
850 .then(|| detect_edges(&self.lum, frame.width, frame.height, kernel_size));
851 Ok(())
852 }
853
854 fn fill_from_frame_and_diff(
855 &mut self,
856 frame: &VideoFrame<'_>,
857 previous: &FrameData,
858 ) -> Result<(f32, f32, f32)> {
859 let pixels = frame.pixel_count();
860 self.prepare_component_buffers(pixels);
861 self.edges = None;
862 let sums = match frame.pixel_format {
863 PixelFormat::Rgb24 => self.fill_from_rgb24_and_diff(frame, previous)?,
864 PixelFormat::Bgr24 => self.fill_from_bgr24_and_diff(frame, previous)?,
865 };
866 Ok((
867 sums.0 as f32 / pixels as f32,
868 sums.1 as f32 / pixels as f32,
869 sums.2 as f32 / pixels as f32,
870 ))
871 }
872
873 fn prepare_component_buffers(&mut self, pixels: usize) {
874 self.hue.clear();
875 self.sat.clear();
876 self.lum.clear();
877 if self.hue.capacity() < pixels {
878 self.hue.reserve(pixels);
879 }
880 if self.sat.capacity() < pixels {
881 self.sat.reserve(pixels);
882 }
883 if self.lum.capacity() < pixels {
884 self.lum.reserve(pixels);
885 }
886 }
887
888 fn fill_from_rgb24(&mut self, frame: &VideoFrame<'_>) -> Result<()> {
889 let row_len = frame.width as usize * 3;
890 for y in 0..frame.height {
891 let start = y as usize * frame.stride;
892 let row =
893 frame
894 .data
895 .get(start..start + row_len)
896 .ok_or(DetectError::InvalidFrameBuffer {
897 expected: start + row_len,
898 actual: frame.data.len(),
899 })?;
900 for pixel in row.chunks_exact(3) {
901 let (r, g, b) = (pixel[0], pixel[1], pixel[2]);
902 let (h, s, v) = rgb_to_hsv(r, g, b);
903 self.hue.push(h);
904 self.sat.push(s);
905 self.lum.push(v);
906 }
907 }
908 Ok(())
909 }
910
911 fn fill_from_rgb24_and_diff(
912 &mut self,
913 frame: &VideoFrame<'_>,
914 previous: &FrameData,
915 ) -> Result<(u64, u64, u64)> {
916 let row_len = frame.width as usize * 3;
917 let mut index = 0usize;
918 let mut dh = 0u64;
919 let mut ds = 0u64;
920 let mut dl = 0u64;
921 for y in 0..frame.height {
922 let start = y as usize * frame.stride;
923 let row =
924 frame
925 .data
926 .get(start..start + row_len)
927 .ok_or(DetectError::InvalidFrameBuffer {
928 expected: start + row_len,
929 actual: frame.data.len(),
930 })?;
931 for pixel in row.chunks_exact(3) {
932 let (h, s, v) = rgb_to_hsv(pixel[0], pixel[1], pixel[2]);
933 dh += abs_diff_u8(h, previous.hue[index]) as u64;
934 ds += abs_diff_u8(s, previous.sat[index]) as u64;
935 dl += abs_diff_u8(v, previous.lum[index]) as u64;
936 self.hue.push(h);
937 self.sat.push(s);
938 self.lum.push(v);
939 index += 1;
940 }
941 }
942 Ok((dh, ds, dl))
943 }
944
945 fn fill_from_bgr24(&mut self, frame: &VideoFrame<'_>) -> Result<()> {
946 let row_len = frame.width as usize * 3;
947 for y in 0..frame.height {
948 let start = y as usize * frame.stride;
949 let row =
950 frame
951 .data
952 .get(start..start + row_len)
953 .ok_or(DetectError::InvalidFrameBuffer {
954 expected: start + row_len,
955 actual: frame.data.len(),
956 })?;
957 for pixel in row.chunks_exact(3) {
958 let (r, g, b) = (pixel[2], pixel[1], pixel[0]);
959 let (h, s, v) = rgb_to_hsv(r, g, b);
960 self.hue.push(h);
961 self.sat.push(s);
962 self.lum.push(v);
963 }
964 }
965 Ok(())
966 }
967
968 fn fill_from_bgr24_and_diff(
969 &mut self,
970 frame: &VideoFrame<'_>,
971 previous: &FrameData,
972 ) -> Result<(u64, u64, u64)> {
973 let row_len = frame.width as usize * 3;
974 let mut index = 0usize;
975 let mut dh = 0u64;
976 let mut ds = 0u64;
977 let mut dl = 0u64;
978 for y in 0..frame.height {
979 let start = y as usize * frame.stride;
980 let row =
981 frame
982 .data
983 .get(start..start + row_len)
984 .ok_or(DetectError::InvalidFrameBuffer {
985 expected: start + row_len,
986 actual: frame.data.len(),
987 })?;
988 for pixel in row.chunks_exact(3) {
989 let (h, s, v) = rgb_to_hsv(pixel[2], pixel[1], pixel[0]);
990 dh += abs_diff_u8(h, previous.hue[index]) as u64;
991 ds += abs_diff_u8(s, previous.sat[index]) as u64;
992 dl += abs_diff_u8(v, previous.lum[index]) as u64;
993 self.hue.push(h);
994 self.sat.push(s);
995 self.lum.push(v);
996 index += 1;
997 }
998 }
999 Ok((dh, ds, dl))
1000 }
1001}
1002
1003fn rgb_to_hsv(r: u8, g: u8, b: u8) -> (u8, u8, u8) {
1004 let r = r as f32 / 255.0;
1005 let g = g as f32 / 255.0;
1006 let b = b as f32 / 255.0;
1007 let max = r.max(g).max(b);
1008 let min = r.min(g).min(b);
1009 let delta = max - min;
1010 let hue = if delta == 0.0 {
1011 0.0
1012 } else if max == r {
1013 60.0 * (((g - b) / delta) % 6.0)
1014 } else if max == g {
1015 60.0 * (((b - r) / delta) + 2.0)
1016 } else {
1017 60.0 * (((r - g) / delta) + 4.0)
1018 };
1019 let hue = if hue < 0.0 { hue + 360.0 } else { hue };
1020 let sat = if max == 0.0 { 0.0 } else { delta / max };
1021 (
1022 (hue / 360.0 * 255.0) as u8,
1023 (sat * 255.0) as u8,
1024 (max * 255.0) as u8,
1025 )
1026}
1027
1028fn mean_abs_diff(left: &[u8], right: &[u8]) -> f32 {
1029 left.iter()
1030 .zip(right.iter())
1031 .map(|(left, right)| abs_diff_u8(*left, *right) as u64)
1032 .sum::<u64>() as f32
1033 / left.len() as f32
1034}
1035
1036fn abs_diff_u8(left: u8, right: u8) -> u16 {
1037 (left as i16 - right as i16).unsigned_abs()
1038}
1039
1040fn detect_edges(lum: &[u8], width: u32, height: u32, kernel_size: Option<usize>) -> Vec<u8> {
1041 let width = width as usize;
1042 let height = height as usize;
1043 if width < 3 || height < 3 {
1044 return vec![0; width * height];
1045 }
1046 let mut edges = vec![0_u8; width * height];
1047 for y in 1..height - 1 {
1048 for x in 1..width - 1 {
1049 let i = |x: usize, y: usize| lum[y * width + x] as i32;
1050 let gx = -i(x - 1, y - 1) + i(x + 1, y - 1) - 2 * i(x - 1, y) + 2 * i(x + 1, y)
1051 - i(x - 1, y + 1)
1052 + i(x + 1, y + 1);
1053 let gy = -i(x - 1, y - 1) - 2 * i(x, y - 1) - i(x + 1, y - 1)
1054 + i(x - 1, y + 1)
1055 + 2 * i(x, y + 1)
1056 + i(x + 1, y + 1);
1057 let mag = ((gx * gx + gy * gy) as f32).sqrt();
1058 edges[y * width + x] = if mag > 64.0 { 255 } else { 0 };
1059 }
1060 }
1061 dilate(&edges, width, height, kernel_size.unwrap_or(3))
1062}
1063
1064fn dilate(input: &[u8], width: usize, height: usize, kernel_size: usize) -> Vec<u8> {
1065 let radius = kernel_size / 2;
1066 let mut output = vec![0_u8; input.len()];
1067 for y in 0..height {
1068 for x in 0..width {
1069 let y0 = y.saturating_sub(radius);
1070 let y1 = (y + radius).min(height - 1);
1071 let x0 = x.saturating_sub(radius);
1072 let x1 = (x + radius).min(width - 1);
1073 let mut value = 0;
1074 for yy in y0..=y1 {
1075 for xx in x0..=x1 {
1076 value = value.max(input[yy * width + xx]);
1077 }
1078 }
1079 output[y * width + x] = value;
1080 }
1081 }
1082 output
1083}
1084
1085fn position_like(current: FramePosition, frame_index: u64) -> FramePosition {
1086 let delta = frame_index as i64 - current.frame_index as i64;
1087 FramePosition {
1088 frame_index,
1089 timestamp: Timestamp::new(current.timestamp.pts + delta, current.timestamp.timebase),
1090 }
1091}
1092
1093pub trait VideoSource {
1095 fn next_frame(&mut self) -> Result<Option<OwnedVideoFrame>>;
1097 fn frame_rate(&self) -> Rational64;
1099}
1100
1101#[derive(Debug, Clone, PartialEq, Eq)]
1102pub struct OwnedVideoFrame {
1104 pub position: FramePosition,
1106 pub width: u32,
1108 pub height: u32,
1110 pub pixel_format: PixelFormat,
1112 pub data: Vec<u8>,
1114 pub stride: usize,
1116}
1117
1118impl OwnedVideoFrame {
1119 pub fn as_frame(&self) -> VideoFrame<'_> {
1121 VideoFrame {
1122 position: self.position,
1123 width: self.width,
1124 height: self.height,
1125 pixel_format: self.pixel_format,
1126 data: &self.data,
1127 stride: self.stride,
1128 }
1129 }
1130}
1131
1132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1133pub enum AudioSampleFormat {
1135 U8,
1137 I16,
1139 I32,
1141 F32,
1143}
1144
1145#[derive(Debug, Clone, PartialEq)]
1146pub enum AudioBuffer {
1148 U8(Vec<u8>),
1150 I16(Vec<i16>),
1152 I32(Vec<i32>),
1154 F32(Vec<f32>),
1156}
1157
1158impl AudioBuffer {
1159 pub fn len(&self) -> usize {
1161 match self {
1162 Self::U8(values) => values.len(),
1163 Self::I16(values) => values.len(),
1164 Self::I32(values) => values.len(),
1165 Self::F32(values) => values.len(),
1166 }
1167 }
1168
1169 pub fn is_empty(&self) -> bool {
1171 self.len() == 0
1172 }
1173
1174 pub fn sample_format(&self) -> AudioSampleFormat {
1176 match self {
1177 Self::U8(_) => AudioSampleFormat::U8,
1178 Self::I16(_) => AudioSampleFormat::I16,
1179 Self::I32(_) => AudioSampleFormat::I32,
1180 Self::F32(_) => AudioSampleFormat::F32,
1181 }
1182 }
1183}
1184
1185#[derive(Debug, Clone, Copy)]
1186pub struct AudioFrame<'a> {
1188 pub timestamp: Timestamp,
1190 pub sample_rate: u32,
1192 pub channels: u16,
1194 pub data: &'a AudioBuffer,
1196}
1197
1198impl<'a> AudioFrame<'a> {
1199 pub fn new(
1201 timestamp: Timestamp,
1202 sample_rate: u32,
1203 channels: u16,
1204 data: &'a AudioBuffer,
1205 ) -> Result<Self> {
1206 if sample_rate == 0 || channels == 0 {
1207 return Err(DetectError::InvalidAudioFormat {
1208 sample_rate,
1209 channels,
1210 });
1211 }
1212 Ok(Self {
1213 timestamp,
1214 sample_rate,
1215 channels,
1216 data,
1217 })
1218 }
1219
1220 pub fn sample_format(&self) -> AudioSampleFormat {
1222 self.data.sample_format()
1223 }
1224
1225 pub fn sample_count(&self) -> usize {
1227 self.data.len()
1228 }
1229
1230 pub fn samples_per_channel(&self) -> usize {
1232 self.sample_count() / self.channels as usize
1233 }
1234
1235 pub fn duration_seconds(&self) -> f64 {
1237 self.samples_per_channel() as f64 / self.sample_rate as f64
1238 }
1239}
1240
1241#[derive(Debug, Clone, PartialEq)]
1242pub struct OwnedAudioFrame {
1244 pub timestamp: Timestamp,
1246 pub sample_rate: u32,
1248 pub channels: u16,
1250 pub data: AudioBuffer,
1252}
1253
1254impl OwnedAudioFrame {
1255 pub fn new(
1257 timestamp: Timestamp,
1258 sample_rate: u32,
1259 channels: u16,
1260 data: AudioBuffer,
1261 ) -> Result<Self> {
1262 AudioFrame::new(timestamp, sample_rate, channels, &data)?;
1263 Ok(Self {
1264 timestamp,
1265 sample_rate,
1266 channels,
1267 data,
1268 })
1269 }
1270
1271 pub fn as_frame(&self) -> Result<AudioFrame<'_>> {
1273 AudioFrame::new(self.timestamp, self.sample_rate, self.channels, &self.data)
1274 }
1275
1276 pub fn sample_format(&self) -> AudioSampleFormat {
1278 self.data.sample_format()
1279 }
1280
1281 pub fn samples_per_channel(&self) -> usize {
1283 if self.channels == 0 {
1284 return 0;
1285 }
1286 self.data.len() / self.channels as usize
1287 }
1288
1289 pub fn duration_seconds(&self) -> f64 {
1291 if self.sample_rate == 0 {
1292 return 0.0;
1293 }
1294 self.samples_per_channel() as f64 / self.sample_rate as f64
1295 }
1296}
1297
1298#[derive(Debug, Clone, Copy)]
1299pub struct TextSegment<'a> {
1301 pub segment_index: u64,
1303 pub timestamp: Option<Timestamp>,
1305 pub text: &'a str,
1307 pub language: Option<&'a str>,
1309 pub is_final: bool,
1311}
1312
1313#[derive(Debug, Clone, PartialEq, Eq)]
1314pub struct OwnedTextSegment {
1316 pub segment_index: u64,
1318 pub timestamp: Option<Timestamp>,
1320 pub text: String,
1322 pub language: Option<String>,
1324 pub is_final: bool,
1326}
1327
1328impl OwnedTextSegment {
1329 pub fn new(segment_index: u64, text: impl Into<String>) -> Self {
1331 Self {
1332 segment_index,
1333 timestamp: None,
1334 text: text.into(),
1335 language: None,
1336 is_final: true,
1337 }
1338 }
1339
1340 pub fn timestamp(mut self, timestamp: Timestamp) -> Self {
1342 self.timestamp = Some(timestamp);
1343 self
1344 }
1345
1346 pub fn language(mut self, language: impl Into<String>) -> Self {
1348 self.language = Some(language.into());
1349 self
1350 }
1351
1352 pub fn finality(mut self, is_final: bool) -> Self {
1354 self.is_final = is_final;
1355 self
1356 }
1357
1358 pub fn as_segment(&self) -> TextSegment<'_> {
1360 TextSegment {
1361 segment_index: self.segment_index,
1362 timestamp: self.timestamp,
1363 text: &self.text,
1364 language: self.language.as_deref(),
1365 is_final: self.is_final,
1366 }
1367 }
1368}
1369
1370#[derive(Debug, Default, Clone, PartialEq)]
1371pub struct DetectionResult {
1373 pub scenes: Vec<Scene>,
1375 pub cuts: Vec<Cut>,
1377 pub metrics: MetricsStore,
1379 pub frames_processed: u64,
1381}
1382
1383#[derive(Debug, Clone, PartialEq)]
1384pub struct FrameAnalysis {
1386 pub position: FramePosition,
1388 pub cuts: Vec<Cut>,
1390 pub frames_processed: u64,
1392}
1393
1394#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
1395#[serde(rename_all = "camelCase")]
1396pub struct BoundingBox {
1398 pub x: u32,
1400 pub y: u32,
1402 pub width: u32,
1404 pub height: u32,
1406}
1407
1408impl BoundingBox {
1409 pub fn new(x: u32, y: u32, width: u32, height: u32) -> Result<Self> {
1411 if width == 0 || height == 0 {
1412 return Err(DetectError::InvalidArgument(
1413 "bounding box must have non-zero width and height".to_string(),
1414 ));
1415 }
1416 Ok(Self {
1417 x,
1418 y,
1419 width,
1420 height,
1421 })
1422 }
1423}
1424
1425impl From<BoundingBox> for math_geometry_2d::RectU32 {
1426 fn from(value: BoundingBox) -> Self {
1427 Self {
1428 x: value.x,
1429 y: value.y,
1430 width: value.width,
1431 height: value.height,
1432 }
1433 }
1434}
1435
1436impl TryFrom<math_geometry_2d::RectU32> for BoundingBox {
1437 type Error = DetectError;
1438
1439 fn try_from(value: math_geometry_2d::RectU32) -> std::result::Result<Self, Self::Error> {
1440 value.validate()?;
1441 Self::new(value.x, value.y, value.width, value.height)
1442 }
1443}
1444
1445impl From<math_geometry_2d::GeometryError> for DetectError {
1446 fn from(value: math_geometry_2d::GeometryError) -> Self {
1447 Self::InvalidArgument(value.to_string())
1448 }
1449}
1450
1451#[derive(Debug, Clone, PartialEq, Eq)]
1452pub enum ObservationKind {
1454 Text,
1456 Face,
1458 Object,
1460 Scene,
1462 Custom(String),
1464}
1465
1466#[derive(Debug, Clone, PartialEq)]
1467pub struct Observation {
1469 pub timestamp: Option<Timestamp>,
1471 pub frame: Option<FramePosition>,
1473 pub scene_index: Option<u64>,
1475 pub analyzer: String,
1477 pub kind: ObservationKind,
1479 pub label: Option<String>,
1481 pub text: Option<String>,
1483 pub score: Option<f32>,
1485 pub region: Option<BoundingBox>,
1487 pub track_id: Option<String>,
1489 pub attributes: BTreeMap<String, String>,
1491}
1492
1493impl Observation {
1494 pub fn new(analyzer: impl Into<String>, kind: ObservationKind) -> Self {
1496 Self {
1497 timestamp: None,
1498 frame: None,
1499 scene_index: None,
1500 analyzer: analyzer.into(),
1501 kind,
1502 label: None,
1503 text: None,
1504 score: None,
1505 region: None,
1506 track_id: None,
1507 attributes: BTreeMap::new(),
1508 }
1509 }
1510
1511 pub fn at_frame(mut self, position: FramePosition) -> Self {
1513 self.timestamp = Some(position.timestamp);
1514 self.frame = Some(position);
1515 self
1516 }
1517
1518 pub fn at_timestamp(mut self, timestamp: Timestamp) -> Self {
1520 self.timestamp = Some(timestamp);
1521 self
1522 }
1523
1524 pub fn in_scene(mut self, scene_index: u64) -> Self {
1526 self.scene_index = Some(scene_index);
1527 self
1528 }
1529
1530 pub fn label(mut self, label: impl Into<String>) -> Self {
1532 self.label = Some(label.into());
1533 self
1534 }
1535
1536 pub fn text(mut self, text: impl Into<String>) -> Self {
1538 self.text = Some(text.into());
1539 self
1540 }
1541
1542 pub fn score(mut self, score: f32) -> Self {
1544 self.score = Some(score);
1545 self
1546 }
1547
1548 pub fn region(mut self, region: BoundingBox) -> Self {
1550 self.region = Some(region);
1551 self
1552 }
1553
1554 pub fn track_id(mut self, track_id: impl Into<String>) -> Self {
1556 self.track_id = Some(track_id.into());
1557 self
1558 }
1559
1560 pub fn attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
1562 self.attributes.insert(key.into(), value.into());
1563 self
1564 }
1565
1566 pub fn to_text_segment(&self, segment_index: u64) -> Option<OwnedTextSegment> {
1568 let text = self.text.as_ref()?;
1569 let mut segment = OwnedTextSegment::new(segment_index, text.clone());
1570 if let Some(timestamp) = self.timestamp {
1571 segment = segment.timestamp(timestamp);
1572 }
1573 if let Some(language) = self.attributes.get("language") {
1574 segment = segment.language(language.clone());
1575 }
1576 Some(segment)
1577 }
1578}
1579
1580#[derive(Debug, Default, Clone, PartialEq)]
1581pub struct VideoAnalysisResult {
1583 pub observations: Vec<Observation>,
1585 pub frames_processed: u64,
1587}
1588
1589#[derive(Debug, Clone, PartialEq)]
1590pub struct VideoFrameAnalysis {
1592 pub position: FramePosition,
1594 pub observations: Vec<Observation>,
1596 pub frames_processed: u64,
1598}
1599
1600#[derive(Debug, Clone, PartialEq)]
1601pub struct SceneAnalysis {
1603 pub scene_index: u64,
1605 pub scene: Scene,
1607 pub observations: Vec<Observation>,
1609}
1610
1611#[derive(Debug, Clone, PartialEq)]
1612pub struct RealtimeVideoFrameAnalysis {
1614 pub position: FramePosition,
1616 pub scene: FrameAnalysis,
1618 pub observations: Vec<Observation>,
1620 pub completed_scenes: Vec<SceneAnalysis>,
1622 pub frames_processed: u64,
1624}
1625
1626#[derive(Debug, Default, Clone, PartialEq)]
1627pub struct RealtimeVideoAnalysisResult {
1629 pub detection: DetectionResult,
1631 pub observations: Vec<Observation>,
1633 pub scenes: Vec<SceneAnalysis>,
1635 pub frames_processed: u64,
1637}
1638
1639#[derive(Debug, Clone, PartialEq)]
1640pub struct AnalysisEvent {
1642 pub timestamp: Option<Timestamp>,
1644 pub analyzer: String,
1646 pub label: String,
1648 pub score: Option<f32>,
1650}
1651
1652impl AnalysisEvent {
1653 pub fn new(analyzer: impl Into<String>, label: impl Into<String>) -> Self {
1655 Self {
1656 timestamp: None,
1657 analyzer: analyzer.into(),
1658 label: label.into(),
1659 score: None,
1660 }
1661 }
1662
1663 pub fn at_timestamp(mut self, timestamp: Timestamp) -> Self {
1665 self.timestamp = Some(timestamp);
1666 self
1667 }
1668
1669 pub fn score(mut self, score: f32) -> Self {
1671 self.score = Some(score);
1672 self
1673 }
1674}
1675
1676#[derive(Debug, Default, Clone, PartialEq)]
1677pub struct AudioAnalysisResult {
1679 pub events: Vec<AnalysisEvent>,
1681 pub frames_processed: u64,
1683}
1684
1685#[derive(Debug, Clone, PartialEq)]
1686pub struct AudioAnalysis {
1688 pub timestamp: Timestamp,
1690 pub events: Vec<AnalysisEvent>,
1692 pub frames_processed: u64,
1694}
1695
1696#[derive(Debug, Default, Clone, PartialEq)]
1697pub struct TextAnalysisResult {
1699 pub events: Vec<AnalysisEvent>,
1701 pub segments_processed: u64,
1703}
1704
1705#[derive(Debug, Clone, PartialEq)]
1706pub struct TextAnalysis {
1708 pub segment_index: u64,
1710 pub events: Vec<AnalysisEvent>,
1712 pub segments_processed: u64,
1714}
1715
1716pub struct ScenePipeline {
1718 detectors: Vec<Box<dyn SceneDetector>>,
1719 start_in_scene: bool,
1720 crop: Option<CropRegion>,
1721 auto_downscale_min_width: Option<u32>,
1722 state: ScenePipelineState,
1723}
1724
1725#[derive(Debug, Default, Clone)]
1726struct ScenePipelineState {
1727 cuts: Vec<Cut>,
1728 metrics: MetricsStore,
1729 first_position: Option<FramePosition>,
1730 last_position: Option<FramePosition>,
1731 frames_processed: u64,
1732 finished: bool,
1733}
1734
1735#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1736pub struct CropRegion {
1738 pub x0: u32,
1740 pub y0: u32,
1742 pub x1: u32,
1744 pub y1: u32,
1746}
1747
1748impl CropRegion {
1749 pub fn new(x0: u32, y0: u32, x1: u32, y1: u32) -> Result<Self> {
1751 if x0 == x1 || y0 == y1 {
1752 return Err(DetectError::InvalidArgument(
1753 "crop region must have non-zero width and height".to_string(),
1754 ));
1755 }
1756 Ok(Self { x0, y0, x1, y1 })
1757 }
1758}
1759
1760impl ScenePipeline {
1761 pub fn builder() -> ScenePipelineBuilder {
1763 ScenePipelineBuilder::default()
1764 }
1765
1766 pub fn detect<S: VideoSource>(&mut self, source: &mut S) -> Result<DetectionResult> {
1768 self.reset();
1769 while let Some(frame) = source.next_frame()? {
1770 self.process_frame(frame)?;
1771 }
1772 self.finish_detection()
1773 }
1774
1775 pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<FrameAnalysis> {
1777 let frame = self.prepare_frame(frame)?;
1778 self.process_frame_ref(&frame.as_frame())
1779 }
1780
1781 pub fn process_frame_ref(&mut self, frame: &VideoFrame<'_>) -> Result<FrameAnalysis> {
1783 if self.state.finished {
1784 return Err(DetectError::InvalidArgument(
1785 "cannot process frames after finish_detection; call reset first".to_string(),
1786 ));
1787 }
1788 self.validate_frame_options(frame)?;
1789 self.state.first_position.get_or_insert(frame.position);
1790 self.state.last_position = Some(frame.position);
1791
1792 let mut cuts = Vec::new();
1793 for detector in &mut self.detectors {
1794 let mut new_cuts = detector.process_frame(frame, Some(&mut self.state.metrics))?;
1795 cuts.append(&mut new_cuts);
1796 }
1797 let cuts = self.record_cuts(cuts);
1798 self.state.frames_processed += 1;
1799
1800 Ok(FrameAnalysis {
1801 position: frame.position,
1802 cuts,
1803 frames_processed: self.state.frames_processed,
1804 })
1805 }
1806
1807 pub fn finish_detection(&mut self) -> Result<DetectionResult> {
1809 if !self.state.finished {
1810 if let Some(last) = self.state.last_position {
1811 let mut cuts = Vec::new();
1812 for detector in &mut self.detectors {
1813 let mut new_cuts = detector.finish(last, Some(&mut self.state.metrics))?;
1814 cuts.append(&mut new_cuts);
1815 }
1816 self.record_cuts(cuts);
1817 }
1818 self.state.finished = true;
1819 }
1820
1821 let scenes = if let (Some(start), Some(end)) =
1822 (self.state.first_position, self.state.last_position)
1823 {
1824 scenes_from_cuts(&self.state.cuts, start, end, self.start_in_scene)
1825 } else {
1826 Vec::new()
1827 };
1828
1829 Ok(DetectionResult {
1830 scenes,
1831 cuts: self.state.cuts.clone(),
1832 metrics: self.state.metrics.clone(),
1833 frames_processed: self.state.frames_processed,
1834 })
1835 }
1836
1837 pub fn reset(&mut self) {
1839 self.state = ScenePipelineState::default();
1840 }
1841
1842 pub fn metrics(&self) -> &MetricsStore {
1844 &self.state.metrics
1845 }
1846
1847 pub fn cuts(&self) -> &[Cut] {
1849 &self.state.cuts
1850 }
1851
1852 pub fn frames_processed(&self) -> u64 {
1854 self.state.frames_processed
1855 }
1856
1857 fn record_cuts(&mut self, mut cuts: Vec<Cut>) -> Vec<Cut> {
1858 cuts.sort_by_key(|cut| cut.position.frame_index);
1859 cuts.dedup_by_key(|cut| cut.position.frame_index);
1860 let mut accepted = Vec::new();
1861 for cut in cuts {
1862 if self
1863 .state
1864 .cuts
1865 .iter()
1866 .any(|existing| existing.position.frame_index == cut.position.frame_index)
1867 {
1868 continue;
1869 }
1870 self.state.cuts.push(cut.clone());
1871 accepted.push(cut);
1872 }
1873 self.state.cuts.sort_by_key(|cut| cut.position.frame_index);
1874 accepted
1875 }
1876
1877 fn prepare_frame(&self, frame: OwnedVideoFrame) -> Result<OwnedVideoFrame> {
1878 self.validate_frame_options(&frame.as_frame())?;
1879 Ok(frame)
1880 }
1881
1882 fn validate_frame_options(&self, frame: &VideoFrame<'_>) -> Result<()> {
1883 let _ = self.auto_downscale_min_width;
1884 if let Some(crop) = self.crop {
1885 if crop.x0 >= frame.width || crop.y0 >= frame.height {
1886 return Err(DetectError::InvalidArgument(
1887 "crop starts outside frame boundary".to_string(),
1888 ));
1889 }
1890 }
1891 Ok(())
1892 }
1893}
1894
1895#[derive(Default)]
1896pub struct ScenePipelineBuilder {
1898 detectors: Vec<Box<dyn SceneDetector>>,
1899 start_in_scene: bool,
1900 crop: Option<CropRegion>,
1901 auto_downscale_min_width: Option<u32>,
1902}
1903
1904impl ScenePipelineBuilder {
1905 pub fn detector<D: SceneDetector + 'static>(mut self, detector: D) -> Self {
1907 self.detectors.push(Box::new(detector));
1908 self
1909 }
1910
1911 pub fn start_in_scene(mut self, value: bool) -> Self {
1913 self.start_in_scene = value;
1914 self
1915 }
1916
1917 pub fn crop(mut self, value: Option<CropRegion>) -> Self {
1919 self.crop = value;
1920 self
1921 }
1922
1923 pub fn auto_downscale_min_width(mut self, value: u32) -> Self {
1925 self.auto_downscale_min_width = Some(value);
1926 self
1927 }
1928
1929 pub fn build(self) -> Result<ScenePipeline> {
1931 if self.detectors.is_empty() {
1932 return Err(DetectError::InvalidArgument(
1933 "at least one detector is required".to_string(),
1934 ));
1935 }
1936 Ok(ScenePipeline {
1937 detectors: self.detectors,
1938 start_in_scene: self.start_in_scene,
1939 crop: self.crop,
1940 auto_downscale_min_width: self.auto_downscale_min_width,
1941 state: ScenePipelineState::default(),
1942 })
1943 }
1944}
1945
1946pub trait VideoAnalyzer {
1948 fn name(&self) -> &str;
1950
1951 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>>;
1953
1954 fn finish(&mut self, _last_position: Option<FramePosition>) -> Result<Vec<Observation>> {
1956 Ok(Vec::new())
1957 }
1958}
1959
1960#[derive(Debug, Clone, PartialEq)]
1961pub struct SampledVideoAnalyzer<A> {
1963 inner: A,
1964 every: u64,
1965}
1966
1967impl<A> SampledVideoAnalyzer<A> {
1968 pub fn new(inner: A, every: u64) -> Self {
1970 Self {
1971 inner,
1972 every: every.max(1),
1973 }
1974 }
1975
1976 pub fn inner(&self) -> &A {
1978 &self.inner
1979 }
1980
1981 pub fn inner_mut(&mut self) -> &mut A {
1983 &mut self.inner
1984 }
1985
1986 pub fn every(&self) -> u64 {
1988 self.every
1989 }
1990}
1991
1992impl<A: VideoAnalyzer> VideoAnalyzer for SampledVideoAnalyzer<A> {
1993 fn name(&self) -> &str {
1994 self.inner.name()
1995 }
1996
1997 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
1998 if frame.position.frame_index.is_multiple_of(self.every) {
1999 self.inner.process_frame(frame)
2000 } else {
2001 Ok(Vec::new())
2002 }
2003 }
2004
2005 fn finish(&mut self, last_position: Option<FramePosition>) -> Result<Vec<Observation>> {
2006 self.inner.finish(last_position)
2007 }
2008}
2009
2010pub struct VideoAnalysisPipeline {
2012 analyzers: Vec<Box<dyn VideoAnalyzer>>,
2013 state: VideoAnalysisPipelineState,
2014}
2015
2016#[derive(Debug, Default, Clone)]
2017struct VideoAnalysisPipelineState {
2018 observations: Vec<Observation>,
2019 last_position: Option<FramePosition>,
2020 frames_processed: u64,
2021 finished: bool,
2022}
2023
2024impl VideoAnalysisPipeline {
2025 pub fn builder() -> VideoAnalysisPipelineBuilder {
2027 VideoAnalysisPipelineBuilder::default()
2028 }
2029
2030 pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<VideoFrameAnalysis> {
2032 self.process_frame_ref(&frame.as_frame())
2033 }
2034
2035 pub fn process_frame_ref(&mut self, frame: &VideoFrame<'_>) -> Result<VideoFrameAnalysis> {
2037 if self.state.finished {
2038 return Err(DetectError::InvalidArgument(
2039 "cannot process video frames after finish_analysis; call reset first".to_string(),
2040 ));
2041 }
2042 self.state.last_position = Some(frame.position);
2043
2044 let mut observations = Vec::new();
2045 for analyzer in &mut self.analyzers {
2046 let mut new_observations = analyzer.process_frame(frame)?;
2047 for observation in &mut new_observations {
2048 if observation.timestamp.is_none() {
2049 observation.timestamp = Some(frame.position.timestamp);
2050 }
2051 if observation.frame.is_none() {
2052 observation.frame = Some(frame.position);
2053 }
2054 }
2055 observations.append(&mut new_observations);
2056 }
2057 self.state.observations.extend(observations.iter().cloned());
2058 self.state.frames_processed += 1;
2059
2060 Ok(VideoFrameAnalysis {
2061 position: frame.position,
2062 observations,
2063 frames_processed: self.state.frames_processed,
2064 })
2065 }
2066
2067 pub fn finish_analysis(&mut self) -> Result<VideoAnalysisResult> {
2069 if !self.state.finished {
2070 let mut observations = Vec::new();
2071 for analyzer in &mut self.analyzers {
2072 let mut new_observations = analyzer.finish(self.state.last_position)?;
2073 observations.append(&mut new_observations);
2074 }
2075 self.state.observations.extend(observations);
2076 self.state.finished = true;
2077 }
2078 Ok(VideoAnalysisResult {
2079 observations: self.state.observations.clone(),
2080 frames_processed: self.state.frames_processed,
2081 })
2082 }
2083
2084 pub fn reset(&mut self) {
2086 self.state = VideoAnalysisPipelineState::default();
2087 }
2088
2089 pub fn observations(&self) -> &[Observation] {
2091 &self.state.observations
2092 }
2093
2094 pub fn frames_processed(&self) -> u64 {
2096 self.state.frames_processed
2097 }
2098}
2099
2100#[derive(Default)]
2101pub struct VideoAnalysisPipelineBuilder {
2103 analyzers: Vec<Box<dyn VideoAnalyzer>>,
2104}
2105
2106impl VideoAnalysisPipelineBuilder {
2107 pub fn analyzer<A: VideoAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2109 self.analyzers.push(Box::new(analyzer));
2110 self
2111 }
2112
2113 pub fn build(self) -> Result<VideoAnalysisPipeline> {
2115 if self.analyzers.is_empty() {
2116 return Err(DetectError::InvalidArgument(
2117 "at least one video analyzer is required".to_string(),
2118 ));
2119 }
2120 Ok(VideoAnalysisPipeline {
2121 analyzers: self.analyzers,
2122 state: VideoAnalysisPipelineState::default(),
2123 })
2124 }
2125}
2126
2127pub struct RealtimeVideoPipeline {
2129 scene_pipeline: ScenePipeline,
2130 video_pipeline: Option<VideoAnalysisPipeline>,
2131 state: RealtimeVideoPipelineState,
2132}
2133
2134#[derive(Debug, Default, Clone)]
2135struct RealtimeVideoPipelineState {
2136 observations: Vec<Observation>,
2137 completed_scenes: Vec<SceneAnalysis>,
2138 open_scene_observations: Vec<Observation>,
2139 active_scene_start: Option<FramePosition>,
2140 active_scene_index: u64,
2141 first_position: Option<FramePosition>,
2142 last_position: Option<FramePosition>,
2143 frames_processed: u64,
2144 finished: bool,
2145}
2146
2147impl RealtimeVideoPipeline {
2148 pub fn builder() -> RealtimeVideoPipelineBuilder {
2150 RealtimeVideoPipelineBuilder::default()
2151 }
2152
2153 pub fn new(
2155 scene_pipeline: ScenePipeline,
2156 video_pipeline: Option<VideoAnalysisPipeline>,
2157 ) -> Self {
2158 Self {
2159 scene_pipeline,
2160 video_pipeline,
2161 state: RealtimeVideoPipelineState::default(),
2162 }
2163 }
2164
2165 pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<RealtimeVideoFrameAnalysis> {
2167 self.process_frame_ref(&frame.as_frame())
2168 }
2169
2170 pub fn process_frame_ref(
2172 &mut self,
2173 frame: &VideoFrame<'_>,
2174 ) -> Result<RealtimeVideoFrameAnalysis> {
2175 if self.state.finished {
2176 return Err(DetectError::InvalidArgument(
2177 "cannot process video frames after finish_analysis; call reset first".to_string(),
2178 ));
2179 }
2180
2181 self.state.first_position.get_or_insert(frame.position);
2182 self.state.last_position = Some(frame.position);
2183 self.state.active_scene_start.get_or_insert(frame.position);
2184
2185 let scene = self.scene_pipeline.process_frame_ref(frame)?;
2186 let completed_scenes = self.close_scenes(&scene.cuts);
2187 let mut observations = if let Some(pipeline) = &mut self.video_pipeline {
2188 pipeline.process_frame_ref(frame)?.observations
2189 } else {
2190 Vec::new()
2191 };
2192 self.annotate_observations(&mut observations, frame.position);
2193 self.state
2194 .open_scene_observations
2195 .extend(observations.iter().cloned());
2196 self.state.observations.extend(observations.iter().cloned());
2197 self.state.frames_processed += 1;
2198
2199 Ok(RealtimeVideoFrameAnalysis {
2200 position: frame.position,
2201 scene,
2202 observations,
2203 completed_scenes,
2204 frames_processed: self.state.frames_processed,
2205 })
2206 }
2207
2208 pub fn finish_analysis(&mut self) -> Result<RealtimeVideoAnalysisResult> {
2210 let detection = self.scene_pipeline.finish_detection()?;
2211 if !self.state.finished {
2212 let pending_cuts = detection
2213 .cuts
2214 .iter()
2215 .filter(|cut| {
2216 self.state
2217 .active_scene_start
2218 .map(|start| cut.position.frame_index > start.frame_index)
2219 .unwrap_or(false)
2220 })
2221 .cloned()
2222 .collect::<Vec<_>>();
2223 self.close_scenes(&pending_cuts);
2224
2225 if let Some(pipeline) = &mut self.video_pipeline {
2226 let already_observed = self.state.observations.len();
2227 let result = pipeline.finish_analysis()?;
2228 let mut final_observations = result
2229 .observations
2230 .into_iter()
2231 .skip(already_observed)
2232 .collect::<Vec<_>>();
2233 self.annotate_observations_without_frame(&mut final_observations);
2234 self.state
2235 .open_scene_observations
2236 .extend(final_observations.iter().cloned());
2237 self.state
2238 .observations
2239 .extend(final_observations.iter().cloned());
2240 }
2241 self.state.finished = true;
2242 }
2243
2244 let scenes = self.scene_analyses_for(&detection.scenes);
2245 Ok(RealtimeVideoAnalysisResult {
2246 detection,
2247 observations: self.state.observations.clone(),
2248 scenes,
2249 frames_processed: self.state.frames_processed,
2250 })
2251 }
2252
2253 pub fn reset(&mut self) {
2255 self.scene_pipeline.reset();
2256 if let Some(pipeline) = &mut self.video_pipeline {
2257 pipeline.reset();
2258 }
2259 self.state = RealtimeVideoPipelineState::default();
2260 }
2261
2262 pub fn observations(&self) -> &[Observation] {
2264 &self.state.observations
2265 }
2266
2267 pub fn completed_scenes(&self) -> &[SceneAnalysis] {
2269 &self.state.completed_scenes
2270 }
2271
2272 pub fn frames_processed(&self) -> u64 {
2274 self.state.frames_processed
2275 }
2276
2277 fn close_scenes(&mut self, cuts: &[Cut]) -> Vec<SceneAnalysis> {
2278 let mut completed = Vec::new();
2279 for cut in cuts {
2280 let Some(start) = self.state.active_scene_start else {
2281 continue;
2282 };
2283 if cut.position.frame_index <= start.frame_index {
2284 continue;
2285 }
2286 let scene = Scene {
2287 start,
2288 end: cut.position,
2289 };
2290 let observations = std::mem::take(&mut self.state.open_scene_observations);
2291 let analysis = SceneAnalysis {
2292 scene_index: self.state.active_scene_index,
2293 scene,
2294 observations,
2295 };
2296 self.state.completed_scenes.push(analysis.clone());
2297 completed.push(analysis);
2298 self.state.active_scene_index += 1;
2299 self.state.active_scene_start = Some(cut.position);
2300 }
2301 completed
2302 }
2303
2304 fn annotate_observations(&self, observations: &mut [Observation], position: FramePosition) {
2305 for observation in observations {
2306 if observation.timestamp.is_none() {
2307 observation.timestamp = Some(position.timestamp);
2308 }
2309 if observation.frame.is_none() {
2310 observation.frame = Some(position);
2311 }
2312 if observation.scene_index.is_none() {
2313 observation.scene_index = Some(self.state.active_scene_index);
2314 }
2315 }
2316 }
2317
2318 fn annotate_observations_without_frame(&self, observations: &mut [Observation]) {
2319 for observation in observations {
2320 if observation.scene_index.is_none() {
2321 observation.scene_index = Some(self.state.active_scene_index);
2322 }
2323 }
2324 }
2325
2326 fn scene_analyses_for(&self, scenes: &[Scene]) -> Vec<SceneAnalysis> {
2327 scenes
2328 .iter()
2329 .enumerate()
2330 .map(|(index, scene)| {
2331 let scene_index = index as u64;
2332 let observations = self
2333 .state
2334 .observations
2335 .iter()
2336 .filter(|observation| observation.scene_index == Some(scene_index))
2337 .cloned()
2338 .collect();
2339 SceneAnalysis {
2340 scene_index,
2341 scene: scene.clone(),
2342 observations,
2343 }
2344 })
2345 .collect()
2346 }
2347}
2348
2349#[derive(Default)]
2350pub struct RealtimeVideoPipelineBuilder {
2352 scene_pipeline: Option<ScenePipeline>,
2353 scene_builder: ScenePipelineBuilder,
2354 video_analyzers: Vec<Box<dyn VideoAnalyzer>>,
2355}
2356
2357impl RealtimeVideoPipelineBuilder {
2358 pub fn scene_pipeline(mut self, pipeline: ScenePipeline) -> Self {
2360 self.scene_pipeline = Some(pipeline);
2361 self
2362 }
2363
2364 pub fn scene_detector<D: SceneDetector + 'static>(mut self, detector: D) -> Self {
2366 self.scene_builder = self.scene_builder.detector(detector);
2367 self
2368 }
2369
2370 pub fn video_analyzer<A: VideoAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2372 self.video_analyzers.push(Box::new(analyzer));
2373 self
2374 }
2375
2376 pub fn start_in_scene(mut self, value: bool) -> Self {
2378 self.scene_builder = self.scene_builder.start_in_scene(value);
2379 self
2380 }
2381
2382 pub fn crop(mut self, value: Option<CropRegion>) -> Self {
2384 self.scene_builder = self.scene_builder.crop(value);
2385 self
2386 }
2387
2388 pub fn auto_downscale_min_width(mut self, value: u32) -> Self {
2390 self.scene_builder = self.scene_builder.auto_downscale_min_width(value);
2391 self
2392 }
2393
2394 pub fn build(self) -> Result<RealtimeVideoPipeline> {
2396 let scene_pipeline = match self.scene_pipeline {
2397 Some(pipeline) => pipeline,
2398 None => self.scene_builder.build()?,
2399 };
2400 let video_pipeline = if self.video_analyzers.is_empty() {
2401 None
2402 } else {
2403 Some(VideoAnalysisPipeline {
2404 analyzers: self.video_analyzers,
2405 state: VideoAnalysisPipelineState::default(),
2406 })
2407 };
2408 Ok(RealtimeVideoPipeline::new(scene_pipeline, video_pipeline))
2409 }
2410}
2411
2412pub trait AudioAnalyzer {
2414 fn name(&self) -> &str;
2416
2417 fn process_frame(&mut self, frame: &AudioFrame<'_>) -> Result<Vec<AnalysisEvent>>;
2419
2420 fn finish(&mut self, _last_timestamp: Option<Timestamp>) -> Result<Vec<AnalysisEvent>> {
2422 Ok(Vec::new())
2423 }
2424}
2425
2426pub struct AudioPipeline {
2428 analyzers: Vec<Box<dyn AudioAnalyzer>>,
2429 state: AudioPipelineState,
2430}
2431
2432#[derive(Debug, Default, Clone)]
2433struct AudioPipelineState {
2434 events: Vec<AnalysisEvent>,
2435 last_timestamp: Option<Timestamp>,
2436 frames_processed: u64,
2437 finished: bool,
2438}
2439
2440impl AudioPipeline {
2441 pub fn builder() -> AudioPipelineBuilder {
2443 AudioPipelineBuilder::default()
2444 }
2445
2446 pub fn process_frame(&mut self, frame: OwnedAudioFrame) -> Result<AudioAnalysis> {
2448 if self.state.finished {
2449 return Err(DetectError::InvalidArgument(
2450 "cannot process audio frames after finish_analysis; call reset first".to_string(),
2451 ));
2452 }
2453 let frame_ref = frame.as_frame()?;
2454 self.state.last_timestamp = Some(frame_ref.timestamp);
2455
2456 let mut events = Vec::new();
2457 for analyzer in &mut self.analyzers {
2458 let mut new_events = analyzer.process_frame(&frame_ref)?;
2459 events.append(&mut new_events);
2460 }
2461 self.state.events.extend(events.iter().cloned());
2462 self.state.frames_processed += 1;
2463
2464 Ok(AudioAnalysis {
2465 timestamp: frame_ref.timestamp,
2466 events,
2467 frames_processed: self.state.frames_processed,
2468 })
2469 }
2470
2471 pub fn finish_analysis(&mut self) -> Result<AudioAnalysisResult> {
2473 if !self.state.finished {
2474 let mut events = Vec::new();
2475 for analyzer in &mut self.analyzers {
2476 let mut new_events = analyzer.finish(self.state.last_timestamp)?;
2477 events.append(&mut new_events);
2478 }
2479 self.state.events.extend(events);
2480 self.state.finished = true;
2481 }
2482 Ok(AudioAnalysisResult {
2483 events: self.state.events.clone(),
2484 frames_processed: self.state.frames_processed,
2485 })
2486 }
2487
2488 pub fn reset(&mut self) {
2490 self.state = AudioPipelineState::default();
2491 }
2492
2493 pub fn events(&self) -> &[AnalysisEvent] {
2495 &self.state.events
2496 }
2497
2498 pub fn frames_processed(&self) -> u64 {
2500 self.state.frames_processed
2501 }
2502}
2503
2504#[derive(Default)]
2505pub struct AudioPipelineBuilder {
2507 analyzers: Vec<Box<dyn AudioAnalyzer>>,
2508}
2509
2510impl AudioPipelineBuilder {
2511 pub fn analyzer<A: AudioAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2513 self.analyzers.push(Box::new(analyzer));
2514 self
2515 }
2516
2517 pub fn build(self) -> Result<AudioPipeline> {
2519 if self.analyzers.is_empty() {
2520 return Err(DetectError::InvalidArgument(
2521 "at least one audio analyzer is required".to_string(),
2522 ));
2523 }
2524 Ok(AudioPipeline {
2525 analyzers: self.analyzers,
2526 state: AudioPipelineState::default(),
2527 })
2528 }
2529}
2530
2531pub trait TextAnalyzer {
2533 fn name(&self) -> &str;
2535
2536 fn process_segment(&mut self, segment: &TextSegment<'_>) -> Result<Vec<AnalysisEvent>>;
2538
2539 fn finish(&mut self, _last_segment_index: Option<u64>) -> Result<Vec<AnalysisEvent>> {
2541 Ok(Vec::new())
2542 }
2543}
2544
2545pub struct TextPipeline {
2547 analyzers: Vec<Box<dyn TextAnalyzer>>,
2548 state: TextPipelineState,
2549}
2550
2551#[derive(Debug, Default, Clone)]
2552struct TextPipelineState {
2553 events: Vec<AnalysisEvent>,
2554 last_segment_index: Option<u64>,
2555 segments_processed: u64,
2556 finished: bool,
2557}
2558
2559impl TextPipeline {
2560 pub fn builder() -> TextPipelineBuilder {
2562 TextPipelineBuilder::default()
2563 }
2564
2565 pub fn process_segment(&mut self, segment: OwnedTextSegment) -> Result<TextAnalysis> {
2567 if self.state.finished {
2568 return Err(DetectError::InvalidArgument(
2569 "cannot process text segments after finish_analysis; call reset first".to_string(),
2570 ));
2571 }
2572 let segment_ref = segment.as_segment();
2573 self.state.last_segment_index = Some(segment_ref.segment_index);
2574
2575 let mut events = Vec::new();
2576 for analyzer in &mut self.analyzers {
2577 let mut new_events = analyzer.process_segment(&segment_ref)?;
2578 events.append(&mut new_events);
2579 }
2580 self.state.events.extend(events.iter().cloned());
2581 self.state.segments_processed += 1;
2582
2583 Ok(TextAnalysis {
2584 segment_index: segment_ref.segment_index,
2585 events,
2586 segments_processed: self.state.segments_processed,
2587 })
2588 }
2589
2590 pub fn finish_analysis(&mut self) -> Result<TextAnalysisResult> {
2592 if !self.state.finished {
2593 let mut events = Vec::new();
2594 for analyzer in &mut self.analyzers {
2595 let mut new_events = analyzer.finish(self.state.last_segment_index)?;
2596 events.append(&mut new_events);
2597 }
2598 self.state.events.extend(events);
2599 self.state.finished = true;
2600 }
2601 Ok(TextAnalysisResult {
2602 events: self.state.events.clone(),
2603 segments_processed: self.state.segments_processed,
2604 })
2605 }
2606
2607 pub fn reset(&mut self) {
2609 self.state = TextPipelineState::default();
2610 }
2611
2612 pub fn events(&self) -> &[AnalysisEvent] {
2614 &self.state.events
2615 }
2616
2617 pub fn segments_processed(&self) -> u64 {
2619 self.state.segments_processed
2620 }
2621}
2622
2623#[derive(Default)]
2624pub struct TextPipelineBuilder {
2626 analyzers: Vec<Box<dyn TextAnalyzer>>,
2627}
2628
2629impl TextPipelineBuilder {
2630 pub fn analyzer<A: TextAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2632 self.analyzers.push(Box::new(analyzer));
2633 self
2634 }
2635
2636 pub fn build(self) -> Result<TextPipeline> {
2638 if self.analyzers.is_empty() {
2639 return Err(DetectError::InvalidArgument(
2640 "at least one text analyzer is required".to_string(),
2641 ));
2642 }
2643 Ok(TextPipeline {
2644 analyzers: self.analyzers,
2645 state: TextPipelineState::default(),
2646 })
2647 }
2648}
2649
2650pub fn scenes_from_cuts(
2652 cuts: &[Cut],
2653 start: FramePosition,
2654 last_frame: FramePosition,
2655 start_in_scene: bool,
2656) -> Vec<Scene> {
2657 if cuts.is_empty() && !start_in_scene {
2658 return Vec::new();
2659 }
2660 let mut scenes = Vec::new();
2661 let mut scene_start = start;
2662 for cut in cuts {
2663 if cut.position.frame_index <= scene_start.frame_index {
2664 continue;
2665 }
2666 scenes.push(Scene {
2667 start: scene_start,
2668 end: cut.position,
2669 });
2670 scene_start = cut.position;
2671 }
2672 scenes.push(Scene {
2673 start: scene_start,
2674 end: FramePosition {
2675 frame_index: last_frame.frame_index + 1,
2676 timestamp: Timestamp::new(last_frame.timestamp.pts + 1, last_frame.timestamp.timebase),
2677 },
2678 });
2679 scenes
2680}
2681
2682#[cfg(test)]
2683mod tests {
2684 use super::*;
2685
2686 fn fps() -> Rational64 {
2687 Rational64::new(30, 1)
2688 }
2689
2690 fn pos(frame: u64) -> FramePosition {
2691 FramePosition::from_frame_index(frame, fps())
2692 }
2693
2694 fn owned_frame(frame_index: u64) -> OwnedVideoFrame {
2695 OwnedVideoFrame {
2696 position: pos(frame_index),
2697 width: 1,
2698 height: 1,
2699 pixel_format: PixelFormat::Rgb24,
2700 data: vec![0, 0, 0],
2701 stride: 3,
2702 }
2703 }
2704
2705 fn content_frame(frame_index: u64, rgb: [u8; 3]) -> OwnedVideoFrame {
2706 let mut data = Vec::new();
2707 for _ in 0..16 {
2708 data.extend_from_slice(&rgb);
2709 }
2710 OwnedVideoFrame {
2711 position: pos(frame_index),
2712 width: 4,
2713 height: 4,
2714 pixel_format: PixelFormat::Rgb24,
2715 data,
2716 stride: 12,
2717 }
2718 }
2719
2720 fn content_bgr_frame(frame_index: u64, rgb: [u8; 3]) -> OwnedVideoFrame {
2721 let mut data = Vec::new();
2722 for _ in 0..16 {
2723 data.extend_from_slice(&[rgb[2], rgb[1], rgb[0]]);
2724 }
2725 OwnedVideoFrame {
2726 position: pos(frame_index),
2727 width: 4,
2728 height: 4,
2729 pixel_format: PixelFormat::Bgr24,
2730 data,
2731 stride: 12,
2732 }
2733 }
2734
2735 #[test]
2736 fn timecode_formats_and_clamps_subtraction() {
2737 let tc = FrameTimecode::from_seconds(10.0, fps()).unwrap();
2738 assert_eq!(tc.frame_index, 300);
2739 assert_eq!(tc.timecode(3), "00:00:10.000");
2740 assert_eq!((FrameTimecode::from_frames(5, fps()) - 10).frame_index, 0);
2741 }
2742
2743 #[test]
2744 fn timecode_parse_accepts_frames_seconds_and_hms() {
2745 assert_eq!(FrameTimecode::parse("42", fps()).unwrap().frame_index, 42);
2746 assert_eq!(FrameTimecode::parse("2.0", fps()).unwrap().frame_index, 60);
2747 assert_eq!(
2748 FrameTimecode::parse("00:01:00.000", fps())
2749 .unwrap()
2750 .frame_index,
2751 1800
2752 );
2753 }
2754
2755 #[test]
2756 fn scenes_are_empty_without_cuts_unless_start_in_scene() {
2757 assert!(scenes_from_cuts(&[], pos(0), pos(9), false).is_empty());
2758 let scenes = scenes_from_cuts(&[], pos(0), pos(9), true);
2759 assert_eq!(scenes.len(), 1);
2760 assert_eq!(scenes[0].end.frame_index, 10);
2761 }
2762
2763 #[test]
2764 fn scenes_are_contiguous_from_unique_cuts() {
2765 let cuts = vec![Cut {
2766 position: pos(5),
2767 detector: "test",
2768 score: None,
2769 }];
2770 let scenes = scenes_from_cuts(&cuts, pos(0), pos(9), true);
2771 assert_eq!(scenes.len(), 2);
2772 assert_eq!(scenes[0].start.frame_index, 0);
2773 assert_eq!(scenes[0].end.frame_index, 5);
2774 assert_eq!(scenes[1].start.frame_index, 5);
2775 assert_eq!(scenes[1].end.frame_index, 10);
2776 }
2777
2778 #[test]
2779 fn metrics_store_tracks_keys_and_values() {
2780 let mut metrics = MetricsStore::default();
2781 metrics.set_metric(7, "content_val", 12.5);
2782 assert_eq!(metrics.get(7, "content_val"), Some(12.5));
2783 assert_eq!(metrics.keys().collect::<Vec<_>>(), vec!["content_val"]);
2784 }
2785
2786 #[test]
2787 fn metrics_store_accepts_dynamic_keys() {
2788 let mut metrics = MetricsStore::default();
2789 metrics.set_metric(7, "combined.content.raw", 12.5);
2790 assert_eq!(metrics.get(7, "combined.content.raw"), Some(12.5));
2791 assert_eq!(
2792 metrics.keys().collect::<Vec<_>>(),
2793 vec!["combined.content.raw"]
2794 );
2795 }
2796
2797 #[test]
2798 fn content_detector_finds_hard_cut() {
2799 let mut detector = ContentDetector::new(10.0, 1);
2800 let mut metrics = MetricsStore::default();
2801 let first = content_frame(0, [0, 0, 0]);
2802 let second = content_frame(1, [255, 255, 255]);
2803 assert!(detector
2804 .process_frame(&first.as_frame(), Some(&mut metrics))
2805 .unwrap()
2806 .is_empty());
2807 let cuts = detector
2808 .process_frame(&second.as_frame(), Some(&mut metrics))
2809 .unwrap();
2810 assert_eq!(cuts.len(), 1);
2811 assert!(metrics.get(1, "content_val").unwrap() > 10.0);
2812 }
2813
2814 #[test]
2815 fn content_detector_scores_rgb24_and_bgr24_equivalent_frames_equally() {
2816 let mut rgb_detector = ContentDetector::new(10.0, 1);
2817 let mut bgr_detector = ContentDetector::new(10.0, 1);
2818 let mut rgb_metrics = MetricsStore::default();
2819 let mut bgr_metrics = MetricsStore::default();
2820 let rgb_first = content_frame(0, [24, 48, 96]);
2821 let rgb_second = content_frame(1, [160, 32, 224]);
2822 let bgr_first = content_bgr_frame(0, [24, 48, 96]);
2823 let bgr_second = content_bgr_frame(1, [160, 32, 224]);
2824
2825 rgb_detector
2826 .process_frame(&rgb_first.as_frame(), Some(&mut rgb_metrics))
2827 .unwrap();
2828 rgb_detector
2829 .process_frame(&rgb_second.as_frame(), Some(&mut rgb_metrics))
2830 .unwrap();
2831 bgr_detector
2832 .process_frame(&bgr_first.as_frame(), Some(&mut bgr_metrics))
2833 .unwrap();
2834 bgr_detector
2835 .process_frame(&bgr_second.as_frame(), Some(&mut bgr_metrics))
2836 .unwrap();
2837
2838 for key in ["content_val", "delta_hue", "delta_sat", "delta_lum"] {
2839 assert_eq!(rgb_metrics.get(1, key), bgr_metrics.get(1, key), "{key}");
2840 }
2841 }
2842
2843 #[test]
2844 fn content_detector_suppresses_flash() {
2845 let mut detector = ContentDetector::new(10.0, 3).filter_mode(FlashFilterMode::Suppress, 3);
2846 let frames = [
2847 content_frame(0, [0, 0, 0]),
2848 content_frame(1, [255, 255, 255]),
2849 content_frame(2, [0, 0, 0]),
2850 ];
2851 let mut cuts = Vec::new();
2852 for frame in frames {
2853 cuts.extend(detector.process_frame(&frame.as_frame(), None).unwrap());
2854 }
2855
2856 assert!(cuts.is_empty());
2857 }
2858
2859 #[test]
2860 fn pipeline_processes_frames_incrementally() {
2861 struct CutOnFrame(u64);
2862
2863 impl SceneDetector for CutOnFrame {
2864 fn name(&self) -> &'static str {
2865 "test"
2866 }
2867
2868 fn metric_keys(&self) -> &'static [&'static str] {
2869 &[]
2870 }
2871
2872 fn process_frame(
2873 &mut self,
2874 frame: &VideoFrame<'_>,
2875 _metrics: Option<&mut dyn MetricsSink>,
2876 ) -> Result<Vec<Cut>> {
2877 Ok((frame.position.frame_index == self.0)
2878 .then(|| Cut {
2879 position: frame.position,
2880 detector: self.name(),
2881 score: Some(1.0),
2882 })
2883 .into_iter()
2884 .collect())
2885 }
2886 }
2887
2888 let mut pipeline = ScenePipeline::builder()
2889 .detector(CutOnFrame(1))
2890 .start_in_scene(true)
2891 .build()
2892 .unwrap();
2893
2894 assert!(pipeline
2895 .process_frame(owned_frame(0))
2896 .unwrap()
2897 .cuts
2898 .is_empty());
2899 let analysis = pipeline.process_frame(owned_frame(1)).unwrap();
2900 assert_eq!(analysis.frames_processed, 2);
2901 assert_eq!(analysis.cuts[0].position.frame_index, 1);
2902
2903 let result = pipeline.finish_detection().unwrap();
2904 assert_eq!(result.frames_processed, 2);
2905 assert_eq!(result.cuts.len(), 1);
2906 assert_eq!(result.scenes.len(), 2);
2907 }
2908
2909 #[test]
2910 fn video_pipeline_emits_frame_observations() {
2911 struct OcrAnalyzer;
2912
2913 impl VideoAnalyzer for OcrAnalyzer {
2914 fn name(&self) -> &str {
2915 "ocr"
2916 }
2917
2918 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2919 Ok(vec![Observation::new(self.name(), ObservationKind::Text)
2920 .at_frame(frame.position)
2921 .text("EXIT")
2922 .score(0.9)])
2923 }
2924 }
2925
2926 let mut pipeline = VideoAnalysisPipeline::builder()
2927 .analyzer(OcrAnalyzer)
2928 .build()
2929 .unwrap();
2930
2931 let analysis = pipeline.process_frame(owned_frame(3)).unwrap();
2932 assert_eq!(analysis.frames_processed, 1);
2933 assert_eq!(analysis.observations[0].text.as_deref(), Some("EXIT"));
2934 assert_eq!(
2935 analysis.observations[0].to_text_segment(0).unwrap().text,
2936 "EXIT"
2937 );
2938 assert_eq!(pipeline.finish_analysis().unwrap().observations.len(), 1);
2939 }
2940
2941 #[test]
2942 fn sampled_video_analyzer_skips_unsampled_frames() {
2943 struct CountingVideoAnalyzer {
2944 frames: Vec<u64>,
2945 }
2946
2947 impl VideoAnalyzer for CountingVideoAnalyzer {
2948 fn name(&self) -> &str {
2949 "counting"
2950 }
2951
2952 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2953 self.frames.push(frame.position.frame_index);
2954 Ok(Vec::new())
2955 }
2956 }
2957
2958 let mut analyzer =
2959 SampledVideoAnalyzer::new(CountingVideoAnalyzer { frames: Vec::new() }, 3);
2960 for index in 0..7 {
2961 analyzer
2962 .process_frame(&owned_frame(index).as_frame())
2963 .unwrap();
2964 }
2965
2966 assert_eq!(analyzer.inner().frames, vec![0, 3, 6]);
2967 }
2968
2969 #[test]
2970 fn realtime_video_pipeline_groups_observations_by_completed_scene() {
2971 struct CutOnFrame(u64);
2972
2973 impl SceneDetector for CutOnFrame {
2974 fn name(&self) -> &'static str {
2975 "test"
2976 }
2977
2978 fn metric_keys(&self) -> &'static [&'static str] {
2979 &[]
2980 }
2981
2982 fn process_frame(
2983 &mut self,
2984 frame: &VideoFrame<'_>,
2985 _metrics: Option<&mut dyn MetricsSink>,
2986 ) -> Result<Vec<Cut>> {
2987 Ok((frame.position.frame_index == self.0)
2988 .then(|| Cut {
2989 position: frame.position,
2990 detector: self.name(),
2991 score: Some(1.0),
2992 })
2993 .into_iter()
2994 .collect())
2995 }
2996 }
2997
2998 struct ObjectAnalyzer;
2999
3000 impl VideoAnalyzer for ObjectAnalyzer {
3001 fn name(&self) -> &str {
3002 "objects"
3003 }
3004
3005 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
3006 Ok(vec![Observation::new(self.name(), ObservationKind::Object)
3007 .at_frame(frame.position)
3008 .label(format!("frame-{}", frame.position.frame_index))])
3009 }
3010 }
3011
3012 let mut pipeline = RealtimeVideoPipeline::builder()
3013 .scene_detector(CutOnFrame(2))
3014 .video_analyzer(ObjectAnalyzer)
3015 .start_in_scene(true)
3016 .build()
3017 .unwrap();
3018
3019 assert!(pipeline
3020 .process_frame(owned_frame(0))
3021 .unwrap()
3022 .completed_scenes
3023 .is_empty());
3024 pipeline.process_frame(owned_frame(1)).unwrap();
3025 let analysis = pipeline.process_frame(owned_frame(2)).unwrap();
3026
3027 assert_eq!(analysis.completed_scenes.len(), 1);
3028 assert_eq!(analysis.completed_scenes[0].scene.start.frame_index, 0);
3029 assert_eq!(analysis.completed_scenes[0].scene.end.frame_index, 2);
3030 assert_eq!(analysis.completed_scenes[0].observations.len(), 2);
3031 assert_eq!(analysis.observations[0].scene_index, Some(1));
3032
3033 let result = pipeline.finish_analysis().unwrap();
3034 assert_eq!(result.detection.scenes.len(), 2);
3035 assert_eq!(result.scenes[0].observations.len(), 2);
3036 assert_eq!(result.scenes[1].observations.len(), 1);
3037 }
3038
3039 #[test]
3040 fn audio_pipeline_processes_frames_incrementally() {
3041 struct LoudnessAnalyzer;
3042
3043 impl AudioAnalyzer for LoudnessAnalyzer {
3044 fn name(&self) -> &str {
3045 "loudness"
3046 }
3047
3048 fn process_frame(&mut self, frame: &AudioFrame<'_>) -> Result<Vec<AnalysisEvent>> {
3049 let AudioBuffer::F32(samples) = frame.data else {
3050 return Ok(Vec::new());
3051 };
3052 let mean = samples.iter().map(|sample| sample.abs()).sum::<f32>()
3053 / samples.len().max(1) as f32;
3054 Ok((mean > 0.5)
3055 .then(|| {
3056 AnalysisEvent::new(self.name(), "loud")
3057 .at_timestamp(frame.timestamp)
3058 .score(mean)
3059 })
3060 .into_iter()
3061 .collect())
3062 }
3063 }
3064
3065 let mut pipeline = AudioPipeline::builder()
3066 .analyzer(LoudnessAnalyzer)
3067 .build()
3068 .unwrap();
3069 let frame = OwnedAudioFrame::new(
3070 Timestamp::new(0, Timebase::new(1, 48_000)),
3071 48_000,
3072 1,
3073 AudioBuffer::F32(vec![1.0, 0.5]),
3074 )
3075 .unwrap();
3076
3077 let analysis = pipeline.process_frame(frame).unwrap();
3078 assert_eq!(analysis.frames_processed, 1);
3079 assert_eq!(analysis.events[0].label, "loud");
3080 assert_eq!(pipeline.finish_analysis().unwrap().events.len(), 1);
3081 }
3082
3083 #[test]
3084 fn text_pipeline_processes_segments_incrementally() {
3085 struct KeywordAnalyzer;
3086
3087 impl TextAnalyzer for KeywordAnalyzer {
3088 fn name(&self) -> &str {
3089 "keyword"
3090 }
3091
3092 fn process_segment(&mut self, segment: &TextSegment<'_>) -> Result<Vec<AnalysisEvent>> {
3093 Ok(segment
3094 .text
3095 .contains("cut")
3096 .then(|| {
3097 let mut event = AnalysisEvent::new(self.name(), "keyword").score(1.0);
3098 if let Some(timestamp) = segment.timestamp {
3099 event = event.at_timestamp(timestamp);
3100 }
3101 event
3102 })
3103 .into_iter()
3104 .collect())
3105 }
3106 }
3107
3108 let mut pipeline = TextPipeline::builder()
3109 .analyzer(KeywordAnalyzer)
3110 .build()
3111 .unwrap();
3112 let segment = OwnedTextSegment::new(0, "find this cut point");
3113
3114 let analysis = pipeline.process_segment(segment).unwrap();
3115 assert_eq!(analysis.segments_processed, 1);
3116 assert_eq!(analysis.events[0].analyzer, "keyword");
3117 assert_eq!(pipeline.finish_analysis().unwrap().segments_processed, 1);
3118 }
3119
3120 #[test]
3121 fn crop_rejects_empty_regions() {
3122 assert!(CropRegion::new(0, 0, 0, 10).is_err());
3123 }
3124}