1#![doc = include_str!("../README.md")]
2
3pub mod runtime;
4pub mod surface;
5use std::collections::{BTreeMap, BTreeSet};
6use std::fmt;
7use std::ops::{Add, Sub};
8
9use num_rational::Rational64;
10use num_traits::ToPrimitive;
11use thiserror::Error;
12
13#[derive(Debug, Error)]
14pub enum DetectError {
16 #[error("unsupported pixel format: {0:?}")]
17 UnsupportedPixelFormat(PixelFormat),
19 #[error("unsupported audio sample format: {0:?}")]
20 UnsupportedAudioSampleFormat(AudioSampleFormat),
22 #[error("invalid frame buffer: expected at least {expected} bytes, got {actual}")]
23 InvalidFrameBuffer {
25 expected: usize,
27 actual: usize,
29 },
30 #[error("invalid dimensions: {width}x{height}")]
31 InvalidDimensions {
33 width: u32,
35 height: u32,
37 },
38 #[error("invalid audio format: sample_rate={sample_rate}, channels={channels}")]
39 InvalidAudioFormat {
41 sample_rate: u32,
43 channels: u16,
45 },
46 #[error("video source error: {0}")]
47 Source(String),
49 #[error("invalid argument: {0}")]
50 InvalidArgument(String),
52 #[error("I/O error: {0}")]
53 Io(#[from] std::io::Error),
55}
56
57pub type Result<T> = std::result::Result<T, DetectError>;
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
61pub struct Timebase {
63 pub num: i32,
65 pub den: i32,
67}
68
69impl Timebase {
70 pub const fn new(num: i32, den: i32) -> Self {
72 Self { num, den }
73 }
74
75 pub fn seconds_per_tick(self) -> f64 {
77 self.num as f64 / self.den as f64
78 }
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
82pub struct Timestamp {
84 pub pts: i64,
86 pub timebase: Timebase,
88}
89
90impl Timestamp {
91 pub const fn new(pts: i64, timebase: Timebase) -> Self {
93 Self { pts, timebase }
94 }
95
96 pub fn seconds(self) -> f64 {
98 self.pts as f64 * self.timebase.seconds_per_tick()
99 }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
103pub struct FramePosition {
105 pub frame_index: u64,
107 pub timestamp: Timestamp,
109}
110
111impl FramePosition {
112 pub fn from_frame_index(frame_index: u64, fps: Rational64) -> Self {
114 let den = *fps.numer() as i32;
115 let num = *fps.denom() as i32;
116 Self {
117 frame_index,
118 timestamp: Timestamp::new(frame_index as i64, Timebase::new(num, den)),
119 }
120 }
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
124pub struct FrameTimecode {
126 pub frame_index: u64,
128 pub fps: Rational64,
130}
131
132impl FrameTimecode {
133 pub fn from_frames(frame_index: u64, fps: Rational64) -> Self {
135 Self { frame_index, fps }
136 }
137
138 pub fn from_seconds(seconds: f64, fps: Rational64) -> Result<Self> {
140 if seconds < 0.0 {
141 return Err(DetectError::InvalidArgument(
142 "seconds must be greater than or equal to zero".to_string(),
143 ));
144 }
145 let fps_float = fps.to_f64().ok_or_else(|| {
146 DetectError::InvalidArgument("frame rate cannot be represented".to_string())
147 })?;
148 Ok(Self {
149 frame_index: (seconds * fps_float).round() as u64,
150 fps,
151 })
152 }
153
154 pub fn parse(input: &str, fps: Rational64) -> Result<Self> {
156 if input.chars().all(|c| c.is_ascii_digit()) {
157 let frame_index = input.parse::<u64>().map_err(|err| {
158 DetectError::InvalidArgument(format!("invalid frame number `{input}`: {err}"))
159 })?;
160 return Ok(Self { frame_index, fps });
161 }
162 if !input.contains(':') {
163 let seconds = input.parse::<f64>().map_err(|err| {
164 DetectError::InvalidArgument(format!("invalid seconds `{input}`: {err}"))
165 })?;
166 return Self::from_seconds(seconds, fps);
167 }
168
169 let parts: Vec<&str> = input.split(':').collect();
170 if parts.len() != 3 {
171 return Err(DetectError::InvalidArgument(format!(
172 "invalid timecode `{input}`"
173 )));
174 }
175 let hours = parts[0].parse::<f64>().map_err(|err| {
176 DetectError::InvalidArgument(format!("invalid hours in `{input}`: {err}"))
177 })?;
178 let minutes = parts[1].parse::<f64>().map_err(|err| {
179 DetectError::InvalidArgument(format!("invalid minutes in `{input}`: {err}"))
180 })?;
181 let seconds = parts[2].parse::<f64>().map_err(|err| {
182 DetectError::InvalidArgument(format!("invalid seconds in `{input}`: {err}"))
183 })?;
184 Self::from_seconds((hours * 3600.0) + (minutes * 60.0) + seconds, fps)
185 }
186
187 pub fn seconds(self) -> f64 {
189 self.frame_index as f64 / self.fps.to_f64().unwrap_or(1.0)
190 }
191
192 pub fn timecode(self, precision: usize) -> String {
194 let factor = 10_f64.powi(precision as i32);
195 let total = (self.seconds() * factor).round() / factor;
196 let hours = (total / 3600.0).floor() as u64;
197 let minutes = ((total - hours as f64 * 3600.0) / 60.0).floor() as u64;
198 let seconds = total - hours as f64 * 3600.0 - minutes as f64 * 60.0;
199 if precision == 0 {
200 format!("{hours:02}:{minutes:02}:{:02}", seconds.round() as u64)
201 } else {
202 let whole = seconds.floor() as u64;
203 let frac = ((seconds - whole as f64) * factor).round() as u64;
204 format!("{hours:02}:{minutes:02}:{whole:02}.{frac:0precision$}")
205 }
206 }
207
208 pub fn position(self) -> FramePosition {
210 FramePosition::from_frame_index(self.frame_index, self.fps)
211 }
212}
213
214impl Add<u64> for FrameTimecode {
215 type Output = FrameTimecode;
216
217 fn add(self, rhs: u64) -> Self::Output {
218 Self {
219 frame_index: self.frame_index + rhs,
220 fps: self.fps,
221 }
222 }
223}
224
225impl Sub<u64> for FrameTimecode {
226 type Output = FrameTimecode;
227
228 fn sub(self, rhs: u64) -> Self::Output {
229 Self {
230 frame_index: self.frame_index.saturating_sub(rhs),
231 fps: self.fps,
232 }
233 }
234}
235
236impl fmt::Display for FrameTimecode {
237 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
238 f.write_str(&self.timecode(3))
239 }
240}
241
242#[derive(Debug, Clone, Copy, PartialEq, Eq)]
243pub enum PixelFormat {
245 Rgb24,
247 Bgr24,
249}
250
251#[derive(Debug, Clone, Copy)]
252pub struct VideoFrame<'a> {
254 pub position: FramePosition,
256 pub width: u32,
258 pub height: u32,
260 pub pixel_format: PixelFormat,
262 pub data: &'a [u8],
264 pub stride: usize,
266}
267
268impl<'a> VideoFrame<'a> {
269 pub fn rgb24(position: FramePosition, width: u32, height: u32, data: &'a [u8]) -> Result<Self> {
271 Self::packed(
272 position,
273 width,
274 height,
275 PixelFormat::Rgb24,
276 data,
277 width as usize * 3,
278 )
279 }
280
281 pub fn bgr24(position: FramePosition, width: u32, height: u32, data: &'a [u8]) -> Result<Self> {
283 Self::packed(
284 position,
285 width,
286 height,
287 PixelFormat::Bgr24,
288 data,
289 width as usize * 3,
290 )
291 }
292
293 pub fn packed(
295 position: FramePosition,
296 width: u32,
297 height: u32,
298 pixel_format: PixelFormat,
299 data: &'a [u8],
300 stride: usize,
301 ) -> Result<Self> {
302 if width == 0 || height == 0 {
303 return Err(DetectError::InvalidDimensions { width, height });
304 }
305 let expected = stride * height as usize;
306 if data.len() < expected {
307 return Err(DetectError::InvalidFrameBuffer {
308 expected,
309 actual: data.len(),
310 });
311 }
312 Ok(Self {
313 position,
314 width,
315 height,
316 pixel_format,
317 data,
318 stride,
319 })
320 }
321
322 pub fn pixel_rgb(&self, x: u32, y: u32) -> [u8; 3] {
324 let i = y as usize * self.stride + x as usize * 3;
325 match self.pixel_format {
326 PixelFormat::Rgb24 => [self.data[i], self.data[i + 1], self.data[i + 2]],
327 PixelFormat::Bgr24 => [self.data[i + 2], self.data[i + 1], self.data[i]],
328 }
329 }
330
331 pub fn pixel_count(&self) -> usize {
333 self.width as usize * self.height as usize
334 }
335}
336
337#[derive(Debug, Clone, PartialEq)]
338pub struct Scene {
340 pub start: FramePosition,
342 pub end: FramePosition,
344}
345
346#[derive(Debug, Clone, PartialEq)]
347pub struct Cut {
349 pub position: FramePosition,
351 pub detector: &'static str,
353 pub score: Option<f32>,
355}
356
357pub trait MetricsSink {
359 fn set_metric(&mut self, frame_index: u64, key: &str, value: f64);
361}
362
363#[derive(Debug, Default, Clone, PartialEq)]
364pub struct MetricsStore {
366 rows: BTreeMap<u64, BTreeMap<String, f64>>,
367 keys: BTreeSet<String>,
368}
369
370impl MetricsStore {
371 pub fn rows(&self) -> &BTreeMap<u64, BTreeMap<String, f64>> {
373 &self.rows
374 }
375
376 pub fn keys(&self) -> impl Iterator<Item = &str> + '_ {
378 self.keys.iter().map(String::as_str)
379 }
380
381 pub fn get(&self, frame_index: u64, key: &str) -> Option<f64> {
383 self.rows
384 .get(&frame_index)
385 .and_then(|row| row.get(key))
386 .copied()
387 }
388}
389
390impl MetricsSink for MetricsStore {
391 fn set_metric(&mut self, frame_index: u64, key: &str, value: f64) {
392 self.keys.insert(key.to_string());
393 self.rows
394 .entry(frame_index)
395 .or_default()
396 .insert(key.to_string(), value);
397 }
398}
399
400pub trait SceneDetector {
402 fn name(&self) -> &'static str;
404 fn metric_keys(&self) -> &'static [&'static str];
406 fn event_buffer_len(&self) -> usize {
408 0
409 }
410 fn process_frame(
412 &mut self,
413 frame: &VideoFrame<'_>,
414 metrics: Option<&mut dyn MetricsSink>,
415 ) -> Result<Vec<Cut>>;
416 fn finish(
418 &mut self,
419 _last_position: FramePosition,
420 _metrics: Option<&mut dyn MetricsSink>,
421 ) -> Result<Vec<Cut>> {
422 Ok(Vec::new())
423 }
424}
425
426pub trait VideoSource {
428 fn next_frame(&mut self) -> Result<Option<OwnedVideoFrame>>;
430 fn frame_rate(&self) -> Rational64;
432}
433
434#[derive(Debug, Clone, PartialEq, Eq)]
435pub struct OwnedVideoFrame {
437 pub position: FramePosition,
439 pub width: u32,
441 pub height: u32,
443 pub pixel_format: PixelFormat,
445 pub data: Vec<u8>,
447 pub stride: usize,
449}
450
451impl OwnedVideoFrame {
452 pub fn as_frame(&self) -> VideoFrame<'_> {
454 VideoFrame {
455 position: self.position,
456 width: self.width,
457 height: self.height,
458 pixel_format: self.pixel_format,
459 data: &self.data,
460 stride: self.stride,
461 }
462 }
463}
464
465#[derive(Debug, Clone, Copy, PartialEq, Eq)]
466pub enum AudioSampleFormat {
468 U8,
470 I16,
472 I32,
474 F32,
476}
477
478#[derive(Debug, Clone, PartialEq)]
479pub enum AudioBuffer {
481 U8(Vec<u8>),
483 I16(Vec<i16>),
485 I32(Vec<i32>),
487 F32(Vec<f32>),
489}
490
491impl AudioBuffer {
492 pub fn len(&self) -> usize {
494 match self {
495 Self::U8(values) => values.len(),
496 Self::I16(values) => values.len(),
497 Self::I32(values) => values.len(),
498 Self::F32(values) => values.len(),
499 }
500 }
501
502 pub fn is_empty(&self) -> bool {
504 self.len() == 0
505 }
506
507 pub fn sample_format(&self) -> AudioSampleFormat {
509 match self {
510 Self::U8(_) => AudioSampleFormat::U8,
511 Self::I16(_) => AudioSampleFormat::I16,
512 Self::I32(_) => AudioSampleFormat::I32,
513 Self::F32(_) => AudioSampleFormat::F32,
514 }
515 }
516}
517
518#[derive(Debug, Clone, Copy)]
519pub struct AudioFrame<'a> {
521 pub timestamp: Timestamp,
523 pub sample_rate: u32,
525 pub channels: u16,
527 pub data: &'a AudioBuffer,
529}
530
531impl<'a> AudioFrame<'a> {
532 pub fn new(
534 timestamp: Timestamp,
535 sample_rate: u32,
536 channels: u16,
537 data: &'a AudioBuffer,
538 ) -> Result<Self> {
539 if sample_rate == 0 || channels == 0 {
540 return Err(DetectError::InvalidAudioFormat {
541 sample_rate,
542 channels,
543 });
544 }
545 Ok(Self {
546 timestamp,
547 sample_rate,
548 channels,
549 data,
550 })
551 }
552
553 pub fn sample_format(&self) -> AudioSampleFormat {
555 self.data.sample_format()
556 }
557
558 pub fn sample_count(&self) -> usize {
560 self.data.len()
561 }
562
563 pub fn samples_per_channel(&self) -> usize {
565 self.sample_count() / self.channels as usize
566 }
567
568 pub fn duration_seconds(&self) -> f64 {
570 self.samples_per_channel() as f64 / self.sample_rate as f64
571 }
572}
573
574#[derive(Debug, Clone, PartialEq)]
575pub struct OwnedAudioFrame {
577 pub timestamp: Timestamp,
579 pub sample_rate: u32,
581 pub channels: u16,
583 pub data: AudioBuffer,
585}
586
587impl OwnedAudioFrame {
588 pub fn new(
590 timestamp: Timestamp,
591 sample_rate: u32,
592 channels: u16,
593 data: AudioBuffer,
594 ) -> Result<Self> {
595 AudioFrame::new(timestamp, sample_rate, channels, &data)?;
596 Ok(Self {
597 timestamp,
598 sample_rate,
599 channels,
600 data,
601 })
602 }
603
604 pub fn as_frame(&self) -> Result<AudioFrame<'_>> {
606 AudioFrame::new(self.timestamp, self.sample_rate, self.channels, &self.data)
607 }
608
609 pub fn sample_format(&self) -> AudioSampleFormat {
611 self.data.sample_format()
612 }
613
614 pub fn samples_per_channel(&self) -> usize {
616 if self.channels == 0 {
617 return 0;
618 }
619 self.data.len() / self.channels as usize
620 }
621
622 pub fn duration_seconds(&self) -> f64 {
624 if self.sample_rate == 0 {
625 return 0.0;
626 }
627 self.samples_per_channel() as f64 / self.sample_rate as f64
628 }
629}
630
631#[derive(Debug, Clone, Copy)]
632pub struct TextSegment<'a> {
634 pub segment_index: u64,
636 pub timestamp: Option<Timestamp>,
638 pub text: &'a str,
640 pub language: Option<&'a str>,
642 pub is_final: bool,
644}
645
646#[derive(Debug, Clone, PartialEq, Eq)]
647pub struct OwnedTextSegment {
649 pub segment_index: u64,
651 pub timestamp: Option<Timestamp>,
653 pub text: String,
655 pub language: Option<String>,
657 pub is_final: bool,
659}
660
661impl OwnedTextSegment {
662 pub fn new(segment_index: u64, text: impl Into<String>) -> Self {
664 Self {
665 segment_index,
666 timestamp: None,
667 text: text.into(),
668 language: None,
669 is_final: true,
670 }
671 }
672
673 pub fn timestamp(mut self, timestamp: Timestamp) -> Self {
675 self.timestamp = Some(timestamp);
676 self
677 }
678
679 pub fn language(mut self, language: impl Into<String>) -> Self {
681 self.language = Some(language.into());
682 self
683 }
684
685 pub fn finality(mut self, is_final: bool) -> Self {
687 self.is_final = is_final;
688 self
689 }
690
691 pub fn as_segment(&self) -> TextSegment<'_> {
693 TextSegment {
694 segment_index: self.segment_index,
695 timestamp: self.timestamp,
696 text: &self.text,
697 language: self.language.as_deref(),
698 is_final: self.is_final,
699 }
700 }
701}
702
703#[derive(Debug, Default, Clone, PartialEq)]
704pub struct DetectionResult {
706 pub scenes: Vec<Scene>,
708 pub cuts: Vec<Cut>,
710 pub metrics: MetricsStore,
712 pub frames_processed: u64,
714}
715
716#[derive(Debug, Clone, PartialEq)]
717pub struct FrameAnalysis {
719 pub position: FramePosition,
721 pub cuts: Vec<Cut>,
723 pub frames_processed: u64,
725}
726
727#[derive(Debug, Clone, Copy, PartialEq, Eq)]
728pub struct BoundingBox {
730 pub x: u32,
732 pub y: u32,
734 pub width: u32,
736 pub height: u32,
738}
739
740impl BoundingBox {
741 pub fn new(x: u32, y: u32, width: u32, height: u32) -> Result<Self> {
743 if width == 0 || height == 0 {
744 return Err(DetectError::InvalidArgument(
745 "bounding box must have non-zero width and height".to_string(),
746 ));
747 }
748 Ok(Self {
749 x,
750 y,
751 width,
752 height,
753 })
754 }
755}
756
757#[derive(Debug, Clone, PartialEq, Eq)]
758pub enum ObservationKind {
760 Text,
762 Face,
764 Object,
766 Scene,
768 Custom(String),
770}
771
772#[derive(Debug, Clone, PartialEq)]
773pub struct Observation {
775 pub timestamp: Option<Timestamp>,
777 pub frame: Option<FramePosition>,
779 pub scene_index: Option<u64>,
781 pub analyzer: String,
783 pub kind: ObservationKind,
785 pub label: Option<String>,
787 pub text: Option<String>,
789 pub score: Option<f32>,
791 pub region: Option<BoundingBox>,
793 pub track_id: Option<String>,
795 pub attributes: BTreeMap<String, String>,
797}
798
799impl Observation {
800 pub fn new(analyzer: impl Into<String>, kind: ObservationKind) -> Self {
802 Self {
803 timestamp: None,
804 frame: None,
805 scene_index: None,
806 analyzer: analyzer.into(),
807 kind,
808 label: None,
809 text: None,
810 score: None,
811 region: None,
812 track_id: None,
813 attributes: BTreeMap::new(),
814 }
815 }
816
817 pub fn at_frame(mut self, position: FramePosition) -> Self {
819 self.timestamp = Some(position.timestamp);
820 self.frame = Some(position);
821 self
822 }
823
824 pub fn at_timestamp(mut self, timestamp: Timestamp) -> Self {
826 self.timestamp = Some(timestamp);
827 self
828 }
829
830 pub fn in_scene(mut self, scene_index: u64) -> Self {
832 self.scene_index = Some(scene_index);
833 self
834 }
835
836 pub fn label(mut self, label: impl Into<String>) -> Self {
838 self.label = Some(label.into());
839 self
840 }
841
842 pub fn text(mut self, text: impl Into<String>) -> Self {
844 self.text = Some(text.into());
845 self
846 }
847
848 pub fn score(mut self, score: f32) -> Self {
850 self.score = Some(score);
851 self
852 }
853
854 pub fn region(mut self, region: BoundingBox) -> Self {
856 self.region = Some(region);
857 self
858 }
859
860 pub fn track_id(mut self, track_id: impl Into<String>) -> Self {
862 self.track_id = Some(track_id.into());
863 self
864 }
865
866 pub fn attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
868 self.attributes.insert(key.into(), value.into());
869 self
870 }
871
872 pub fn to_text_segment(&self, segment_index: u64) -> Option<OwnedTextSegment> {
874 let text = self.text.as_ref()?;
875 let mut segment = OwnedTextSegment::new(segment_index, text.clone());
876 if let Some(timestamp) = self.timestamp {
877 segment = segment.timestamp(timestamp);
878 }
879 if let Some(language) = self.attributes.get("language") {
880 segment = segment.language(language.clone());
881 }
882 Some(segment)
883 }
884}
885
886#[derive(Debug, Default, Clone, PartialEq)]
887pub struct VideoAnalysisResult {
889 pub observations: Vec<Observation>,
891 pub frames_processed: u64,
893}
894
895#[derive(Debug, Clone, PartialEq)]
896pub struct VideoFrameAnalysis {
898 pub position: FramePosition,
900 pub observations: Vec<Observation>,
902 pub frames_processed: u64,
904}
905
906#[derive(Debug, Clone, PartialEq)]
907pub struct SceneAnalysis {
909 pub scene_index: u64,
911 pub scene: Scene,
913 pub observations: Vec<Observation>,
915}
916
917#[derive(Debug, Clone, PartialEq)]
918pub struct RealtimeVideoFrameAnalysis {
920 pub position: FramePosition,
922 pub scene: FrameAnalysis,
924 pub observations: Vec<Observation>,
926 pub completed_scenes: Vec<SceneAnalysis>,
928 pub frames_processed: u64,
930}
931
932#[derive(Debug, Default, Clone, PartialEq)]
933pub struct RealtimeVideoAnalysisResult {
935 pub detection: DetectionResult,
937 pub observations: Vec<Observation>,
939 pub scenes: Vec<SceneAnalysis>,
941 pub frames_processed: u64,
943}
944
945#[derive(Debug, Clone, PartialEq)]
946pub struct AnalysisEvent {
948 pub timestamp: Option<Timestamp>,
950 pub analyzer: String,
952 pub label: String,
954 pub score: Option<f32>,
956}
957
958impl AnalysisEvent {
959 pub fn new(analyzer: impl Into<String>, label: impl Into<String>) -> Self {
961 Self {
962 timestamp: None,
963 analyzer: analyzer.into(),
964 label: label.into(),
965 score: None,
966 }
967 }
968
969 pub fn at_timestamp(mut self, timestamp: Timestamp) -> Self {
971 self.timestamp = Some(timestamp);
972 self
973 }
974
975 pub fn score(mut self, score: f32) -> Self {
977 self.score = Some(score);
978 self
979 }
980}
981
982#[derive(Debug, Default, Clone, PartialEq)]
983pub struct AudioAnalysisResult {
985 pub events: Vec<AnalysisEvent>,
987 pub frames_processed: u64,
989}
990
991#[derive(Debug, Clone, PartialEq)]
992pub struct AudioAnalysis {
994 pub timestamp: Timestamp,
996 pub events: Vec<AnalysisEvent>,
998 pub frames_processed: u64,
1000}
1001
1002#[derive(Debug, Default, Clone, PartialEq)]
1003pub struct TextAnalysisResult {
1005 pub events: Vec<AnalysisEvent>,
1007 pub segments_processed: u64,
1009}
1010
1011#[derive(Debug, Clone, PartialEq)]
1012pub struct TextAnalysis {
1014 pub segment_index: u64,
1016 pub events: Vec<AnalysisEvent>,
1018 pub segments_processed: u64,
1020}
1021
1022pub struct ScenePipeline {
1024 detectors: Vec<Box<dyn SceneDetector>>,
1025 start_in_scene: bool,
1026 crop: Option<CropRegion>,
1027 auto_downscale_min_width: Option<u32>,
1028 state: ScenePipelineState,
1029}
1030
1031#[derive(Debug, Default, Clone)]
1032struct ScenePipelineState {
1033 cuts: Vec<Cut>,
1034 metrics: MetricsStore,
1035 first_position: Option<FramePosition>,
1036 last_position: Option<FramePosition>,
1037 frames_processed: u64,
1038 finished: bool,
1039}
1040
1041#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1042pub struct CropRegion {
1044 pub x0: u32,
1046 pub y0: u32,
1048 pub x1: u32,
1050 pub y1: u32,
1052}
1053
1054impl CropRegion {
1055 pub fn new(x0: u32, y0: u32, x1: u32, y1: u32) -> Result<Self> {
1057 if x0 == x1 || y0 == y1 {
1058 return Err(DetectError::InvalidArgument(
1059 "crop region must have non-zero width and height".to_string(),
1060 ));
1061 }
1062 Ok(Self { x0, y0, x1, y1 })
1063 }
1064}
1065
1066impl ScenePipeline {
1067 pub fn builder() -> ScenePipelineBuilder {
1069 ScenePipelineBuilder::default()
1070 }
1071
1072 pub fn detect<S: VideoSource>(&mut self, source: &mut S) -> Result<DetectionResult> {
1074 self.reset();
1075 while let Some(frame) = source.next_frame()? {
1076 self.process_frame(frame)?;
1077 }
1078 self.finish_detection()
1079 }
1080
1081 pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<FrameAnalysis> {
1083 let frame = self.prepare_frame(frame)?;
1084 self.process_frame_ref(&frame.as_frame())
1085 }
1086
1087 pub fn process_frame_ref(&mut self, frame: &VideoFrame<'_>) -> Result<FrameAnalysis> {
1089 if self.state.finished {
1090 return Err(DetectError::InvalidArgument(
1091 "cannot process frames after finish_detection; call reset first".to_string(),
1092 ));
1093 }
1094 self.validate_frame_options(frame)?;
1095 self.state.first_position.get_or_insert(frame.position);
1096 self.state.last_position = Some(frame.position);
1097
1098 let mut cuts = Vec::new();
1099 for detector in &mut self.detectors {
1100 let mut new_cuts = detector.process_frame(frame, Some(&mut self.state.metrics))?;
1101 cuts.append(&mut new_cuts);
1102 }
1103 let cuts = self.record_cuts(cuts);
1104 self.state.frames_processed += 1;
1105
1106 Ok(FrameAnalysis {
1107 position: frame.position,
1108 cuts,
1109 frames_processed: self.state.frames_processed,
1110 })
1111 }
1112
1113 pub fn finish_detection(&mut self) -> Result<DetectionResult> {
1115 if !self.state.finished {
1116 if let Some(last) = self.state.last_position {
1117 let mut cuts = Vec::new();
1118 for detector in &mut self.detectors {
1119 let mut new_cuts = detector.finish(last, Some(&mut self.state.metrics))?;
1120 cuts.append(&mut new_cuts);
1121 }
1122 self.record_cuts(cuts);
1123 }
1124 self.state.finished = true;
1125 }
1126
1127 let scenes = if let (Some(start), Some(end)) =
1128 (self.state.first_position, self.state.last_position)
1129 {
1130 scenes_from_cuts(&self.state.cuts, start, end, self.start_in_scene)
1131 } else {
1132 Vec::new()
1133 };
1134
1135 Ok(DetectionResult {
1136 scenes,
1137 cuts: self.state.cuts.clone(),
1138 metrics: self.state.metrics.clone(),
1139 frames_processed: self.state.frames_processed,
1140 })
1141 }
1142
1143 pub fn reset(&mut self) {
1145 self.state = ScenePipelineState::default();
1146 }
1147
1148 pub fn metrics(&self) -> &MetricsStore {
1150 &self.state.metrics
1151 }
1152
1153 pub fn cuts(&self) -> &[Cut] {
1155 &self.state.cuts
1156 }
1157
1158 pub fn frames_processed(&self) -> u64 {
1160 self.state.frames_processed
1161 }
1162
1163 fn record_cuts(&mut self, mut cuts: Vec<Cut>) -> Vec<Cut> {
1164 cuts.sort_by_key(|cut| cut.position.frame_index);
1165 cuts.dedup_by_key(|cut| cut.position.frame_index);
1166 let mut accepted = Vec::new();
1167 for cut in cuts {
1168 if self
1169 .state
1170 .cuts
1171 .iter()
1172 .any(|existing| existing.position.frame_index == cut.position.frame_index)
1173 {
1174 continue;
1175 }
1176 self.state.cuts.push(cut.clone());
1177 accepted.push(cut);
1178 }
1179 self.state.cuts.sort_by_key(|cut| cut.position.frame_index);
1180 accepted
1181 }
1182
1183 fn prepare_frame(&self, frame: OwnedVideoFrame) -> Result<OwnedVideoFrame> {
1184 self.validate_frame_options(&frame.as_frame())?;
1185 Ok(frame)
1186 }
1187
1188 fn validate_frame_options(&self, frame: &VideoFrame<'_>) -> Result<()> {
1189 let _ = self.auto_downscale_min_width;
1190 if let Some(crop) = self.crop {
1191 if crop.x0 >= frame.width || crop.y0 >= frame.height {
1192 return Err(DetectError::InvalidArgument(
1193 "crop starts outside frame boundary".to_string(),
1194 ));
1195 }
1196 }
1197 Ok(())
1198 }
1199}
1200
1201#[derive(Default)]
1202pub struct ScenePipelineBuilder {
1204 detectors: Vec<Box<dyn SceneDetector>>,
1205 start_in_scene: bool,
1206 crop: Option<CropRegion>,
1207 auto_downscale_min_width: Option<u32>,
1208}
1209
1210impl ScenePipelineBuilder {
1211 pub fn detector<D: SceneDetector + 'static>(mut self, detector: D) -> Self {
1213 self.detectors.push(Box::new(detector));
1214 self
1215 }
1216
1217 pub fn start_in_scene(mut self, value: bool) -> Self {
1219 self.start_in_scene = value;
1220 self
1221 }
1222
1223 pub fn crop(mut self, value: Option<CropRegion>) -> Self {
1225 self.crop = value;
1226 self
1227 }
1228
1229 pub fn auto_downscale_min_width(mut self, value: u32) -> Self {
1231 self.auto_downscale_min_width = Some(value);
1232 self
1233 }
1234
1235 pub fn build(self) -> Result<ScenePipeline> {
1237 if self.detectors.is_empty() {
1238 return Err(DetectError::InvalidArgument(
1239 "at least one detector is required".to_string(),
1240 ));
1241 }
1242 Ok(ScenePipeline {
1243 detectors: self.detectors,
1244 start_in_scene: self.start_in_scene,
1245 crop: self.crop,
1246 auto_downscale_min_width: self.auto_downscale_min_width,
1247 state: ScenePipelineState::default(),
1248 })
1249 }
1250}
1251
1252pub trait VideoAnalyzer {
1254 fn name(&self) -> &str;
1256
1257 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>>;
1259
1260 fn finish(&mut self, _last_position: Option<FramePosition>) -> Result<Vec<Observation>> {
1262 Ok(Vec::new())
1263 }
1264}
1265
1266#[derive(Debug, Clone, PartialEq)]
1267pub struct SampledVideoAnalyzer<A> {
1269 inner: A,
1270 every: u64,
1271}
1272
1273impl<A> SampledVideoAnalyzer<A> {
1274 pub fn new(inner: A, every: u64) -> Self {
1276 Self {
1277 inner,
1278 every: every.max(1),
1279 }
1280 }
1281
1282 pub fn inner(&self) -> &A {
1284 &self.inner
1285 }
1286
1287 pub fn inner_mut(&mut self) -> &mut A {
1289 &mut self.inner
1290 }
1291
1292 pub fn every(&self) -> u64 {
1294 self.every
1295 }
1296}
1297
1298impl<A: VideoAnalyzer> VideoAnalyzer for SampledVideoAnalyzer<A> {
1299 fn name(&self) -> &str {
1300 self.inner.name()
1301 }
1302
1303 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
1304 if frame.position.frame_index.is_multiple_of(self.every) {
1305 self.inner.process_frame(frame)
1306 } else {
1307 Ok(Vec::new())
1308 }
1309 }
1310
1311 fn finish(&mut self, last_position: Option<FramePosition>) -> Result<Vec<Observation>> {
1312 self.inner.finish(last_position)
1313 }
1314}
1315
1316pub struct VideoAnalysisPipeline {
1318 analyzers: Vec<Box<dyn VideoAnalyzer>>,
1319 state: VideoAnalysisPipelineState,
1320}
1321
1322#[derive(Debug, Default, Clone)]
1323struct VideoAnalysisPipelineState {
1324 observations: Vec<Observation>,
1325 last_position: Option<FramePosition>,
1326 frames_processed: u64,
1327 finished: bool,
1328}
1329
1330impl VideoAnalysisPipeline {
1331 pub fn builder() -> VideoAnalysisPipelineBuilder {
1333 VideoAnalysisPipelineBuilder::default()
1334 }
1335
1336 pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<VideoFrameAnalysis> {
1338 self.process_frame_ref(&frame.as_frame())
1339 }
1340
1341 pub fn process_frame_ref(&mut self, frame: &VideoFrame<'_>) -> Result<VideoFrameAnalysis> {
1343 if self.state.finished {
1344 return Err(DetectError::InvalidArgument(
1345 "cannot process video frames after finish_analysis; call reset first".to_string(),
1346 ));
1347 }
1348 self.state.last_position = Some(frame.position);
1349
1350 let mut observations = Vec::new();
1351 for analyzer in &mut self.analyzers {
1352 let mut new_observations = analyzer.process_frame(frame)?;
1353 for observation in &mut new_observations {
1354 if observation.timestamp.is_none() {
1355 observation.timestamp = Some(frame.position.timestamp);
1356 }
1357 if observation.frame.is_none() {
1358 observation.frame = Some(frame.position);
1359 }
1360 }
1361 observations.append(&mut new_observations);
1362 }
1363 self.state.observations.extend(observations.iter().cloned());
1364 self.state.frames_processed += 1;
1365
1366 Ok(VideoFrameAnalysis {
1367 position: frame.position,
1368 observations,
1369 frames_processed: self.state.frames_processed,
1370 })
1371 }
1372
1373 pub fn finish_analysis(&mut self) -> Result<VideoAnalysisResult> {
1375 if !self.state.finished {
1376 let mut observations = Vec::new();
1377 for analyzer in &mut self.analyzers {
1378 let mut new_observations = analyzer.finish(self.state.last_position)?;
1379 observations.append(&mut new_observations);
1380 }
1381 self.state.observations.extend(observations);
1382 self.state.finished = true;
1383 }
1384 Ok(VideoAnalysisResult {
1385 observations: self.state.observations.clone(),
1386 frames_processed: self.state.frames_processed,
1387 })
1388 }
1389
1390 pub fn reset(&mut self) {
1392 self.state = VideoAnalysisPipelineState::default();
1393 }
1394
1395 pub fn observations(&self) -> &[Observation] {
1397 &self.state.observations
1398 }
1399
1400 pub fn frames_processed(&self) -> u64 {
1402 self.state.frames_processed
1403 }
1404}
1405
1406#[derive(Default)]
1407pub struct VideoAnalysisPipelineBuilder {
1409 analyzers: Vec<Box<dyn VideoAnalyzer>>,
1410}
1411
1412impl VideoAnalysisPipelineBuilder {
1413 pub fn analyzer<A: VideoAnalyzer + 'static>(mut self, analyzer: A) -> Self {
1415 self.analyzers.push(Box::new(analyzer));
1416 self
1417 }
1418
1419 pub fn build(self) -> Result<VideoAnalysisPipeline> {
1421 if self.analyzers.is_empty() {
1422 return Err(DetectError::InvalidArgument(
1423 "at least one video analyzer is required".to_string(),
1424 ));
1425 }
1426 Ok(VideoAnalysisPipeline {
1427 analyzers: self.analyzers,
1428 state: VideoAnalysisPipelineState::default(),
1429 })
1430 }
1431}
1432
1433pub struct RealtimeVideoPipeline {
1435 scene_pipeline: ScenePipeline,
1436 video_pipeline: Option<VideoAnalysisPipeline>,
1437 state: RealtimeVideoPipelineState,
1438}
1439
1440#[derive(Debug, Default, Clone)]
1441struct RealtimeVideoPipelineState {
1442 observations: Vec<Observation>,
1443 completed_scenes: Vec<SceneAnalysis>,
1444 open_scene_observations: Vec<Observation>,
1445 active_scene_start: Option<FramePosition>,
1446 active_scene_index: u64,
1447 first_position: Option<FramePosition>,
1448 last_position: Option<FramePosition>,
1449 frames_processed: u64,
1450 finished: bool,
1451}
1452
1453impl RealtimeVideoPipeline {
1454 pub fn builder() -> RealtimeVideoPipelineBuilder {
1456 RealtimeVideoPipelineBuilder::default()
1457 }
1458
1459 pub fn new(
1461 scene_pipeline: ScenePipeline,
1462 video_pipeline: Option<VideoAnalysisPipeline>,
1463 ) -> Self {
1464 Self {
1465 scene_pipeline,
1466 video_pipeline,
1467 state: RealtimeVideoPipelineState::default(),
1468 }
1469 }
1470
1471 pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<RealtimeVideoFrameAnalysis> {
1473 self.process_frame_ref(&frame.as_frame())
1474 }
1475
1476 pub fn process_frame_ref(
1478 &mut self,
1479 frame: &VideoFrame<'_>,
1480 ) -> Result<RealtimeVideoFrameAnalysis> {
1481 if self.state.finished {
1482 return Err(DetectError::InvalidArgument(
1483 "cannot process video frames after finish_analysis; call reset first".to_string(),
1484 ));
1485 }
1486
1487 self.state.first_position.get_or_insert(frame.position);
1488 self.state.last_position = Some(frame.position);
1489 self.state.active_scene_start.get_or_insert(frame.position);
1490
1491 let scene = self.scene_pipeline.process_frame_ref(frame)?;
1492 let completed_scenes = self.close_scenes(&scene.cuts);
1493 let mut observations = if let Some(pipeline) = &mut self.video_pipeline {
1494 pipeline.process_frame_ref(frame)?.observations
1495 } else {
1496 Vec::new()
1497 };
1498 self.annotate_observations(&mut observations, frame.position);
1499 self.state
1500 .open_scene_observations
1501 .extend(observations.iter().cloned());
1502 self.state.observations.extend(observations.iter().cloned());
1503 self.state.frames_processed += 1;
1504
1505 Ok(RealtimeVideoFrameAnalysis {
1506 position: frame.position,
1507 scene,
1508 observations,
1509 completed_scenes,
1510 frames_processed: self.state.frames_processed,
1511 })
1512 }
1513
1514 pub fn finish_analysis(&mut self) -> Result<RealtimeVideoAnalysisResult> {
1516 let detection = self.scene_pipeline.finish_detection()?;
1517 if !self.state.finished {
1518 let pending_cuts = detection
1519 .cuts
1520 .iter()
1521 .filter(|cut| {
1522 self.state
1523 .active_scene_start
1524 .map(|start| cut.position.frame_index > start.frame_index)
1525 .unwrap_or(false)
1526 })
1527 .cloned()
1528 .collect::<Vec<_>>();
1529 self.close_scenes(&pending_cuts);
1530
1531 if let Some(pipeline) = &mut self.video_pipeline {
1532 let already_observed = self.state.observations.len();
1533 let result = pipeline.finish_analysis()?;
1534 let mut final_observations = result
1535 .observations
1536 .into_iter()
1537 .skip(already_observed)
1538 .collect::<Vec<_>>();
1539 self.annotate_observations_without_frame(&mut final_observations);
1540 self.state
1541 .open_scene_observations
1542 .extend(final_observations.iter().cloned());
1543 self.state
1544 .observations
1545 .extend(final_observations.iter().cloned());
1546 }
1547 self.state.finished = true;
1548 }
1549
1550 let scenes = self.scene_analyses_for(&detection.scenes);
1551 Ok(RealtimeVideoAnalysisResult {
1552 detection,
1553 observations: self.state.observations.clone(),
1554 scenes,
1555 frames_processed: self.state.frames_processed,
1556 })
1557 }
1558
1559 pub fn reset(&mut self) {
1561 self.scene_pipeline.reset();
1562 if let Some(pipeline) = &mut self.video_pipeline {
1563 pipeline.reset();
1564 }
1565 self.state = RealtimeVideoPipelineState::default();
1566 }
1567
1568 pub fn observations(&self) -> &[Observation] {
1570 &self.state.observations
1571 }
1572
1573 pub fn completed_scenes(&self) -> &[SceneAnalysis] {
1575 &self.state.completed_scenes
1576 }
1577
1578 pub fn frames_processed(&self) -> u64 {
1580 self.state.frames_processed
1581 }
1582
1583 fn close_scenes(&mut self, cuts: &[Cut]) -> Vec<SceneAnalysis> {
1584 let mut completed = Vec::new();
1585 for cut in cuts {
1586 let Some(start) = self.state.active_scene_start else {
1587 continue;
1588 };
1589 if cut.position.frame_index <= start.frame_index {
1590 continue;
1591 }
1592 let scene = Scene {
1593 start,
1594 end: cut.position,
1595 };
1596 let observations = std::mem::take(&mut self.state.open_scene_observations);
1597 let analysis = SceneAnalysis {
1598 scene_index: self.state.active_scene_index,
1599 scene,
1600 observations,
1601 };
1602 self.state.completed_scenes.push(analysis.clone());
1603 completed.push(analysis);
1604 self.state.active_scene_index += 1;
1605 self.state.active_scene_start = Some(cut.position);
1606 }
1607 completed
1608 }
1609
1610 fn annotate_observations(&self, observations: &mut [Observation], position: FramePosition) {
1611 for observation in observations {
1612 if observation.timestamp.is_none() {
1613 observation.timestamp = Some(position.timestamp);
1614 }
1615 if observation.frame.is_none() {
1616 observation.frame = Some(position);
1617 }
1618 if observation.scene_index.is_none() {
1619 observation.scene_index = Some(self.state.active_scene_index);
1620 }
1621 }
1622 }
1623
1624 fn annotate_observations_without_frame(&self, observations: &mut [Observation]) {
1625 for observation in observations {
1626 if observation.scene_index.is_none() {
1627 observation.scene_index = Some(self.state.active_scene_index);
1628 }
1629 }
1630 }
1631
1632 fn scene_analyses_for(&self, scenes: &[Scene]) -> Vec<SceneAnalysis> {
1633 scenes
1634 .iter()
1635 .enumerate()
1636 .map(|(index, scene)| {
1637 let scene_index = index as u64;
1638 let observations = self
1639 .state
1640 .observations
1641 .iter()
1642 .filter(|observation| observation.scene_index == Some(scene_index))
1643 .cloned()
1644 .collect();
1645 SceneAnalysis {
1646 scene_index,
1647 scene: scene.clone(),
1648 observations,
1649 }
1650 })
1651 .collect()
1652 }
1653}
1654
1655#[derive(Default)]
1656pub struct RealtimeVideoPipelineBuilder {
1658 scene_pipeline: Option<ScenePipeline>,
1659 scene_builder: ScenePipelineBuilder,
1660 video_analyzers: Vec<Box<dyn VideoAnalyzer>>,
1661}
1662
1663impl RealtimeVideoPipelineBuilder {
1664 pub fn scene_pipeline(mut self, pipeline: ScenePipeline) -> Self {
1666 self.scene_pipeline = Some(pipeline);
1667 self
1668 }
1669
1670 pub fn scene_detector<D: SceneDetector + 'static>(mut self, detector: D) -> Self {
1672 self.scene_builder = self.scene_builder.detector(detector);
1673 self
1674 }
1675
1676 pub fn video_analyzer<A: VideoAnalyzer + 'static>(mut self, analyzer: A) -> Self {
1678 self.video_analyzers.push(Box::new(analyzer));
1679 self
1680 }
1681
1682 pub fn start_in_scene(mut self, value: bool) -> Self {
1684 self.scene_builder = self.scene_builder.start_in_scene(value);
1685 self
1686 }
1687
1688 pub fn crop(mut self, value: Option<CropRegion>) -> Self {
1690 self.scene_builder = self.scene_builder.crop(value);
1691 self
1692 }
1693
1694 pub fn auto_downscale_min_width(mut self, value: u32) -> Self {
1696 self.scene_builder = self.scene_builder.auto_downscale_min_width(value);
1697 self
1698 }
1699
1700 pub fn build(self) -> Result<RealtimeVideoPipeline> {
1702 let scene_pipeline = match self.scene_pipeline {
1703 Some(pipeline) => pipeline,
1704 None => self.scene_builder.build()?,
1705 };
1706 let video_pipeline = if self.video_analyzers.is_empty() {
1707 None
1708 } else {
1709 Some(VideoAnalysisPipeline {
1710 analyzers: self.video_analyzers,
1711 state: VideoAnalysisPipelineState::default(),
1712 })
1713 };
1714 Ok(RealtimeVideoPipeline::new(scene_pipeline, video_pipeline))
1715 }
1716}
1717
1718pub trait AudioAnalyzer {
1720 fn name(&self) -> &str;
1722
1723 fn process_frame(&mut self, frame: &AudioFrame<'_>) -> Result<Vec<AnalysisEvent>>;
1725
1726 fn finish(&mut self, _last_timestamp: Option<Timestamp>) -> Result<Vec<AnalysisEvent>> {
1728 Ok(Vec::new())
1729 }
1730}
1731
1732pub struct AudioPipeline {
1734 analyzers: Vec<Box<dyn AudioAnalyzer>>,
1735 state: AudioPipelineState,
1736}
1737
1738#[derive(Debug, Default, Clone)]
1739struct AudioPipelineState {
1740 events: Vec<AnalysisEvent>,
1741 last_timestamp: Option<Timestamp>,
1742 frames_processed: u64,
1743 finished: bool,
1744}
1745
1746impl AudioPipeline {
1747 pub fn builder() -> AudioPipelineBuilder {
1749 AudioPipelineBuilder::default()
1750 }
1751
1752 pub fn process_frame(&mut self, frame: OwnedAudioFrame) -> Result<AudioAnalysis> {
1754 if self.state.finished {
1755 return Err(DetectError::InvalidArgument(
1756 "cannot process audio frames after finish_analysis; call reset first".to_string(),
1757 ));
1758 }
1759 let frame_ref = frame.as_frame()?;
1760 self.state.last_timestamp = Some(frame_ref.timestamp);
1761
1762 let mut events = Vec::new();
1763 for analyzer in &mut self.analyzers {
1764 let mut new_events = analyzer.process_frame(&frame_ref)?;
1765 events.append(&mut new_events);
1766 }
1767 self.state.events.extend(events.iter().cloned());
1768 self.state.frames_processed += 1;
1769
1770 Ok(AudioAnalysis {
1771 timestamp: frame_ref.timestamp,
1772 events,
1773 frames_processed: self.state.frames_processed,
1774 })
1775 }
1776
1777 pub fn finish_analysis(&mut self) -> Result<AudioAnalysisResult> {
1779 if !self.state.finished {
1780 let mut events = Vec::new();
1781 for analyzer in &mut self.analyzers {
1782 let mut new_events = analyzer.finish(self.state.last_timestamp)?;
1783 events.append(&mut new_events);
1784 }
1785 self.state.events.extend(events);
1786 self.state.finished = true;
1787 }
1788 Ok(AudioAnalysisResult {
1789 events: self.state.events.clone(),
1790 frames_processed: self.state.frames_processed,
1791 })
1792 }
1793
1794 pub fn reset(&mut self) {
1796 self.state = AudioPipelineState::default();
1797 }
1798
1799 pub fn events(&self) -> &[AnalysisEvent] {
1801 &self.state.events
1802 }
1803
1804 pub fn frames_processed(&self) -> u64 {
1806 self.state.frames_processed
1807 }
1808}
1809
1810#[derive(Default)]
1811pub struct AudioPipelineBuilder {
1813 analyzers: Vec<Box<dyn AudioAnalyzer>>,
1814}
1815
1816impl AudioPipelineBuilder {
1817 pub fn analyzer<A: AudioAnalyzer + 'static>(mut self, analyzer: A) -> Self {
1819 self.analyzers.push(Box::new(analyzer));
1820 self
1821 }
1822
1823 pub fn build(self) -> Result<AudioPipeline> {
1825 if self.analyzers.is_empty() {
1826 return Err(DetectError::InvalidArgument(
1827 "at least one audio analyzer is required".to_string(),
1828 ));
1829 }
1830 Ok(AudioPipeline {
1831 analyzers: self.analyzers,
1832 state: AudioPipelineState::default(),
1833 })
1834 }
1835}
1836
1837pub trait TextAnalyzer {
1839 fn name(&self) -> &str;
1841
1842 fn process_segment(&mut self, segment: &TextSegment<'_>) -> Result<Vec<AnalysisEvent>>;
1844
1845 fn finish(&mut self, _last_segment_index: Option<u64>) -> Result<Vec<AnalysisEvent>> {
1847 Ok(Vec::new())
1848 }
1849}
1850
1851pub struct TextPipeline {
1853 analyzers: Vec<Box<dyn TextAnalyzer>>,
1854 state: TextPipelineState,
1855}
1856
1857#[derive(Debug, Default, Clone)]
1858struct TextPipelineState {
1859 events: Vec<AnalysisEvent>,
1860 last_segment_index: Option<u64>,
1861 segments_processed: u64,
1862 finished: bool,
1863}
1864
1865impl TextPipeline {
1866 pub fn builder() -> TextPipelineBuilder {
1868 TextPipelineBuilder::default()
1869 }
1870
1871 pub fn process_segment(&mut self, segment: OwnedTextSegment) -> Result<TextAnalysis> {
1873 if self.state.finished {
1874 return Err(DetectError::InvalidArgument(
1875 "cannot process text segments after finish_analysis; call reset first".to_string(),
1876 ));
1877 }
1878 let segment_ref = segment.as_segment();
1879 self.state.last_segment_index = Some(segment_ref.segment_index);
1880
1881 let mut events = Vec::new();
1882 for analyzer in &mut self.analyzers {
1883 let mut new_events = analyzer.process_segment(&segment_ref)?;
1884 events.append(&mut new_events);
1885 }
1886 self.state.events.extend(events.iter().cloned());
1887 self.state.segments_processed += 1;
1888
1889 Ok(TextAnalysis {
1890 segment_index: segment_ref.segment_index,
1891 events,
1892 segments_processed: self.state.segments_processed,
1893 })
1894 }
1895
1896 pub fn finish_analysis(&mut self) -> Result<TextAnalysisResult> {
1898 if !self.state.finished {
1899 let mut events = Vec::new();
1900 for analyzer in &mut self.analyzers {
1901 let mut new_events = analyzer.finish(self.state.last_segment_index)?;
1902 events.append(&mut new_events);
1903 }
1904 self.state.events.extend(events);
1905 self.state.finished = true;
1906 }
1907 Ok(TextAnalysisResult {
1908 events: self.state.events.clone(),
1909 segments_processed: self.state.segments_processed,
1910 })
1911 }
1912
1913 pub fn reset(&mut self) {
1915 self.state = TextPipelineState::default();
1916 }
1917
1918 pub fn events(&self) -> &[AnalysisEvent] {
1920 &self.state.events
1921 }
1922
1923 pub fn segments_processed(&self) -> u64 {
1925 self.state.segments_processed
1926 }
1927}
1928
1929#[derive(Default)]
1930pub struct TextPipelineBuilder {
1932 analyzers: Vec<Box<dyn TextAnalyzer>>,
1933}
1934
1935impl TextPipelineBuilder {
1936 pub fn analyzer<A: TextAnalyzer + 'static>(mut self, analyzer: A) -> Self {
1938 self.analyzers.push(Box::new(analyzer));
1939 self
1940 }
1941
1942 pub fn build(self) -> Result<TextPipeline> {
1944 if self.analyzers.is_empty() {
1945 return Err(DetectError::InvalidArgument(
1946 "at least one text analyzer is required".to_string(),
1947 ));
1948 }
1949 Ok(TextPipeline {
1950 analyzers: self.analyzers,
1951 state: TextPipelineState::default(),
1952 })
1953 }
1954}
1955
1956pub fn scenes_from_cuts(
1958 cuts: &[Cut],
1959 start: FramePosition,
1960 last_frame: FramePosition,
1961 start_in_scene: bool,
1962) -> Vec<Scene> {
1963 if cuts.is_empty() && !start_in_scene {
1964 return Vec::new();
1965 }
1966 let mut scenes = Vec::new();
1967 let mut scene_start = start;
1968 for cut in cuts {
1969 if cut.position.frame_index <= scene_start.frame_index {
1970 continue;
1971 }
1972 scenes.push(Scene {
1973 start: scene_start,
1974 end: cut.position,
1975 });
1976 scene_start = cut.position;
1977 }
1978 scenes.push(Scene {
1979 start: scene_start,
1980 end: FramePosition {
1981 frame_index: last_frame.frame_index + 1,
1982 timestamp: Timestamp::new(last_frame.timestamp.pts + 1, last_frame.timestamp.timebase),
1983 },
1984 });
1985 scenes
1986}
1987
1988#[cfg(test)]
1989mod tests {
1990 use super::*;
1991
1992 fn fps() -> Rational64 {
1993 Rational64::new(30, 1)
1994 }
1995
1996 fn pos(frame: u64) -> FramePosition {
1997 FramePosition::from_frame_index(frame, fps())
1998 }
1999
2000 fn owned_frame(frame_index: u64) -> OwnedVideoFrame {
2001 OwnedVideoFrame {
2002 position: pos(frame_index),
2003 width: 1,
2004 height: 1,
2005 pixel_format: PixelFormat::Rgb24,
2006 data: vec![0, 0, 0],
2007 stride: 3,
2008 }
2009 }
2010
2011 #[test]
2012 fn timecode_formats_and_clamps_subtraction() {
2013 let tc = FrameTimecode::from_seconds(10.0, fps()).unwrap();
2014 assert_eq!(tc.frame_index, 300);
2015 assert_eq!(tc.timecode(3), "00:00:10.000");
2016 assert_eq!((FrameTimecode::from_frames(5, fps()) - 10).frame_index, 0);
2017 }
2018
2019 #[test]
2020 fn timecode_parse_accepts_frames_seconds_and_hms() {
2021 assert_eq!(FrameTimecode::parse("42", fps()).unwrap().frame_index, 42);
2022 assert_eq!(FrameTimecode::parse("2.0", fps()).unwrap().frame_index, 60);
2023 assert_eq!(
2024 FrameTimecode::parse("00:01:00.000", fps())
2025 .unwrap()
2026 .frame_index,
2027 1800
2028 );
2029 }
2030
2031 #[test]
2032 fn scenes_are_empty_without_cuts_unless_start_in_scene() {
2033 assert!(scenes_from_cuts(&[], pos(0), pos(9), false).is_empty());
2034 let scenes = scenes_from_cuts(&[], pos(0), pos(9), true);
2035 assert_eq!(scenes.len(), 1);
2036 assert_eq!(scenes[0].end.frame_index, 10);
2037 }
2038
2039 #[test]
2040 fn scenes_are_contiguous_from_unique_cuts() {
2041 let cuts = vec![Cut {
2042 position: pos(5),
2043 detector: "test",
2044 score: None,
2045 }];
2046 let scenes = scenes_from_cuts(&cuts, pos(0), pos(9), true);
2047 assert_eq!(scenes.len(), 2);
2048 assert_eq!(scenes[0].start.frame_index, 0);
2049 assert_eq!(scenes[0].end.frame_index, 5);
2050 assert_eq!(scenes[1].start.frame_index, 5);
2051 assert_eq!(scenes[1].end.frame_index, 10);
2052 }
2053
2054 #[test]
2055 fn metrics_store_tracks_keys_and_values() {
2056 let mut metrics = MetricsStore::default();
2057 metrics.set_metric(7, "content_val", 12.5);
2058 assert_eq!(metrics.get(7, "content_val"), Some(12.5));
2059 assert_eq!(metrics.keys().collect::<Vec<_>>(), vec!["content_val"]);
2060 }
2061
2062 #[test]
2063 fn metrics_store_accepts_dynamic_keys() {
2064 let mut metrics = MetricsStore::default();
2065 metrics.set_metric(7, "combined.content.raw", 12.5);
2066 assert_eq!(metrics.get(7, "combined.content.raw"), Some(12.5));
2067 assert_eq!(
2068 metrics.keys().collect::<Vec<_>>(),
2069 vec!["combined.content.raw"]
2070 );
2071 }
2072
2073 #[test]
2074 fn pipeline_processes_frames_incrementally() {
2075 struct CutOnFrame(u64);
2076
2077 impl SceneDetector for CutOnFrame {
2078 fn name(&self) -> &'static str {
2079 "test"
2080 }
2081
2082 fn metric_keys(&self) -> &'static [&'static str] {
2083 &[]
2084 }
2085
2086 fn process_frame(
2087 &mut self,
2088 frame: &VideoFrame<'_>,
2089 _metrics: Option<&mut dyn MetricsSink>,
2090 ) -> Result<Vec<Cut>> {
2091 Ok((frame.position.frame_index == self.0)
2092 .then(|| Cut {
2093 position: frame.position,
2094 detector: self.name(),
2095 score: Some(1.0),
2096 })
2097 .into_iter()
2098 .collect())
2099 }
2100 }
2101
2102 let mut pipeline = ScenePipeline::builder()
2103 .detector(CutOnFrame(1))
2104 .start_in_scene(true)
2105 .build()
2106 .unwrap();
2107
2108 assert!(pipeline
2109 .process_frame(owned_frame(0))
2110 .unwrap()
2111 .cuts
2112 .is_empty());
2113 let analysis = pipeline.process_frame(owned_frame(1)).unwrap();
2114 assert_eq!(analysis.frames_processed, 2);
2115 assert_eq!(analysis.cuts[0].position.frame_index, 1);
2116
2117 let result = pipeline.finish_detection().unwrap();
2118 assert_eq!(result.frames_processed, 2);
2119 assert_eq!(result.cuts.len(), 1);
2120 assert_eq!(result.scenes.len(), 2);
2121 }
2122
2123 #[test]
2124 fn video_pipeline_emits_frame_observations() {
2125 struct OcrAnalyzer;
2126
2127 impl VideoAnalyzer for OcrAnalyzer {
2128 fn name(&self) -> &str {
2129 "ocr"
2130 }
2131
2132 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2133 Ok(vec![Observation::new(self.name(), ObservationKind::Text)
2134 .at_frame(frame.position)
2135 .text("EXIT")
2136 .score(0.9)])
2137 }
2138 }
2139
2140 let mut pipeline = VideoAnalysisPipeline::builder()
2141 .analyzer(OcrAnalyzer)
2142 .build()
2143 .unwrap();
2144
2145 let analysis = pipeline.process_frame(owned_frame(3)).unwrap();
2146 assert_eq!(analysis.frames_processed, 1);
2147 assert_eq!(analysis.observations[0].text.as_deref(), Some("EXIT"));
2148 assert_eq!(
2149 analysis.observations[0].to_text_segment(0).unwrap().text,
2150 "EXIT"
2151 );
2152 assert_eq!(pipeline.finish_analysis().unwrap().observations.len(), 1);
2153 }
2154
2155 #[test]
2156 fn sampled_video_analyzer_skips_unsampled_frames() {
2157 struct CountingVideoAnalyzer {
2158 frames: Vec<u64>,
2159 }
2160
2161 impl VideoAnalyzer for CountingVideoAnalyzer {
2162 fn name(&self) -> &str {
2163 "counting"
2164 }
2165
2166 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2167 self.frames.push(frame.position.frame_index);
2168 Ok(Vec::new())
2169 }
2170 }
2171
2172 let mut analyzer =
2173 SampledVideoAnalyzer::new(CountingVideoAnalyzer { frames: Vec::new() }, 3);
2174 for index in 0..7 {
2175 analyzer
2176 .process_frame(&owned_frame(index).as_frame())
2177 .unwrap();
2178 }
2179
2180 assert_eq!(analyzer.inner().frames, vec![0, 3, 6]);
2181 }
2182
2183 #[test]
2184 fn realtime_video_pipeline_groups_observations_by_completed_scene() {
2185 struct CutOnFrame(u64);
2186
2187 impl SceneDetector for CutOnFrame {
2188 fn name(&self) -> &'static str {
2189 "test"
2190 }
2191
2192 fn metric_keys(&self) -> &'static [&'static str] {
2193 &[]
2194 }
2195
2196 fn process_frame(
2197 &mut self,
2198 frame: &VideoFrame<'_>,
2199 _metrics: Option<&mut dyn MetricsSink>,
2200 ) -> Result<Vec<Cut>> {
2201 Ok((frame.position.frame_index == self.0)
2202 .then(|| Cut {
2203 position: frame.position,
2204 detector: self.name(),
2205 score: Some(1.0),
2206 })
2207 .into_iter()
2208 .collect())
2209 }
2210 }
2211
2212 struct ObjectAnalyzer;
2213
2214 impl VideoAnalyzer for ObjectAnalyzer {
2215 fn name(&self) -> &str {
2216 "objects"
2217 }
2218
2219 fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2220 Ok(vec![Observation::new(self.name(), ObservationKind::Object)
2221 .at_frame(frame.position)
2222 .label(format!("frame-{}", frame.position.frame_index))])
2223 }
2224 }
2225
2226 let mut pipeline = RealtimeVideoPipeline::builder()
2227 .scene_detector(CutOnFrame(2))
2228 .video_analyzer(ObjectAnalyzer)
2229 .start_in_scene(true)
2230 .build()
2231 .unwrap();
2232
2233 assert!(pipeline
2234 .process_frame(owned_frame(0))
2235 .unwrap()
2236 .completed_scenes
2237 .is_empty());
2238 pipeline.process_frame(owned_frame(1)).unwrap();
2239 let analysis = pipeline.process_frame(owned_frame(2)).unwrap();
2240
2241 assert_eq!(analysis.completed_scenes.len(), 1);
2242 assert_eq!(analysis.completed_scenes[0].scene.start.frame_index, 0);
2243 assert_eq!(analysis.completed_scenes[0].scene.end.frame_index, 2);
2244 assert_eq!(analysis.completed_scenes[0].observations.len(), 2);
2245 assert_eq!(analysis.observations[0].scene_index, Some(1));
2246
2247 let result = pipeline.finish_analysis().unwrap();
2248 assert_eq!(result.detection.scenes.len(), 2);
2249 assert_eq!(result.scenes[0].observations.len(), 2);
2250 assert_eq!(result.scenes[1].observations.len(), 1);
2251 }
2252
2253 #[test]
2254 fn audio_pipeline_processes_frames_incrementally() {
2255 struct LoudnessAnalyzer;
2256
2257 impl AudioAnalyzer for LoudnessAnalyzer {
2258 fn name(&self) -> &str {
2259 "loudness"
2260 }
2261
2262 fn process_frame(&mut self, frame: &AudioFrame<'_>) -> Result<Vec<AnalysisEvent>> {
2263 let AudioBuffer::F32(samples) = frame.data else {
2264 return Ok(Vec::new());
2265 };
2266 let mean = samples.iter().map(|sample| sample.abs()).sum::<f32>()
2267 / samples.len().max(1) as f32;
2268 Ok((mean > 0.5)
2269 .then(|| {
2270 AnalysisEvent::new(self.name(), "loud")
2271 .at_timestamp(frame.timestamp)
2272 .score(mean)
2273 })
2274 .into_iter()
2275 .collect())
2276 }
2277 }
2278
2279 let mut pipeline = AudioPipeline::builder()
2280 .analyzer(LoudnessAnalyzer)
2281 .build()
2282 .unwrap();
2283 let frame = OwnedAudioFrame::new(
2284 Timestamp::new(0, Timebase::new(1, 48_000)),
2285 48_000,
2286 1,
2287 AudioBuffer::F32(vec![1.0, 0.5]),
2288 )
2289 .unwrap();
2290
2291 let analysis = pipeline.process_frame(frame).unwrap();
2292 assert_eq!(analysis.frames_processed, 1);
2293 assert_eq!(analysis.events[0].label, "loud");
2294 assert_eq!(pipeline.finish_analysis().unwrap().events.len(), 1);
2295 }
2296
2297 #[test]
2298 fn text_pipeline_processes_segments_incrementally() {
2299 struct KeywordAnalyzer;
2300
2301 impl TextAnalyzer for KeywordAnalyzer {
2302 fn name(&self) -> &str {
2303 "keyword"
2304 }
2305
2306 fn process_segment(&mut self, segment: &TextSegment<'_>) -> Result<Vec<AnalysisEvent>> {
2307 Ok(segment
2308 .text
2309 .contains("cut")
2310 .then(|| {
2311 let mut event = AnalysisEvent::new(self.name(), "keyword").score(1.0);
2312 if let Some(timestamp) = segment.timestamp {
2313 event = event.at_timestamp(timestamp);
2314 }
2315 event
2316 })
2317 .into_iter()
2318 .collect())
2319 }
2320 }
2321
2322 let mut pipeline = TextPipeline::builder()
2323 .analyzer(KeywordAnalyzer)
2324 .build()
2325 .unwrap();
2326 let segment = OwnedTextSegment::new(0, "find this cut point");
2327
2328 let analysis = pipeline.process_segment(segment).unwrap();
2329 assert_eq!(analysis.segments_processed, 1);
2330 assert_eq!(analysis.events[0].analyzer, "keyword");
2331 assert_eq!(pipeline.finish_analysis().unwrap().segments_processed, 1);
2332 }
2333
2334 #[test]
2335 fn crop_rejects_empty_regions() {
2336 assert!(CropRegion::new(0, 0, 0, 10).is_err());
2337 }
2338}