Skip to main content

video_analysis_core/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod runtime;
4pub mod surface;
5use std::collections::{BTreeMap, BTreeSet};
6use std::fmt;
7use std::ops::{Add, Sub};
8
9use num_rational::Rational64;
10use num_traits::ToPrimitive;
11use thiserror::Error;
12
13#[derive(Debug, Error)]
14/// Variants describing detect error.
15pub enum DetectError {
16    #[error("unsupported pixel format: {0:?}")]
17    /// The unsupported pixel format variant.
18    UnsupportedPixelFormat(PixelFormat),
19    #[error("unsupported audio sample format: {0:?}")]
20    /// The unsupported audio sample format variant.
21    UnsupportedAudioSampleFormat(AudioSampleFormat),
22    #[error("invalid frame buffer: expected at least {expected} bytes, got {actual}")]
23    /// The invalid frame buffer variant.
24    InvalidFrameBuffer {
25        /// Expected value for this error.
26        expected: usize,
27        /// Actual value that triggered this error.
28        actual: usize,
29    },
30    #[error("invalid dimensions: {width}x{height}")]
31    /// The invalid dimensions variant.
32    InvalidDimensions {
33        /// Width in pixels.
34        width: u32,
35        /// Height in pixels.
36        height: u32,
37    },
38    #[error("invalid audio format: sample_rate={sample_rate}, channels={channels}")]
39    /// The invalid audio format variant.
40    InvalidAudioFormat {
41        /// Sample rate in hertz.
42        sample_rate: u32,
43        /// Number of audio channels.
44        channels: u16,
45    },
46    #[error("video source error: {0}")]
47    /// The source variant.
48    Source(String),
49    #[error("invalid argument: {0}")]
50    /// The invalid argument variant.
51    InvalidArgument(String),
52    #[error("I/O error: {0}")]
53    /// The I/O variant.
54    Io(#[from] std::io::Error),
55}
56
57/// Type alias for result.
58pub type Result<T> = std::result::Result<T, DetectError>;
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
61/// Data type for timebase.
62pub struct Timebase {
63    /// The num value.
64    pub num: i32,
65    /// The den value.
66    pub den: i32,
67}
68
69impl Timebase {
70    /// Creates a new value.
71    pub const fn new(num: i32, den: i32) -> Self {
72        Self { num, den }
73    }
74
75    /// Returns seconds per tick.
76    pub fn seconds_per_tick(self) -> f64 {
77        self.num as f64 / self.den as f64
78    }
79}
80
81#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
82/// Data type for timestamp.
83pub struct Timestamp {
84    /// The PTS value.
85    pub pts: i64,
86    /// The timebase value.
87    pub timebase: Timebase,
88}
89
90impl Timestamp {
91    /// Creates a new value.
92    pub const fn new(pts: i64, timebase: Timebase) -> Self {
93        Self { pts, timebase }
94    }
95
96    /// Returns seconds.
97    pub fn seconds(self) -> f64 {
98        self.pts as f64 * self.timebase.seconds_per_tick()
99    }
100}
101
102#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
103/// Data type for frame position.
104pub struct FramePosition {
105    /// The frame index value.
106    pub frame_index: u64,
107    /// Timestamp associated with this value.
108    pub timestamp: Timestamp,
109}
110
111impl FramePosition {
112    /// Builds this value from frame index.
113    pub fn from_frame_index(frame_index: u64, fps: Rational64) -> Self {
114        let den = *fps.numer() as i32;
115        let num = *fps.denom() as i32;
116        Self {
117            frame_index,
118            timestamp: Timestamp::new(frame_index as i64, Timebase::new(num, den)),
119        }
120    }
121}
122
123#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
124/// Data type for frame timecode.
125pub struct FrameTimecode {
126    /// The frame index value.
127    pub frame_index: u64,
128    /// The FPS value.
129    pub fps: Rational64,
130}
131
132impl FrameTimecode {
133    /// Builds this value from frames.
134    pub fn from_frames(frame_index: u64, fps: Rational64) -> Self {
135        Self { frame_index, fps }
136    }
137
138    /// Builds a PTS-backed frame timecode without changing the legacy
139    /// `FrameTimecode` layout.
140    pub fn from_pts(
141        frame_index: u64,
142        fps: Rational64,
143        pts: i64,
144        timebase: Timebase,
145    ) -> TimestampedFrameTimecode {
146        TimestampedFrameTimecode {
147            timecode: Self::from_frames(frame_index, fps),
148            timestamp: Timestamp::new(pts, timebase),
149        }
150    }
151
152    /// Builds this value from seconds.
153    pub fn from_seconds(seconds: f64, fps: Rational64) -> Result<Self> {
154        if seconds < 0.0 {
155            return Err(DetectError::InvalidArgument(
156                "seconds must be greater than or equal to zero".to_string(),
157            ));
158        }
159        let fps_float = fps.to_f64().ok_or_else(|| {
160            DetectError::InvalidArgument("frame rate cannot be represented".to_string())
161        })?;
162        Ok(Self {
163            frame_index: (seconds * fps_float).round() as u64,
164            fps,
165        })
166    }
167
168    /// Parses parse.
169    pub fn parse(input: &str, fps: Rational64) -> Result<Self> {
170        if input.chars().all(|c| c.is_ascii_digit()) {
171            let frame_index = input.parse::<u64>().map_err(|err| {
172                DetectError::InvalidArgument(format!("invalid frame number `{input}`: {err}"))
173            })?;
174            return Ok(Self { frame_index, fps });
175        }
176        if !input.contains(':') {
177            let seconds = input.parse::<f64>().map_err(|err| {
178                DetectError::InvalidArgument(format!("invalid seconds `{input}`: {err}"))
179            })?;
180            return Self::from_seconds(seconds, fps);
181        }
182
183        let parts: Vec<&str> = input.split(':').collect();
184        if parts.len() != 3 {
185            return Err(DetectError::InvalidArgument(format!(
186                "invalid timecode `{input}`"
187            )));
188        }
189        let hours = parts[0].parse::<f64>().map_err(|err| {
190            DetectError::InvalidArgument(format!("invalid hours in `{input}`: {err}"))
191        })?;
192        let minutes = parts[1].parse::<f64>().map_err(|err| {
193            DetectError::InvalidArgument(format!("invalid minutes in `{input}`: {err}"))
194        })?;
195        let seconds = parts[2].parse::<f64>().map_err(|err| {
196            DetectError::InvalidArgument(format!("invalid seconds in `{input}`: {err}"))
197        })?;
198        Self::from_seconds((hours * 3600.0) + (minutes * 60.0) + seconds, fps)
199    }
200
201    /// Returns seconds.
202    pub fn seconds(self) -> f64 {
203        self.frame_index as f64 / self.fps.to_f64().unwrap_or(1.0)
204    }
205
206    /// Returns timecode.
207    pub fn timecode(self, precision: usize) -> String {
208        let factor = 10_f64.powi(precision as i32);
209        let total = (self.seconds() * factor).round() / factor;
210        let hours = (total / 3600.0).floor() as u64;
211        let minutes = ((total - hours as f64 * 3600.0) / 60.0).floor() as u64;
212        let seconds = total - hours as f64 * 3600.0 - minutes as f64 * 60.0;
213        if precision == 0 {
214            format!("{hours:02}:{minutes:02}:{:02}", seconds.round() as u64)
215        } else {
216            let whole = seconds.floor() as u64;
217            let frac = ((seconds - whole as f64) * factor).round() as u64;
218            format!("{hours:02}:{minutes:02}:{whole:02}.{frac:0precision$}")
219        }
220    }
221
222    /// Returns position.
223    pub fn position(self) -> FramePosition {
224        FramePosition::from_frame_index(self.frame_index, self.fps)
225    }
226
227    /// Returns PTS metadata when this value came from a timestamped wrapper.
228    ///
229    /// Plain `FrameTimecode` values remain frame-rate-derived and therefore do
230    /// not carry source PTS metadata.
231    pub const fn pts(self) -> Option<i64> {
232        None
233    }
234
235    /// Returns source timebase metadata when this value came from a timestamped
236    /// wrapper.
237    ///
238    /// Plain `FrameTimecode` values remain frame-rate-derived and therefore do
239    /// not carry source timebase metadata.
240    pub const fn time_base(self) -> Option<Timebase> {
241        None
242    }
243}
244
245#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
246/// Frame timecode paired with source PTS/timebase metadata for VFR-aware
247/// workflows.
248pub struct TimestampedFrameTimecode {
249    /// The legacy frame/fps timecode view.
250    pub timecode: FrameTimecode,
251    /// The source timestamp attached to this frame.
252    pub timestamp: Timestamp,
253}
254
255impl TimestampedFrameTimecode {
256    /// Returns the frame index.
257    pub const fn frame_index(self) -> u64 {
258        self.timecode.frame_index
259    }
260
261    /// Returns the display frame rate.
262    pub const fn fps(self) -> Rational64 {
263        self.timecode.fps
264    }
265
266    /// Returns source PTS metadata.
267    pub const fn pts(self) -> Option<i64> {
268        Some(self.timestamp.pts)
269    }
270
271    /// Returns source timebase metadata.
272    pub const fn time_base(self) -> Option<Timebase> {
273        Some(self.timestamp.timebase)
274    }
275
276    /// Returns source timestamp seconds.
277    pub fn seconds(self) -> f64 {
278        self.timestamp.seconds()
279    }
280
281    /// Returns display timecode using the legacy frame/fps conversion.
282    pub fn timecode(self, precision: usize) -> String {
283        self.timecode.timecode(precision)
284    }
285
286    /// Returns a frame position preserving the source timestamp.
287    pub const fn position(self) -> FramePosition {
288        FramePosition {
289            frame_index: self.timecode.frame_index,
290            timestamp: self.timestamp,
291        }
292    }
293}
294
295impl Add<u64> for FrameTimecode {
296    type Output = FrameTimecode;
297
298    fn add(self, rhs: u64) -> Self::Output {
299        Self {
300            frame_index: self.frame_index + rhs,
301            fps: self.fps,
302        }
303    }
304}
305
306impl Sub<u64> for FrameTimecode {
307    type Output = FrameTimecode;
308
309    fn sub(self, rhs: u64) -> Self::Output {
310        Self {
311            frame_index: self.frame_index.saturating_sub(rhs),
312            fps: self.fps,
313        }
314    }
315}
316
317impl fmt::Display for FrameTimecode {
318    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
319        f.write_str(&self.timecode(3))
320    }
321}
322
323#[derive(Debug, Clone, Copy, PartialEq, Eq)]
324/// Variants describing pixel format.
325pub enum PixelFormat {
326    /// The rgb24 variant.
327    Rgb24,
328    /// The bgr24 variant.
329    Bgr24,
330}
331
332#[derive(Debug, Clone, Copy)]
333/// Data type for video frame.
334pub struct VideoFrame<'a> {
335    /// The position value.
336    pub position: FramePosition,
337    /// Width in pixels.
338    pub width: u32,
339    /// Height in pixels.
340    pub height: u32,
341    /// The pixel format value.
342    pub pixel_format: PixelFormat,
343    /// Underlying data buffer.
344    pub data: &'a [u8],
345    /// The stride value.
346    pub stride: usize,
347}
348
349impl<'a> VideoFrame<'a> {
350    /// Returns rgb24.
351    pub fn rgb24(position: FramePosition, width: u32, height: u32, data: &'a [u8]) -> Result<Self> {
352        Self::packed(
353            position,
354            width,
355            height,
356            PixelFormat::Rgb24,
357            data,
358            width as usize * 3,
359        )
360    }
361
362    /// Returns bgr24.
363    pub fn bgr24(position: FramePosition, width: u32, height: u32, data: &'a [u8]) -> Result<Self> {
364        Self::packed(
365            position,
366            width,
367            height,
368            PixelFormat::Bgr24,
369            data,
370            width as usize * 3,
371        )
372    }
373
374    /// Returns packed.
375    pub fn packed(
376        position: FramePosition,
377        width: u32,
378        height: u32,
379        pixel_format: PixelFormat,
380        data: &'a [u8],
381        stride: usize,
382    ) -> Result<Self> {
383        if width == 0 || height == 0 {
384            return Err(DetectError::InvalidDimensions { width, height });
385        }
386        let expected = stride * height as usize;
387        if data.len() < expected {
388            return Err(DetectError::InvalidFrameBuffer {
389                expected,
390                actual: data.len(),
391            });
392        }
393        Ok(Self {
394            position,
395            width,
396            height,
397            pixel_format,
398            data,
399            stride,
400        })
401    }
402
403    /// Returns pixel RGB.
404    pub fn pixel_rgb(&self, x: u32, y: u32) -> [u8; 3] {
405        let i = y as usize * self.stride + x as usize * 3;
406        match self.pixel_format {
407            PixelFormat::Rgb24 => [self.data[i], self.data[i + 1], self.data[i + 2]],
408            PixelFormat::Bgr24 => [self.data[i + 2], self.data[i + 1], self.data[i]],
409        }
410    }
411
412    /// Returns pixel count.
413    pub fn pixel_count(&self) -> usize {
414        self.width as usize * self.height as usize
415    }
416}
417
418#[derive(Debug, Clone, PartialEq)]
419/// Data type for scene.
420pub struct Scene {
421    /// The start value.
422    pub start: FramePosition,
423    /// The end value.
424    pub end: FramePosition,
425}
426
427#[derive(Debug, Clone, PartialEq)]
428/// Data type for cut.
429pub struct Cut {
430    /// The position value.
431    pub position: FramePosition,
432    /// The detector value.
433    pub detector: &'static str,
434    /// Score assigned to this value.
435    pub score: Option<f32>,
436}
437
438/// Trait for metrics sink implementations.
439pub trait MetricsSink {
440    /// Sets metric.
441    fn set_metric(&mut self, frame_index: u64, key: &str, value: f64);
442}
443
444#[derive(Debug, Default, Clone, PartialEq)]
445/// Data type for metrics store.
446pub struct MetricsStore {
447    rows: BTreeMap<u64, BTreeMap<String, f64>>,
448    keys: BTreeSet<String>,
449}
450
451impl MetricsStore {
452    /// Returns rows.
453    pub fn rows(&self) -> &BTreeMap<u64, BTreeMap<String, f64>> {
454        &self.rows
455    }
456
457    /// Returns keys.
458    pub fn keys(&self) -> impl Iterator<Item = &str> + '_ {
459        self.keys.iter().map(String::as_str)
460    }
461
462    /// Returns get.
463    pub fn get(&self, frame_index: u64, key: &str) -> Option<f64> {
464        self.rows
465            .get(&frame_index)
466            .and_then(|row| row.get(key))
467            .copied()
468    }
469}
470
471impl MetricsSink for MetricsStore {
472    fn set_metric(&mut self, frame_index: u64, key: &str, value: f64) {
473        self.keys.insert(key.to_string());
474        self.rows
475            .entry(frame_index)
476            .or_default()
477            .insert(key.to_string(), value);
478    }
479}
480
481/// Trait for scene detector implementations.
482pub trait SceneDetector {
483    /// Returns name.
484    fn name(&self) -> &'static str;
485    /// Returns metric keys.
486    fn metric_keys(&self) -> &'static [&'static str];
487    /// Returns event buffer len.
488    fn event_buffer_len(&self) -> usize {
489        0
490    }
491    /// Returns process frame.
492    fn process_frame(
493        &mut self,
494        frame: &VideoFrame<'_>,
495        metrics: Option<&mut dyn MetricsSink>,
496    ) -> Result<Vec<Cut>>;
497    /// Returns finish.
498    fn finish(
499        &mut self,
500        _last_position: FramePosition,
501        _metrics: Option<&mut dyn MetricsSink>,
502    ) -> Result<Vec<Cut>> {
503        Ok(Vec::new())
504    }
505}
506
507#[derive(Debug, Clone, Copy, PartialEq, Eq)]
508/// Variants describing flash filter mode.
509pub enum FlashFilterMode {
510    /// The merge variant.
511    Merge,
512    /// The suppress variant.
513    Suppress,
514}
515
516#[derive(Debug, Clone)]
517struct FlashFilter {
518    mode: FlashFilterMode,
519    length: u64,
520    last_above: Option<u64>,
521    merge_enabled: bool,
522    merge_triggered: bool,
523    merge_start: Option<u64>,
524}
525
526impl FlashFilter {
527    fn new(mode: FlashFilterMode, length: u64) -> Self {
528        Self {
529            mode,
530            length,
531            last_above: None,
532            merge_enabled: false,
533            merge_triggered: false,
534            merge_start: None,
535        }
536    }
537
538    fn max_behind(&self) -> usize {
539        match self.mode {
540            FlashFilterMode::Suppress => 0,
541            FlashFilterMode::Merge => self.length as usize,
542        }
543    }
544
545    fn filter(&mut self, frame_index: u64, above_threshold: bool) -> Vec<u64> {
546        if self.length == 0 {
547            return above_threshold.then_some(frame_index).into_iter().collect();
548        }
549        if self.last_above.is_none() {
550            self.last_above = Some(frame_index);
551        }
552        match self.mode {
553            FlashFilterMode::Suppress => self.filter_suppress(frame_index, above_threshold),
554            FlashFilterMode::Merge => self.filter_merge(frame_index, above_threshold),
555        }
556    }
557
558    fn filter_suppress(&mut self, frame_index: u64, above_threshold: bool) -> Vec<u64> {
559        let last_above = self.last_above.unwrap_or(frame_index);
560        let min_length_met = frame_index.saturating_sub(last_above) >= self.length;
561        if above_threshold && min_length_met {
562            self.last_above = Some(frame_index);
563            vec![frame_index]
564        } else {
565            Vec::new()
566        }
567    }
568
569    fn filter_merge(&mut self, frame_index: u64, above_threshold: bool) -> Vec<u64> {
570        let last_above = self.last_above.unwrap_or(frame_index);
571        let min_length_met = frame_index.saturating_sub(last_above) >= self.length;
572        if above_threshold {
573            self.last_above = Some(frame_index);
574        }
575        let current_last_above = self.last_above.unwrap_or(frame_index);
576        if self.merge_triggered {
577            let merged = current_last_above.saturating_sub(self.merge_start.unwrap_or(frame_index));
578            if min_length_met && !above_threshold && merged >= self.length {
579                self.merge_triggered = false;
580                return vec![current_last_above];
581            }
582            return Vec::new();
583        }
584        if !above_threshold {
585            return Vec::new();
586        }
587        if min_length_met {
588            self.merge_enabled = true;
589            return vec![frame_index];
590        }
591        if self.merge_enabled {
592            self.merge_triggered = true;
593            self.merge_start = Some(frame_index);
594        }
595        Vec::new()
596    }
597}
598
599#[derive(Debug, Clone, Copy)]
600/// Data type for content weights.
601pub struct ContentWeights {
602    /// The delta hue value.
603    pub delta_hue: f32,
604    /// The delta sat value.
605    pub delta_sat: f32,
606    /// The delta lum value.
607    pub delta_lum: f32,
608    /// The delta edges value.
609    pub delta_edges: f32,
610}
611
612impl Default for ContentWeights {
613    fn default() -> Self {
614        Self {
615            delta_hue: 1.0,
616            delta_sat: 1.0,
617            delta_lum: 1.0,
618            delta_edges: 0.0,
619        }
620    }
621}
622
623impl ContentWeights {
624    /// Constant for luma only.
625    pub const LUMA_ONLY: Self = Self {
626        delta_hue: 0.0,
627        delta_sat: 0.0,
628        delta_lum: 1.0,
629        delta_edges: 0.0,
630    };
631
632    fn total(self) -> f32 {
633        self.delta_hue.abs() + self.delta_sat.abs() + self.delta_lum.abs() + self.delta_edges.abs()
634    }
635}
636
637#[derive(Debug, Clone)]
638/// Deterministic content scene detector using HSV/luma/edge frame differences.
639pub struct ContentDetector {
640    threshold: f32,
641    scorer: ContentScorer,
642    flash_filter: FlashFilter,
643}
644
645impl Default for ContentDetector {
646    fn default() -> Self {
647        Self::new(27.0, 15)
648    }
649}
650
651impl ContentDetector {
652    /// Constant for metric keys.
653    pub const METRIC_KEYS: &'static [&'static str] = &[
654        "content_val",
655        "delta_hue",
656        "delta_sat",
657        "delta_lum",
658        "delta_edges",
659    ];
660
661    /// Creates a new value.
662    pub fn new(threshold: f32, min_scene_len: u64) -> Self {
663        Self {
664            threshold,
665            scorer: ContentScorer::new(ContentWeights::default(), None),
666            flash_filter: FlashFilter::new(FlashFilterMode::Merge, min_scene_len),
667        }
668    }
669
670    /// Returns this value with weights.
671    pub fn with_weights(mut self, weights: ContentWeights) -> Self {
672        self.scorer.weights = weights;
673        self
674    }
675
676    /// Returns luma only.
677    pub fn luma_only(mut self, value: bool) -> Self {
678        if value {
679            self.scorer.weights = ContentWeights::LUMA_ONLY;
680        }
681        self
682    }
683
684    /// Returns kernel size.
685    pub fn kernel_size(mut self, value: Option<usize>) -> Result<Self> {
686        if let Some(size) = value {
687            if size < 3 || size % 2 == 0 {
688                return Err(DetectError::InvalidArgument(
689                    "kernel_size must be an odd integer >= 3".to_string(),
690                ));
691            }
692        }
693        self.scorer.kernel_size = value;
694        Ok(self)
695    }
696
697    /// Returns filter mode.
698    pub fn filter_mode(mut self, mode: FlashFilterMode, min_scene_len: u64) -> Self {
699        self.flash_filter = FlashFilter::new(mode, min_scene_len);
700        self
701    }
702}
703
704impl SceneDetector for ContentDetector {
705    fn name(&self) -> &'static str {
706        "content"
707    }
708
709    fn metric_keys(&self) -> &'static [&'static str] {
710        Self::METRIC_KEYS
711    }
712
713    fn event_buffer_len(&self) -> usize {
714        self.flash_filter.max_behind()
715    }
716
717    fn process_frame(
718        &mut self,
719        frame: &VideoFrame<'_>,
720        metrics: Option<&mut dyn MetricsSink>,
721    ) -> Result<Vec<Cut>> {
722        let score = self.scorer.score(frame, metrics)?;
723        let cut_frames = self
724            .flash_filter
725            .filter(frame.position.frame_index, score >= self.threshold);
726        Ok(cut_frames
727            .into_iter()
728            .map(|frame_index| Cut {
729                position: position_like(frame.position, frame_index),
730                detector: self.name(),
731                score: Some(score),
732            })
733            .collect())
734    }
735}
736
737#[derive(Debug, Clone)]
738struct ContentScorer {
739    weights: ContentWeights,
740    kernel_size: Option<usize>,
741    last: Option<FrameData>,
742    current: FrameData,
743}
744
745impl ContentScorer {
746    fn new(weights: ContentWeights, kernel_size: Option<usize>) -> Self {
747        Self {
748            weights,
749            kernel_size,
750            last: None,
751            current: FrameData::default(),
752        }
753    }
754
755    fn score(
756        &mut self,
757        frame: &VideoFrame<'_>,
758        metrics: Option<&mut dyn MetricsSink>,
759    ) -> Result<f32> {
760        let calculate_edges = self.weights.delta_edges > 0.0;
761        let Some(previous) = self.last.as_ref() else {
762            self.current
763                .fill_from_frame(frame, calculate_edges, self.kernel_size)?;
764            self.last = Some(std::mem::take(&mut self.current));
765            if let Some(metrics) = metrics {
766                set_content_metrics(
767                    metrics,
768                    frame.position.frame_index,
769                    0.0,
770                    (0.0, 0.0, 0.0, 0.0),
771                );
772            }
773            return Ok(0.0);
774        };
775
776        let (dh, ds, dl) = if !calculate_edges && previous.pixel_count() == frame.pixel_count() {
777            self.current.fill_from_frame_and_diff(frame, previous)?
778        } else {
779            self.current
780                .fill_from_frame(frame, calculate_edges, self.kernel_size)?;
781            (
782                mean_abs_diff(&self.current.hue, &previous.hue),
783                mean_abs_diff(&self.current.sat, &previous.sat),
784                mean_abs_diff(&self.current.lum, &previous.lum),
785            )
786        };
787        let de = match (&self.current.edges, &previous.edges) {
788            (Some(left), Some(right)) => mean_abs_diff(left, right),
789            _ => 0.0,
790        };
791        let total_weight = self.weights.total();
792        let score = if total_weight == 0.0 {
793            0.0
794        } else {
795            ((dh * self.weights.delta_hue)
796                + (ds * self.weights.delta_sat)
797                + (dl * self.weights.delta_lum)
798                + (de * self.weights.delta_edges))
799                / total_weight
800        };
801        if let Some(metrics) = metrics {
802            set_content_metrics(metrics, frame.position.frame_index, score, (dh, ds, dl, de));
803        }
804        if let Some(previous) = self.last.as_mut() {
805            std::mem::swap(previous, &mut self.current);
806        }
807        Ok(score)
808    }
809}
810
811fn set_content_metrics(
812    metrics: &mut dyn MetricsSink,
813    frame_index: u64,
814    score: f32,
815    components: (f32, f32, f32, f32),
816) {
817    metrics.set_metric(frame_index, "content_val", score as f64);
818    metrics.set_metric(frame_index, "delta_hue", components.0 as f64);
819    metrics.set_metric(frame_index, "delta_sat", components.1 as f64);
820    metrics.set_metric(frame_index, "delta_lum", components.2 as f64);
821    metrics.set_metric(frame_index, "delta_edges", components.3 as f64);
822}
823
824#[derive(Debug, Default, Clone)]
825struct FrameData {
826    hue: Vec<u8>,
827    sat: Vec<u8>,
828    lum: Vec<u8>,
829    edges: Option<Vec<u8>>,
830}
831
832impl FrameData {
833    fn pixel_count(&self) -> usize {
834        self.lum.len()
835    }
836
837    fn fill_from_frame(
838        &mut self,
839        frame: &VideoFrame<'_>,
840        calculate_edges: bool,
841        kernel_size: Option<usize>,
842    ) -> Result<()> {
843        let pixels = frame.pixel_count();
844        self.prepare_component_buffers(pixels);
845        match frame.pixel_format {
846            PixelFormat::Rgb24 => self.fill_from_rgb24(frame)?,
847            PixelFormat::Bgr24 => self.fill_from_bgr24(frame)?,
848        }
849        self.edges = calculate_edges
850            .then(|| detect_edges(&self.lum, frame.width, frame.height, kernel_size));
851        Ok(())
852    }
853
854    fn fill_from_frame_and_diff(
855        &mut self,
856        frame: &VideoFrame<'_>,
857        previous: &FrameData,
858    ) -> Result<(f32, f32, f32)> {
859        let pixels = frame.pixel_count();
860        self.prepare_component_buffers(pixels);
861        self.edges = None;
862        let sums = match frame.pixel_format {
863            PixelFormat::Rgb24 => self.fill_from_rgb24_and_diff(frame, previous)?,
864            PixelFormat::Bgr24 => self.fill_from_bgr24_and_diff(frame, previous)?,
865        };
866        Ok((
867            sums.0 as f32 / pixels as f32,
868            sums.1 as f32 / pixels as f32,
869            sums.2 as f32 / pixels as f32,
870        ))
871    }
872
873    fn prepare_component_buffers(&mut self, pixels: usize) {
874        self.hue.clear();
875        self.sat.clear();
876        self.lum.clear();
877        if self.hue.capacity() < pixels {
878            self.hue.reserve(pixels);
879        }
880        if self.sat.capacity() < pixels {
881            self.sat.reserve(pixels);
882        }
883        if self.lum.capacity() < pixels {
884            self.lum.reserve(pixels);
885        }
886    }
887
888    fn fill_from_rgb24(&mut self, frame: &VideoFrame<'_>) -> Result<()> {
889        let row_len = frame.width as usize * 3;
890        for y in 0..frame.height {
891            let start = y as usize * frame.stride;
892            let row =
893                frame
894                    .data
895                    .get(start..start + row_len)
896                    .ok_or(DetectError::InvalidFrameBuffer {
897                        expected: start + row_len,
898                        actual: frame.data.len(),
899                    })?;
900            for pixel in row.chunks_exact(3) {
901                let (r, g, b) = (pixel[0], pixel[1], pixel[2]);
902                let (h, s, v) = rgb_to_hsv(r, g, b);
903                self.hue.push(h);
904                self.sat.push(s);
905                self.lum.push(v);
906            }
907        }
908        Ok(())
909    }
910
911    fn fill_from_rgb24_and_diff(
912        &mut self,
913        frame: &VideoFrame<'_>,
914        previous: &FrameData,
915    ) -> Result<(u64, u64, u64)> {
916        let row_len = frame.width as usize * 3;
917        let mut index = 0usize;
918        let mut dh = 0u64;
919        let mut ds = 0u64;
920        let mut dl = 0u64;
921        for y in 0..frame.height {
922            let start = y as usize * frame.stride;
923            let row =
924                frame
925                    .data
926                    .get(start..start + row_len)
927                    .ok_or(DetectError::InvalidFrameBuffer {
928                        expected: start + row_len,
929                        actual: frame.data.len(),
930                    })?;
931            for pixel in row.chunks_exact(3) {
932                let (h, s, v) = rgb_to_hsv(pixel[0], pixel[1], pixel[2]);
933                dh += abs_diff_u8(h, previous.hue[index]) as u64;
934                ds += abs_diff_u8(s, previous.sat[index]) as u64;
935                dl += abs_diff_u8(v, previous.lum[index]) as u64;
936                self.hue.push(h);
937                self.sat.push(s);
938                self.lum.push(v);
939                index += 1;
940            }
941        }
942        Ok((dh, ds, dl))
943    }
944
945    fn fill_from_bgr24(&mut self, frame: &VideoFrame<'_>) -> Result<()> {
946        let row_len = frame.width as usize * 3;
947        for y in 0..frame.height {
948            let start = y as usize * frame.stride;
949            let row =
950                frame
951                    .data
952                    .get(start..start + row_len)
953                    .ok_or(DetectError::InvalidFrameBuffer {
954                        expected: start + row_len,
955                        actual: frame.data.len(),
956                    })?;
957            for pixel in row.chunks_exact(3) {
958                let (r, g, b) = (pixel[2], pixel[1], pixel[0]);
959                let (h, s, v) = rgb_to_hsv(r, g, b);
960                self.hue.push(h);
961                self.sat.push(s);
962                self.lum.push(v);
963            }
964        }
965        Ok(())
966    }
967
968    fn fill_from_bgr24_and_diff(
969        &mut self,
970        frame: &VideoFrame<'_>,
971        previous: &FrameData,
972    ) -> Result<(u64, u64, u64)> {
973        let row_len = frame.width as usize * 3;
974        let mut index = 0usize;
975        let mut dh = 0u64;
976        let mut ds = 0u64;
977        let mut dl = 0u64;
978        for y in 0..frame.height {
979            let start = y as usize * frame.stride;
980            let row =
981                frame
982                    .data
983                    .get(start..start + row_len)
984                    .ok_or(DetectError::InvalidFrameBuffer {
985                        expected: start + row_len,
986                        actual: frame.data.len(),
987                    })?;
988            for pixel in row.chunks_exact(3) {
989                let (h, s, v) = rgb_to_hsv(pixel[2], pixel[1], pixel[0]);
990                dh += abs_diff_u8(h, previous.hue[index]) as u64;
991                ds += abs_diff_u8(s, previous.sat[index]) as u64;
992                dl += abs_diff_u8(v, previous.lum[index]) as u64;
993                self.hue.push(h);
994                self.sat.push(s);
995                self.lum.push(v);
996                index += 1;
997            }
998        }
999        Ok((dh, ds, dl))
1000    }
1001}
1002
1003fn rgb_to_hsv(r: u8, g: u8, b: u8) -> (u8, u8, u8) {
1004    let r = r as f32 / 255.0;
1005    let g = g as f32 / 255.0;
1006    let b = b as f32 / 255.0;
1007    let max = r.max(g).max(b);
1008    let min = r.min(g).min(b);
1009    let delta = max - min;
1010    let hue = if delta == 0.0 {
1011        0.0
1012    } else if max == r {
1013        60.0 * (((g - b) / delta) % 6.0)
1014    } else if max == g {
1015        60.0 * (((b - r) / delta) + 2.0)
1016    } else {
1017        60.0 * (((r - g) / delta) + 4.0)
1018    };
1019    let hue = if hue < 0.0 { hue + 360.0 } else { hue };
1020    let sat = if max == 0.0 { 0.0 } else { delta / max };
1021    (
1022        (hue / 360.0 * 255.0) as u8,
1023        (sat * 255.0) as u8,
1024        (max * 255.0) as u8,
1025    )
1026}
1027
1028fn mean_abs_diff(left: &[u8], right: &[u8]) -> f32 {
1029    left.iter()
1030        .zip(right.iter())
1031        .map(|(left, right)| abs_diff_u8(*left, *right) as u64)
1032        .sum::<u64>() as f32
1033        / left.len() as f32
1034}
1035
1036fn abs_diff_u8(left: u8, right: u8) -> u16 {
1037    (left as i16 - right as i16).unsigned_abs()
1038}
1039
1040fn detect_edges(lum: &[u8], width: u32, height: u32, kernel_size: Option<usize>) -> Vec<u8> {
1041    let width = width as usize;
1042    let height = height as usize;
1043    if width < 3 || height < 3 {
1044        return vec![0; width * height];
1045    }
1046    let mut edges = vec![0_u8; width * height];
1047    for y in 1..height - 1 {
1048        for x in 1..width - 1 {
1049            let i = |x: usize, y: usize| lum[y * width + x] as i32;
1050            let gx = -i(x - 1, y - 1) + i(x + 1, y - 1) - 2 * i(x - 1, y) + 2 * i(x + 1, y)
1051                - i(x - 1, y + 1)
1052                + i(x + 1, y + 1);
1053            let gy = -i(x - 1, y - 1) - 2 * i(x, y - 1) - i(x + 1, y - 1)
1054                + i(x - 1, y + 1)
1055                + 2 * i(x, y + 1)
1056                + i(x + 1, y + 1);
1057            let mag = ((gx * gx + gy * gy) as f32).sqrt();
1058            edges[y * width + x] = if mag > 64.0 { 255 } else { 0 };
1059        }
1060    }
1061    dilate(&edges, width, height, kernel_size.unwrap_or(3))
1062}
1063
1064fn dilate(input: &[u8], width: usize, height: usize, kernel_size: usize) -> Vec<u8> {
1065    let radius = kernel_size / 2;
1066    let mut output = vec![0_u8; input.len()];
1067    for y in 0..height {
1068        for x in 0..width {
1069            let y0 = y.saturating_sub(radius);
1070            let y1 = (y + radius).min(height - 1);
1071            let x0 = x.saturating_sub(radius);
1072            let x1 = (x + radius).min(width - 1);
1073            let mut value = 0;
1074            for yy in y0..=y1 {
1075                for xx in x0..=x1 {
1076                    value = value.max(input[yy * width + xx]);
1077                }
1078            }
1079            output[y * width + x] = value;
1080        }
1081    }
1082    output
1083}
1084
1085fn position_like(current: FramePosition, frame_index: u64) -> FramePosition {
1086    let delta = frame_index as i64 - current.frame_index as i64;
1087    FramePosition {
1088        frame_index,
1089        timestamp: Timestamp::new(current.timestamp.pts + delta, current.timestamp.timebase),
1090    }
1091}
1092
1093/// Trait for video source implementations.
1094pub trait VideoSource {
1095    /// Returns next frame.
1096    fn next_frame(&mut self) -> Result<Option<OwnedVideoFrame>>;
1097    /// Returns frame rate.
1098    fn frame_rate(&self) -> Rational64;
1099}
1100
1101#[derive(Debug, Clone, PartialEq, Eq)]
1102/// Data type for owned video frame.
1103pub struct OwnedVideoFrame {
1104    /// The position value.
1105    pub position: FramePosition,
1106    /// Width in pixels.
1107    pub width: u32,
1108    /// Height in pixels.
1109    pub height: u32,
1110    /// The pixel format value.
1111    pub pixel_format: PixelFormat,
1112    /// Underlying data buffer.
1113    pub data: Vec<u8>,
1114    /// The stride value.
1115    pub stride: usize,
1116}
1117
1118impl OwnedVideoFrame {
1119    /// Borrows this value as a frame.
1120    pub fn as_frame(&self) -> VideoFrame<'_> {
1121        VideoFrame {
1122            position: self.position,
1123            width: self.width,
1124            height: self.height,
1125            pixel_format: self.pixel_format,
1126            data: &self.data,
1127            stride: self.stride,
1128        }
1129    }
1130}
1131
1132#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1133/// Variants describing audio sample format.
1134pub enum AudioSampleFormat {
1135    /// The u8 variant.
1136    U8,
1137    /// The i16 variant.
1138    I16,
1139    /// The i32 variant.
1140    I32,
1141    /// The f32 variant.
1142    F32,
1143}
1144
1145#[derive(Debug, Clone, PartialEq)]
1146/// Variants describing audio buffer.
1147pub enum AudioBuffer {
1148    /// The u8 variant.
1149    U8(Vec<u8>),
1150    /// The i16 variant.
1151    I16(Vec<i16>),
1152    /// The i32 variant.
1153    I32(Vec<i32>),
1154    /// The f32 variant.
1155    F32(Vec<f32>),
1156}
1157
1158impl AudioBuffer {
1159    /// Returns len.
1160    pub fn len(&self) -> usize {
1161        match self {
1162            Self::U8(values) => values.len(),
1163            Self::I16(values) => values.len(),
1164            Self::I32(values) => values.len(),
1165            Self::F32(values) => values.len(),
1166        }
1167    }
1168
1169    /// Returns whether is empty.
1170    pub fn is_empty(&self) -> bool {
1171        self.len() == 0
1172    }
1173
1174    /// Returns sample format.
1175    pub fn sample_format(&self) -> AudioSampleFormat {
1176        match self {
1177            Self::U8(_) => AudioSampleFormat::U8,
1178            Self::I16(_) => AudioSampleFormat::I16,
1179            Self::I32(_) => AudioSampleFormat::I32,
1180            Self::F32(_) => AudioSampleFormat::F32,
1181        }
1182    }
1183}
1184
1185#[derive(Debug, Clone, Copy)]
1186/// Data type for audio frame.
1187pub struct AudioFrame<'a> {
1188    /// Timestamp associated with this value.
1189    pub timestamp: Timestamp,
1190    /// Sample rate in hertz.
1191    pub sample_rate: u32,
1192    /// Number of audio channels.
1193    pub channels: u16,
1194    /// Underlying data buffer.
1195    pub data: &'a AudioBuffer,
1196}
1197
1198impl<'a> AudioFrame<'a> {
1199    /// Creates a new value.
1200    pub fn new(
1201        timestamp: Timestamp,
1202        sample_rate: u32,
1203        channels: u16,
1204        data: &'a AudioBuffer,
1205    ) -> Result<Self> {
1206        if sample_rate == 0 || channels == 0 {
1207            return Err(DetectError::InvalidAudioFormat {
1208                sample_rate,
1209                channels,
1210            });
1211        }
1212        Ok(Self {
1213            timestamp,
1214            sample_rate,
1215            channels,
1216            data,
1217        })
1218    }
1219
1220    /// Returns sample format.
1221    pub fn sample_format(&self) -> AudioSampleFormat {
1222        self.data.sample_format()
1223    }
1224
1225    /// Returns sample count.
1226    pub fn sample_count(&self) -> usize {
1227        self.data.len()
1228    }
1229
1230    /// Returns samples per channel.
1231    pub fn samples_per_channel(&self) -> usize {
1232        self.sample_count() / self.channels as usize
1233    }
1234
1235    /// Returns duration seconds.
1236    pub fn duration_seconds(&self) -> f64 {
1237        self.samples_per_channel() as f64 / self.sample_rate as f64
1238    }
1239}
1240
1241#[derive(Debug, Clone, PartialEq)]
1242/// Data type for owned audio frame.
1243pub struct OwnedAudioFrame {
1244    /// Timestamp associated with this value.
1245    pub timestamp: Timestamp,
1246    /// Sample rate in hertz.
1247    pub sample_rate: u32,
1248    /// Number of audio channels.
1249    pub channels: u16,
1250    /// Underlying data buffer.
1251    pub data: AudioBuffer,
1252}
1253
1254impl OwnedAudioFrame {
1255    /// Creates a new value.
1256    pub fn new(
1257        timestamp: Timestamp,
1258        sample_rate: u32,
1259        channels: u16,
1260        data: AudioBuffer,
1261    ) -> Result<Self> {
1262        AudioFrame::new(timestamp, sample_rate, channels, &data)?;
1263        Ok(Self {
1264            timestamp,
1265            sample_rate,
1266            channels,
1267            data,
1268        })
1269    }
1270
1271    /// Borrows this value as a frame.
1272    pub fn as_frame(&self) -> Result<AudioFrame<'_>> {
1273        AudioFrame::new(self.timestamp, self.sample_rate, self.channels, &self.data)
1274    }
1275
1276    /// Returns sample format.
1277    pub fn sample_format(&self) -> AudioSampleFormat {
1278        self.data.sample_format()
1279    }
1280
1281    /// Returns samples per channel.
1282    pub fn samples_per_channel(&self) -> usize {
1283        if self.channels == 0 {
1284            return 0;
1285        }
1286        self.data.len() / self.channels as usize
1287    }
1288
1289    /// Returns duration seconds.
1290    pub fn duration_seconds(&self) -> f64 {
1291        if self.sample_rate == 0 {
1292            return 0.0;
1293        }
1294        self.samples_per_channel() as f64 / self.sample_rate as f64
1295    }
1296}
1297
1298#[derive(Debug, Clone, Copy)]
1299/// Data type for text segment.
1300pub struct TextSegment<'a> {
1301    /// The segment index value.
1302    pub segment_index: u64,
1303    /// Timestamp associated with this value.
1304    pub timestamp: Option<Timestamp>,
1305    /// Text content for this value.
1306    pub text: &'a str,
1307    /// Language tag for this value.
1308    pub language: Option<&'a str>,
1309    /// The is final value.
1310    pub is_final: bool,
1311}
1312
1313#[derive(Debug, Clone, PartialEq, Eq)]
1314/// Data type for owned text segment.
1315pub struct OwnedTextSegment {
1316    /// The segment index value.
1317    pub segment_index: u64,
1318    /// Timestamp associated with this value.
1319    pub timestamp: Option<Timestamp>,
1320    /// Text content for this value.
1321    pub text: String,
1322    /// Language tag for this value.
1323    pub language: Option<String>,
1324    /// The is final value.
1325    pub is_final: bool,
1326}
1327
1328impl OwnedTextSegment {
1329    /// Creates a new value.
1330    pub fn new(segment_index: u64, text: impl Into<String>) -> Self {
1331        Self {
1332            segment_index,
1333            timestamp: None,
1334            text: text.into(),
1335            language: None,
1336            is_final: true,
1337        }
1338    }
1339
1340    /// Returns timestamp.
1341    pub fn timestamp(mut self, timestamp: Timestamp) -> Self {
1342        self.timestamp = Some(timestamp);
1343        self
1344    }
1345
1346    /// Returns language.
1347    pub fn language(mut self, language: impl Into<String>) -> Self {
1348        self.language = Some(language.into());
1349        self
1350    }
1351
1352    /// Returns finality.
1353    pub fn finality(mut self, is_final: bool) -> Self {
1354        self.is_final = is_final;
1355        self
1356    }
1357
1358    /// Borrows this value as a segment.
1359    pub fn as_segment(&self) -> TextSegment<'_> {
1360        TextSegment {
1361            segment_index: self.segment_index,
1362            timestamp: self.timestamp,
1363            text: &self.text,
1364            language: self.language.as_deref(),
1365            is_final: self.is_final,
1366        }
1367    }
1368}
1369
1370#[derive(Debug, Default, Clone, PartialEq)]
1371/// Data type for detection result.
1372pub struct DetectionResult {
1373    /// The scenes value.
1374    pub scenes: Vec<Scene>,
1375    /// The cuts value.
1376    pub cuts: Vec<Cut>,
1377    /// The metrics value.
1378    pub metrics: MetricsStore,
1379    /// The frames processed value.
1380    pub frames_processed: u64,
1381}
1382
1383#[derive(Debug, Clone, PartialEq)]
1384/// Data type for frame analysis.
1385pub struct FrameAnalysis {
1386    /// The position value.
1387    pub position: FramePosition,
1388    /// The cuts value.
1389    pub cuts: Vec<Cut>,
1390    /// The frames processed value.
1391    pub frames_processed: u64,
1392}
1393
1394#[derive(Debug, Clone, Copy, PartialEq, Eq, serde::Serialize, serde::Deserialize)]
1395#[serde(rename_all = "camelCase")]
1396/// Data type for bounding box.
1397pub struct BoundingBox {
1398    /// The x value.
1399    pub x: u32,
1400    /// The y value.
1401    pub y: u32,
1402    /// Width in pixels.
1403    pub width: u32,
1404    /// Height in pixels.
1405    pub height: u32,
1406}
1407
1408impl BoundingBox {
1409    /// Creates a new value.
1410    pub fn new(x: u32, y: u32, width: u32, height: u32) -> Result<Self> {
1411        if width == 0 || height == 0 {
1412            return Err(DetectError::InvalidArgument(
1413                "bounding box must have non-zero width and height".to_string(),
1414            ));
1415        }
1416        Ok(Self {
1417            x,
1418            y,
1419            width,
1420            height,
1421        })
1422    }
1423}
1424
1425impl From<BoundingBox> for math_geometry_2d::RectU32 {
1426    fn from(value: BoundingBox) -> Self {
1427        Self {
1428            x: value.x,
1429            y: value.y,
1430            width: value.width,
1431            height: value.height,
1432        }
1433    }
1434}
1435
1436impl TryFrom<math_geometry_2d::RectU32> for BoundingBox {
1437    type Error = DetectError;
1438
1439    fn try_from(value: math_geometry_2d::RectU32) -> std::result::Result<Self, Self::Error> {
1440        value.validate()?;
1441        Self::new(value.x, value.y, value.width, value.height)
1442    }
1443}
1444
1445impl From<math_geometry_2d::GeometryError> for DetectError {
1446    fn from(value: math_geometry_2d::GeometryError) -> Self {
1447        Self::InvalidArgument(value.to_string())
1448    }
1449}
1450
1451#[derive(Debug, Clone, PartialEq, Eq)]
1452/// Variants describing observation kind.
1453pub enum ObservationKind {
1454    /// The text variant.
1455    Text,
1456    /// The face variant.
1457    Face,
1458    /// The object variant.
1459    Object,
1460    /// The scene variant.
1461    Scene,
1462    /// The custom variant.
1463    Custom(String),
1464}
1465
1466#[derive(Debug, Clone, PartialEq)]
1467/// Data type for observation.
1468pub struct Observation {
1469    /// Timestamp associated with this value.
1470    pub timestamp: Option<Timestamp>,
1471    /// The frame value.
1472    pub frame: Option<FramePosition>,
1473    /// The scene index value.
1474    pub scene_index: Option<u64>,
1475    /// The analyzer value.
1476    pub analyzer: String,
1477    /// The kind value.
1478    pub kind: ObservationKind,
1479    /// Label assigned to this value.
1480    pub label: Option<String>,
1481    /// Text content for this value.
1482    pub text: Option<String>,
1483    /// Score assigned to this value.
1484    pub score: Option<f32>,
1485    /// The region value.
1486    pub region: Option<BoundingBox>,
1487    /// The track identifier value.
1488    pub track_id: Option<String>,
1489    /// The attributes value.
1490    pub attributes: BTreeMap<String, String>,
1491}
1492
1493impl Observation {
1494    /// Creates a new value.
1495    pub fn new(analyzer: impl Into<String>, kind: ObservationKind) -> Self {
1496        Self {
1497            timestamp: None,
1498            frame: None,
1499            scene_index: None,
1500            analyzer: analyzer.into(),
1501            kind,
1502            label: None,
1503            text: None,
1504            score: None,
1505            region: None,
1506            track_id: None,
1507            attributes: BTreeMap::new(),
1508        }
1509    }
1510
1511    /// Returns at frame.
1512    pub fn at_frame(mut self, position: FramePosition) -> Self {
1513        self.timestamp = Some(position.timestamp);
1514        self.frame = Some(position);
1515        self
1516    }
1517
1518    /// Returns at timestamp.
1519    pub fn at_timestamp(mut self, timestamp: Timestamp) -> Self {
1520        self.timestamp = Some(timestamp);
1521        self
1522    }
1523
1524    /// Returns in scene.
1525    pub fn in_scene(mut self, scene_index: u64) -> Self {
1526        self.scene_index = Some(scene_index);
1527        self
1528    }
1529
1530    /// Returns label.
1531    pub fn label(mut self, label: impl Into<String>) -> Self {
1532        self.label = Some(label.into());
1533        self
1534    }
1535
1536    /// Returns text.
1537    pub fn text(mut self, text: impl Into<String>) -> Self {
1538        self.text = Some(text.into());
1539        self
1540    }
1541
1542    /// Returns score.
1543    pub fn score(mut self, score: f32) -> Self {
1544        self.score = Some(score);
1545        self
1546    }
1547
1548    /// Returns region.
1549    pub fn region(mut self, region: BoundingBox) -> Self {
1550        self.region = Some(region);
1551        self
1552    }
1553
1554    /// Returns track identifier.
1555    pub fn track_id(mut self, track_id: impl Into<String>) -> Self {
1556        self.track_id = Some(track_id.into());
1557        self
1558    }
1559
1560    /// Returns attribute.
1561    pub fn attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
1562        self.attributes.insert(key.into(), value.into());
1563        self
1564    }
1565
1566    /// Converts this value to text segment.
1567    pub fn to_text_segment(&self, segment_index: u64) -> Option<OwnedTextSegment> {
1568        let text = self.text.as_ref()?;
1569        let mut segment = OwnedTextSegment::new(segment_index, text.clone());
1570        if let Some(timestamp) = self.timestamp {
1571            segment = segment.timestamp(timestamp);
1572        }
1573        if let Some(language) = self.attributes.get("language") {
1574            segment = segment.language(language.clone());
1575        }
1576        Some(segment)
1577    }
1578}
1579
1580#[derive(Debug, Default, Clone, PartialEq)]
1581/// Data type for video analysis result.
1582pub struct VideoAnalysisResult {
1583    /// The observations value.
1584    pub observations: Vec<Observation>,
1585    /// The frames processed value.
1586    pub frames_processed: u64,
1587}
1588
1589#[derive(Debug, Clone, PartialEq)]
1590/// Data type for video frame analysis.
1591pub struct VideoFrameAnalysis {
1592    /// The position value.
1593    pub position: FramePosition,
1594    /// The observations value.
1595    pub observations: Vec<Observation>,
1596    /// The frames processed value.
1597    pub frames_processed: u64,
1598}
1599
1600#[derive(Debug, Clone, PartialEq)]
1601/// Data type for scene analysis.
1602pub struct SceneAnalysis {
1603    /// The scene index value.
1604    pub scene_index: u64,
1605    /// The scene value.
1606    pub scene: Scene,
1607    /// The observations value.
1608    pub observations: Vec<Observation>,
1609}
1610
1611#[derive(Debug, Clone, PartialEq)]
1612/// Data type for realtime video frame analysis.
1613pub struct RealtimeVideoFrameAnalysis {
1614    /// The position value.
1615    pub position: FramePosition,
1616    /// The scene value.
1617    pub scene: FrameAnalysis,
1618    /// The observations value.
1619    pub observations: Vec<Observation>,
1620    /// The completed scenes value.
1621    pub completed_scenes: Vec<SceneAnalysis>,
1622    /// The frames processed value.
1623    pub frames_processed: u64,
1624}
1625
1626#[derive(Debug, Default, Clone, PartialEq)]
1627/// Data type for realtime video analysis result.
1628pub struct RealtimeVideoAnalysisResult {
1629    /// The detection value.
1630    pub detection: DetectionResult,
1631    /// The observations value.
1632    pub observations: Vec<Observation>,
1633    /// The scenes value.
1634    pub scenes: Vec<SceneAnalysis>,
1635    /// The frames processed value.
1636    pub frames_processed: u64,
1637}
1638
1639#[derive(Debug, Clone, PartialEq)]
1640/// Data type for analysis event.
1641pub struct AnalysisEvent {
1642    /// Timestamp associated with this value.
1643    pub timestamp: Option<Timestamp>,
1644    /// The analyzer value.
1645    pub analyzer: String,
1646    /// Label assigned to this value.
1647    pub label: String,
1648    /// Score assigned to this value.
1649    pub score: Option<f32>,
1650}
1651
1652impl AnalysisEvent {
1653    /// Creates a new value.
1654    pub fn new(analyzer: impl Into<String>, label: impl Into<String>) -> Self {
1655        Self {
1656            timestamp: None,
1657            analyzer: analyzer.into(),
1658            label: label.into(),
1659            score: None,
1660        }
1661    }
1662
1663    /// Returns at timestamp.
1664    pub fn at_timestamp(mut self, timestamp: Timestamp) -> Self {
1665        self.timestamp = Some(timestamp);
1666        self
1667    }
1668
1669    /// Returns score.
1670    pub fn score(mut self, score: f32) -> Self {
1671        self.score = Some(score);
1672        self
1673    }
1674}
1675
1676#[derive(Debug, Default, Clone, PartialEq)]
1677/// Data type for audio analysis result.
1678pub struct AudioAnalysisResult {
1679    /// The events value.
1680    pub events: Vec<AnalysisEvent>,
1681    /// The frames processed value.
1682    pub frames_processed: u64,
1683}
1684
1685#[derive(Debug, Clone, PartialEq)]
1686/// Data type for audio analysis.
1687pub struct AudioAnalysis {
1688    /// Timestamp associated with this value.
1689    pub timestamp: Timestamp,
1690    /// The events value.
1691    pub events: Vec<AnalysisEvent>,
1692    /// The frames processed value.
1693    pub frames_processed: u64,
1694}
1695
1696#[derive(Debug, Default, Clone, PartialEq)]
1697/// Data type for text analysis result.
1698pub struct TextAnalysisResult {
1699    /// The events value.
1700    pub events: Vec<AnalysisEvent>,
1701    /// The segments processed value.
1702    pub segments_processed: u64,
1703}
1704
1705#[derive(Debug, Clone, PartialEq)]
1706/// Data type for text analysis.
1707pub struct TextAnalysis {
1708    /// The segment index value.
1709    pub segment_index: u64,
1710    /// The events value.
1711    pub events: Vec<AnalysisEvent>,
1712    /// The segments processed value.
1713    pub segments_processed: u64,
1714}
1715
1716/// Data type for scene pipeline.
1717pub struct ScenePipeline {
1718    detectors: Vec<Box<dyn SceneDetector>>,
1719    start_in_scene: bool,
1720    crop: Option<CropRegion>,
1721    auto_downscale_min_width: Option<u32>,
1722    state: ScenePipelineState,
1723}
1724
1725#[derive(Debug, Default, Clone)]
1726struct ScenePipelineState {
1727    cuts: Vec<Cut>,
1728    metrics: MetricsStore,
1729    first_position: Option<FramePosition>,
1730    last_position: Option<FramePosition>,
1731    frames_processed: u64,
1732    finished: bool,
1733}
1734
1735#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1736/// Data type for crop region.
1737pub struct CropRegion {
1738    /// The x0 value.
1739    pub x0: u32,
1740    /// The y0 value.
1741    pub y0: u32,
1742    /// The x1 value.
1743    pub x1: u32,
1744    /// The y1 value.
1745    pub y1: u32,
1746}
1747
1748impl CropRegion {
1749    /// Creates a new value.
1750    pub fn new(x0: u32, y0: u32, x1: u32, y1: u32) -> Result<Self> {
1751        if x0 == x1 || y0 == y1 {
1752            return Err(DetectError::InvalidArgument(
1753                "crop region must have non-zero width and height".to_string(),
1754            ));
1755        }
1756        Ok(Self { x0, y0, x1, y1 })
1757    }
1758}
1759
1760impl ScenePipeline {
1761    /// Returns builder.
1762    pub fn builder() -> ScenePipelineBuilder {
1763        ScenePipelineBuilder::default()
1764    }
1765
1766    /// Returns detect.
1767    pub fn detect<S: VideoSource>(&mut self, source: &mut S) -> Result<DetectionResult> {
1768        self.reset();
1769        while let Some(frame) = source.next_frame()? {
1770            self.process_frame(frame)?;
1771        }
1772        self.finish_detection()
1773    }
1774
1775    /// Returns process frame.
1776    pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<FrameAnalysis> {
1777        let frame = self.prepare_frame(frame)?;
1778        self.process_frame_ref(&frame.as_frame())
1779    }
1780
1781    /// Returns process frame ref.
1782    pub fn process_frame_ref(&mut self, frame: &VideoFrame<'_>) -> Result<FrameAnalysis> {
1783        if self.state.finished {
1784            return Err(DetectError::InvalidArgument(
1785                "cannot process frames after finish_detection; call reset first".to_string(),
1786            ));
1787        }
1788        self.validate_frame_options(frame)?;
1789        self.state.first_position.get_or_insert(frame.position);
1790        self.state.last_position = Some(frame.position);
1791
1792        let mut cuts = Vec::new();
1793        for detector in &mut self.detectors {
1794            let mut new_cuts = detector.process_frame(frame, Some(&mut self.state.metrics))?;
1795            cuts.append(&mut new_cuts);
1796        }
1797        let cuts = self.record_cuts(cuts);
1798        self.state.frames_processed += 1;
1799
1800        Ok(FrameAnalysis {
1801            position: frame.position,
1802            cuts,
1803            frames_processed: self.state.frames_processed,
1804        })
1805    }
1806
1807    /// Returns finish detection.
1808    pub fn finish_detection(&mut self) -> Result<DetectionResult> {
1809        if !self.state.finished {
1810            if let Some(last) = self.state.last_position {
1811                let mut cuts = Vec::new();
1812                for detector in &mut self.detectors {
1813                    let mut new_cuts = detector.finish(last, Some(&mut self.state.metrics))?;
1814                    cuts.append(&mut new_cuts);
1815                }
1816                self.record_cuts(cuts);
1817            }
1818            self.state.finished = true;
1819        }
1820
1821        let scenes = if let (Some(start), Some(end)) =
1822            (self.state.first_position, self.state.last_position)
1823        {
1824            scenes_from_cuts(&self.state.cuts, start, end, self.start_in_scene)
1825        } else {
1826            Vec::new()
1827        };
1828
1829        Ok(DetectionResult {
1830            scenes,
1831            cuts: self.state.cuts.clone(),
1832            metrics: self.state.metrics.clone(),
1833            frames_processed: self.state.frames_processed,
1834        })
1835    }
1836
1837    /// Returns reset.
1838    pub fn reset(&mut self) {
1839        self.state = ScenePipelineState::default();
1840    }
1841
1842    /// Returns metrics.
1843    pub fn metrics(&self) -> &MetricsStore {
1844        &self.state.metrics
1845    }
1846
1847    /// Returns cuts.
1848    pub fn cuts(&self) -> &[Cut] {
1849        &self.state.cuts
1850    }
1851
1852    /// Returns frames processed.
1853    pub fn frames_processed(&self) -> u64 {
1854        self.state.frames_processed
1855    }
1856
1857    fn record_cuts(&mut self, mut cuts: Vec<Cut>) -> Vec<Cut> {
1858        cuts.sort_by_key(|cut| cut.position.frame_index);
1859        cuts.dedup_by_key(|cut| cut.position.frame_index);
1860        let mut accepted = Vec::new();
1861        for cut in cuts {
1862            if self
1863                .state
1864                .cuts
1865                .iter()
1866                .any(|existing| existing.position.frame_index == cut.position.frame_index)
1867            {
1868                continue;
1869            }
1870            self.state.cuts.push(cut.clone());
1871            accepted.push(cut);
1872        }
1873        self.state.cuts.sort_by_key(|cut| cut.position.frame_index);
1874        accepted
1875    }
1876
1877    fn prepare_frame(&self, frame: OwnedVideoFrame) -> Result<OwnedVideoFrame> {
1878        self.validate_frame_options(&frame.as_frame())?;
1879        Ok(frame)
1880    }
1881
1882    fn validate_frame_options(&self, frame: &VideoFrame<'_>) -> Result<()> {
1883        let _ = self.auto_downscale_min_width;
1884        if let Some(crop) = self.crop {
1885            if crop.x0 >= frame.width || crop.y0 >= frame.height {
1886                return Err(DetectError::InvalidArgument(
1887                    "crop starts outside frame boundary".to_string(),
1888                ));
1889            }
1890        }
1891        Ok(())
1892    }
1893}
1894
1895#[derive(Default)]
1896/// Data type for scene pipeline builder.
1897pub struct ScenePipelineBuilder {
1898    detectors: Vec<Box<dyn SceneDetector>>,
1899    start_in_scene: bool,
1900    crop: Option<CropRegion>,
1901    auto_downscale_min_width: Option<u32>,
1902}
1903
1904impl ScenePipelineBuilder {
1905    /// Returns detector.
1906    pub fn detector<D: SceneDetector + 'static>(mut self, detector: D) -> Self {
1907        self.detectors.push(Box::new(detector));
1908        self
1909    }
1910
1911    /// Returns start in scene.
1912    pub fn start_in_scene(mut self, value: bool) -> Self {
1913        self.start_in_scene = value;
1914        self
1915    }
1916
1917    /// Returns crop.
1918    pub fn crop(mut self, value: Option<CropRegion>) -> Self {
1919        self.crop = value;
1920        self
1921    }
1922
1923    /// Returns auto downscale min width.
1924    pub fn auto_downscale_min_width(mut self, value: u32) -> Self {
1925        self.auto_downscale_min_width = Some(value);
1926        self
1927    }
1928
1929    /// Returns build.
1930    pub fn build(self) -> Result<ScenePipeline> {
1931        if self.detectors.is_empty() {
1932            return Err(DetectError::InvalidArgument(
1933                "at least one detector is required".to_string(),
1934            ));
1935        }
1936        Ok(ScenePipeline {
1937            detectors: self.detectors,
1938            start_in_scene: self.start_in_scene,
1939            crop: self.crop,
1940            auto_downscale_min_width: self.auto_downscale_min_width,
1941            state: ScenePipelineState::default(),
1942        })
1943    }
1944}
1945
1946/// Trait for video analyzer implementations.
1947pub trait VideoAnalyzer {
1948    /// Returns name.
1949    fn name(&self) -> &str;
1950
1951    /// Returns process frame.
1952    fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>>;
1953
1954    /// Returns finish.
1955    fn finish(&mut self, _last_position: Option<FramePosition>) -> Result<Vec<Observation>> {
1956        Ok(Vec::new())
1957    }
1958}
1959
1960#[derive(Debug, Clone, PartialEq)]
1961/// Data type for sampled video analyzer.
1962pub struct SampledVideoAnalyzer<A> {
1963    inner: A,
1964    every: u64,
1965}
1966
1967impl<A> SampledVideoAnalyzer<A> {
1968    /// Creates a new value.
1969    pub fn new(inner: A, every: u64) -> Self {
1970        Self {
1971            inner,
1972            every: every.max(1),
1973        }
1974    }
1975
1976    /// Returns inner.
1977    pub fn inner(&self) -> &A {
1978        &self.inner
1979    }
1980
1981    /// Returns inner mut.
1982    pub fn inner_mut(&mut self) -> &mut A {
1983        &mut self.inner
1984    }
1985
1986    /// Returns every.
1987    pub fn every(&self) -> u64 {
1988        self.every
1989    }
1990}
1991
1992impl<A: VideoAnalyzer> VideoAnalyzer for SampledVideoAnalyzer<A> {
1993    fn name(&self) -> &str {
1994        self.inner.name()
1995    }
1996
1997    fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
1998        if frame.position.frame_index.is_multiple_of(self.every) {
1999            self.inner.process_frame(frame)
2000        } else {
2001            Ok(Vec::new())
2002        }
2003    }
2004
2005    fn finish(&mut self, last_position: Option<FramePosition>) -> Result<Vec<Observation>> {
2006        self.inner.finish(last_position)
2007    }
2008}
2009
2010/// Data type for video analysis pipeline.
2011pub struct VideoAnalysisPipeline {
2012    analyzers: Vec<Box<dyn VideoAnalyzer>>,
2013    state: VideoAnalysisPipelineState,
2014}
2015
2016#[derive(Debug, Default, Clone)]
2017struct VideoAnalysisPipelineState {
2018    observations: Vec<Observation>,
2019    last_position: Option<FramePosition>,
2020    frames_processed: u64,
2021    finished: bool,
2022}
2023
2024impl VideoAnalysisPipeline {
2025    /// Returns builder.
2026    pub fn builder() -> VideoAnalysisPipelineBuilder {
2027        VideoAnalysisPipelineBuilder::default()
2028    }
2029
2030    /// Returns process frame.
2031    pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<VideoFrameAnalysis> {
2032        self.process_frame_ref(&frame.as_frame())
2033    }
2034
2035    /// Returns process frame ref.
2036    pub fn process_frame_ref(&mut self, frame: &VideoFrame<'_>) -> Result<VideoFrameAnalysis> {
2037        if self.state.finished {
2038            return Err(DetectError::InvalidArgument(
2039                "cannot process video frames after finish_analysis; call reset first".to_string(),
2040            ));
2041        }
2042        self.state.last_position = Some(frame.position);
2043
2044        let mut observations = Vec::new();
2045        for analyzer in &mut self.analyzers {
2046            let mut new_observations = analyzer.process_frame(frame)?;
2047            for observation in &mut new_observations {
2048                if observation.timestamp.is_none() {
2049                    observation.timestamp = Some(frame.position.timestamp);
2050                }
2051                if observation.frame.is_none() {
2052                    observation.frame = Some(frame.position);
2053                }
2054            }
2055            observations.append(&mut new_observations);
2056        }
2057        self.state.observations.extend(observations.iter().cloned());
2058        self.state.frames_processed += 1;
2059
2060        Ok(VideoFrameAnalysis {
2061            position: frame.position,
2062            observations,
2063            frames_processed: self.state.frames_processed,
2064        })
2065    }
2066
2067    /// Returns finish analysis.
2068    pub fn finish_analysis(&mut self) -> Result<VideoAnalysisResult> {
2069        if !self.state.finished {
2070            let mut observations = Vec::new();
2071            for analyzer in &mut self.analyzers {
2072                let mut new_observations = analyzer.finish(self.state.last_position)?;
2073                observations.append(&mut new_observations);
2074            }
2075            self.state.observations.extend(observations);
2076            self.state.finished = true;
2077        }
2078        Ok(VideoAnalysisResult {
2079            observations: self.state.observations.clone(),
2080            frames_processed: self.state.frames_processed,
2081        })
2082    }
2083
2084    /// Returns reset.
2085    pub fn reset(&mut self) {
2086        self.state = VideoAnalysisPipelineState::default();
2087    }
2088
2089    /// Returns observations.
2090    pub fn observations(&self) -> &[Observation] {
2091        &self.state.observations
2092    }
2093
2094    /// Returns frames processed.
2095    pub fn frames_processed(&self) -> u64 {
2096        self.state.frames_processed
2097    }
2098}
2099
2100#[derive(Default)]
2101/// Data type for video analysis pipeline builder.
2102pub struct VideoAnalysisPipelineBuilder {
2103    analyzers: Vec<Box<dyn VideoAnalyzer>>,
2104}
2105
2106impl VideoAnalysisPipelineBuilder {
2107    /// Returns analyzer.
2108    pub fn analyzer<A: VideoAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2109        self.analyzers.push(Box::new(analyzer));
2110        self
2111    }
2112
2113    /// Returns build.
2114    pub fn build(self) -> Result<VideoAnalysisPipeline> {
2115        if self.analyzers.is_empty() {
2116            return Err(DetectError::InvalidArgument(
2117                "at least one video analyzer is required".to_string(),
2118            ));
2119        }
2120        Ok(VideoAnalysisPipeline {
2121            analyzers: self.analyzers,
2122            state: VideoAnalysisPipelineState::default(),
2123        })
2124    }
2125}
2126
2127/// Data type for realtime video pipeline.
2128pub struct RealtimeVideoPipeline {
2129    scene_pipeline: ScenePipeline,
2130    video_pipeline: Option<VideoAnalysisPipeline>,
2131    state: RealtimeVideoPipelineState,
2132}
2133
2134#[derive(Debug, Default, Clone)]
2135struct RealtimeVideoPipelineState {
2136    observations: Vec<Observation>,
2137    completed_scenes: Vec<SceneAnalysis>,
2138    open_scene_observations: Vec<Observation>,
2139    active_scene_start: Option<FramePosition>,
2140    active_scene_index: u64,
2141    first_position: Option<FramePosition>,
2142    last_position: Option<FramePosition>,
2143    frames_processed: u64,
2144    finished: bool,
2145}
2146
2147impl RealtimeVideoPipeline {
2148    /// Returns builder.
2149    pub fn builder() -> RealtimeVideoPipelineBuilder {
2150        RealtimeVideoPipelineBuilder::default()
2151    }
2152
2153    /// Creates a new value.
2154    pub fn new(
2155        scene_pipeline: ScenePipeline,
2156        video_pipeline: Option<VideoAnalysisPipeline>,
2157    ) -> Self {
2158        Self {
2159            scene_pipeline,
2160            video_pipeline,
2161            state: RealtimeVideoPipelineState::default(),
2162        }
2163    }
2164
2165    /// Returns process frame.
2166    pub fn process_frame(&mut self, frame: OwnedVideoFrame) -> Result<RealtimeVideoFrameAnalysis> {
2167        self.process_frame_ref(&frame.as_frame())
2168    }
2169
2170    /// Returns process frame ref.
2171    pub fn process_frame_ref(
2172        &mut self,
2173        frame: &VideoFrame<'_>,
2174    ) -> Result<RealtimeVideoFrameAnalysis> {
2175        if self.state.finished {
2176            return Err(DetectError::InvalidArgument(
2177                "cannot process video frames after finish_analysis; call reset first".to_string(),
2178            ));
2179        }
2180
2181        self.state.first_position.get_or_insert(frame.position);
2182        self.state.last_position = Some(frame.position);
2183        self.state.active_scene_start.get_or_insert(frame.position);
2184
2185        let scene = self.scene_pipeline.process_frame_ref(frame)?;
2186        let completed_scenes = self.close_scenes(&scene.cuts);
2187        let mut observations = if let Some(pipeline) = &mut self.video_pipeline {
2188            pipeline.process_frame_ref(frame)?.observations
2189        } else {
2190            Vec::new()
2191        };
2192        self.annotate_observations(&mut observations, frame.position);
2193        self.state
2194            .open_scene_observations
2195            .extend(observations.iter().cloned());
2196        self.state.observations.extend(observations.iter().cloned());
2197        self.state.frames_processed += 1;
2198
2199        Ok(RealtimeVideoFrameAnalysis {
2200            position: frame.position,
2201            scene,
2202            observations,
2203            completed_scenes,
2204            frames_processed: self.state.frames_processed,
2205        })
2206    }
2207
2208    /// Returns finish analysis.
2209    pub fn finish_analysis(&mut self) -> Result<RealtimeVideoAnalysisResult> {
2210        let detection = self.scene_pipeline.finish_detection()?;
2211        if !self.state.finished {
2212            let pending_cuts = detection
2213                .cuts
2214                .iter()
2215                .filter(|cut| {
2216                    self.state
2217                        .active_scene_start
2218                        .map(|start| cut.position.frame_index > start.frame_index)
2219                        .unwrap_or(false)
2220                })
2221                .cloned()
2222                .collect::<Vec<_>>();
2223            self.close_scenes(&pending_cuts);
2224
2225            if let Some(pipeline) = &mut self.video_pipeline {
2226                let already_observed = self.state.observations.len();
2227                let result = pipeline.finish_analysis()?;
2228                let mut final_observations = result
2229                    .observations
2230                    .into_iter()
2231                    .skip(already_observed)
2232                    .collect::<Vec<_>>();
2233                self.annotate_observations_without_frame(&mut final_observations);
2234                self.state
2235                    .open_scene_observations
2236                    .extend(final_observations.iter().cloned());
2237                self.state
2238                    .observations
2239                    .extend(final_observations.iter().cloned());
2240            }
2241            self.state.finished = true;
2242        }
2243
2244        let scenes = self.scene_analyses_for(&detection.scenes);
2245        Ok(RealtimeVideoAnalysisResult {
2246            detection,
2247            observations: self.state.observations.clone(),
2248            scenes,
2249            frames_processed: self.state.frames_processed,
2250        })
2251    }
2252
2253    /// Returns reset.
2254    pub fn reset(&mut self) {
2255        self.scene_pipeline.reset();
2256        if let Some(pipeline) = &mut self.video_pipeline {
2257            pipeline.reset();
2258        }
2259        self.state = RealtimeVideoPipelineState::default();
2260    }
2261
2262    /// Returns observations.
2263    pub fn observations(&self) -> &[Observation] {
2264        &self.state.observations
2265    }
2266
2267    /// Returns completed scenes.
2268    pub fn completed_scenes(&self) -> &[SceneAnalysis] {
2269        &self.state.completed_scenes
2270    }
2271
2272    /// Returns frames processed.
2273    pub fn frames_processed(&self) -> u64 {
2274        self.state.frames_processed
2275    }
2276
2277    fn close_scenes(&mut self, cuts: &[Cut]) -> Vec<SceneAnalysis> {
2278        let mut completed = Vec::new();
2279        for cut in cuts {
2280            let Some(start) = self.state.active_scene_start else {
2281                continue;
2282            };
2283            if cut.position.frame_index <= start.frame_index {
2284                continue;
2285            }
2286            let scene = Scene {
2287                start,
2288                end: cut.position,
2289            };
2290            let observations = std::mem::take(&mut self.state.open_scene_observations);
2291            let analysis = SceneAnalysis {
2292                scene_index: self.state.active_scene_index,
2293                scene,
2294                observations,
2295            };
2296            self.state.completed_scenes.push(analysis.clone());
2297            completed.push(analysis);
2298            self.state.active_scene_index += 1;
2299            self.state.active_scene_start = Some(cut.position);
2300        }
2301        completed
2302    }
2303
2304    fn annotate_observations(&self, observations: &mut [Observation], position: FramePosition) {
2305        for observation in observations {
2306            if observation.timestamp.is_none() {
2307                observation.timestamp = Some(position.timestamp);
2308            }
2309            if observation.frame.is_none() {
2310                observation.frame = Some(position);
2311            }
2312            if observation.scene_index.is_none() {
2313                observation.scene_index = Some(self.state.active_scene_index);
2314            }
2315        }
2316    }
2317
2318    fn annotate_observations_without_frame(&self, observations: &mut [Observation]) {
2319        for observation in observations {
2320            if observation.scene_index.is_none() {
2321                observation.scene_index = Some(self.state.active_scene_index);
2322            }
2323        }
2324    }
2325
2326    fn scene_analyses_for(&self, scenes: &[Scene]) -> Vec<SceneAnalysis> {
2327        scenes
2328            .iter()
2329            .enumerate()
2330            .map(|(index, scene)| {
2331                let scene_index = index as u64;
2332                let observations = self
2333                    .state
2334                    .observations
2335                    .iter()
2336                    .filter(|observation| observation.scene_index == Some(scene_index))
2337                    .cloned()
2338                    .collect();
2339                SceneAnalysis {
2340                    scene_index,
2341                    scene: scene.clone(),
2342                    observations,
2343                }
2344            })
2345            .collect()
2346    }
2347}
2348
2349#[derive(Default)]
2350/// Data type for realtime video pipeline builder.
2351pub struct RealtimeVideoPipelineBuilder {
2352    scene_pipeline: Option<ScenePipeline>,
2353    scene_builder: ScenePipelineBuilder,
2354    video_analyzers: Vec<Box<dyn VideoAnalyzer>>,
2355}
2356
2357impl RealtimeVideoPipelineBuilder {
2358    /// Returns scene pipeline.
2359    pub fn scene_pipeline(mut self, pipeline: ScenePipeline) -> Self {
2360        self.scene_pipeline = Some(pipeline);
2361        self
2362    }
2363
2364    /// Returns scene detector.
2365    pub fn scene_detector<D: SceneDetector + 'static>(mut self, detector: D) -> Self {
2366        self.scene_builder = self.scene_builder.detector(detector);
2367        self
2368    }
2369
2370    /// Returns video analyzer.
2371    pub fn video_analyzer<A: VideoAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2372        self.video_analyzers.push(Box::new(analyzer));
2373        self
2374    }
2375
2376    /// Returns start in scene.
2377    pub fn start_in_scene(mut self, value: bool) -> Self {
2378        self.scene_builder = self.scene_builder.start_in_scene(value);
2379        self
2380    }
2381
2382    /// Returns crop.
2383    pub fn crop(mut self, value: Option<CropRegion>) -> Self {
2384        self.scene_builder = self.scene_builder.crop(value);
2385        self
2386    }
2387
2388    /// Returns auto downscale min width.
2389    pub fn auto_downscale_min_width(mut self, value: u32) -> Self {
2390        self.scene_builder = self.scene_builder.auto_downscale_min_width(value);
2391        self
2392    }
2393
2394    /// Returns build.
2395    pub fn build(self) -> Result<RealtimeVideoPipeline> {
2396        let scene_pipeline = match self.scene_pipeline {
2397            Some(pipeline) => pipeline,
2398            None => self.scene_builder.build()?,
2399        };
2400        let video_pipeline = if self.video_analyzers.is_empty() {
2401            None
2402        } else {
2403            Some(VideoAnalysisPipeline {
2404                analyzers: self.video_analyzers,
2405                state: VideoAnalysisPipelineState::default(),
2406            })
2407        };
2408        Ok(RealtimeVideoPipeline::new(scene_pipeline, video_pipeline))
2409    }
2410}
2411
2412/// Trait for audio analyzer implementations.
2413pub trait AudioAnalyzer {
2414    /// Returns name.
2415    fn name(&self) -> &str;
2416
2417    /// Returns process frame.
2418    fn process_frame(&mut self, frame: &AudioFrame<'_>) -> Result<Vec<AnalysisEvent>>;
2419
2420    /// Returns finish.
2421    fn finish(&mut self, _last_timestamp: Option<Timestamp>) -> Result<Vec<AnalysisEvent>> {
2422        Ok(Vec::new())
2423    }
2424}
2425
2426/// Data type for audio pipeline.
2427pub struct AudioPipeline {
2428    analyzers: Vec<Box<dyn AudioAnalyzer>>,
2429    state: AudioPipelineState,
2430}
2431
2432#[derive(Debug, Default, Clone)]
2433struct AudioPipelineState {
2434    events: Vec<AnalysisEvent>,
2435    last_timestamp: Option<Timestamp>,
2436    frames_processed: u64,
2437    finished: bool,
2438}
2439
2440impl AudioPipeline {
2441    /// Returns builder.
2442    pub fn builder() -> AudioPipelineBuilder {
2443        AudioPipelineBuilder::default()
2444    }
2445
2446    /// Returns process frame.
2447    pub fn process_frame(&mut self, frame: OwnedAudioFrame) -> Result<AudioAnalysis> {
2448        if self.state.finished {
2449            return Err(DetectError::InvalidArgument(
2450                "cannot process audio frames after finish_analysis; call reset first".to_string(),
2451            ));
2452        }
2453        let frame_ref = frame.as_frame()?;
2454        self.state.last_timestamp = Some(frame_ref.timestamp);
2455
2456        let mut events = Vec::new();
2457        for analyzer in &mut self.analyzers {
2458            let mut new_events = analyzer.process_frame(&frame_ref)?;
2459            events.append(&mut new_events);
2460        }
2461        self.state.events.extend(events.iter().cloned());
2462        self.state.frames_processed += 1;
2463
2464        Ok(AudioAnalysis {
2465            timestamp: frame_ref.timestamp,
2466            events,
2467            frames_processed: self.state.frames_processed,
2468        })
2469    }
2470
2471    /// Returns finish analysis.
2472    pub fn finish_analysis(&mut self) -> Result<AudioAnalysisResult> {
2473        if !self.state.finished {
2474            let mut events = Vec::new();
2475            for analyzer in &mut self.analyzers {
2476                let mut new_events = analyzer.finish(self.state.last_timestamp)?;
2477                events.append(&mut new_events);
2478            }
2479            self.state.events.extend(events);
2480            self.state.finished = true;
2481        }
2482        Ok(AudioAnalysisResult {
2483            events: self.state.events.clone(),
2484            frames_processed: self.state.frames_processed,
2485        })
2486    }
2487
2488    /// Returns reset.
2489    pub fn reset(&mut self) {
2490        self.state = AudioPipelineState::default();
2491    }
2492
2493    /// Returns events.
2494    pub fn events(&self) -> &[AnalysisEvent] {
2495        &self.state.events
2496    }
2497
2498    /// Returns frames processed.
2499    pub fn frames_processed(&self) -> u64 {
2500        self.state.frames_processed
2501    }
2502}
2503
2504#[derive(Default)]
2505/// Data type for audio pipeline builder.
2506pub struct AudioPipelineBuilder {
2507    analyzers: Vec<Box<dyn AudioAnalyzer>>,
2508}
2509
2510impl AudioPipelineBuilder {
2511    /// Returns analyzer.
2512    pub fn analyzer<A: AudioAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2513        self.analyzers.push(Box::new(analyzer));
2514        self
2515    }
2516
2517    /// Returns build.
2518    pub fn build(self) -> Result<AudioPipeline> {
2519        if self.analyzers.is_empty() {
2520            return Err(DetectError::InvalidArgument(
2521                "at least one audio analyzer is required".to_string(),
2522            ));
2523        }
2524        Ok(AudioPipeline {
2525            analyzers: self.analyzers,
2526            state: AudioPipelineState::default(),
2527        })
2528    }
2529}
2530
2531/// Trait for text analyzer implementations.
2532pub trait TextAnalyzer {
2533    /// Returns name.
2534    fn name(&self) -> &str;
2535
2536    /// Returns process segment.
2537    fn process_segment(&mut self, segment: &TextSegment<'_>) -> Result<Vec<AnalysisEvent>>;
2538
2539    /// Returns finish.
2540    fn finish(&mut self, _last_segment_index: Option<u64>) -> Result<Vec<AnalysisEvent>> {
2541        Ok(Vec::new())
2542    }
2543}
2544
2545/// Data type for text pipeline.
2546pub struct TextPipeline {
2547    analyzers: Vec<Box<dyn TextAnalyzer>>,
2548    state: TextPipelineState,
2549}
2550
2551#[derive(Debug, Default, Clone)]
2552struct TextPipelineState {
2553    events: Vec<AnalysisEvent>,
2554    last_segment_index: Option<u64>,
2555    segments_processed: u64,
2556    finished: bool,
2557}
2558
2559impl TextPipeline {
2560    /// Returns builder.
2561    pub fn builder() -> TextPipelineBuilder {
2562        TextPipelineBuilder::default()
2563    }
2564
2565    /// Returns process segment.
2566    pub fn process_segment(&mut self, segment: OwnedTextSegment) -> Result<TextAnalysis> {
2567        if self.state.finished {
2568            return Err(DetectError::InvalidArgument(
2569                "cannot process text segments after finish_analysis; call reset first".to_string(),
2570            ));
2571        }
2572        let segment_ref = segment.as_segment();
2573        self.state.last_segment_index = Some(segment_ref.segment_index);
2574
2575        let mut events = Vec::new();
2576        for analyzer in &mut self.analyzers {
2577            let mut new_events = analyzer.process_segment(&segment_ref)?;
2578            events.append(&mut new_events);
2579        }
2580        self.state.events.extend(events.iter().cloned());
2581        self.state.segments_processed += 1;
2582
2583        Ok(TextAnalysis {
2584            segment_index: segment_ref.segment_index,
2585            events,
2586            segments_processed: self.state.segments_processed,
2587        })
2588    }
2589
2590    /// Returns finish analysis.
2591    pub fn finish_analysis(&mut self) -> Result<TextAnalysisResult> {
2592        if !self.state.finished {
2593            let mut events = Vec::new();
2594            for analyzer in &mut self.analyzers {
2595                let mut new_events = analyzer.finish(self.state.last_segment_index)?;
2596                events.append(&mut new_events);
2597            }
2598            self.state.events.extend(events);
2599            self.state.finished = true;
2600        }
2601        Ok(TextAnalysisResult {
2602            events: self.state.events.clone(),
2603            segments_processed: self.state.segments_processed,
2604        })
2605    }
2606
2607    /// Returns reset.
2608    pub fn reset(&mut self) {
2609        self.state = TextPipelineState::default();
2610    }
2611
2612    /// Returns events.
2613    pub fn events(&self) -> &[AnalysisEvent] {
2614        &self.state.events
2615    }
2616
2617    /// Returns segments processed.
2618    pub fn segments_processed(&self) -> u64 {
2619        self.state.segments_processed
2620    }
2621}
2622
2623#[derive(Default)]
2624/// Data type for text pipeline builder.
2625pub struct TextPipelineBuilder {
2626    analyzers: Vec<Box<dyn TextAnalyzer>>,
2627}
2628
2629impl TextPipelineBuilder {
2630    /// Returns analyzer.
2631    pub fn analyzer<A: TextAnalyzer + 'static>(mut self, analyzer: A) -> Self {
2632        self.analyzers.push(Box::new(analyzer));
2633        self
2634    }
2635
2636    /// Returns build.
2637    pub fn build(self) -> Result<TextPipeline> {
2638        if self.analyzers.is_empty() {
2639            return Err(DetectError::InvalidArgument(
2640                "at least one text analyzer is required".to_string(),
2641            ));
2642        }
2643        Ok(TextPipeline {
2644            analyzers: self.analyzers,
2645            state: TextPipelineState::default(),
2646        })
2647    }
2648}
2649
2650/// Returns scenes from cuts.
2651pub fn scenes_from_cuts(
2652    cuts: &[Cut],
2653    start: FramePosition,
2654    last_frame: FramePosition,
2655    start_in_scene: bool,
2656) -> Vec<Scene> {
2657    if cuts.is_empty() && !start_in_scene {
2658        return Vec::new();
2659    }
2660    let mut scenes = Vec::new();
2661    let mut scene_start = start;
2662    for cut in cuts {
2663        if cut.position.frame_index <= scene_start.frame_index {
2664            continue;
2665        }
2666        scenes.push(Scene {
2667            start: scene_start,
2668            end: cut.position,
2669        });
2670        scene_start = cut.position;
2671    }
2672    scenes.push(Scene {
2673        start: scene_start,
2674        end: FramePosition {
2675            frame_index: last_frame.frame_index + 1,
2676            timestamp: Timestamp::new(last_frame.timestamp.pts + 1, last_frame.timestamp.timebase),
2677        },
2678    });
2679    scenes
2680}
2681
2682#[cfg(test)]
2683mod tests {
2684    use super::*;
2685
2686    fn fps() -> Rational64 {
2687        Rational64::new(30, 1)
2688    }
2689
2690    fn pos(frame: u64) -> FramePosition {
2691        FramePosition::from_frame_index(frame, fps())
2692    }
2693
2694    fn owned_frame(frame_index: u64) -> OwnedVideoFrame {
2695        OwnedVideoFrame {
2696            position: pos(frame_index),
2697            width: 1,
2698            height: 1,
2699            pixel_format: PixelFormat::Rgb24,
2700            data: vec![0, 0, 0],
2701            stride: 3,
2702        }
2703    }
2704
2705    fn content_frame(frame_index: u64, rgb: [u8; 3]) -> OwnedVideoFrame {
2706        let mut data = Vec::new();
2707        for _ in 0..16 {
2708            data.extend_from_slice(&rgb);
2709        }
2710        OwnedVideoFrame {
2711            position: pos(frame_index),
2712            width: 4,
2713            height: 4,
2714            pixel_format: PixelFormat::Rgb24,
2715            data,
2716            stride: 12,
2717        }
2718    }
2719
2720    fn content_bgr_frame(frame_index: u64, rgb: [u8; 3]) -> OwnedVideoFrame {
2721        let mut data = Vec::new();
2722        for _ in 0..16 {
2723            data.extend_from_slice(&[rgb[2], rgb[1], rgb[0]]);
2724        }
2725        OwnedVideoFrame {
2726            position: pos(frame_index),
2727            width: 4,
2728            height: 4,
2729            pixel_format: PixelFormat::Bgr24,
2730            data,
2731            stride: 12,
2732        }
2733    }
2734
2735    #[test]
2736    fn timecode_formats_and_clamps_subtraction() {
2737        let tc = FrameTimecode::from_seconds(10.0, fps()).unwrap();
2738        assert_eq!(tc.frame_index, 300);
2739        assert_eq!(tc.timecode(3), "00:00:10.000");
2740        assert_eq!((FrameTimecode::from_frames(5, fps()) - 10).frame_index, 0);
2741    }
2742
2743    #[test]
2744    fn timecode_parse_accepts_frames_seconds_and_hms() {
2745        assert_eq!(FrameTimecode::parse("42", fps()).unwrap().frame_index, 42);
2746        assert_eq!(FrameTimecode::parse("2.0", fps()).unwrap().frame_index, 60);
2747        assert_eq!(
2748            FrameTimecode::parse("00:01:00.000", fps())
2749                .unwrap()
2750                .frame_index,
2751            1800
2752        );
2753    }
2754
2755    #[test]
2756    fn scenes_are_empty_without_cuts_unless_start_in_scene() {
2757        assert!(scenes_from_cuts(&[], pos(0), pos(9), false).is_empty());
2758        let scenes = scenes_from_cuts(&[], pos(0), pos(9), true);
2759        assert_eq!(scenes.len(), 1);
2760        assert_eq!(scenes[0].end.frame_index, 10);
2761    }
2762
2763    #[test]
2764    fn scenes_are_contiguous_from_unique_cuts() {
2765        let cuts = vec![Cut {
2766            position: pos(5),
2767            detector: "test",
2768            score: None,
2769        }];
2770        let scenes = scenes_from_cuts(&cuts, pos(0), pos(9), true);
2771        assert_eq!(scenes.len(), 2);
2772        assert_eq!(scenes[0].start.frame_index, 0);
2773        assert_eq!(scenes[0].end.frame_index, 5);
2774        assert_eq!(scenes[1].start.frame_index, 5);
2775        assert_eq!(scenes[1].end.frame_index, 10);
2776    }
2777
2778    #[test]
2779    fn metrics_store_tracks_keys_and_values() {
2780        let mut metrics = MetricsStore::default();
2781        metrics.set_metric(7, "content_val", 12.5);
2782        assert_eq!(metrics.get(7, "content_val"), Some(12.5));
2783        assert_eq!(metrics.keys().collect::<Vec<_>>(), vec!["content_val"]);
2784    }
2785
2786    #[test]
2787    fn metrics_store_accepts_dynamic_keys() {
2788        let mut metrics = MetricsStore::default();
2789        metrics.set_metric(7, "combined.content.raw", 12.5);
2790        assert_eq!(metrics.get(7, "combined.content.raw"), Some(12.5));
2791        assert_eq!(
2792            metrics.keys().collect::<Vec<_>>(),
2793            vec!["combined.content.raw"]
2794        );
2795    }
2796
2797    #[test]
2798    fn content_detector_finds_hard_cut() {
2799        let mut detector = ContentDetector::new(10.0, 1);
2800        let mut metrics = MetricsStore::default();
2801        let first = content_frame(0, [0, 0, 0]);
2802        let second = content_frame(1, [255, 255, 255]);
2803        assert!(detector
2804            .process_frame(&first.as_frame(), Some(&mut metrics))
2805            .unwrap()
2806            .is_empty());
2807        let cuts = detector
2808            .process_frame(&second.as_frame(), Some(&mut metrics))
2809            .unwrap();
2810        assert_eq!(cuts.len(), 1);
2811        assert!(metrics.get(1, "content_val").unwrap() > 10.0);
2812    }
2813
2814    #[test]
2815    fn content_detector_scores_rgb24_and_bgr24_equivalent_frames_equally() {
2816        let mut rgb_detector = ContentDetector::new(10.0, 1);
2817        let mut bgr_detector = ContentDetector::new(10.0, 1);
2818        let mut rgb_metrics = MetricsStore::default();
2819        let mut bgr_metrics = MetricsStore::default();
2820        let rgb_first = content_frame(0, [24, 48, 96]);
2821        let rgb_second = content_frame(1, [160, 32, 224]);
2822        let bgr_first = content_bgr_frame(0, [24, 48, 96]);
2823        let bgr_second = content_bgr_frame(1, [160, 32, 224]);
2824
2825        rgb_detector
2826            .process_frame(&rgb_first.as_frame(), Some(&mut rgb_metrics))
2827            .unwrap();
2828        rgb_detector
2829            .process_frame(&rgb_second.as_frame(), Some(&mut rgb_metrics))
2830            .unwrap();
2831        bgr_detector
2832            .process_frame(&bgr_first.as_frame(), Some(&mut bgr_metrics))
2833            .unwrap();
2834        bgr_detector
2835            .process_frame(&bgr_second.as_frame(), Some(&mut bgr_metrics))
2836            .unwrap();
2837
2838        for key in ["content_val", "delta_hue", "delta_sat", "delta_lum"] {
2839            assert_eq!(rgb_metrics.get(1, key), bgr_metrics.get(1, key), "{key}");
2840        }
2841    }
2842
2843    #[test]
2844    fn content_detector_suppresses_flash() {
2845        let mut detector = ContentDetector::new(10.0, 3).filter_mode(FlashFilterMode::Suppress, 3);
2846        let frames = [
2847            content_frame(0, [0, 0, 0]),
2848            content_frame(1, [255, 255, 255]),
2849            content_frame(2, [0, 0, 0]),
2850        ];
2851        let mut cuts = Vec::new();
2852        for frame in frames {
2853            cuts.extend(detector.process_frame(&frame.as_frame(), None).unwrap());
2854        }
2855
2856        assert!(cuts.is_empty());
2857    }
2858
2859    #[test]
2860    fn pipeline_processes_frames_incrementally() {
2861        struct CutOnFrame(u64);
2862
2863        impl SceneDetector for CutOnFrame {
2864            fn name(&self) -> &'static str {
2865                "test"
2866            }
2867
2868            fn metric_keys(&self) -> &'static [&'static str] {
2869                &[]
2870            }
2871
2872            fn process_frame(
2873                &mut self,
2874                frame: &VideoFrame<'_>,
2875                _metrics: Option<&mut dyn MetricsSink>,
2876            ) -> Result<Vec<Cut>> {
2877                Ok((frame.position.frame_index == self.0)
2878                    .then(|| Cut {
2879                        position: frame.position,
2880                        detector: self.name(),
2881                        score: Some(1.0),
2882                    })
2883                    .into_iter()
2884                    .collect())
2885            }
2886        }
2887
2888        let mut pipeline = ScenePipeline::builder()
2889            .detector(CutOnFrame(1))
2890            .start_in_scene(true)
2891            .build()
2892            .unwrap();
2893
2894        assert!(pipeline
2895            .process_frame(owned_frame(0))
2896            .unwrap()
2897            .cuts
2898            .is_empty());
2899        let analysis = pipeline.process_frame(owned_frame(1)).unwrap();
2900        assert_eq!(analysis.frames_processed, 2);
2901        assert_eq!(analysis.cuts[0].position.frame_index, 1);
2902
2903        let result = pipeline.finish_detection().unwrap();
2904        assert_eq!(result.frames_processed, 2);
2905        assert_eq!(result.cuts.len(), 1);
2906        assert_eq!(result.scenes.len(), 2);
2907    }
2908
2909    #[test]
2910    fn video_pipeline_emits_frame_observations() {
2911        struct OcrAnalyzer;
2912
2913        impl VideoAnalyzer for OcrAnalyzer {
2914            fn name(&self) -> &str {
2915                "ocr"
2916            }
2917
2918            fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2919                Ok(vec![Observation::new(self.name(), ObservationKind::Text)
2920                    .at_frame(frame.position)
2921                    .text("EXIT")
2922                    .score(0.9)])
2923            }
2924        }
2925
2926        let mut pipeline = VideoAnalysisPipeline::builder()
2927            .analyzer(OcrAnalyzer)
2928            .build()
2929            .unwrap();
2930
2931        let analysis = pipeline.process_frame(owned_frame(3)).unwrap();
2932        assert_eq!(analysis.frames_processed, 1);
2933        assert_eq!(analysis.observations[0].text.as_deref(), Some("EXIT"));
2934        assert_eq!(
2935            analysis.observations[0].to_text_segment(0).unwrap().text,
2936            "EXIT"
2937        );
2938        assert_eq!(pipeline.finish_analysis().unwrap().observations.len(), 1);
2939    }
2940
2941    #[test]
2942    fn sampled_video_analyzer_skips_unsampled_frames() {
2943        struct CountingVideoAnalyzer {
2944            frames: Vec<u64>,
2945        }
2946
2947        impl VideoAnalyzer for CountingVideoAnalyzer {
2948            fn name(&self) -> &str {
2949                "counting"
2950            }
2951
2952            fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
2953                self.frames.push(frame.position.frame_index);
2954                Ok(Vec::new())
2955            }
2956        }
2957
2958        let mut analyzer =
2959            SampledVideoAnalyzer::new(CountingVideoAnalyzer { frames: Vec::new() }, 3);
2960        for index in 0..7 {
2961            analyzer
2962                .process_frame(&owned_frame(index).as_frame())
2963                .unwrap();
2964        }
2965
2966        assert_eq!(analyzer.inner().frames, vec![0, 3, 6]);
2967    }
2968
2969    #[test]
2970    fn realtime_video_pipeline_groups_observations_by_completed_scene() {
2971        struct CutOnFrame(u64);
2972
2973        impl SceneDetector for CutOnFrame {
2974            fn name(&self) -> &'static str {
2975                "test"
2976            }
2977
2978            fn metric_keys(&self) -> &'static [&'static str] {
2979                &[]
2980            }
2981
2982            fn process_frame(
2983                &mut self,
2984                frame: &VideoFrame<'_>,
2985                _metrics: Option<&mut dyn MetricsSink>,
2986            ) -> Result<Vec<Cut>> {
2987                Ok((frame.position.frame_index == self.0)
2988                    .then(|| Cut {
2989                        position: frame.position,
2990                        detector: self.name(),
2991                        score: Some(1.0),
2992                    })
2993                    .into_iter()
2994                    .collect())
2995            }
2996        }
2997
2998        struct ObjectAnalyzer;
2999
3000        impl VideoAnalyzer for ObjectAnalyzer {
3001            fn name(&self) -> &str {
3002                "objects"
3003            }
3004
3005            fn process_frame(&mut self, frame: &VideoFrame<'_>) -> Result<Vec<Observation>> {
3006                Ok(vec![Observation::new(self.name(), ObservationKind::Object)
3007                    .at_frame(frame.position)
3008                    .label(format!("frame-{}", frame.position.frame_index))])
3009            }
3010        }
3011
3012        let mut pipeline = RealtimeVideoPipeline::builder()
3013            .scene_detector(CutOnFrame(2))
3014            .video_analyzer(ObjectAnalyzer)
3015            .start_in_scene(true)
3016            .build()
3017            .unwrap();
3018
3019        assert!(pipeline
3020            .process_frame(owned_frame(0))
3021            .unwrap()
3022            .completed_scenes
3023            .is_empty());
3024        pipeline.process_frame(owned_frame(1)).unwrap();
3025        let analysis = pipeline.process_frame(owned_frame(2)).unwrap();
3026
3027        assert_eq!(analysis.completed_scenes.len(), 1);
3028        assert_eq!(analysis.completed_scenes[0].scene.start.frame_index, 0);
3029        assert_eq!(analysis.completed_scenes[0].scene.end.frame_index, 2);
3030        assert_eq!(analysis.completed_scenes[0].observations.len(), 2);
3031        assert_eq!(analysis.observations[0].scene_index, Some(1));
3032
3033        let result = pipeline.finish_analysis().unwrap();
3034        assert_eq!(result.detection.scenes.len(), 2);
3035        assert_eq!(result.scenes[0].observations.len(), 2);
3036        assert_eq!(result.scenes[1].observations.len(), 1);
3037    }
3038
3039    #[test]
3040    fn audio_pipeline_processes_frames_incrementally() {
3041        struct LoudnessAnalyzer;
3042
3043        impl AudioAnalyzer for LoudnessAnalyzer {
3044            fn name(&self) -> &str {
3045                "loudness"
3046            }
3047
3048            fn process_frame(&mut self, frame: &AudioFrame<'_>) -> Result<Vec<AnalysisEvent>> {
3049                let AudioBuffer::F32(samples) = frame.data else {
3050                    return Ok(Vec::new());
3051                };
3052                let mean = samples.iter().map(|sample| sample.abs()).sum::<f32>()
3053                    / samples.len().max(1) as f32;
3054                Ok((mean > 0.5)
3055                    .then(|| {
3056                        AnalysisEvent::new(self.name(), "loud")
3057                            .at_timestamp(frame.timestamp)
3058                            .score(mean)
3059                    })
3060                    .into_iter()
3061                    .collect())
3062            }
3063        }
3064
3065        let mut pipeline = AudioPipeline::builder()
3066            .analyzer(LoudnessAnalyzer)
3067            .build()
3068            .unwrap();
3069        let frame = OwnedAudioFrame::new(
3070            Timestamp::new(0, Timebase::new(1, 48_000)),
3071            48_000,
3072            1,
3073            AudioBuffer::F32(vec![1.0, 0.5]),
3074        )
3075        .unwrap();
3076
3077        let analysis = pipeline.process_frame(frame).unwrap();
3078        assert_eq!(analysis.frames_processed, 1);
3079        assert_eq!(analysis.events[0].label, "loud");
3080        assert_eq!(pipeline.finish_analysis().unwrap().events.len(), 1);
3081    }
3082
3083    #[test]
3084    fn text_pipeline_processes_segments_incrementally() {
3085        struct KeywordAnalyzer;
3086
3087        impl TextAnalyzer for KeywordAnalyzer {
3088            fn name(&self) -> &str {
3089                "keyword"
3090            }
3091
3092            fn process_segment(&mut self, segment: &TextSegment<'_>) -> Result<Vec<AnalysisEvent>> {
3093                Ok(segment
3094                    .text
3095                    .contains("cut")
3096                    .then(|| {
3097                        let mut event = AnalysisEvent::new(self.name(), "keyword").score(1.0);
3098                        if let Some(timestamp) = segment.timestamp {
3099                            event = event.at_timestamp(timestamp);
3100                        }
3101                        event
3102                    })
3103                    .into_iter()
3104                    .collect())
3105            }
3106        }
3107
3108        let mut pipeline = TextPipeline::builder()
3109            .analyzer(KeywordAnalyzer)
3110            .build()
3111            .unwrap();
3112        let segment = OwnedTextSegment::new(0, "find this cut point");
3113
3114        let analysis = pipeline.process_segment(segment).unwrap();
3115        assert_eq!(analysis.segments_processed, 1);
3116        assert_eq!(analysis.events[0].analyzer, "keyword");
3117        assert_eq!(pipeline.finish_analysis().unwrap().segments_processed, 1);
3118    }
3119
3120    #[test]
3121    fn crop_rejects_empty_regions() {
3122        assert!(CropRegion::new(0, 0, 0, 10).is_err());
3123    }
3124}