Skip to main content

video_analysis_ingest/
lib.rs

1#![doc = include_str!("../README.md")]
2
3pub mod surface;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use num_rational::Rational64;
9use video_analysis_core::{
10    AudioAnalysis, AudioAnalysisResult, AudioPipeline, AudioSampleFormat, DetectionResult,
11    FrameAnalysis, OwnedAudioFrame, OwnedTextSegment, OwnedVideoFrame, PixelFormat,
12    RealtimeVideoAnalysisResult, RealtimeVideoFrameAnalysis, RealtimeVideoPipeline, Result,
13    ScenePipeline, TextAnalysis, TextAnalysisResult, TextPipeline, VideoAnalysisPipeline,
14    VideoAnalysisResult, VideoFrameAnalysis,
15};
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18/// Variants describing source mode.
19pub enum SourceMode {
20    /// The recorded variant.
21    Recorded,
22    /// The live variant.
23    Live,
24}
25
26#[derive(Debug, Clone, PartialEq)]
27/// Data type for media source info.
28pub struct MediaSourceInfo {
29    /// The input value.
30    pub input: String,
31    /// The mode value.
32    pub mode: SourceMode,
33    /// The video value.
34    pub video: Option<VideoStreamInfo>,
35    /// The audio value.
36    pub audio: Vec<AudioStreamInfo>,
37    /// Text content for this value.
38    pub text: Vec<TextStreamInfo>,
39}
40
41impl MediaSourceInfo {
42    /// Returns recorded.
43    pub fn recorded(input: impl Into<String>) -> Self {
44        Self {
45            input: input.into(),
46            mode: SourceMode::Recorded,
47            video: None,
48            audio: Vec::new(),
49            text: Vec::new(),
50        }
51    }
52
53    /// Returns live.
54    pub fn live(input: impl Into<String>) -> Self {
55        Self {
56            input: input.into(),
57            mode: SourceMode::Live,
58            video: None,
59            audio: Vec::new(),
60            text: Vec::new(),
61        }
62    }
63
64    /// Returns this value with video.
65    pub fn with_video(mut self, video: VideoStreamInfo) -> Self {
66        self.video = Some(video);
67        self
68    }
69
70    /// Returns this value with audio.
71    pub fn with_audio(mut self, audio: AudioStreamInfo) -> Self {
72        self.audio.push(audio);
73        self
74    }
75
76    /// Returns this value with text.
77    pub fn with_text(mut self, text: TextStreamInfo) -> Self {
78        self.text.push(text);
79        self
80    }
81}
82
83#[derive(Debug, Clone, PartialEq)]
84/// Data type for video stream info.
85pub struct VideoStreamInfo {
86    /// Width in pixels.
87    pub width: u32,
88    /// Height in pixels.
89    pub height: u32,
90    /// The frame rate value.
91    pub frame_rate: Option<Rational64>,
92    /// The pixel format value.
93    pub pixel_format: PixelFormat,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97/// Data type for audio stream info.
98pub struct AudioStreamInfo {
99    /// Sample rate in hertz.
100    pub sample_rate: u32,
101    /// Number of audio channels.
102    pub channels: u16,
103    /// The sample format value.
104    pub sample_format: AudioSampleFormat,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108/// Data type for text stream info.
109pub struct TextStreamInfo {
110    /// The format value.
111    pub format: TextFormat,
112    /// Language tag for this value.
113    pub language: Option<String>,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117/// Variants describing text format.
118pub enum TextFormat {
119    /// The plain variant.
120    Plain,
121    /// The lines variant.
122    Lines,
123    /// The transcript variant.
124    Transcript,
125    /// The subtitles variant.
126    Subtitles,
127}
128
129#[derive(Debug, Clone, PartialEq)]
130/// Variants describing media sample.
131pub enum MediaSample {
132    /// The video variant.
133    Video(OwnedVideoFrame),
134    /// The audio variant.
135    Audio(OwnedAudioFrame),
136    /// The text variant.
137    Text(OwnedTextSegment),
138}
139
140/// Trait for media source implementations.
141pub trait MediaSource {
142    /// Returns source info.
143    fn source_info(&self) -> &MediaSourceInfo;
144    /// Returns next sample.
145    fn next_sample(&mut self) -> Result<Option<MediaSample>>;
146
147    /// Returns mode.
148    fn mode(&self) -> SourceMode {
149        self.source_info().mode
150    }
151
152    /// Returns whether is live.
153    fn is_live(&self) -> bool {
154        self.mode() == SourceMode::Live
155    }
156}
157
158/// Trait for video frame source implementations.
159pub trait VideoFrameSource {
160    /// Returns source info.
161    fn source_info(&self) -> &MediaSourceInfo;
162    /// Returns next video frame.
163    fn next_video_frame(&mut self) -> Result<Option<OwnedVideoFrame>>;
164
165    /// Returns frame rate.
166    fn frame_rate(&self) -> Option<Rational64> {
167        self.source_info()
168            .video
169            .as_ref()
170            .and_then(|video| video.frame_rate)
171    }
172
173    /// Returns whether is live.
174    fn is_live(&self) -> bool {
175        self.source_info().mode == SourceMode::Live
176    }
177}
178
179/// Trait for audio frame source implementations.
180pub trait AudioFrameSource {
181    /// Returns source info.
182    fn source_info(&self) -> &MediaSourceInfo;
183    /// Returns next audio frame.
184    fn next_audio_frame(&mut self) -> Result<Option<OwnedAudioFrame>>;
185
186    /// Returns whether is live.
187    fn is_live(&self) -> bool {
188        self.source_info().mode == SourceMode::Live
189    }
190}
191
192/// Trait for text segment source implementations.
193pub trait TextSegmentSource {
194    /// Returns source info.
195    fn source_info(&self) -> &MediaSourceInfo;
196    /// Returns next text segment.
197    fn next_text_segment(&mut self) -> Result<Option<OwnedTextSegment>>;
198
199    /// Returns whether is live.
200    fn is_live(&self) -> bool {
201        self.source_info().mode == SourceMode::Live
202    }
203}
204
205/// Returns analyze video source.
206pub fn analyze_video_source<S, F>(
207    source: &mut S,
208    pipeline: &mut ScenePipeline,
209    mut on_frame: F,
210) -> Result<DetectionResult>
211where
212    S: VideoFrameSource,
213    F: FnMut(&FrameAnalysis) -> Result<()>,
214{
215    pipeline.reset();
216    while let Some(frame) = source.next_video_frame()? {
217        let analysis = pipeline.process_frame(frame)?;
218        on_frame(&analysis)?;
219    }
220    pipeline.finish_detection()
221}
222
223/// Returns analyze video frames.
224pub fn analyze_video_frames<S, F>(
225    source: &mut S,
226    pipeline: &mut VideoAnalysisPipeline,
227    mut on_frame: F,
228) -> Result<VideoAnalysisResult>
229where
230    S: VideoFrameSource,
231    F: FnMut(&VideoFrameAnalysis) -> Result<()>,
232{
233    pipeline.reset();
234    while let Some(frame) = source.next_video_frame()? {
235        let analysis = pipeline.process_frame(frame)?;
236        on_frame(&analysis)?;
237    }
238    pipeline.finish_analysis()
239}
240
241/// Returns analyze realtime video source.
242pub fn analyze_realtime_video_source<S, F>(
243    source: &mut S,
244    pipeline: &mut RealtimeVideoPipeline,
245    mut on_frame: F,
246) -> Result<RealtimeVideoAnalysisResult>
247where
248    S: VideoFrameSource,
249    F: FnMut(&RealtimeVideoFrameAnalysis) -> Result<()>,
250{
251    pipeline.reset();
252    while let Some(frame) = source.next_video_frame()? {
253        let analysis = pipeline.process_frame(frame)?;
254        on_frame(&analysis)?;
255    }
256    pipeline.finish_analysis()
257}
258
259/// Returns analyze audio source.
260pub fn analyze_audio_source<S, F>(
261    source: &mut S,
262    pipeline: &mut AudioPipeline,
263    mut on_frame: F,
264) -> Result<AudioAnalysisResult>
265where
266    S: AudioFrameSource,
267    F: FnMut(&AudioAnalysis) -> Result<()>,
268{
269    pipeline.reset();
270    while let Some(frame) = source.next_audio_frame()? {
271        let analysis = pipeline.process_frame(frame)?;
272        on_frame(&analysis)?;
273    }
274    pipeline.finish_analysis()
275}
276
277/// Returns analyze text source.
278pub fn analyze_text_source<S, F>(
279    source: &mut S,
280    pipeline: &mut TextPipeline,
281    mut on_segment: F,
282) -> Result<TextAnalysisResult>
283where
284    S: TextSegmentSource,
285    F: FnMut(&TextAnalysis) -> Result<()>,
286{
287    pipeline.reset();
288    while let Some(segment) = source.next_text_segment()? {
289        let analysis = pipeline.process_segment(segment)?;
290        on_segment(&analysis)?;
291    }
292    pipeline.finish_analysis()
293}
294
295/// Data type for text line source.
296pub struct TextLineSource<R> {
297    source_info: MediaSourceInfo,
298    reader: R,
299    next_segment_index: u64,
300    language: Option<String>,
301}
302
303impl<R: BufRead> TextLineSource<R> {
304    /// Returns recorded.
305    pub fn recorded(input: impl Into<String>, reader: R) -> Self {
306        Self::new(SourceMode::Recorded, input, reader)
307    }
308
309    /// Returns live.
310    pub fn live(input: impl Into<String>, reader: R) -> Self {
311        Self::new(SourceMode::Live, input, reader)
312    }
313
314    /// Returns this value with language.
315    pub fn with_language(mut self, language: impl Into<String>) -> Self {
316        let language = language.into();
317        self.language = Some(language.clone());
318        if let Some(text) = self.source_info.text.first_mut() {
319            text.language = Some(language);
320        }
321        self
322    }
323
324    fn new(mode: SourceMode, input: impl Into<String>, reader: R) -> Self {
325        let input = input.into();
326        let source_info = MediaSourceInfo {
327            input,
328            mode,
329            video: None,
330            audio: Vec::new(),
331            text: vec![TextStreamInfo {
332                format: TextFormat::Lines,
333                language: None,
334            }],
335        };
336        Self {
337            source_info,
338            reader,
339            next_segment_index: 0,
340            language: None,
341        }
342    }
343}
344
345impl TextLineSource<BufReader<File>> {
346    /// Returns open.
347    pub fn open(path: impl AsRef<Path>) -> Result<Self> {
348        let path = path.as_ref();
349        let file = File::open(path)?;
350        Ok(Self::recorded(
351            path.to_string_lossy().into_owned(),
352            BufReader::new(file),
353        ))
354    }
355}
356
357impl<R: BufRead> TextSegmentSource for TextLineSource<R> {
358    fn source_info(&self) -> &MediaSourceInfo {
359        &self.source_info
360    }
361
362    fn next_text_segment(&mut self) -> Result<Option<OwnedTextSegment>> {
363        let mut line = String::new();
364        let bytes = self.reader.read_line(&mut line)?;
365        if bytes == 0 {
366            return Ok(None);
367        }
368        let text = line.trim_end_matches(['\r', '\n']).to_string();
369        let segment_index = self.next_segment_index;
370        self.next_segment_index += 1;
371        let mut segment = OwnedTextSegment::new(segment_index, text);
372        if let Some(language) = &self.language {
373            segment = segment.language(language.clone());
374        }
375        Ok(Some(segment))
376    }
377}
378
379impl<R: BufRead> MediaSource for TextLineSource<R> {
380    fn source_info(&self) -> &MediaSourceInfo {
381        &self.source_info
382    }
383
384    fn next_sample(&mut self) -> Result<Option<MediaSample>> {
385        self.next_text_segment()
386            .map(|segment| segment.map(MediaSample::Text))
387    }
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393    use video_analysis_core::{AudioBuffer, FramePosition, PixelFormat, Timebase, Timestamp};
394
395    #[test]
396    fn audio_frame_reports_samples_per_channel_and_duration() {
397        let frame = OwnedAudioFrame {
398            timestamp: Timestamp::new(0, Timebase::new(1, 48_000)),
399            sample_rate: 48_000,
400            channels: 2,
401            data: AudioBuffer::F32(vec![0.0; 960]),
402        };
403
404        assert_eq!(frame.sample_format(), AudioSampleFormat::F32);
405        assert_eq!(frame.samples_per_channel(), 480);
406        assert_eq!(frame.duration_seconds(), 0.01);
407    }
408
409    #[test]
410    fn media_source_info_builders_track_mode_and_streams() {
411        let info = MediaSourceInfo::live("rtsp://example/stream").with_video(VideoStreamInfo {
412            width: 1920,
413            height: 1080,
414            frame_rate: Some(Rational64::new(30, 1)),
415            pixel_format: PixelFormat::Rgb24,
416        });
417
418        assert_eq!(info.mode, SourceMode::Live);
419        assert_eq!(info.video.unwrap().width, 1920);
420    }
421
422    #[test]
423    fn media_sample_can_hold_video_frames() {
424        let frame = OwnedVideoFrame {
425            position: FramePosition::from_frame_index(0, Rational64::new(30, 1)),
426            width: 1,
427            height: 1,
428            pixel_format: PixelFormat::Rgb24,
429            data: vec![0, 0, 0],
430            stride: 3,
431        };
432
433        assert!(matches!(MediaSample::Video(frame), MediaSample::Video(_)));
434    }
435
436    #[test]
437    fn text_line_source_yields_segments() {
438        let input = std::io::Cursor::new("first\nsecond\n");
439        let mut source = TextLineSource::recorded("memory", input).with_language("en");
440
441        let first = source.next_text_segment().unwrap().unwrap();
442        let second = source.next_text_segment().unwrap().unwrap();
443
444        assert_eq!(first.segment_index, 0);
445        assert_eq!(first.text, "first");
446        assert_eq!(first.language.as_deref(), Some("en"));
447        assert_eq!(second.segment_index, 1);
448        assert!(source.next_text_segment().unwrap().is_none());
449    }
450}