1#![doc = include_str!("../README.md")]
2
3pub mod surface;
4use std::fs::File;
5use std::io::{BufRead, BufReader};
6use std::path::Path;
7
8use num_rational::Rational64;
9use video_analysis_core::{
10 AudioAnalysis, AudioAnalysisResult, AudioPipeline, AudioSampleFormat, DetectionResult,
11 FrameAnalysis, OwnedAudioFrame, OwnedTextSegment, OwnedVideoFrame, PixelFormat,
12 RealtimeVideoAnalysisResult, RealtimeVideoFrameAnalysis, RealtimeVideoPipeline, Result,
13 ScenePipeline, TextAnalysis, TextAnalysisResult, TextPipeline, VideoAnalysisPipeline,
14 VideoAnalysisResult, VideoFrameAnalysis,
15};
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq)]
18pub enum SourceMode {
20 Recorded,
22 Live,
24}
25
26#[derive(Debug, Clone, PartialEq)]
27pub struct MediaSourceInfo {
29 pub input: String,
31 pub mode: SourceMode,
33 pub video: Option<VideoStreamInfo>,
35 pub audio: Vec<AudioStreamInfo>,
37 pub text: Vec<TextStreamInfo>,
39}
40
41impl MediaSourceInfo {
42 pub fn recorded(input: impl Into<String>) -> Self {
44 Self {
45 input: input.into(),
46 mode: SourceMode::Recorded,
47 video: None,
48 audio: Vec::new(),
49 text: Vec::new(),
50 }
51 }
52
53 pub fn live(input: impl Into<String>) -> Self {
55 Self {
56 input: input.into(),
57 mode: SourceMode::Live,
58 video: None,
59 audio: Vec::new(),
60 text: Vec::new(),
61 }
62 }
63
64 pub fn with_video(mut self, video: VideoStreamInfo) -> Self {
66 self.video = Some(video);
67 self
68 }
69
70 pub fn with_audio(mut self, audio: AudioStreamInfo) -> Self {
72 self.audio.push(audio);
73 self
74 }
75
76 pub fn with_text(mut self, text: TextStreamInfo) -> Self {
78 self.text.push(text);
79 self
80 }
81}
82
83#[derive(Debug, Clone, PartialEq)]
84pub struct VideoStreamInfo {
86 pub width: u32,
88 pub height: u32,
90 pub frame_rate: Option<Rational64>,
92 pub pixel_format: PixelFormat,
94}
95
96#[derive(Debug, Clone, PartialEq, Eq)]
97pub struct AudioStreamInfo {
99 pub sample_rate: u32,
101 pub channels: u16,
103 pub sample_format: AudioSampleFormat,
105}
106
107#[derive(Debug, Clone, PartialEq, Eq)]
108pub struct TextStreamInfo {
110 pub format: TextFormat,
112 pub language: Option<String>,
114}
115
116#[derive(Debug, Clone, Copy, PartialEq, Eq)]
117pub enum TextFormat {
119 Plain,
121 Lines,
123 Transcript,
125 Subtitles,
127}
128
129#[derive(Debug, Clone, PartialEq)]
130pub enum MediaSample {
132 Video(OwnedVideoFrame),
134 Audio(OwnedAudioFrame),
136 Text(OwnedTextSegment),
138}
139
140pub trait MediaSource {
142 fn source_info(&self) -> &MediaSourceInfo;
144 fn next_sample(&mut self) -> Result<Option<MediaSample>>;
146
147 fn mode(&self) -> SourceMode {
149 self.source_info().mode
150 }
151
152 fn is_live(&self) -> bool {
154 self.mode() == SourceMode::Live
155 }
156}
157
158pub trait VideoFrameSource {
160 fn source_info(&self) -> &MediaSourceInfo;
162 fn next_video_frame(&mut self) -> Result<Option<OwnedVideoFrame>>;
164
165 fn frame_rate(&self) -> Option<Rational64> {
167 self.source_info()
168 .video
169 .as_ref()
170 .and_then(|video| video.frame_rate)
171 }
172
173 fn is_live(&self) -> bool {
175 self.source_info().mode == SourceMode::Live
176 }
177}
178
179pub trait AudioFrameSource {
181 fn source_info(&self) -> &MediaSourceInfo;
183 fn next_audio_frame(&mut self) -> Result<Option<OwnedAudioFrame>>;
185
186 fn is_live(&self) -> bool {
188 self.source_info().mode == SourceMode::Live
189 }
190}
191
192pub trait TextSegmentSource {
194 fn source_info(&self) -> &MediaSourceInfo;
196 fn next_text_segment(&mut self) -> Result<Option<OwnedTextSegment>>;
198
199 fn is_live(&self) -> bool {
201 self.source_info().mode == SourceMode::Live
202 }
203}
204
205pub fn analyze_video_source<S, F>(
207 source: &mut S,
208 pipeline: &mut ScenePipeline,
209 mut on_frame: F,
210) -> Result<DetectionResult>
211where
212 S: VideoFrameSource,
213 F: FnMut(&FrameAnalysis) -> Result<()>,
214{
215 pipeline.reset();
216 while let Some(frame) = source.next_video_frame()? {
217 let analysis = pipeline.process_frame(frame)?;
218 on_frame(&analysis)?;
219 }
220 pipeline.finish_detection()
221}
222
223pub fn analyze_video_frames<S, F>(
225 source: &mut S,
226 pipeline: &mut VideoAnalysisPipeline,
227 mut on_frame: F,
228) -> Result<VideoAnalysisResult>
229where
230 S: VideoFrameSource,
231 F: FnMut(&VideoFrameAnalysis) -> Result<()>,
232{
233 pipeline.reset();
234 while let Some(frame) = source.next_video_frame()? {
235 let analysis = pipeline.process_frame(frame)?;
236 on_frame(&analysis)?;
237 }
238 pipeline.finish_analysis()
239}
240
241pub fn analyze_realtime_video_source<S, F>(
243 source: &mut S,
244 pipeline: &mut RealtimeVideoPipeline,
245 mut on_frame: F,
246) -> Result<RealtimeVideoAnalysisResult>
247where
248 S: VideoFrameSource,
249 F: FnMut(&RealtimeVideoFrameAnalysis) -> Result<()>,
250{
251 pipeline.reset();
252 while let Some(frame) = source.next_video_frame()? {
253 let analysis = pipeline.process_frame(frame)?;
254 on_frame(&analysis)?;
255 }
256 pipeline.finish_analysis()
257}
258
259pub fn analyze_audio_source<S, F>(
261 source: &mut S,
262 pipeline: &mut AudioPipeline,
263 mut on_frame: F,
264) -> Result<AudioAnalysisResult>
265where
266 S: AudioFrameSource,
267 F: FnMut(&AudioAnalysis) -> Result<()>,
268{
269 pipeline.reset();
270 while let Some(frame) = source.next_audio_frame()? {
271 let analysis = pipeline.process_frame(frame)?;
272 on_frame(&analysis)?;
273 }
274 pipeline.finish_analysis()
275}
276
277pub fn analyze_text_source<S, F>(
279 source: &mut S,
280 pipeline: &mut TextPipeline,
281 mut on_segment: F,
282) -> Result<TextAnalysisResult>
283where
284 S: TextSegmentSource,
285 F: FnMut(&TextAnalysis) -> Result<()>,
286{
287 pipeline.reset();
288 while let Some(segment) = source.next_text_segment()? {
289 let analysis = pipeline.process_segment(segment)?;
290 on_segment(&analysis)?;
291 }
292 pipeline.finish_analysis()
293}
294
295pub struct TextLineSource<R> {
297 source_info: MediaSourceInfo,
298 reader: R,
299 next_segment_index: u64,
300 language: Option<String>,
301}
302
303impl<R: BufRead> TextLineSource<R> {
304 pub fn recorded(input: impl Into<String>, reader: R) -> Self {
306 Self::new(SourceMode::Recorded, input, reader)
307 }
308
309 pub fn live(input: impl Into<String>, reader: R) -> Self {
311 Self::new(SourceMode::Live, input, reader)
312 }
313
314 pub fn with_language(mut self, language: impl Into<String>) -> Self {
316 let language = language.into();
317 self.language = Some(language.clone());
318 if let Some(text) = self.source_info.text.first_mut() {
319 text.language = Some(language);
320 }
321 self
322 }
323
324 fn new(mode: SourceMode, input: impl Into<String>, reader: R) -> Self {
325 let input = input.into();
326 let source_info = MediaSourceInfo {
327 input,
328 mode,
329 video: None,
330 audio: Vec::new(),
331 text: vec![TextStreamInfo {
332 format: TextFormat::Lines,
333 language: None,
334 }],
335 };
336 Self {
337 source_info,
338 reader,
339 next_segment_index: 0,
340 language: None,
341 }
342 }
343}
344
345impl TextLineSource<BufReader<File>> {
346 pub fn open(path: impl AsRef<Path>) -> Result<Self> {
348 let path = path.as_ref();
349 let file = File::open(path)?;
350 Ok(Self::recorded(
351 path.to_string_lossy().into_owned(),
352 BufReader::new(file),
353 ))
354 }
355}
356
357impl<R: BufRead> TextSegmentSource for TextLineSource<R> {
358 fn source_info(&self) -> &MediaSourceInfo {
359 &self.source_info
360 }
361
362 fn next_text_segment(&mut self) -> Result<Option<OwnedTextSegment>> {
363 let mut line = String::new();
364 let bytes = self.reader.read_line(&mut line)?;
365 if bytes == 0 {
366 return Ok(None);
367 }
368 let text = line.trim_end_matches(['\r', '\n']).to_string();
369 let segment_index = self.next_segment_index;
370 self.next_segment_index += 1;
371 let mut segment = OwnedTextSegment::new(segment_index, text);
372 if let Some(language) = &self.language {
373 segment = segment.language(language.clone());
374 }
375 Ok(Some(segment))
376 }
377}
378
379impl<R: BufRead> MediaSource for TextLineSource<R> {
380 fn source_info(&self) -> &MediaSourceInfo {
381 &self.source_info
382 }
383
384 fn next_sample(&mut self) -> Result<Option<MediaSample>> {
385 self.next_text_segment()
386 .map(|segment| segment.map(MediaSample::Text))
387 }
388}
389
390#[cfg(test)]
391mod tests {
392 use super::*;
393 use video_analysis_core::{AudioBuffer, FramePosition, PixelFormat, Timebase, Timestamp};
394
395 #[test]
396 fn audio_frame_reports_samples_per_channel_and_duration() {
397 let frame = OwnedAudioFrame {
398 timestamp: Timestamp::new(0, Timebase::new(1, 48_000)),
399 sample_rate: 48_000,
400 channels: 2,
401 data: AudioBuffer::F32(vec![0.0; 960]),
402 };
403
404 assert_eq!(frame.sample_format(), AudioSampleFormat::F32);
405 assert_eq!(frame.samples_per_channel(), 480);
406 assert_eq!(frame.duration_seconds(), 0.01);
407 }
408
409 #[test]
410 fn media_source_info_builders_track_mode_and_streams() {
411 let info = MediaSourceInfo::live("rtsp://example/stream").with_video(VideoStreamInfo {
412 width: 1920,
413 height: 1080,
414 frame_rate: Some(Rational64::new(30, 1)),
415 pixel_format: PixelFormat::Rgb24,
416 });
417
418 assert_eq!(info.mode, SourceMode::Live);
419 assert_eq!(info.video.unwrap().width, 1920);
420 }
421
422 #[test]
423 fn media_sample_can_hold_video_frames() {
424 let frame = OwnedVideoFrame {
425 position: FramePosition::from_frame_index(0, Rational64::new(30, 1)),
426 width: 1,
427 height: 1,
428 pixel_format: PixelFormat::Rgb24,
429 data: vec![0, 0, 0],
430 stride: 3,
431 };
432
433 assert!(matches!(MediaSample::Video(frame), MediaSample::Video(_)));
434 }
435
436 #[test]
437 fn text_line_source_yields_segments() {
438 let input = std::io::Cursor::new("first\nsecond\n");
439 let mut source = TextLineSource::recorded("memory", input).with_language("en");
440
441 let first = source.next_text_segment().unwrap().unwrap();
442 let second = source.next_text_segment().unwrap().unwrap();
443
444 assert_eq!(first.segment_index, 0);
445 assert_eq!(first.text, "first");
446 assert_eq!(first.language.as_deref(), Some("en"));
447 assert_eq!(second.segment_index, 1);
448 assert!(source.next_text_segment().unwrap().is_none());
449 }
450}