subx_cli/services/vad/audio_processor.rs
1use crate::Result;
2use crate::services::vad::audio_loader::DirectAudioLoader;
3use crate::services::vad::detector::AudioInfo;
4use std::path::Path;
5
6/// Audio processor for VAD operations.
7///
8/// Handles loading, resampling, and format conversion of audio files
9/// for voice activity detection processing.
10/// Audio processor for VAD operations, optimized to use original sample rate and first channel only.
11pub struct VadAudioProcessor {}
12
13/// Processed audio data ready for VAD analysis.
14///
15/// Contains the audio samples and metadata after processing
16/// and format conversion.
17#[derive(Debug)]
18pub struct ProcessedAudioData {
19 /// Audio samples as 16-bit integers
20 pub samples: Vec<i16>,
21 /// Audio metadata and properties
22 pub info: AudioInfo,
23}
24
25impl VadAudioProcessor {
26 /// Create a new VAD audio processor.
27 ///
28 /// # Arguments
29 ///
30 /// * `target_sample_rate` - Desired sample rate for processing
31 /// * `target_channels` - Desired number of audio channels
32 ///
33 /// # Returns
34 ///
35 /// A new `VadAudioProcessor` instance
36 /// Create a new VAD audio processor.
37 pub fn new() -> Result<Self> {
38 Ok(Self {})
39 }
40
41 /// Load and prepare audio file for VAD processing.
42 ///
43 /// Performs all necessary audio processing steps including loading,
44 /// resampling, and format conversion to prepare the audio for
45 /// voice activity detection.
46 ///
47 /// # Arguments
48 ///
49 /// * `audio_path` - Path to the audio file to process
50 ///
51 /// # Returns
52 ///
53 /// Processed audio data ready for VAD analysis
54 ///
55 /// # Errors
56 ///
57 /// Returns an error if:
58 /// - Audio file cannot be loaded
59 /// - Audio format is unsupported
60 /// - Resampling fails
61 /// - Format conversion fails
62 ///
63 /// Directly loads and prepares audio files for VAD processing, supporting multiple formats.
64 /// Load and prepare audio file for VAD processing.
65 ///
66 /// Uses original sample rate and first channel only.
67 pub async fn load_and_prepare_audio_direct(
68 &self,
69 audio_path: &Path,
70 ) -> Result<ProcessedAudioData> {
71 // 1. Load with DirectAudioLoader
72 let loader = DirectAudioLoader::new()?;
73 let (samples, info) = loader.load_audio_samples(audio_path)?;
74
75 // 2. Extract first channel if multi-channel, retain original sample rate
76 let mono_samples = if info.channels == 1 {
77 samples
78 } else {
79 self.extract_first_channel(&samples, info.channels as usize)
80 };
81 let mono_info = AudioInfo {
82 sample_rate: info.sample_rate,
83 channels: 1,
84 duration_seconds: mono_samples.len() as f64 / info.sample_rate as f64,
85 total_samples: mono_samples.len(),
86 };
87 Ok(ProcessedAudioData {
88 samples: mono_samples,
89 info: mono_info,
90 })
91 }
92
93 // Removed resampling and multi-channel averaging methods
94
95 /// Extract the first channel samples from interleaved multi-channel data.
96 fn extract_first_channel(&self, samples: &[i16], channels: usize) -> Vec<i16> {
97 samples.iter().step_by(channels).copied().collect()
98 }
99}