subx_cli/services/vad/
audio_processor.rs

1use crate::Result;
2use crate::services::vad::audio_loader::DirectAudioLoader;
3use crate::services::vad::detector::AudioInfo;
4use std::path::Path;
5
6/// Audio processor for VAD operations.
7///
8/// Handles loading, resampling, and format conversion of audio files
9/// for voice activity detection processing.
10/// Audio processor for VAD operations, optimized to use original sample rate and first channel only.
11pub struct VadAudioProcessor {}
12
13/// Processed audio data ready for VAD analysis.
14///
15/// Contains the audio samples and metadata after processing
16/// and format conversion.
17#[derive(Debug)]
18pub struct ProcessedAudioData {
19    /// Audio samples as 16-bit integers
20    pub samples: Vec<i16>,
21    /// Audio metadata and properties
22    pub info: AudioInfo,
23}
24
25impl VadAudioProcessor {
26    /// Create a new VAD audio processor.
27    ///
28    /// # Arguments
29    ///
30    /// * `target_sample_rate` - Desired sample rate for processing
31    /// * `target_channels` - Desired number of audio channels
32    ///
33    /// # Returns
34    ///
35    /// A new `VadAudioProcessor` instance
36    /// Create a new VAD audio processor.
37    pub fn new() -> Result<Self> {
38        Ok(Self {})
39    }
40
41    /// Load and prepare audio file for VAD processing.
42    ///
43    /// Performs all necessary audio processing steps including loading,
44    /// resampling, and format conversion to prepare the audio for
45    /// voice activity detection.
46    ///
47    /// # Arguments
48    ///
49    /// * `audio_path` - Path to the audio file to process
50    ///
51    /// # Returns
52    ///
53    /// Processed audio data ready for VAD analysis
54    ///
55    /// # Errors
56    ///
57    /// Returns an error if:
58    /// - Audio file cannot be loaded
59    /// - Audio format is unsupported
60    /// - Resampling fails
61    /// - Format conversion fails
62    ///
63    /// Directly loads and prepares audio files for VAD processing, supporting multiple formats.
64    /// Load and prepare audio file for VAD processing.
65    ///
66    /// Uses original sample rate and first channel only.
67    pub async fn load_and_prepare_audio_direct(
68        &self,
69        audio_path: &Path,
70    ) -> Result<ProcessedAudioData> {
71        // 1. Load with DirectAudioLoader
72        let loader = DirectAudioLoader::new()?;
73        let (samples, info) = loader.load_audio_samples(audio_path)?;
74
75        // 2. Extract first channel if multi-channel, retain original sample rate
76        let mono_samples = if info.channels == 1 {
77            samples
78        } else {
79            self.extract_first_channel(&samples, info.channels as usize)
80        };
81        let mono_info = AudioInfo {
82            sample_rate: info.sample_rate,
83            channels: 1,
84            duration_seconds: mono_samples.len() as f64 / info.sample_rate as f64,
85            total_samples: mono_samples.len(),
86        };
87        Ok(ProcessedAudioData {
88            samples: mono_samples,
89            info: mono_info,
90        })
91    }
92
93    // Removed resampling and multi-channel averaging methods
94
95    /// Extract the first channel samples from interleaved multi-channel data.
96    fn extract_first_channel(&self, samples: &[i16], channels: usize) -> Vec<i16> {
97        samples.iter().step_by(channels).copied().collect()
98    }
99}