subx_cli/services/vad/
audio_processor.rs

1use crate::Result;
2use crate::services::vad::audio_loader::DirectAudioLoader;
3use crate::services::vad::detector::AudioInfo;
4use std::path::Path;
5
6/// Audio processor for VAD operations.
7///
8/// Handles loading, resampling, and format conversion of audio files
9/// for voice activity detection processing.
10/// Audio processor for VAD operations, optimized to use original sample rate and first channel only.
11pub struct VadAudioProcessor {}
12
13/// Processed audio data ready for VAD analysis.
14///
15/// Contains the audio samples and metadata after processing
16/// and format conversion.
17#[derive(Debug, Clone)]
18pub struct ProcessedAudioData {
19    /// Audio samples as 16-bit integers
20    pub samples: Vec<i16>,
21    /// Audio metadata and properties
22    pub info: AudioInfo,
23}
24
25impl VadAudioProcessor {
26    /// Create a new VAD audio processor.
27    ///
28    /// # Arguments
29    ///
30    /// * `target_sample_rate` - Desired sample rate for processing
31    /// * `target_channels` - Desired number of audio channels
32    ///
33    /// # Returns
34    ///
35    /// A new `VadAudioProcessor` instance
36    /// Create a new VAD audio processor.
37    pub fn new() -> Result<Self> {
38        Ok(Self {})
39    }
40
41    /// Load and prepare audio file for VAD processing.
42    ///
43    /// Performs all necessary audio processing steps including loading,
44    /// resampling, and format conversion to prepare the audio for
45    /// voice activity detection.
46    ///
47    /// # Arguments
48    ///
49    /// * `audio_path` - Path to the audio file to process
50    ///
51    /// # Returns
52    ///
53    /// Processed audio data ready for VAD analysis
54    ///
55    /// # Errors
56    ///
57    /// Returns an error if:
58    /// - Audio file cannot be loaded
59    /// - Audio format is unsupported
60    /// - Resampling fails
61    /// - Format conversion fails
62    ///
63    /// Directly loads and prepares audio files for VAD processing, supporting multiple formats.
64    /// Load and prepare audio file for VAD processing.
65    ///
66    /// Uses original sample rate and first channel only.
67    pub async fn load_and_prepare_audio_direct(
68        &self,
69        audio_path: &Path,
70    ) -> Result<ProcessedAudioData> {
71        // 1. Load with DirectAudioLoader
72        let loader = DirectAudioLoader::new()?;
73        let (samples, info) = match loader.load_audio_samples(audio_path) {
74            Ok((samples, info)) => (samples, info),
75            Err(e) => {
76                // If the file is empty, return empty samples
77                if let Ok(metadata) = std::fs::metadata(audio_path) {
78                    if metadata.len() == 0 {
79                        return Ok(ProcessedAudioData {
80                            samples: vec![],
81                            info: AudioInfo {
82                                sample_rate: 16000, // Default value
83                                channels: 1,
84                                duration_seconds: 0.0,
85                                total_samples: 0,
86                            },
87                        });
88                    }
89                }
90                return Err(e);
91            }
92        };
93
94        // 2. Extract first channel if multi-channel, retain original sample rate
95        let mono_samples = if info.channels == 1 {
96            samples
97        } else {
98            self.extract_first_channel(&samples, info.channels as usize)
99        };
100        let mono_info = AudioInfo {
101            sample_rate: info.sample_rate,
102            channels: 1,
103            duration_seconds: info.duration_seconds,
104            total_samples: mono_samples.len(),
105        };
106        Ok(ProcessedAudioData {
107            samples: mono_samples,
108            info: mono_info,
109        })
110    }
111
112    // Removed resampling and multi-channel averaging methods
113
114    /// Extract the first channel samples from interleaved multi-channel data.
115    fn extract_first_channel(&self, samples: &[i16], channels: usize) -> Vec<i16> {
116        samples.iter().step_by(channels).copied().collect()
117    }
118}