subx_cli/services/vad/audio_processor.rs
1use crate::Result;
2use crate::services::vad::audio_loader::DirectAudioLoader;
3use crate::services::vad::detector::AudioInfo;
4use std::path::Path;
5
6/// Audio processor for VAD operations.
7///
8/// Handles loading, resampling, and format conversion of audio files
9/// for voice activity detection processing.
10/// Audio processor for VAD operations, optimized to use original sample rate and first channel only.
11pub struct VadAudioProcessor {}
12
13/// Processed audio data ready for VAD analysis.
14///
15/// Contains the audio samples and metadata after processing
16/// and format conversion.
17#[derive(Debug, Clone)]
18pub struct ProcessedAudioData {
19 /// Audio samples as 16-bit integers
20 pub samples: Vec<i16>,
21 /// Audio metadata and properties
22 pub info: AudioInfo,
23}
24
25impl VadAudioProcessor {
26 /// Create a new VAD audio processor.
27 ///
28 /// # Arguments
29 ///
30 /// * `target_sample_rate` - Desired sample rate for processing
31 /// * `target_channels` - Desired number of audio channels
32 ///
33 /// # Returns
34 ///
35 /// A new `VadAudioProcessor` instance
36 /// Create a new VAD audio processor.
37 pub fn new() -> Result<Self> {
38 Ok(Self {})
39 }
40
41 /// Load and prepare audio file for VAD processing.
42 ///
43 /// Performs all necessary audio processing steps including loading,
44 /// resampling, and format conversion to prepare the audio for
45 /// voice activity detection.
46 ///
47 /// # Arguments
48 ///
49 /// * `audio_path` - Path to the audio file to process
50 ///
51 /// # Returns
52 ///
53 /// Processed audio data ready for VAD analysis
54 ///
55 /// # Errors
56 ///
57 /// Returns an error if:
58 /// - Audio file cannot be loaded
59 /// - Audio format is unsupported
60 /// - Resampling fails
61 /// - Format conversion fails
62 ///
63 /// Directly loads and prepares audio files for VAD processing, supporting multiple formats.
64 /// Load and prepare audio file for VAD processing.
65 ///
66 /// Uses original sample rate and first channel only.
67 pub async fn load_and_prepare_audio_direct(
68 &self,
69 audio_path: &Path,
70 ) -> Result<ProcessedAudioData> {
71 // 1. Load with DirectAudioLoader
72 let loader = DirectAudioLoader::new()?;
73 let (samples, info) = match loader.load_audio_samples(audio_path) {
74 Ok((samples, info)) => (samples, info),
75 Err(e) => {
76 // If the file is empty, return empty samples
77 if let Ok(metadata) = std::fs::metadata(audio_path) {
78 if metadata.len() == 0 {
79 return Ok(ProcessedAudioData {
80 samples: vec![],
81 info: AudioInfo {
82 sample_rate: 16000, // Default value
83 channels: 1,
84 duration_seconds: 0.0,
85 total_samples: 0,
86 },
87 });
88 }
89 }
90 return Err(e);
91 }
92 };
93
94 // 2. Extract first channel if multi-channel, retain original sample rate
95 let mono_samples = if info.channels == 1 {
96 samples
97 } else {
98 self.extract_first_channel(&samples, info.channels as usize)
99 };
100 let mono_info = AudioInfo {
101 sample_rate: info.sample_rate,
102 channels: 1,
103 duration_seconds: info.duration_seconds,
104 total_samples: mono_samples.len(),
105 };
106 Ok(ProcessedAudioData {
107 samples: mono_samples,
108 info: mono_info,
109 })
110 }
111
112 // Removed resampling and multi-channel averaging methods
113
114 /// Extract the first channel samples from interleaved multi-channel data.
115 fn extract_first_channel(&self, samples: &[i16], channels: usize) -> Vec<i16> {
116 samples.iter().step_by(channels).copied().collect()
117 }
118}