1use crate::services::audio::{AudioData, AudioEnvelope};
4use crate::{Result, error::SubXError};
5use aus::{AudioFile, WindowType, analysis, operations, spectrum};
6use std::path::Path;
7
8pub struct AusAudioAnalyzer {
10 sample_rate: u32,
11 window_size: usize,
12 hop_size: usize,
13}
14
15impl AusAudioAnalyzer {
16 pub fn new(sample_rate: u32) -> Self {
18 Self {
19 sample_rate,
20 window_size: 1024,
21 hop_size: 512,
22 }
23 }
24
25 pub async fn load_audio_file<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioFile> {
27 let path = audio_path.as_ref();
28 let path_str = path
29 .to_str()
30 .ok_or_else(|| SubXError::audio_processing("Failed to convert path to UTF-8 string"))?;
31 let mut audio_file = aus::read(path_str)?;
32 if audio_file.num_channels > 1 {
33 aus::mixdown(&mut audio_file);
34 }
35
36 if audio_file.duration == 0.0 && !audio_file.samples[0].is_empty() {
38 audio_file.duration =
39 audio_file.samples[0].len() as f64 / audio_file.sample_rate as f64;
40 }
41
42 Ok(audio_file)
43 }
44
45 pub async fn load_audio_data<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioData> {
47 let audio_file = self.load_audio_file(audio_path).await?;
48 let samples: Vec<f32> = audio_file.samples[0].iter().map(|&x| x as f32).collect();
49 Ok(AudioData {
50 samples,
51 sample_rate: audio_file.sample_rate,
52 channels: audio_file.num_channels,
53 duration: audio_file.duration as f32,
54 })
55 }
56
57 pub async fn extract_envelope<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioEnvelope> {
59 let audio_file = self.load_audio_file(audio_path).await?;
60 let samples = &audio_file.samples[0];
61 let mut energy_samples = Vec::new();
62 for chunk in samples.chunks(self.hop_size) {
63 let rms_energy = operations::rms(chunk);
64 energy_samples.push(rms_energy as f32);
65 }
66
67 let duration = if audio_file.duration > 0.0 {
69 audio_file.duration as f32
70 } else {
71 samples.len() as f32 / audio_file.sample_rate as f32
72 };
73
74 Ok(AudioEnvelope {
75 samples: energy_samples,
76 sample_rate: self.sample_rate,
77 duration,
78 })
79 }
80
81 pub fn detect_dialogue(
83 &self,
84 envelope: &AudioEnvelope,
85 threshold: f32,
86 ) -> Vec<crate::services::audio::DialogueSegment> {
87 let mut segments = Vec::new();
88 let mut in_dialogue = false;
89 let mut start = 0.0;
90 let time_per_sample = envelope.duration / envelope.samples.len() as f32;
91
92 for (i, &e) in envelope.samples.iter().enumerate() {
93 let t = i as f32 * time_per_sample;
94 if e > threshold && !in_dialogue {
95 in_dialogue = true;
96 start = t;
97 } else if e <= threshold && in_dialogue {
98 in_dialogue = false;
99 if t - start > 0.5 {
100 segments.push(crate::services::audio::DialogueSegment {
101 start_time: start,
102 end_time: t,
103 intensity: e,
104 });
105 }
106 }
107 }
108
109 segments
110 }
111
112 pub async fn analyze_audio_features(&self, audio_file: &AudioFile) -> Result<AudioFeatures> {
114 let samples = &audio_file.samples[0];
115 let stft_result = spectrum::rstft(
116 samples,
117 self.window_size,
118 self.hop_size,
119 WindowType::Hanning,
120 );
121
122 let mut features = Vec::new();
123 for frame in stft_result.iter() {
124 let (magnitude_spectrum, _) = spectrum::complex_to_polar_rfft(frame);
125 let frequencies = spectrum::rfftfreq(self.window_size, audio_file.sample_rate);
126
127 let spectral_centroid = analysis::spectral_centroid(&magnitude_spectrum, &frequencies);
128 let spectral_entropy = analysis::spectral_entropy(&magnitude_spectrum);
129 let zero_crossing_rate = analysis::zero_crossing_rate(samples, audio_file.sample_rate);
130
131 features.push(FrameFeatures {
132 spectral_centroid: spectral_centroid as f32,
133 spectral_entropy: spectral_entropy as f32,
134 zero_crossing_rate: zero_crossing_rate as f32,
135 });
136 }
137
138 Ok(AudioFeatures { frames: features })
139 }
140}
141
142#[derive(Debug, Clone)]
147pub struct AudioFeatures {
148 pub frames: Vec<FrameFeatures>,
150}
151
152#[derive(Debug, Clone)]
157pub struct FrameFeatures {
158 pub spectral_centroid: f32,
160 pub spectral_entropy: f32,
162 pub zero_crossing_rate: f32,
164}
165
166#[cfg(test)]
167mod tests {
168 use super::*;
169 use std::fs;
170 use tempfile::TempDir;
171
172 #[ignore]
174 #[tokio::test]
175 async fn test_load_audio_file_success() {
176 let analyzer = AusAudioAnalyzer::new(44100);
177 let temp_dir = TempDir::new().unwrap();
178 let wav_data = create_minimal_wav_file(44100, 1, 1.0);
180 let wav_path = temp_dir.path().join("test.wav");
181 fs::write(&wav_path, wav_data).unwrap();
182
183 let result = analyzer.load_audio_file(&wav_path).await;
184 assert!(result.is_ok());
185
186 let audio_file = result.unwrap();
187 assert_eq!(audio_file.sample_rate, 44100);
188 assert!(audio_file.duration > 0.0);
189 assert_eq!(audio_file.num_channels, 1);
190 }
191
192 #[ignore]
194 #[tokio::test]
195 async fn test_load_audio_file_not_exists() {
196 let analyzer = AusAudioAnalyzer::new(44100);
197 let result = analyzer.load_audio_file("non_existent.wav").await;
198 assert!(result.is_err());
199 }
200
201 #[ignore]
203 #[tokio::test]
204 async fn test_load_audio_data_conversion() {
205 let analyzer = AusAudioAnalyzer::new(16000);
206 let temp_dir = TempDir::new().unwrap();
207
208 let wav_data = create_minimal_wav_file(16000, 1, 2.0);
209 let wav_path = temp_dir.path().join("test.wav");
210 fs::write(&wav_path, wav_data).unwrap();
211
212 let audio_data = analyzer.load_audio_data(&wav_path).await.unwrap();
213
214 assert_eq!(audio_data.sample_rate, 16000);
215 assert_eq!(audio_data.channels, 1);
216 assert!(audio_data.duration > 1.9 && audio_data.duration < 2.1);
217 assert!(!audio_data.samples.is_empty());
218 }
219
220 #[ignore]
222 #[tokio::test]
223 async fn test_extract_envelope_features() {
224 let sample_rate = 44100;
225 let analyzer = AusAudioAnalyzer::new(sample_rate);
226 let temp_dir = TempDir::new().unwrap();
227
228 let wav_data = create_varying_energy_wav(44100, 2.0);
230 let wav_path = temp_dir.path().join("varying.wav");
231 fs::write(&wav_path, wav_data).unwrap();
232
233 let envelope = analyzer.extract_envelope(&wav_path).await.unwrap();
234
235 assert!(!envelope.samples.is_empty());
236 assert_eq!(envelope.sample_rate, sample_rate);
237 assert!(envelope.duration > 1.9);
238
239 for &energy in &envelope.samples {
241 assert!(energy >= 0.0);
242 assert!(energy <= 1.0);
243 }
244 }
245
246 #[ignore]
248 #[tokio::test]
249 async fn test_detect_dialogue_segments() {
250 let analyzer = AusAudioAnalyzer::new(16000);
251
252 let envelope = AudioEnvelope {
254 samples: vec![
255 0.1, 0.8, 0.9, 0.7, 0.2, 0.05, 0.03, 0.02, 0.04, 0.6, 0.8, 0.7, 0.9, 0.5, ],
259 sample_rate: 16000,
260 duration: 2.0,
261 };
262
263 let segments = analyzer.detect_dialogue(&envelope, 0.3);
264
265 assert!(!segments.is_empty());
266
267 let speech_segments: Vec<_> = segments.iter().filter(|s| s.intensity > 0.3).collect();
269 assert!(speech_segments.len() >= 2);
270 }
271
272 #[ignore]
274 #[tokio::test]
275 async fn test_audio_features_analysis() {
276 let analyzer = AusAudioAnalyzer::new(44100);
277 let temp_dir = TempDir::new().unwrap();
278
279 let wav_data = create_spectral_rich_wav(44100, 1.0);
280 let wav_path = temp_dir.path().join("rich.wav");
281 fs::write(&wav_path, wav_data).unwrap();
282
283 let audio_file = analyzer.load_audio_file(&wav_path).await.unwrap();
284 let features = analyzer.analyze_audio_features(&audio_file).await.unwrap();
285
286 assert!(!features.frames.is_empty());
287
288 for frame in &features.frames {
289 assert!(frame.spectral_centroid >= 0.0);
291 assert!(frame.spectral_centroid <= 22050.0);
292
293 assert!(frame.spectral_entropy >= 0.0);
295 assert!(frame.spectral_entropy <= 1.0);
296
297 assert!(frame.zero_crossing_rate >= 0.0);
299 assert!(frame.zero_crossing_rate <= 1.0);
300 }
301 }
302
303 #[ignore]
305 #[tokio::test]
306 async fn test_invalid_audio_format() {
307 let analyzer = AusAudioAnalyzer::new(44100);
308 let temp_dir = TempDir::new().unwrap();
309
310 let invalid_path = temp_dir.path().join("invalid.wav");
312 fs::write(&invalid_path, b"This is not audio data").unwrap();
313
314 let result = analyzer.load_audio_file(&invalid_path).await;
315 assert!(result.is_err());
316 }
317
318 #[ignore]
320 #[tokio::test]
321 async fn test_large_file_memory_management() {
322 let analyzer = AusAudioAnalyzer::new(44100);
323 let temp_dir = TempDir::new().unwrap();
324
325 let wav_data = create_minimal_wav_file(44100, 1, 10.0);
327 let wav_path = temp_dir.path().join("large.wav");
328 fs::write(&wav_path, wav_data).unwrap();
329
330 let start_memory = get_memory_usage();
331 let _audio_data = analyzer.load_audio_data(&wav_path).await.unwrap();
332 let end_memory = get_memory_usage();
333
334 assert!((end_memory - start_memory) < 100_000_000);
336 }
337
338 fn create_minimal_wav_file(sample_rate: u32, channels: u16, duration: f32) -> Vec<u8> {
340 let samples_per_channel = (sample_rate as f32 * duration) as u32;
341 let total_samples = samples_per_channel * channels as u32;
342 let data_size = total_samples * 2; let mut wav_data = Vec::new();
344 wav_data.extend_from_slice(b"RIFF");
346 wav_data.extend_from_slice(&(36 + data_size).to_le_bytes());
347 wav_data.extend_from_slice(b"WAVE");
348 wav_data.extend_from_slice(b"fmt ");
349 wav_data.extend_from_slice(&16u32.to_le_bytes());
350 wav_data.extend_from_slice(&1u16.to_le_bytes()); wav_data.extend_from_slice(&channels.to_le_bytes());
352 wav_data.extend_from_slice(&sample_rate.to_le_bytes());
353 wav_data.extend_from_slice(&(sample_rate * channels as u32 * 2).to_le_bytes());
354 wav_data.extend_from_slice(&(channels * 2).to_le_bytes());
355 wav_data.extend_from_slice(&16u16.to_le_bytes());
356 wav_data.extend_from_slice(b"data");
357 wav_data.extend_from_slice(&data_size.to_le_bytes());
358 for i in 0..total_samples {
360 let t = i as f32 / sample_rate as f32;
361 let amplitude = (2.0 * std::f32::consts::PI * 440.0 * t).sin();
362 let sample = (amplitude * 32767.0) as i16;
363 wav_data.extend_from_slice(&sample.to_le_bytes());
364 }
365 wav_data
366 }
367
368 fn create_varying_energy_wav(sample_rate: u32, duration: f32) -> Vec<u8> {
369 create_minimal_wav_file(sample_rate, 1, duration)
371 }
372
373 fn create_spectral_rich_wav(sample_rate: u32, duration: f32) -> Vec<u8> {
374 create_minimal_wav_file(sample_rate, 1, duration)
376 }
377
378 fn get_memory_usage() -> usize {
379 0 }
382}