1use crate::error::{Error, Result};
34use crate::time::{AudioDuration, AudioInstant};
35
36use crate::decoder::{ChannelMixer, SampleRateConverter, WavDecoder};
37use crate::format::{AudioFormat, FormatDetector};
38
39#[derive(Debug, Clone, PartialEq)]
43pub struct StandardAudio {
44 pub samples: Vec<f32>,
46 pub metadata: ConversionMetadata,
48}
49
50impl StandardAudio {
51 #[must_use]
53 pub fn sample_count(&self) -> usize {
54 self.samples.len()
55 }
56
57 #[must_use]
59 pub fn duration_sec(&self) -> f64 {
60 self.samples.len() as f64 / 16000.0
61 }
62
63 #[must_use]
65 pub fn is_silent(&self) -> bool {
66 self.samples.iter().all(|&s| s.abs() < 1e-4)
67 }
68}
69
70#[derive(Debug, Clone, Copy, PartialEq)]
74pub struct ConversionMetadata {
75 pub original_format: AudioFormat,
77 pub original_sample_rate: u32,
79 pub original_channels: u8,
81 pub original_bit_depth: Option<u16>,
83 pub peak_before: f32,
85 pub peak_after: f32,
87 pub conversion_time_ms: f64,
89 pub detection_time_ms: f64,
91 pub decode_time_ms: f64,
93 pub resample_time_ms: f64,
95 pub mix_time_ms: f64,
97}
98
99impl ConversionMetadata {
100 #[must_use]
109 pub fn has_performance_issue(&self) -> bool {
110 self.conversion_time_ms > 10.0
111 || self.detection_time_ms > 1.0
112 || self.decode_time_ms > 3.0
113 || self.resample_time_ms > 5.0
114 || self.mix_time_ms > 1.0
115 }
116
117 #[must_use]
124 pub fn peak_ratio(&self) -> f32 {
125 if self.peak_before.abs() < f32::EPSILON {
126 1.0 } else {
128 self.peak_after / self.peak_before
129 }
130 }
131}
132
133#[derive(Debug, Default, Clone, Copy)]
175pub struct AudioFormatConverter;
176
177impl AudioFormatConverter {
178 #[must_use]
180 pub const fn new() -> Self {
181 Self
182 }
183
184 #[allow(clippy::cognitive_complexity)] pub fn convert_to_standard(audio_bytes: &[u8]) -> Result<StandardAudio> {
227 let pipeline_start = AudioInstant::now();
228
229 tracing::debug!(
230 audio_bytes_len = audio_bytes.len(),
231 "Starting audio format conversion pipeline"
232 );
233
234 let detection_start = AudioInstant::now();
235 let format_metadata = FormatDetector::detect(audio_bytes)?;
236 let detection_duration = elapsed_since(detection_start);
237 let detection_time_ms = detection_duration.as_secs_f64() * 1000.0;
238
239 tracing::debug!(
240 format = %format_metadata.format,
241 detection_time_ms,
242 "Format detection complete"
243 );
244
245 if format_metadata.format != AudioFormat::WavPcm {
246 return Err(Error::InvalidInput(format!(
247 "unsupported format for decoding: {} (only WAV supported)",
248 format_metadata.format.as_str()
249 )));
250 }
251
252 let decode_start = AudioInstant::now();
253 let decoded = WavDecoder::decode(audio_bytes)?;
254 let decode_duration = elapsed_since(decode_start);
255 let decode_time_ms = decode_duration.as_secs_f64() * 1000.0;
256
257 tracing::debug!(
258 sample_rate = decoded.sample_rate,
259 channels = decoded.channels,
260 bit_depth = decoded.bit_depth,
261 sample_count = decoded.samples.len(),
262 decode_time_ms,
263 "WAV decoding complete"
264 );
265
266 let peak_before = decoded
267 .samples
268 .iter()
269 .map(|s| s.abs())
270 .fold(0.0f32, f32::max);
271
272 let resample_start = AudioInstant::now();
273 let resampled = SampleRateConverter::resample_to_16khz(
274 &decoded.samples,
275 decoded.channels,
276 decoded.sample_rate,
277 )?;
278 let resample_duration = elapsed_since(resample_start);
279 let resample_time_ms = resample_duration.as_secs_f64() * 1000.0;
280
281 tracing::debug!(
282 input_rate = decoded.sample_rate,
283 output_rate = SampleRateConverter::TARGET_SAMPLE_RATE,
284 output_samples = resampled.len(),
285 resample_time_ms,
286 "Sample rate conversion complete"
287 );
288
289 let mix_start = AudioInstant::now();
290 let mixed = ChannelMixer::mix_to_mono(&resampled, decoded.channels)?;
291 let mix_duration = elapsed_since(mix_start);
292 let mix_time_ms = mix_duration.as_secs_f64() * 1000.0;
293
294 tracing::debug!(
295 input_channels = decoded.channels,
296 output_samples = mixed.samples.len(),
297 peak_before_mix = mixed.peak_before_mix,
298 peak_after_mix = mixed.peak_after_mix,
299 mix_time_ms,
300 "Channel mixing complete"
301 );
302
303 let conversion_duration = elapsed_since(pipeline_start);
304 let conversion_time_ms = conversion_duration.as_secs_f64() * 1000.0;
305
306 if conversion_time_ms > 10.0 {
307 tracing::warn!(
308 conversion_time_ms,
309 detection_time_ms,
310 decode_time_ms,
311 resample_time_ms,
312 mix_time_ms,
313 "Audio conversion exceeded 10ms target latency"
314 );
315 } else {
316 tracing::debug!(conversion_time_ms, "Audio conversion pipeline complete");
317 }
318
319 let metadata = ConversionMetadata {
320 original_format: format_metadata.format,
321 original_sample_rate: decoded.sample_rate,
322 original_channels: decoded.channels,
323 original_bit_depth: Some(decoded.bit_depth),
324 peak_before,
325 peak_after: mixed.peak_after_mix,
326 conversion_time_ms,
327 detection_time_ms,
328 decode_time_ms,
329 resample_time_ms,
330 mix_time_ms,
331 };
332
333 Ok(StandardAudio {
334 samples: mixed.samples,
335 metadata,
336 })
337 }
338}
339
340fn elapsed_since(start: AudioInstant) -> AudioDuration {
341 AudioInstant::now().duration_since(start)
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347
348 type TestResult<T> = std::result::Result<T, String>;
349
350 fn create_test_wav(sample_rate: u32, channels: u16, samples: &[i16]) -> TestResult<Vec<u8>> {
352 let spec = hound::WavSpec {
353 sample_rate,
354 channels,
355 bits_per_sample: 16,
356 sample_format: hound::SampleFormat::Int,
357 };
358
359 let mut cursor = std::io::Cursor::new(Vec::new());
360 let mut writer = hound::WavWriter::new(&mut cursor, spec)
361 .map_err(|e| format!("failed to create WAV writer: {e}"))?;
362
363 for &sample in samples {
364 writer
365 .write_sample(sample)
366 .map_err(|e| format!("failed to write sample: {e}"))?;
367 }
368
369 writer
370 .finalize()
371 .map_err(|e| format!("failed to finalize WAV: {e}"))?;
372
373 Ok(cursor.into_inner())
374 }
375
376 #[test]
377 fn test_convert_mono_16khz_identity() -> TestResult<()> {
378 let samples = vec![100i16, 200, -100, -200]; let wav = create_test_wav(16000, 1, &samples)?;
381
382 let standard =
383 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
384
385 assert_eq!(standard.samples.len(), 4);
387 assert_eq!(standard.metadata.original_sample_rate, 16000);
388 assert_eq!(standard.metadata.original_channels, 1);
389 assert_eq!(standard.metadata.original_format, AudioFormat::WavPcm);
390
391 Ok(())
392 }
393
394 #[test]
395 fn test_convert_stereo_44100_to_standard() -> TestResult<()> {
396 let samples = vec![1000i16, -1000, 2000, -2000]; let wav = create_test_wav(44100, 2, &samples)?;
399
400 let standard =
401 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
402
403 assert!(!standard.samples.is_empty());
407 assert_eq!(standard.metadata.original_sample_rate, 44100);
408 assert_eq!(standard.metadata.original_channels, 2);
409
410 Ok(())
411 }
412
413 #[test]
414 fn test_convert_tracks_timing() -> TestResult<()> {
415 let samples = vec![0i16; 1000]; let wav = create_test_wav(16000, 1, &samples)?;
417
418 let standard =
419 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
420
421 assert!(standard.metadata.detection_time_ms >= 0.0);
423 assert!(standard.metadata.decode_time_ms >= 0.0);
424 assert!(standard.metadata.resample_time_ms >= 0.0);
425 assert!(standard.metadata.mix_time_ms >= 0.0);
426 assert!(standard.metadata.conversion_time_ms >= 0.0);
427
428 let stage_sum = standard.metadata.detection_time_ms
430 + standard.metadata.decode_time_ms
431 + standard.metadata.resample_time_ms
432 + standard.metadata.mix_time_ms;
433
434 assert!(
435 (standard.metadata.conversion_time_ms - stage_sum).abs() < 1.0,
436 "total time {} should approximately equal stage sum {}",
437 standard.metadata.conversion_time_ms,
438 stage_sum
439 );
440
441 Ok(())
442 }
443
444 #[test]
445 fn test_convert_tracks_peaks() -> TestResult<()> {
446 let samples = vec![10000i16, -10000, 5000, -5000]; let wav = create_test_wav(16000, 1, &samples)?;
449
450 let standard =
451 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
452
453 assert!(standard.metadata.peak_before > 0.0);
455 assert!(standard.metadata.peak_after > 0.0);
456
457 assert!(
459 (standard.metadata.peak_before - 0.305).abs() < 0.01,
460 "expected peak ~0.305, got {}",
461 standard.metadata.peak_before
462 );
463
464 Ok(())
465 }
466
467 #[test]
468 fn test_convert_rejects_non_wav() {
469 let mp3_bytes = vec![0xFF, 0xFB, 0x90, 0x00]; let result = AudioFormatConverter::convert_to_standard(&mp3_bytes);
473
474 assert!(result.is_err());
476 if let Err(err) = result {
477 let err_msg = err.to_string();
478 assert!(err_msg.contains("MP3") || err_msg.contains("unsupported"));
479 }
480 }
481
482 #[test]
483 fn test_standard_audio_duration_calculation() -> TestResult<()> {
484 let samples = vec![0i16; 16000]; let wav = create_test_wav(16000, 1, &samples)?;
486
487 let standard =
488 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
489
490 assert!((standard.duration_sec() - 1.0).abs() < 0.01);
492
493 Ok(())
494 }
495
496 #[test]
497 fn test_standard_audio_is_silent_detection() -> TestResult<()> {
498 let silent_samples = vec![0i16; 100];
499 let wav = create_test_wav(16000, 1, &silent_samples)?;
500
501 let standard =
502 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
503
504 assert!(standard.is_silent());
505
506 Ok(())
507 }
508
509 #[test]
510 fn test_conversion_metadata_peak_ratio() -> TestResult<()> {
511 let samples = vec![10000i16, -10000];
512 let wav = create_test_wav(16000, 1, &samples)?;
513
514 let standard =
515 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
516
517 assert!(
520 (standard.metadata.peak_ratio() - 1.0).abs() < 0.1,
521 "expected peak ratio ~1.0, got {}",
522 standard.metadata.peak_ratio()
523 );
524
525 Ok(())
526 }
527}