1use crate::error::{Error, Result};
37use crate::time::{AudioDuration, AudioInstant};
38
39use crate::decoder::{ChannelMixer, SampleRateConverter, WavDecoder};
40use crate::format::{AudioFormat, FormatDetector};
41
42#[derive(Debug, Clone, PartialEq)]
48pub struct StandardAudio {
49 pub samples: Vec<f32>,
51 pub metadata: ConversionMetadata,
53}
54
55impl StandardAudio {
56 #[must_use]
58 pub fn sample_count(&self) -> usize {
59 self.samples.len()
60 }
61
62 #[must_use]
64 pub fn duration_sec(&self) -> f64 {
65 self.samples.len() as f64 / 16000.0
66 }
67
68 #[must_use]
70 pub fn is_silent(&self) -> bool {
71 self.samples.iter().all(|&s| s.abs() < 1e-4)
72 }
73}
74
75#[derive(Debug, Clone, Copy, PartialEq)]
80pub struct ConversionMetadata {
81 pub original_format: AudioFormat,
83 pub original_sample_rate: u32,
85 pub original_channels: u8,
87 pub original_bit_depth: Option<u16>,
89 pub peak_before: f32,
91 pub peak_after: f32,
93 pub conversion_time_ms: f64,
95 pub detection_time_ms: f64,
97 pub decode_time_ms: f64,
99 pub resample_time_ms: f64,
101 pub mix_time_ms: f64,
103}
104
105impl ConversionMetadata {
106 #[must_use]
115 pub fn has_performance_issue(&self) -> bool {
116 self.conversion_time_ms > 10.0
117 || self.detection_time_ms > 1.0
118 || self.decode_time_ms > 3.0
119 || self.resample_time_ms > 5.0
120 || self.mix_time_ms > 1.0
121 }
122
123 #[must_use]
130 pub fn peak_ratio(&self) -> f32 {
131 if self.peak_before.abs() < f32::EPSILON {
132 1.0 } else {
134 self.peak_after / self.peak_before
135 }
136 }
137}
138
139#[derive(Debug, Default, Clone, Copy)]
185pub struct AudioFormatConverter;
186
187impl AudioFormatConverter {
188 #[must_use]
190 pub const fn new() -> Self {
191 Self
192 }
193
194 #[allow(clippy::cognitive_complexity)] pub fn convert_to_standard(audio_bytes: &[u8]) -> Result<StandardAudio> {
237 let pipeline_start = AudioInstant::now();
238
239 tracing::debug!(
240 audio_bytes_len = audio_bytes.len(),
241 "Starting audio format conversion pipeline"
242 );
243
244 let detection_start = AudioInstant::now();
245 let format_metadata = FormatDetector::detect(audio_bytes)?;
246 let detection_duration = elapsed_since(detection_start);
247 let detection_time_ms = detection_duration.as_secs_f64() * 1000.0;
248
249 tracing::debug!(
250 format = %format_metadata.format,
251 detection_time_ms,
252 "Format detection complete"
253 );
254
255 if format_metadata.format != AudioFormat::WavPcm {
256 return Err(Error::InvalidInput(format!(
257 "unsupported format for decoding: {} (only WAV supported)",
258 format_metadata.format.as_str()
259 )));
260 }
261
262 let decode_start = AudioInstant::now();
263 let decoded = WavDecoder::decode(audio_bytes)?;
264 let decode_duration = elapsed_since(decode_start);
265 let decode_time_ms = decode_duration.as_secs_f64() * 1000.0;
266
267 tracing::debug!(
268 sample_rate = decoded.sample_rate,
269 channels = decoded.channels,
270 bit_depth = decoded.bit_depth,
271 sample_count = decoded.samples.len(),
272 decode_time_ms,
273 "WAV decoding complete"
274 );
275
276 let peak_before = decoded
277 .samples
278 .iter()
279 .map(|s| s.abs())
280 .fold(0.0f32, f32::max);
281
282 let resample_start = AudioInstant::now();
283 let resampled = SampleRateConverter::resample(
284 &decoded.samples,
285 decoded.channels,
286 decoded.sample_rate,
287 16000,
288 )?;
289 let resample_duration = elapsed_since(resample_start);
290 let resample_time_ms = resample_duration.as_secs_f64() * 1000.0;
291
292 tracing::debug!(
293 input_rate = decoded.sample_rate,
294 output_rate = 16000,
295 output_samples = resampled.len(),
296 resample_time_ms,
297 "Sample rate conversion complete"
298 );
299
300 let mix_start = AudioInstant::now();
301 let mixed = ChannelMixer::mix_to_mono(&resampled, decoded.channels)?;
302 let mix_duration = elapsed_since(mix_start);
303 let mix_time_ms = mix_duration.as_secs_f64() * 1000.0;
304
305 tracing::debug!(
306 input_channels = decoded.channels,
307 output_samples = mixed.samples.len(),
308 peak_before_mix = mixed.peak_before_mix,
309 peak_after_mix = mixed.peak_after_mix,
310 mix_time_ms,
311 "Channel mixing complete"
312 );
313
314 let conversion_duration = elapsed_since(pipeline_start);
315 let conversion_time_ms = conversion_duration.as_secs_f64() * 1000.0;
316
317 if conversion_time_ms > 10.0 {
318 tracing::warn!(
319 conversion_time_ms,
320 detection_time_ms,
321 decode_time_ms,
322 resample_time_ms,
323 mix_time_ms,
324 "Audio conversion exceeded 10ms target latency"
325 );
326 } else {
327 tracing::debug!(conversion_time_ms, "Audio conversion pipeline complete");
328 }
329
330 let metadata = ConversionMetadata {
331 original_format: format_metadata.format,
332 original_sample_rate: decoded.sample_rate,
333 original_channels: decoded.channels,
334 original_bit_depth: Some(decoded.bit_depth),
335 peak_before,
336 peak_after: mixed.peak_after_mix,
337 conversion_time_ms,
338 detection_time_ms,
339 decode_time_ms,
340 resample_time_ms,
341 mix_time_ms,
342 };
343
344 Ok(StandardAudio {
345 samples: mixed.samples,
346 metadata,
347 })
348 }
349}
350
351fn elapsed_since(start: AudioInstant) -> AudioDuration {
352 AudioInstant::now().duration_since(start)
353}
354
355#[cfg(test)]
356mod tests {
357 use super::*;
358
359 type TestResult<T> = std::result::Result<T, String>;
360
361 fn create_test_wav(sample_rate: u32, channels: u16, samples: &[i16]) -> TestResult<Vec<u8>> {
363 let spec = hound::WavSpec {
364 sample_rate,
365 channels,
366 bits_per_sample: 16,
367 sample_format: hound::SampleFormat::Int,
368 };
369
370 let mut cursor = std::io::Cursor::new(Vec::new());
371 let mut writer = hound::WavWriter::new(&mut cursor, spec)
372 .map_err(|e| format!("failed to create WAV writer: {e}"))?;
373
374 for &sample in samples {
375 writer
376 .write_sample(sample)
377 .map_err(|e| format!("failed to write sample: {e}"))?;
378 }
379
380 writer
381 .finalize()
382 .map_err(|e| format!("failed to finalize WAV: {e}"))?;
383
384 Ok(cursor.into_inner())
385 }
386
387 #[test]
388 fn test_convert_mono_16khz_identity() -> TestResult<()> {
389 let samples = vec![100i16, 200, -100, -200]; let wav = create_test_wav(16000, 1, &samples)?;
392
393 let standard =
394 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
395
396 assert_eq!(standard.samples.len(), 4);
398 assert_eq!(standard.metadata.original_sample_rate, 16000);
399 assert_eq!(standard.metadata.original_channels, 1);
400 assert_eq!(standard.metadata.original_format, AudioFormat::WavPcm);
401
402 Ok(())
403 }
404
405 #[test]
406 fn test_convert_stereo_44100_to_standard() -> TestResult<()> {
407 let samples = vec![1000i16, -1000, 2000, -2000]; let wav = create_test_wav(44100, 2, &samples)?;
410
411 let standard =
412 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
413
414 assert!(!standard.samples.is_empty());
418 assert_eq!(standard.metadata.original_sample_rate, 44100);
419 assert_eq!(standard.metadata.original_channels, 2);
420
421 Ok(())
422 }
423
424 #[test]
425 fn test_convert_tracks_timing() -> TestResult<()> {
426 let samples = vec![0i16; 1000]; let wav = create_test_wav(16000, 1, &samples)?;
428
429 let standard =
430 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
431
432 assert!(standard.metadata.detection_time_ms >= 0.0);
434 assert!(standard.metadata.decode_time_ms >= 0.0);
435 assert!(standard.metadata.resample_time_ms >= 0.0);
436 assert!(standard.metadata.mix_time_ms >= 0.0);
437 assert!(standard.metadata.conversion_time_ms >= 0.0);
438
439 let stage_sum = standard.metadata.detection_time_ms
441 + standard.metadata.decode_time_ms
442 + standard.metadata.resample_time_ms
443 + standard.metadata.mix_time_ms;
444
445 assert!(
446 (standard.metadata.conversion_time_ms - stage_sum).abs() < 1.0,
447 "total time {} should approximately equal stage sum {}",
448 standard.metadata.conversion_time_ms,
449 stage_sum
450 );
451
452 Ok(())
453 }
454
455 #[test]
456 fn test_convert_tracks_peaks() -> TestResult<()> {
457 let samples = vec![10000i16, -10000, 5000, -5000]; let wav = create_test_wav(16000, 1, &samples)?;
460
461 let standard =
462 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
463
464 assert!(standard.metadata.peak_before > 0.0);
466 assert!(standard.metadata.peak_after > 0.0);
467
468 assert!(
470 (standard.metadata.peak_before - 0.305).abs() < 0.01,
471 "expected peak ~0.305, got {}",
472 standard.metadata.peak_before
473 );
474
475 Ok(())
476 }
477
478 #[test]
479 fn test_convert_rejects_non_wav() {
480 let mp3_bytes = vec![0xFF, 0xFB, 0x90, 0x00]; let result = AudioFormatConverter::convert_to_standard(&mp3_bytes);
484
485 assert!(result.is_err());
487 if let Err(err) = result {
488 let err_msg = err.to_string();
489 assert!(err_msg.contains("MP3") || err_msg.contains("unsupported"));
490 }
491 }
492
493 #[test]
494 fn test_standard_audio_duration_calculation() -> TestResult<()> {
495 let samples = vec![0i16; 16000]; let wav = create_test_wav(16000, 1, &samples)?;
497
498 let standard =
499 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
500
501 assert!((standard.duration_sec() - 1.0).abs() < 0.01);
503
504 Ok(())
505 }
506
507 #[test]
508 fn test_standard_audio_is_silent_detection() -> TestResult<()> {
509 let silent_samples = vec![0i16; 100];
510 let wav = create_test_wav(16000, 1, &silent_samples)?;
511
512 let standard =
513 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
514
515 assert!(standard.is_silent());
516
517 Ok(())
518 }
519
520 #[test]
521 fn test_conversion_metadata_peak_ratio() -> TestResult<()> {
522 let samples = vec![10000i16, -10000];
523 let wav = create_test_wav(16000, 1, &samples)?;
524
525 let standard =
526 AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
527
528 assert!(
531 (standard.metadata.peak_ratio() - 1.0).abs() < 0.1,
532 "expected peak ratio ~1.0, got {}",
533 standard.metadata.peak_ratio()
534 );
535
536 Ok(())
537 }
538}