use crate::error::{Error, Result};
use crate::time::{AudioDuration, AudioInstant};
use crate::decoder::{ChannelMixer, SampleRateConverter, WavDecoder};
use crate::format::{AudioFormat, FormatDetector};
#[derive(Debug, Clone, PartialEq)]
pub struct StandardAudio {
pub samples: Vec<f32>,
pub metadata: ConversionMetadata,
}
impl StandardAudio {
#[must_use]
pub fn sample_count(&self) -> usize {
self.samples.len()
}
#[must_use]
pub fn duration_sec(&self) -> f64 {
self.samples.len() as f64 / 16000.0
}
#[must_use]
pub fn is_silent(&self) -> bool {
self.samples.iter().all(|&s| s.abs() < 1e-4)
}
}
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct ConversionMetadata {
pub original_format: AudioFormat,
pub original_sample_rate: u32,
pub original_channels: u8,
pub original_bit_depth: Option<u16>,
pub peak_before: f32,
pub peak_after: f32,
pub conversion_time_ms: f64,
pub detection_time_ms: f64,
pub decode_time_ms: f64,
pub resample_time_ms: f64,
pub mix_time_ms: f64,
}
impl ConversionMetadata {
#[must_use]
pub fn has_performance_issue(&self) -> bool {
self.conversion_time_ms > 10.0
|| self.detection_time_ms > 1.0
|| self.decode_time_ms > 3.0
|| self.resample_time_ms > 5.0
|| self.mix_time_ms > 1.0
}
#[must_use]
pub fn peak_ratio(&self) -> f32 {
if self.peak_before.abs() < f32::EPSILON {
1.0 } else {
self.peak_after / self.peak_before
}
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct AudioFormatConverter;
impl AudioFormatConverter {
#[must_use]
pub const fn new() -> Self {
Self
}
#[allow(clippy::cognitive_complexity)] pub fn convert_to_standard(audio_bytes: &[u8]) -> Result<StandardAudio> {
let pipeline_start = AudioInstant::now();
tracing::debug!(
audio_bytes_len = audio_bytes.len(),
"Starting audio format conversion pipeline"
);
let detection_start = AudioInstant::now();
let format_metadata = FormatDetector::detect(audio_bytes)?;
let detection_duration = elapsed_since(detection_start);
let detection_time_ms = detection_duration.as_secs_f64() * 1000.0;
tracing::debug!(
format = %format_metadata.format,
detection_time_ms,
"Format detection complete"
);
if format_metadata.format != AudioFormat::WavPcm {
return Err(Error::InvalidInput(format!(
"unsupported format for decoding: {} (only WAV supported)",
format_metadata.format.as_str()
)));
}
let decode_start = AudioInstant::now();
let decoded = WavDecoder::decode(audio_bytes)?;
let decode_duration = elapsed_since(decode_start);
let decode_time_ms = decode_duration.as_secs_f64() * 1000.0;
tracing::debug!(
sample_rate = decoded.sample_rate,
channels = decoded.channels,
bit_depth = decoded.bit_depth,
sample_count = decoded.samples.len(),
decode_time_ms,
"WAV decoding complete"
);
let peak_before = decoded
.samples
.iter()
.map(|s| s.abs())
.fold(0.0f32, f32::max);
let resample_start = AudioInstant::now();
let resampled = SampleRateConverter::resample_to_16khz(
&decoded.samples,
decoded.channels,
decoded.sample_rate,
)?;
let resample_duration = elapsed_since(resample_start);
let resample_time_ms = resample_duration.as_secs_f64() * 1000.0;
tracing::debug!(
input_rate = decoded.sample_rate,
output_rate = SampleRateConverter::TARGET_SAMPLE_RATE,
output_samples = resampled.len(),
resample_time_ms,
"Sample rate conversion complete"
);
let mix_start = AudioInstant::now();
let mixed = ChannelMixer::mix_to_mono(&resampled, decoded.channels)?;
let mix_duration = elapsed_since(mix_start);
let mix_time_ms = mix_duration.as_secs_f64() * 1000.0;
tracing::debug!(
input_channels = decoded.channels,
output_samples = mixed.samples.len(),
peak_before_mix = mixed.peak_before_mix,
peak_after_mix = mixed.peak_after_mix,
mix_time_ms,
"Channel mixing complete"
);
let conversion_duration = elapsed_since(pipeline_start);
let conversion_time_ms = conversion_duration.as_secs_f64() * 1000.0;
if conversion_time_ms > 10.0 {
tracing::warn!(
conversion_time_ms,
detection_time_ms,
decode_time_ms,
resample_time_ms,
mix_time_ms,
"Audio conversion exceeded 10ms target latency"
);
} else {
tracing::debug!(conversion_time_ms, "Audio conversion pipeline complete");
}
let metadata = ConversionMetadata {
original_format: format_metadata.format,
original_sample_rate: decoded.sample_rate,
original_channels: decoded.channels,
original_bit_depth: Some(decoded.bit_depth),
peak_before,
peak_after: mixed.peak_after_mix,
conversion_time_ms,
detection_time_ms,
decode_time_ms,
resample_time_ms,
mix_time_ms,
};
Ok(StandardAudio {
samples: mixed.samples,
metadata,
})
}
}
fn elapsed_since(start: AudioInstant) -> AudioDuration {
AudioInstant::now().duration_since(start)
}
#[cfg(test)]
mod tests {
use super::*;
type TestResult<T> = std::result::Result<T, String>;
fn create_test_wav(sample_rate: u32, channels: u16, samples: &[i16]) -> TestResult<Vec<u8>> {
let spec = hound::WavSpec {
sample_rate,
channels,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let mut cursor = std::io::Cursor::new(Vec::new());
let mut writer = hound::WavWriter::new(&mut cursor, spec)
.map_err(|e| format!("failed to create WAV writer: {e}"))?;
for &sample in samples {
writer
.write_sample(sample)
.map_err(|e| format!("failed to write sample: {e}"))?;
}
writer
.finalize()
.map_err(|e| format!("failed to finalize WAV: {e}"))?;
Ok(cursor.into_inner())
}
#[test]
fn test_convert_mono_16khz_identity() -> TestResult<()> {
let samples = vec![100i16, 200, -100, -200]; let wav = create_test_wav(16000, 1, &samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert_eq!(standard.samples.len(), 4);
assert_eq!(standard.metadata.original_sample_rate, 16000);
assert_eq!(standard.metadata.original_channels, 1);
assert_eq!(standard.metadata.original_format, AudioFormat::WavPcm);
Ok(())
}
#[test]
fn test_convert_stereo_44100_to_standard() -> TestResult<()> {
let samples = vec![1000i16, -1000, 2000, -2000]; let wav = create_test_wav(44100, 2, &samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert!(!standard.samples.is_empty());
assert_eq!(standard.metadata.original_sample_rate, 44100);
assert_eq!(standard.metadata.original_channels, 2);
Ok(())
}
#[test]
fn test_convert_tracks_timing() -> TestResult<()> {
let samples = vec![0i16; 1000]; let wav = create_test_wav(16000, 1, &samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert!(standard.metadata.detection_time_ms >= 0.0);
assert!(standard.metadata.decode_time_ms >= 0.0);
assert!(standard.metadata.resample_time_ms >= 0.0);
assert!(standard.metadata.mix_time_ms >= 0.0);
assert!(standard.metadata.conversion_time_ms >= 0.0);
let stage_sum = standard.metadata.detection_time_ms
+ standard.metadata.decode_time_ms
+ standard.metadata.resample_time_ms
+ standard.metadata.mix_time_ms;
assert!(
(standard.metadata.conversion_time_ms - stage_sum).abs() < 1.0,
"total time {} should approximately equal stage sum {}",
standard.metadata.conversion_time_ms,
stage_sum
);
Ok(())
}
#[test]
fn test_convert_tracks_peaks() -> TestResult<()> {
let samples = vec![10000i16, -10000, 5000, -5000]; let wav = create_test_wav(16000, 1, &samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert!(standard.metadata.peak_before > 0.0);
assert!(standard.metadata.peak_after > 0.0);
assert!(
(standard.metadata.peak_before - 0.305).abs() < 0.01,
"expected peak ~0.305, got {}",
standard.metadata.peak_before
);
Ok(())
}
#[test]
fn test_convert_rejects_non_wav() {
let mp3_bytes = vec![0xFF, 0xFB, 0x90, 0x00];
let result = AudioFormatConverter::convert_to_standard(&mp3_bytes);
assert!(result.is_err());
if let Err(err) = result {
let err_msg = err.to_string();
assert!(err_msg.contains("MP3") || err_msg.contains("unsupported"));
}
}
#[test]
fn test_standard_audio_duration_calculation() -> TestResult<()> {
let samples = vec![0i16; 16000]; let wav = create_test_wav(16000, 1, &samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert!((standard.duration_sec() - 1.0).abs() < 0.01);
Ok(())
}
#[test]
fn test_standard_audio_is_silent_detection() -> TestResult<()> {
let silent_samples = vec![0i16; 100];
let wav = create_test_wav(16000, 1, &silent_samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert!(standard.is_silent());
Ok(())
}
#[test]
fn test_conversion_metadata_peak_ratio() -> TestResult<()> {
let samples = vec![10000i16, -10000];
let wav = create_test_wav(16000, 1, &samples)?;
let standard =
AudioFormatConverter::convert_to_standard(&wav).map_err(|e| e.to_string())?;
assert!(
(standard.metadata.peak_ratio() - 1.0).abs() < 0.1,
"expected peak ratio ~1.0, got {}",
standard.metadata.peak_ratio()
);
Ok(())
}
}