use std::borrow::Cow;
#[derive(Debug, Clone)]
pub struct AudioFrame<'a> {
samples: Cow<'a, [f32]>,
sample_rate: u32,
}
impl<'a> AudioFrame<'a> {
pub fn new(samples: impl IntoSamples<'a>, sample_rate: u32) -> Self {
Self {
samples: samples.into_samples(),
sample_rate,
}
}
pub fn samples(&self) -> &[f32] {
&self.samples
}
pub fn sample_rate(&self) -> u32 {
self.sample_rate
}
pub fn len(&self) -> usize {
self.samples.len()
}
pub fn is_empty(&self) -> bool {
self.samples.is_empty()
}
pub fn duration_secs(&self) -> f64 {
self.samples.len() as f64 / self.sample_rate as f64
}
pub fn into_owned(self) -> AudioFrame<'static> {
AudioFrame {
samples: Cow::Owned(self.samples.into_owned()),
sample_rate: self.sample_rate,
}
}
}
impl AudioFrame<'static> {
pub fn from_vec(samples: Vec<f32>, sample_rate: u32) -> Self {
Self {
samples: Cow::Owned(samples),
sample_rate,
}
}
}
#[cfg(feature = "resample")]
impl AudioFrame<'_> {
pub fn resample(&self, target_rate: u32) -> Result<AudioFrame<'static>, crate::CoreError> {
use rubato::audioadapter_buffers::direct::InterleavedSlice;
use rubato::Resampler;
if self.sample_rate == target_rate {
return Ok(self.clone().into_owned());
}
if self.is_empty() {
return Ok(AudioFrame::from_vec(Vec::new(), target_rate));
}
let nbr_input_frames = self.samples.len();
let chunk_size = nbr_input_frames.min(1024);
let mut resampler = build_sinc_resampler(self.sample_rate, target_rate, chunk_size)?;
let out_len = resampler.process_all_needed_output_len(nbr_input_frames);
let mut outdata = vec![0.0f32; out_len];
let input_adapter = InterleavedSlice::new(self.samples.as_ref(), 1, nbr_input_frames)
.map_err(|e| crate::CoreError::Audio(e.to_string()))?;
let mut output_adapter = InterleavedSlice::new_mut(&mut outdata, 1, out_len)
.map_err(|e| crate::CoreError::Audio(e.to_string()))?;
let (_in_consumed, out_produced) = resampler
.process_all_into_buffer(&input_adapter, &mut output_adapter, nbr_input_frames, None)
.map_err(|e| crate::CoreError::Audio(e.to_string()))?;
outdata.truncate(out_produced);
Ok(AudioFrame::from_vec(outdata, target_rate))
}
}
#[cfg(feature = "resample")]
fn build_sinc_resampler(
source_rate: u32,
target_rate: u32,
chunk_size: usize,
) -> Result<rubato::Async<f32>, crate::CoreError> {
use rubato::{
Async, FixedAsync, SincInterpolationParameters, SincInterpolationType, WindowFunction,
};
if source_rate == 0 || target_rate == 0 {
return Err(crate::CoreError::Audio(
"sample rate must be non-zero".into(),
));
}
if chunk_size == 0 {
return Err(crate::CoreError::Audio(
"chunk_size must be non-zero".into(),
));
}
let params = SincInterpolationParameters {
sinc_len: 256,
f_cutoff: 0.95,
interpolation: SincInterpolationType::Cubic,
oversampling_factor: 128,
window: WindowFunction::BlackmanHarris2,
};
let ratio = target_rate as f64 / source_rate as f64;
Async::<f32>::new_sinc(ratio, 1.0, ¶ms, chunk_size, 1, FixedAsync::Input)
.map_err(|e| crate::CoreError::Audio(e.to_string()))
}
#[cfg(feature = "resample")]
pub struct StreamingResampler {
inner: Option<rubato::Async<f32>>,
source_rate: u32,
target_rate: u32,
chunk_size: usize,
input_buf: Vec<f32>,
output_buf: Vec<f32>,
}
#[cfg(feature = "resample")]
impl StreamingResampler {
pub fn new(
source_rate: u32,
target_rate: u32,
chunk_size: usize,
) -> Result<Self, crate::CoreError> {
if source_rate == target_rate {
if source_rate == 0 {
return Err(crate::CoreError::Audio(
"sample rate must be non-zero".into(),
));
}
return Ok(Self {
inner: None,
source_rate,
target_rate,
chunk_size,
input_buf: Vec::new(),
output_buf: Vec::new(),
});
}
let inner = build_sinc_resampler(source_rate, target_rate, chunk_size)?;
let out_max = {
use rubato::Resampler;
inner.output_frames_max()
};
Ok(Self {
inner: Some(inner),
source_rate,
target_rate,
chunk_size,
input_buf: Vec::with_capacity(chunk_size),
output_buf: vec![0.0; out_max],
})
}
pub fn source_rate(&self) -> u32 {
self.source_rate
}
pub fn target_rate(&self) -> u32 {
self.target_rate
}
pub fn chunk_size(&self) -> usize {
self.chunk_size
}
pub fn process(&mut self, input: &[f32], out: &mut Vec<f32>) -> Result<(), crate::CoreError> {
let Some(inner) = self.inner.as_mut() else {
out.extend_from_slice(input);
return Ok(());
};
use rubato::audioadapter_buffers::direct::InterleavedSlice;
use rubato::Resampler;
let mut remaining = input;
while !remaining.is_empty() {
let need = self.chunk_size - self.input_buf.len();
let take = need.min(remaining.len());
self.input_buf.extend_from_slice(&remaining[..take]);
remaining = &remaining[take..];
if self.input_buf.len() < self.chunk_size {
break;
}
let in_adapter = InterleavedSlice::new(&self.input_buf[..], 1, self.chunk_size)
.map_err(|e| crate::CoreError::Audio(e.to_string()))?;
let out_buf_len = self.output_buf.len();
let mut out_adapter =
InterleavedSlice::new_mut(&mut self.output_buf[..], 1, out_buf_len)
.map_err(|e| crate::CoreError::Audio(e.to_string()))?;
let (_in_used, out_produced) = inner
.process_into_buffer(&in_adapter, &mut out_adapter, None)
.map_err(|e| crate::CoreError::Audio(e.to_string()))?;
out.extend_from_slice(&self.output_buf[..out_produced]);
self.input_buf.clear();
}
Ok(())
}
}
#[cfg(feature = "wav")]
impl AudioFrame<'_> {
pub fn write_wav(&self, path: impl AsRef<std::path::Path>) -> Result<(), crate::CoreError> {
let spec = hound::WavSpec {
channels: 1,
sample_rate: self.sample_rate,
bits_per_sample: 32,
sample_format: hound::SampleFormat::Float,
};
let mut writer = hound::WavWriter::create(path, spec)?;
for &sample in self.samples() {
writer.write_sample(sample)?;
}
writer.finalize()?;
Ok(())
}
}
#[cfg(feature = "wav")]
impl AudioFrame<'static> {
pub fn from_wav(path: impl AsRef<std::path::Path>) -> Result<Self, crate::CoreError> {
let mut reader = hound::WavReader::open(path)?;
let spec = reader.spec();
let sample_rate = spec.sample_rate;
let samples: Vec<f32> = match spec.sample_format {
hound::SampleFormat::Float => reader.samples::<f32>().collect::<Result<_, _>>()?,
hound::SampleFormat::Int => reader
.samples::<i16>()
.map(|s| s.map(|v| v as f32 / 32768.0))
.collect::<Result<_, _>>()?,
};
Ok(AudioFrame::from_vec(samples, sample_rate))
}
}
pub trait IntoSamples<'a> {
fn into_samples(self) -> Cow<'a, [f32]>;
}
impl<'a> IntoSamples<'a> for &'a [f32] {
#[inline]
fn into_samples(self) -> Cow<'a, [f32]> {
Cow::Borrowed(self)
}
}
impl<'a> IntoSamples<'a> for &'a Vec<f32> {
#[inline]
fn into_samples(self) -> Cow<'a, [f32]> {
Cow::Borrowed(self.as_slice())
}
}
impl<'a, const N: usize> IntoSamples<'a> for &'a [f32; N] {
#[inline]
fn into_samples(self) -> Cow<'a, [f32]> {
Cow::Borrowed(self.as_slice())
}
}
impl<'a> IntoSamples<'a> for &'a [i16] {
#[inline]
fn into_samples(self) -> Cow<'a, [f32]> {
Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
}
}
impl<'a> IntoSamples<'a> for &'a Vec<i16> {
#[inline]
fn into_samples(self) -> Cow<'a, [f32]> {
Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
}
}
impl<'a, const N: usize> IntoSamples<'a> for &'a [i16; N] {
#[inline]
fn into_samples(self) -> Cow<'a, [f32]> {
Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn f32_is_zero_copy() {
let samples = vec![0.1f32, -0.2, 0.3];
let frame = AudioFrame::new(samples.as_slice(), 16000);
assert!(matches!(frame.samples, Cow::Borrowed(_)));
assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
}
#[test]
fn i16_normalizes_to_f32() {
let samples: Vec<i16> = vec![0, 16384, -16384, i16::MAX, i16::MIN];
let frame = AudioFrame::new(samples.as_slice(), 16000);
assert!(matches!(frame.samples, Cow::Owned(_)));
let s = frame.samples();
assert!((s[0] - 0.0).abs() < f32::EPSILON);
assert!((s[1] - 0.5).abs() < 0.001);
assert!((s[2] - -0.5).abs() < 0.001);
assert!((s[3] - (i16::MAX as f32 / 32768.0)).abs() < f32::EPSILON);
assert!((s[4] - -1.0).abs() < f32::EPSILON);
}
#[test]
fn metadata() {
let samples = vec![0.0f32; 160];
let frame = AudioFrame::new(samples.as_slice(), 16000);
assert_eq!(frame.sample_rate(), 16000);
assert_eq!(frame.len(), 160);
assert!(!frame.is_empty());
assert!((frame.duration_secs() - 0.01).abs() < 1e-9);
}
#[test]
fn empty_frame() {
let samples: &[f32] = &[];
let frame = AudioFrame::new(samples, 16000);
assert!(frame.is_empty());
assert_eq!(frame.len(), 0);
}
#[test]
fn into_owned() {
let samples = vec![0.5f32, -0.5];
let frame = AudioFrame::new(samples.as_slice(), 16000);
let owned: AudioFrame<'static> = frame.into_owned();
assert_eq!(owned.samples(), &[0.5, -0.5]);
assert_eq!(owned.sample_rate(), 16000);
}
#[cfg(feature = "wav")]
#[test]
fn wav_read_i16() {
let path = std::env::temp_dir().join("wavekat_test_i16.wav");
let spec = hound::WavSpec {
channels: 1,
sample_rate: 16000,
bits_per_sample: 16,
sample_format: hound::SampleFormat::Int,
};
let i16_samples: &[i16] = &[0, i16::MAX, i16::MIN, 16384];
let mut writer = hound::WavWriter::create(&path, spec).unwrap();
for &s in i16_samples {
writer.write_sample(s).unwrap();
}
writer.finalize().unwrap();
let frame = AudioFrame::from_wav(&path).unwrap();
assert_eq!(frame.sample_rate(), 16000);
assert_eq!(frame.len(), 4);
let s = frame.samples();
assert!((s[0] - 0.0).abs() < 1e-6);
assert!((s[1] - (i16::MAX as f32 / 32768.0)).abs() < 1e-6);
assert!((s[2] - -1.0).abs() < 1e-6);
assert!((s[3] - 0.5).abs() < 1e-4);
}
#[cfg(feature = "wav")]
#[test]
fn wav_round_trip() {
let original = AudioFrame::from_vec(vec![0.5f32, -0.5, 0.0, 1.0], 16000);
let path = std::env::temp_dir().join("wavekat_test.wav");
original.write_wav(&path).unwrap();
let loaded = AudioFrame::from_wav(&path).unwrap();
assert_eq!(loaded.sample_rate(), 16000);
for (a, b) in original.samples().iter().zip(loaded.samples()) {
assert!((a - b).abs() < 1e-6, "sample mismatch: {a} vs {b}");
}
}
#[test]
fn from_vec_is_zero_copy() {
let samples = vec![0.5f32, -0.5];
let ptr = samples.as_ptr();
let frame = AudioFrame::from_vec(samples, 24000);
assert_eq!(frame.samples().as_ptr(), ptr);
assert_eq!(frame.sample_rate(), 24000);
}
#[test]
fn into_samples_vec_f32() {
let samples = vec![0.1f32, -0.2, 0.3];
let frame = AudioFrame::new(&samples, 16000);
assert!(matches!(frame.samples, Cow::Borrowed(_)));
assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
}
#[test]
fn into_samples_array_f32() {
let samples = [0.1f32, -0.2, 0.3];
let frame = AudioFrame::new(&samples, 16000);
assert!(matches!(frame.samples, Cow::Borrowed(_)));
assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
}
#[test]
fn into_samples_vec_i16() {
let samples: Vec<i16> = vec![0, 16384, i16::MIN];
let frame = AudioFrame::new(&samples, 16000);
assert!(matches!(frame.samples, Cow::Owned(_)));
let s = frame.samples();
assert!((s[0] - 0.0).abs() < f32::EPSILON);
assert!((s[1] - 0.5).abs() < 0.001);
assert!((s[2] - -1.0).abs() < f32::EPSILON);
}
#[test]
fn into_samples_array_i16() {
let samples: [i16; 3] = [0, 16384, i16::MIN];
let frame = AudioFrame::new(&samples, 16000);
assert!(matches!(frame.samples, Cow::Owned(_)));
let s = frame.samples();
assert!((s[0] - 0.0).abs() < f32::EPSILON);
assert!((s[1] - 0.5).abs() < 0.001);
assert!((s[2] - -1.0).abs() < f32::EPSILON);
}
#[cfg(feature = "resample")]
#[test]
fn resample_noop_same_rate() {
let samples = vec![0.1f32, -0.2, 0.3, 0.4, 0.5];
let frame = AudioFrame::from_vec(samples.clone(), 16000);
let resampled = frame.resample(16000).unwrap();
assert_eq!(resampled.sample_rate(), 16000);
assert_eq!(resampled.samples(), &samples[..]);
}
#[cfg(feature = "resample")]
#[test]
fn resample_empty_frame() {
let frame = AudioFrame::from_vec(Vec::new(), 44100);
let resampled = frame.resample(16000).unwrap();
assert_eq!(resampled.sample_rate(), 16000);
assert!(resampled.is_empty());
}
#[cfg(feature = "resample")]
#[test]
fn resample_downsample() {
let frame = AudioFrame::from_vec(vec![0.0f32; 48000], 48000);
let resampled = frame.resample(16000).unwrap();
assert_eq!(resampled.sample_rate(), 16000);
let expected = 16000;
let tolerance = 50;
assert!(
(resampled.len() as i64 - expected as i64).unsigned_abs() < tolerance,
"expected ~{expected} samples, got {}",
resampled.len()
);
}
#[cfg(feature = "resample")]
#[test]
fn resample_upsample() {
let frame = AudioFrame::from_vec(vec![0.0f32; 16000], 16000);
let resampled = frame.resample(24000).unwrap();
assert_eq!(resampled.sample_rate(), 24000);
let expected = 24000;
let tolerance = 50;
assert!(
(resampled.len() as i64 - expected as i64).unsigned_abs() < tolerance,
"expected ~{expected} samples, got {}",
resampled.len()
);
}
#[cfg(feature = "resample")]
#[test]
fn resample_short_input_upsample_large_ratio() {
let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
let resampled = frame.resample(44_100).unwrap();
assert_eq!(resampled.sample_rate(), 44_100);
let expected = (160.0 * 44_100.0 / 8_000.0) as i64; let actual = resampled.len() as i64;
assert!(
(actual - expected).unsigned_abs() < 50,
"expected ~{expected} samples, got {actual}"
);
}
#[cfg(feature = "resample")]
#[test]
fn resample_short_input_upsample_small_ratio() {
let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
let resampled = frame.resample(16_000).unwrap();
assert_eq!(resampled.sample_rate(), 16_000);
let expected: i64 = 320;
let actual = resampled.len() as i64;
assert!(
(actual - expected).unsigned_abs() < 50,
"expected ~{expected} samples, got {actual}"
);
}
#[cfg(feature = "resample")]
#[test]
fn resample_single_g711_frame_to_48k() {
let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
let resampled = frame.resample(48_000).unwrap();
assert_eq!(resampled.sample_rate(), 48_000);
let expected: i64 = 960;
let actual = resampled.len() as i64;
assert!(
(actual - expected).unsigned_abs() < 50,
"expected ~{expected} samples, got {actual}"
);
}
#[cfg(feature = "resample")]
#[test]
fn resample_preserves_sine_frequency() {
let sr_in: u32 = 44100;
let sr_out: u32 = 16000;
let duration_secs = 1.0;
let freq = 440.0;
let n = (sr_in as f64 * duration_secs) as usize;
let samples: Vec<f32> = (0..n)
.map(|i| (2.0 * std::f64::consts::PI * freq * i as f64 / sr_in as f64).sin() as f32)
.collect();
let frame = AudioFrame::from_vec(samples, sr_in);
let resampled = frame.resample(sr_out).unwrap();
let s = resampled.samples();
let crossings: usize = s
.windows(2)
.filter(|w| w[0].signum() != w[1].signum())
.count();
let measured_freq = crossings as f64 / (2.0 * duration_secs);
assert!(
(measured_freq - freq).abs() < 5.0,
"expected ~{freq} Hz, measured {measured_freq} Hz"
);
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_same_rate_is_passthrough() {
use crate::StreamingResampler;
let mut r = StreamingResampler::new(16000, 16000, 160).unwrap();
let input = vec![0.1, -0.2, 0.3, -0.4];
let mut out = Vec::new();
r.process(&input, &mut out).unwrap();
assert_eq!(out, input);
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_accessors_report_construction_args() {
use crate::StreamingResampler;
let r = StreamingResampler::new(8000, 44100, 160).unwrap();
assert_eq!(r.source_rate(), 8000);
assert_eq!(r.target_rate(), 44100);
assert_eq!(r.chunk_size(), 160);
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_short_input_chunked_calls() {
use crate::StreamingResampler;
let mut r = StreamingResampler::new(8000, 44100, 160).unwrap();
let mut out = Vec::new();
for _ in 0..10 {
let input = vec![0.0f32; 160];
r.process(&input, &mut out).unwrap();
}
let expected = (10 * 160 * 44100 / 8000) as i64;
let actual = out.len() as i64;
assert!(
(actual - expected).unsigned_abs() < 2000,
"expected ~{expected} samples, got {actual}"
);
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_buffers_across_partial_calls() {
use crate::StreamingResampler;
let input: Vec<f32> = (0..320).map(|i| (i as f32) * 0.01).collect();
let mut split_out = Vec::new();
let mut r1 = StreamingResampler::new(8000, 16000, 160).unwrap();
r1.process(&input[..50], &mut split_out).unwrap();
assert!(split_out.is_empty(), "no output before a full chunk");
r1.process(&input[50..], &mut split_out).unwrap();
let mut whole_out = Vec::new();
let mut r2 = StreamingResampler::new(8000, 16000, 160).unwrap();
r2.process(&input, &mut whole_out).unwrap();
assert_eq!(
split_out.len(),
whole_out.len(),
"split call must produce same number of samples as one-shot"
);
for (i, (a, b)) in split_out.iter().zip(whole_out.iter()).enumerate() {
assert!(
(a - b).abs() < 1e-6,
"split vs whole differ at {i}: {a} vs {b}"
);
}
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_avoids_per_frame_edge_artifacts() {
use crate::StreamingResampler;
let sr_in: u32 = 8000;
let sr_out: u32 = 44100;
let chunks = 30;
let chunk_size = 160;
let freq = 600.0_f32;
let signal: Vec<f32> = (0..chunks * chunk_size)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sr_in as f32).sin())
.collect();
let mut streaming = StreamingResampler::new(sr_in, sr_out, chunk_size).unwrap();
let mut streaming_out: Vec<f32> = Vec::new();
for c in 0..chunks {
streaming
.process(
&signal[c * chunk_size..(c + 1) * chunk_size],
&mut streaming_out,
)
.unwrap();
}
let mut stateless_out: Vec<f32> = Vec::new();
for c in 0..chunks {
let chunk =
AudioFrame::from_vec(signal[c * chunk_size..(c + 1) * chunk_size].to_vec(), sr_in);
let resampled = chunk.resample(sr_out).unwrap();
stateless_out.extend_from_slice(resampled.samples());
}
let skip = 1500;
let tail = 500;
let expected_max_delta = 2.0 * std::f32::consts::PI * freq / sr_out as f32;
let spike_threshold = expected_max_delta * 4.0;
let count_spikes = |samples: &[f32], skip: usize, tail: usize| -> usize {
samples[skip..samples.len() - tail]
.windows(2)
.filter(|w| (w[1] - w[0]).abs() > spike_threshold)
.count()
};
let streaming_spikes = count_spikes(&streaming_out, skip, tail);
let stateless_spikes = count_spikes(&stateless_out, skip, tail);
assert!(
streaming_spikes < 10,
"streaming output should be smooth, found {streaming_spikes} sample-delta spikes (threshold {spike_threshold})"
);
assert!(
stateless_spikes > streaming_spikes * 5,
"stateless per-chunk should have far more spikes than streaming; got stateless={stateless_spikes}, streaming={streaming_spikes}"
);
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_rejects_zero_rate() {
use crate::StreamingResampler;
assert!(StreamingResampler::new(0, 16000, 160).is_err());
assert!(StreamingResampler::new(16000, 0, 160).is_err());
assert!(StreamingResampler::new(0, 0, 160).is_err());
}
#[cfg(feature = "resample")]
#[test]
fn streaming_resampler_rejects_zero_chunk_size() {
use crate::StreamingResampler;
assert!(StreamingResampler::new(8000, 16000, 0).is_err());
}
}