Skip to main content

wavekat_core/
audio.rs

1use std::borrow::Cow;
2
3/// A frame of audio samples with associated sample rate.
4///
5/// `AudioFrame` is the standard audio input type across the WaveKat ecosystem.
6/// It stores samples as f32 normalized to `[-1.0, 1.0]`, regardless of the
7/// original input format.
8///
9/// Construct via [`AudioFrame::new`], which accepts both `&[f32]` (zero-copy)
10/// and `&[i16]` (converts once) through the [`IntoSamples`] trait.
11///
12/// # Examples
13///
14/// ```
15/// use wavekat_core::AudioFrame;
16///
17/// // f32 input — zero-copy via Cow::Borrowed
18/// let samples = [0.1f32, -0.2, 0.3];
19/// let frame = AudioFrame::new(&samples, 16000);
20/// assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
21///
22/// // i16 input — normalized to f32 [-1.0, 1.0]
23/// let samples = [i16::MAX, 0, i16::MIN];
24/// let frame = AudioFrame::new(&samples, 16000);
25/// assert!((frame.samples()[0] - 1.0).abs() < 0.001);
26/// ```
27#[derive(Debug, Clone)]
28pub struct AudioFrame<'a> {
29    samples: Cow<'a, [f32]>,
30    sample_rate: u32,
31}
32
33impl<'a> AudioFrame<'a> {
34    /// Create a new audio frame from any supported sample type.
35    ///
36    /// Accepts `&[f32]` (zero-copy) or `&[i16]` (converts to normalized f32).
37    pub fn new(samples: impl IntoSamples<'a>, sample_rate: u32) -> Self {
38        Self {
39            samples: samples.into_samples(),
40            sample_rate,
41        }
42    }
43
44    /// The audio samples as f32 normalized to `[-1.0, 1.0]`.
45    pub fn samples(&self) -> &[f32] {
46        &self.samples
47    }
48
49    /// Sample rate in Hz (e.g. 16000).
50    pub fn sample_rate(&self) -> u32 {
51        self.sample_rate
52    }
53
54    /// Number of samples in the frame.
55    pub fn len(&self) -> usize {
56        self.samples.len()
57    }
58
59    /// Returns `true` if the frame contains no samples.
60    pub fn is_empty(&self) -> bool {
61        self.samples.is_empty()
62    }
63
64    /// Duration of this frame in seconds.
65    pub fn duration_secs(&self) -> f64 {
66        self.samples.len() as f64 / self.sample_rate as f64
67    }
68
69    /// Consume the frame and return the owned samples.
70    pub fn into_owned(self) -> AudioFrame<'static> {
71        AudioFrame {
72            samples: Cow::Owned(self.samples.into_owned()),
73            sample_rate: self.sample_rate,
74        }
75    }
76}
77
78impl AudioFrame<'static> {
79    /// Construct an owned frame directly from a `Vec<f32>`.
80    ///
81    /// Zero-copy — wraps the vec as `Cow::Owned` without cloning.
82    /// Intended for audio producers (TTS, ASR) that generate owned data.
83    ///
84    /// # Example
85    ///
86    /// ```
87    /// use wavekat_core::AudioFrame;
88    ///
89    /// let samples = vec![0.5f32, -0.5, 0.3];
90    /// let frame = AudioFrame::from_vec(samples, 24000);
91    /// assert_eq!(frame.sample_rate(), 24000);
92    /// assert_eq!(frame.len(), 3);
93    /// ```
94    pub fn from_vec(samples: Vec<f32>, sample_rate: u32) -> Self {
95        Self {
96            samples: Cow::Owned(samples),
97            sample_rate,
98        }
99    }
100}
101
102#[cfg(feature = "resample")]
103impl AudioFrame<'_> {
104    /// Resample this frame to a different sample rate.
105    ///
106    /// Returns a new owned `AudioFrame` at `target_rate`. If the frame is
107    /// already at the target rate, returns a clone without touching the
108    /// resampler.
109    ///
110    /// Uses high-quality sinc interpolation via [`rubato`].
111    ///
112    /// # Errors
113    ///
114    /// Returns [`CoreError::Audio`] if the resampler cannot be constructed
115    /// (e.g. zero sample rate) or if processing fails.
116    ///
117    /// # Example
118    ///
119    /// ```
120    /// use wavekat_core::AudioFrame;
121    ///
122    /// let frame = AudioFrame::from_vec(vec![0.0f32; 4410], 44100);
123    /// let resampled = frame.resample(16000).unwrap();
124    /// assert_eq!(resampled.sample_rate(), 16000);
125    /// ```
126    pub fn resample(&self, target_rate: u32) -> Result<AudioFrame<'static>, crate::CoreError> {
127        use rubato::audioadapter_buffers::direct::InterleavedSlice;
128        use rubato::{
129            Async, FixedAsync, Resampler, SincInterpolationParameters, SincInterpolationType,
130            WindowFunction,
131        };
132
133        if self.sample_rate == target_rate {
134            return Ok(self.clone().into_owned());
135        }
136
137        if self.is_empty() {
138            return Ok(AudioFrame::from_vec(Vec::new(), target_rate));
139        }
140
141        let ratio = target_rate as f64 / self.sample_rate as f64;
142        let nbr_input_frames = self.samples.len();
143
144        let params = SincInterpolationParameters {
145            sinc_len: 256,
146            f_cutoff: 0.95,
147            interpolation: SincInterpolationType::Cubic,
148            oversampling_factor: 128,
149            window: WindowFunction::BlackmanHarris2,
150        };
151
152        // Match chunk size to input when shorter than the default — avoids
153        // wasting work padding a 160-sample G.711 frame up to 1024 samples.
154        let chunk_size = nbr_input_frames.min(1024);
155
156        let mut resampler =
157            Async::<f32>::new_sinc(ratio, 1.0, &params, chunk_size, 1, FixedAsync::Input)
158                .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
159
160        // Ask rubato exactly how much output space `process_all_into_buffer`
161        // needs — it accounts for the per-chunk pad-up, the resampler's
162        // internal delay, and the input-length-times-ratio expected output.
163        let out_len = resampler.process_all_needed_output_len(nbr_input_frames);
164        let mut outdata = vec![0.0f32; out_len];
165
166        let input_adapter = InterleavedSlice::new(self.samples.as_ref(), 1, nbr_input_frames)
167            .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
168        let mut output_adapter = InterleavedSlice::new_mut(&mut outdata, 1, out_len)
169            .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
170
171        let (_in_consumed, out_produced) = resampler
172            .process_all_into_buffer(&input_adapter, &mut output_adapter, nbr_input_frames, None)
173            .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
174
175        outdata.truncate(out_produced);
176        Ok(AudioFrame::from_vec(outdata, target_rate))
177    }
178}
179
180#[cfg(feature = "wav")]
181impl AudioFrame<'_> {
182    /// Write this frame to a WAV file at `path`.
183    ///
184    /// Always writes mono f32 PCM at the frame's native sample rate.
185    ///
186    /// # Example
187    ///
188    /// ```no_run
189    /// use wavekat_core::AudioFrame;
190    ///
191    /// let frame = AudioFrame::from_vec(vec![0.0f32; 16000], 16000);
192    /// frame.write_wav("output.wav").unwrap();
193    /// ```
194    pub fn write_wav(&self, path: impl AsRef<std::path::Path>) -> Result<(), crate::CoreError> {
195        let spec = hound::WavSpec {
196            channels: 1,
197            sample_rate: self.sample_rate,
198            bits_per_sample: 32,
199            sample_format: hound::SampleFormat::Float,
200        };
201        let mut writer = hound::WavWriter::create(path, spec)?;
202        for &sample in self.samples() {
203            writer.write_sample(sample)?;
204        }
205        writer.finalize()?;
206        Ok(())
207    }
208}
209
210#[cfg(feature = "wav")]
211impl AudioFrame<'static> {
212    /// Read a mono WAV file and return an owned `AudioFrame`.
213    ///
214    /// Accepts both f32 and i16 WAV files. i16 samples are normalised to
215    /// `[-1.0, 1.0]` (divided by 32768).
216    ///
217    /// # Example
218    ///
219    /// ```no_run
220    /// use wavekat_core::AudioFrame;
221    ///
222    /// let frame = AudioFrame::from_wav("input.wav").unwrap();
223    /// println!("{} Hz, {} samples", frame.sample_rate(), frame.len());
224    /// ```
225    pub fn from_wav(path: impl AsRef<std::path::Path>) -> Result<Self, crate::CoreError> {
226        let mut reader = hound::WavReader::open(path)?;
227        let spec = reader.spec();
228        let sample_rate = spec.sample_rate;
229        let samples: Vec<f32> = match spec.sample_format {
230            hound::SampleFormat::Float => reader.samples::<f32>().collect::<Result<_, _>>()?,
231            hound::SampleFormat::Int => reader
232                .samples::<i16>()
233                .map(|s| s.map(|v| v as f32 / 32768.0))
234                .collect::<Result<_, _>>()?,
235        };
236        Ok(AudioFrame::from_vec(samples, sample_rate))
237    }
238}
239
240/// Trait for types that can be converted into audio samples.
241///
242/// Implemented for `&[f32]` (zero-copy) and `&[i16]` (normalized conversion).
243pub trait IntoSamples<'a> {
244    /// Convert into f32 samples normalized to `[-1.0, 1.0]`.
245    fn into_samples(self) -> Cow<'a, [f32]>;
246}
247
248impl<'a> IntoSamples<'a> for &'a [f32] {
249    #[inline]
250    fn into_samples(self) -> Cow<'a, [f32]> {
251        Cow::Borrowed(self)
252    }
253}
254
255impl<'a> IntoSamples<'a> for &'a Vec<f32> {
256    #[inline]
257    fn into_samples(self) -> Cow<'a, [f32]> {
258        Cow::Borrowed(self.as_slice())
259    }
260}
261
262impl<'a, const N: usize> IntoSamples<'a> for &'a [f32; N] {
263    #[inline]
264    fn into_samples(self) -> Cow<'a, [f32]> {
265        Cow::Borrowed(self.as_slice())
266    }
267}
268
269impl<'a> IntoSamples<'a> for &'a [i16] {
270    #[inline]
271    fn into_samples(self) -> Cow<'a, [f32]> {
272        Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
273    }
274}
275
276impl<'a> IntoSamples<'a> for &'a Vec<i16> {
277    #[inline]
278    fn into_samples(self) -> Cow<'a, [f32]> {
279        Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
280    }
281}
282
283impl<'a, const N: usize> IntoSamples<'a> for &'a [i16; N] {
284    #[inline]
285    fn into_samples(self) -> Cow<'a, [f32]> {
286        Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
287    }
288}
289
290#[cfg(test)]
291mod tests {
292    use super::*;
293
294    #[test]
295    fn f32_is_zero_copy() {
296        let samples = vec![0.1f32, -0.2, 0.3];
297        let frame = AudioFrame::new(samples.as_slice(), 16000);
298        // Cow::Borrowed — the pointer should be the same
299        assert!(matches!(frame.samples, Cow::Borrowed(_)));
300        assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
301    }
302
303    #[test]
304    fn i16_normalizes_to_f32() {
305        let samples: Vec<i16> = vec![0, 16384, -16384, i16::MAX, i16::MIN];
306        let frame = AudioFrame::new(samples.as_slice(), 16000);
307        assert!(matches!(frame.samples, Cow::Owned(_)));
308
309        let s = frame.samples();
310        assert!((s[0] - 0.0).abs() < f32::EPSILON);
311        assert!((s[1] - 0.5).abs() < 0.001);
312        assert!((s[2] - -0.5).abs() < 0.001);
313        assert!((s[3] - (i16::MAX as f32 / 32768.0)).abs() < f32::EPSILON);
314        assert!((s[4] - -1.0).abs() < f32::EPSILON);
315    }
316
317    #[test]
318    fn metadata() {
319        let samples = vec![0.0f32; 160];
320        let frame = AudioFrame::new(samples.as_slice(), 16000);
321        assert_eq!(frame.sample_rate(), 16000);
322        assert_eq!(frame.len(), 160);
323        assert!(!frame.is_empty());
324        assert!((frame.duration_secs() - 0.01).abs() < 1e-9);
325    }
326
327    #[test]
328    fn empty_frame() {
329        let samples: &[f32] = &[];
330        let frame = AudioFrame::new(samples, 16000);
331        assert!(frame.is_empty());
332        assert_eq!(frame.len(), 0);
333    }
334
335    #[test]
336    fn into_owned() {
337        let samples = vec![0.5f32, -0.5];
338        let frame = AudioFrame::new(samples.as_slice(), 16000);
339        let owned: AudioFrame<'static> = frame.into_owned();
340        assert_eq!(owned.samples(), &[0.5, -0.5]);
341        assert_eq!(owned.sample_rate(), 16000);
342    }
343
344    #[cfg(feature = "wav")]
345    #[test]
346    fn wav_read_i16() {
347        // Write an i16 WAV directly via hound, then read it with from_wav.
348        let path = std::env::temp_dir().join("wavekat_test_i16.wav");
349        let spec = hound::WavSpec {
350            channels: 1,
351            sample_rate: 16000,
352            bits_per_sample: 16,
353            sample_format: hound::SampleFormat::Int,
354        };
355        let i16_samples: &[i16] = &[0, i16::MAX, i16::MIN, 16384];
356        let mut writer = hound::WavWriter::create(&path, spec).unwrap();
357        for &s in i16_samples {
358            writer.write_sample(s).unwrap();
359        }
360        writer.finalize().unwrap();
361
362        let frame = AudioFrame::from_wav(&path).unwrap();
363        assert_eq!(frame.sample_rate(), 16000);
364        assert_eq!(frame.len(), 4);
365        let s = frame.samples();
366        assert!((s[0] - 0.0).abs() < 1e-6);
367        assert!((s[1] - (i16::MAX as f32 / 32768.0)).abs() < 1e-6);
368        assert!((s[2] - -1.0).abs() < 1e-6);
369        assert!((s[3] - 0.5).abs() < 1e-4);
370    }
371
372    #[cfg(feature = "wav")]
373    #[test]
374    fn wav_round_trip() {
375        let original = AudioFrame::from_vec(vec![0.5f32, -0.5, 0.0, 1.0], 16000);
376        let path = std::env::temp_dir().join("wavekat_test.wav");
377        original.write_wav(&path).unwrap();
378        let loaded = AudioFrame::from_wav(&path).unwrap();
379        assert_eq!(loaded.sample_rate(), 16000);
380        for (a, b) in original.samples().iter().zip(loaded.samples()) {
381            assert!((a - b).abs() < 1e-6, "sample mismatch: {a} vs {b}");
382        }
383    }
384
385    #[test]
386    fn from_vec_is_zero_copy() {
387        let samples = vec![0.5f32, -0.5];
388        let ptr = samples.as_ptr();
389        let frame = AudioFrame::from_vec(samples, 24000);
390        assert_eq!(frame.samples().as_ptr(), ptr);
391        assert_eq!(frame.sample_rate(), 24000);
392    }
393
394    #[test]
395    fn into_samples_vec_f32() {
396        let samples = vec![0.1f32, -0.2, 0.3];
397        let frame = AudioFrame::new(&samples, 16000);
398        assert!(matches!(frame.samples, Cow::Borrowed(_)));
399        assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
400    }
401
402    #[test]
403    fn into_samples_array_f32() {
404        let samples = [0.1f32, -0.2, 0.3];
405        let frame = AudioFrame::new(&samples, 16000);
406        assert!(matches!(frame.samples, Cow::Borrowed(_)));
407        assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
408    }
409
410    #[test]
411    fn into_samples_vec_i16() {
412        let samples: Vec<i16> = vec![0, 16384, i16::MIN];
413        let frame = AudioFrame::new(&samples, 16000);
414        assert!(matches!(frame.samples, Cow::Owned(_)));
415        let s = frame.samples();
416        assert!((s[0] - 0.0).abs() < f32::EPSILON);
417        assert!((s[1] - 0.5).abs() < 0.001);
418        assert!((s[2] - -1.0).abs() < f32::EPSILON);
419    }
420
421    #[test]
422    fn into_samples_array_i16() {
423        let samples: [i16; 3] = [0, 16384, i16::MIN];
424        let frame = AudioFrame::new(&samples, 16000);
425        assert!(matches!(frame.samples, Cow::Owned(_)));
426        let s = frame.samples();
427        assert!((s[0] - 0.0).abs() < f32::EPSILON);
428        assert!((s[1] - 0.5).abs() < 0.001);
429        assert!((s[2] - -1.0).abs() < f32::EPSILON);
430    }
431
432    #[cfg(feature = "resample")]
433    #[test]
434    fn resample_noop_same_rate() {
435        let samples = vec![0.1f32, -0.2, 0.3, 0.4, 0.5];
436        let frame = AudioFrame::from_vec(samples.clone(), 16000);
437        let resampled = frame.resample(16000).unwrap();
438        assert_eq!(resampled.sample_rate(), 16000);
439        assert_eq!(resampled.samples(), &samples[..]);
440    }
441
442    #[cfg(feature = "resample")]
443    #[test]
444    fn resample_empty_frame() {
445        let frame = AudioFrame::from_vec(Vec::new(), 44100);
446        let resampled = frame.resample(16000).unwrap();
447        assert_eq!(resampled.sample_rate(), 16000);
448        assert!(resampled.is_empty());
449    }
450
451    #[cfg(feature = "resample")]
452    #[test]
453    fn resample_downsample() {
454        // 1 second of silence at 48 kHz → 16 kHz
455        let frame = AudioFrame::from_vec(vec![0.0f32; 48000], 48000);
456        let resampled = frame.resample(16000).unwrap();
457        assert_eq!(resampled.sample_rate(), 16000);
458        // Should produce ~16000 samples (allow small tolerance from resampler)
459        let expected = 16000;
460        let tolerance = 50;
461        assert!(
462            (resampled.len() as i64 - expected as i64).unsigned_abs() < tolerance,
463            "expected ~{expected} samples, got {}",
464            resampled.len()
465        );
466    }
467
468    #[cfg(feature = "resample")]
469    #[test]
470    fn resample_upsample() {
471        // 1 second at 16 kHz → 24 kHz
472        let frame = AudioFrame::from_vec(vec![0.0f32; 16000], 16000);
473        let resampled = frame.resample(24000).unwrap();
474        assert_eq!(resampled.sample_rate(), 24000);
475        let expected = 24000;
476        let tolerance = 50;
477        assert!(
478            (resampled.len() as i64 - expected as i64).unsigned_abs() < tolerance,
479            "expected ~{expected} samples, got {}",
480            resampled.len()
481        );
482    }
483
484    #[cfg(feature = "resample")]
485    #[test]
486    fn resample_short_input_upsample_large_ratio() {
487        // The exact case from the wavekat-voice RTP path: a 20 ms G.711 frame
488        // (160 samples @ 8 kHz) upsampled to 44.1 kHz. Before the fix this
489        // returned `InsufficientOutputBufferSize`.
490        let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
491        let resampled = frame.resample(44_100).unwrap();
492        assert_eq!(resampled.sample_rate(), 44_100);
493        let expected = (160.0 * 44_100.0 / 8_000.0) as i64; // 882
494        let actual = resampled.len() as i64;
495        assert!(
496            (actual - expected).unsigned_abs() < 50,
497            "expected ~{expected} samples, got {actual}"
498        );
499    }
500
501    #[cfg(feature = "resample")]
502    #[test]
503    fn resample_short_input_upsample_small_ratio() {
504        // 160 samples @ 8 kHz → 16 kHz. Also failed before the fix even
505        // though the ratio is modest, because nbr_input_frames < chunk_size.
506        let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
507        let resampled = frame.resample(16_000).unwrap();
508        assert_eq!(resampled.sample_rate(), 16_000);
509        let expected: i64 = 320;
510        let actual = resampled.len() as i64;
511        assert!(
512            (actual - expected).unsigned_abs() < 50,
513            "expected ~{expected} samples, got {actual}"
514        );
515    }
516
517    #[cfg(feature = "resample")]
518    #[test]
519    fn resample_single_g711_frame_to_48k() {
520        // The other common device rate: 160 @ 8 kHz → 48 kHz.
521        let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
522        let resampled = frame.resample(48_000).unwrap();
523        assert_eq!(resampled.sample_rate(), 48_000);
524        let expected: i64 = 960;
525        let actual = resampled.len() as i64;
526        assert!(
527            (actual - expected).unsigned_abs() < 50,
528            "expected ~{expected} samples, got {actual}"
529        );
530    }
531
532    #[cfg(feature = "resample")]
533    #[test]
534    fn resample_preserves_sine_frequency() {
535        // Generate a 440 Hz sine at 44100 Hz, resample to 16000 Hz,
536        // then verify the dominant frequency is still ~440 Hz by
537        // checking zero-crossing rate.
538        let sr_in: u32 = 44100;
539        let sr_out: u32 = 16000;
540        let duration_secs = 1.0;
541        let freq = 440.0;
542        let n = (sr_in as f64 * duration_secs) as usize;
543        let samples: Vec<f32> = (0..n)
544            .map(|i| (2.0 * std::f64::consts::PI * freq * i as f64 / sr_in as f64).sin() as f32)
545            .collect();
546
547        let frame = AudioFrame::from_vec(samples, sr_in);
548        let resampled = frame.resample(sr_out).unwrap();
549
550        // Count zero crossings (sign changes)
551        let s = resampled.samples();
552        let crossings: usize = s
553            .windows(2)
554            .filter(|w| w[0].signum() != w[1].signum())
555            .count();
556        // A pure sine at f Hz has 2*f zero crossings per second
557        let measured_freq = crossings as f64 / (2.0 * duration_secs);
558        assert!(
559            (measured_freq - freq).abs() < 5.0,
560            "expected ~{freq} Hz, measured {measured_freq} Hz"
561        );
562    }
563}