wavekat_core/
audio.rs

1use std::borrow::Cow;
2
3/// A frame of audio samples with associated sample rate.
4///
5/// `AudioFrame` is the standard audio input type across the WaveKat ecosystem.
6/// It stores samples as f32 normalized to `[-1.0, 1.0]`, regardless of the
7/// original input format.
8///
9/// Construct via [`AudioFrame::new`], which accepts both `&[f32]` (zero-copy)
10/// and `&[i16]` (converts once) through the [`IntoSamples`] trait.
11///
12/// # Examples
13///
14/// ```
15/// use wavekat_core::AudioFrame;
16///
17/// // f32 input — zero-copy via Cow::Borrowed
18/// let samples = [0.1f32, -0.2, 0.3];
19/// let frame = AudioFrame::new(&samples, 16000);
20/// assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
21///
22/// // i16 input — normalized to f32 [-1.0, 1.0]
23/// let samples = [i16::MAX, 0, i16::MIN];
24/// let frame = AudioFrame::new(&samples, 16000);
25/// assert!((frame.samples()[0] - 1.0).abs() < 0.001);
26/// ```
27#[derive(Debug, Clone)]
28pub struct AudioFrame<'a> {
29    samples: Cow<'a, [f32]>,
30    sample_rate: u32,
31}
32
33impl<'a> AudioFrame<'a> {
34    /// Create a new audio frame from any supported sample type.
35    ///
36    /// Accepts `&[f32]` (zero-copy) or `&[i16]` (converts to normalized f32).
37    pub fn new(samples: impl IntoSamples<'a>, sample_rate: u32) -> Self {
38        Self {
39            samples: samples.into_samples(),
40            sample_rate,
41        }
42    }
43
44    /// The audio samples as f32 normalized to `[-1.0, 1.0]`.
45    pub fn samples(&self) -> &[f32] {
46        &self.samples
47    }
48
49    /// Sample rate in Hz (e.g. 16000).
50    pub fn sample_rate(&self) -> u32 {
51        self.sample_rate
52    }
53
54    /// Number of samples in the frame.
55    pub fn len(&self) -> usize {
56        self.samples.len()
57    }
58
59    /// Returns `true` if the frame contains no samples.
60    pub fn is_empty(&self) -> bool {
61        self.samples.is_empty()
62    }
63
64    /// Duration of this frame in seconds.
65    pub fn duration_secs(&self) -> f64 {
66        self.samples.len() as f64 / self.sample_rate as f64
67    }
68
69    /// Consume the frame and return the owned samples.
70    pub fn into_owned(self) -> AudioFrame<'static> {
71        AudioFrame {
72            samples: Cow::Owned(self.samples.into_owned()),
73            sample_rate: self.sample_rate,
74        }
75    }
76}
77
78impl AudioFrame<'static> {
79    /// Construct an owned frame directly from a `Vec<f32>`.
80    ///
81    /// Zero-copy — wraps the vec as `Cow::Owned` without cloning.
82    /// Intended for audio producers (TTS, ASR) that generate owned data.
83    ///
84    /// # Example
85    ///
86    /// ```
87    /// use wavekat_core::AudioFrame;
88    ///
89    /// let samples = vec![0.5f32, -0.5, 0.3];
90    /// let frame = AudioFrame::from_vec(samples, 24000);
91    /// assert_eq!(frame.sample_rate(), 24000);
92    /// assert_eq!(frame.len(), 3);
93    /// ```
94    pub fn from_vec(samples: Vec<f32>, sample_rate: u32) -> Self {
95        Self {
96            samples: Cow::Owned(samples),
97            sample_rate,
98        }
99    }
100}
101
102#[cfg(feature = "resample")]
103impl AudioFrame<'_> {
104    /// Resample this frame to a different sample rate.
105    ///
106    /// Returns a new owned `AudioFrame` at `target_rate`. If the frame is
107    /// already at the target rate, returns a clone without touching the
108    /// resampler.
109    ///
110    /// Uses high-quality sinc interpolation via [`rubato`].
111    ///
112    /// # Errors
113    ///
114    /// Returns [`CoreError::Audio`] if the resampler cannot be constructed
115    /// (e.g. zero sample rate) or if processing fails.
116    ///
117    /// # Example
118    ///
119    /// ```
120    /// use wavekat_core::AudioFrame;
121    ///
122    /// let frame = AudioFrame::from_vec(vec![0.0f32; 4410], 44100);
123    /// let resampled = frame.resample(16000).unwrap();
124    /// assert_eq!(resampled.sample_rate(), 16000);
125    /// ```
126    pub fn resample(&self, target_rate: u32) -> Result<AudioFrame<'static>, crate::CoreError> {
127        use rubato::audioadapter_buffers::direct::InterleavedSlice;
128        use rubato::Resampler;
129
130        if self.sample_rate == target_rate {
131            return Ok(self.clone().into_owned());
132        }
133
134        if self.is_empty() {
135            return Ok(AudioFrame::from_vec(Vec::new(), target_rate));
136        }
137
138        let nbr_input_frames = self.samples.len();
139        // Match chunk size to input when shorter than the default — avoids
140        // wasting work padding a 160-sample G.711 frame up to 1024 samples.
141        let chunk_size = nbr_input_frames.min(1024);
142        let mut resampler = build_sinc_resampler(self.sample_rate, target_rate, chunk_size)?;
143
144        // Ask rubato exactly how much output space `process_all_into_buffer`
145        // needs — it accounts for the per-chunk pad-up, the resampler's
146        // internal delay, and the input-length-times-ratio expected output.
147        let out_len = resampler.process_all_needed_output_len(nbr_input_frames);
148        let mut outdata = vec![0.0f32; out_len];
149
150        let input_adapter = InterleavedSlice::new(self.samples.as_ref(), 1, nbr_input_frames)
151            .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
152        let mut output_adapter = InterleavedSlice::new_mut(&mut outdata, 1, out_len)
153            .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
154
155        let (_in_consumed, out_produced) = resampler
156            .process_all_into_buffer(&input_adapter, &mut output_adapter, nbr_input_frames, None)
157            .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
158
159        outdata.truncate(out_produced);
160        Ok(AudioFrame::from_vec(outdata, target_rate))
161    }
162}
163
164/// Shared rubato builder used by both [`AudioFrame::resample`] and
165/// [`StreamingResampler`]. Keeps the sinc parameters (and the version
166/// bumps that come with rubato API churn) in one place.
167#[cfg(feature = "resample")]
168fn build_sinc_resampler(
169    source_rate: u32,
170    target_rate: u32,
171    chunk_size: usize,
172) -> Result<rubato::Async<f32>, crate::CoreError> {
173    use rubato::{
174        Async, FixedAsync, SincInterpolationParameters, SincInterpolationType, WindowFunction,
175    };
176
177    if source_rate == 0 || target_rate == 0 {
178        return Err(crate::CoreError::Audio(
179            "sample rate must be non-zero".into(),
180        ));
181    }
182    if chunk_size == 0 {
183        return Err(crate::CoreError::Audio(
184            "chunk_size must be non-zero".into(),
185        ));
186    }
187
188    let params = SincInterpolationParameters {
189        sinc_len: 256,
190        f_cutoff: 0.95,
191        interpolation: SincInterpolationType::Cubic,
192        oversampling_factor: 128,
193        window: WindowFunction::BlackmanHarris2,
194    };
195    let ratio = target_rate as f64 / source_rate as f64;
196    Async::<f32>::new_sinc(ratio, 1.0, &params, chunk_size, 1, FixedAsync::Input)
197        .map_err(|e| crate::CoreError::Audio(e.to_string()))
198}
199
200/// Stateful streaming resampler.
201///
202/// [`AudioFrame::resample`] is convenient but constructs a fresh rubato
203/// resampler per call. For real-time pipelines that hand the resampler
204/// short frames (e.g. 20 ms G.711 packets off an RTP socket) the per-call
205/// resampler has no state to carry across frame boundaries, and sinc
206/// reconstruction produces audible edge artifacts at the frame rate —
207/// 50 Hz for 20 ms packets, perceived as continuous noise/buzz over the
208/// voice. `StreamingResampler` builds rubato once at stream open and
209/// reuses its internal filter state for every call, so output samples
210/// stitch together cleanly.
211///
212/// Build it with [`StreamingResampler::new`], then call
213/// [`process`](Self::process) for each arriving block of audio. Samples
214/// accumulate inside the resampler until a full `chunk_size` is ready,
215/// then a chunk's worth of output is appended to the caller's buffer.
216///
217/// If `source_rate == target_rate`, `process` becomes a pure copy and
218/// `chunk_size` is ignored.
219///
220/// # Example
221///
222/// ```
223/// use wavekat_core::StreamingResampler;
224///
225/// // 8 kHz → 44.1 kHz, 160-sample input chunks (matches 20 ms G.711).
226/// let mut resampler = StreamingResampler::new(8000, 44100, 160).unwrap();
227///
228/// let mut out = Vec::new();
229/// for _packet in 0..5 {
230///     let input = vec![0.0f32; 160]; // 20 ms of silence per packet
231///     resampler.process(&input, &mut out).unwrap();
232/// }
233/// // Five 160-sample inputs at 8 kHz expand to roughly 5 × 882 samples
234/// // at 44.1 kHz (the exact count depends on rubato's edge handling).
235/// assert!(out.len() > 4000);
236/// ```
237#[cfg(feature = "resample")]
238pub struct StreamingResampler {
239    // `None` when source_rate == target_rate (pass-through fast path).
240    inner: Option<rubato::Async<f32>>,
241    source_rate: u32,
242    target_rate: u32,
243    chunk_size: usize,
244    // Accumulates partial input across calls until we have `chunk_size`
245    // samples for the next rubato step.
246    input_buf: Vec<f32>,
247    // Reusable scratch sized to `output_frames_max()` so we don't
248    // re-allocate on every chunk.
249    output_buf: Vec<f32>,
250}
251
252#[cfg(feature = "resample")]
253impl StreamingResampler {
254    /// Build a streaming resampler.
255    ///
256    /// `chunk_size` is how many input samples are processed per internal
257    /// rubato step. Match it to the natural arrival size of your input
258    /// — e.g. 160 for 20 ms G.711 frames at 8 kHz. Smaller chunks mean
259    /// lower latency; larger chunks are marginally more efficient.
260    ///
261    /// Returns [`CoreError::Audio`] if the resampler cannot be built
262    /// (zero rate, zero chunk size, or rubato rejects the ratio).
263    pub fn new(
264        source_rate: u32,
265        target_rate: u32,
266        chunk_size: usize,
267    ) -> Result<Self, crate::CoreError> {
268        if source_rate == target_rate {
269            // Pass-through still validates the rates so calling code
270            // can't smuggle a zero rate past us.
271            if source_rate == 0 {
272                return Err(crate::CoreError::Audio(
273                    "sample rate must be non-zero".into(),
274                ));
275            }
276            return Ok(Self {
277                inner: None,
278                source_rate,
279                target_rate,
280                chunk_size,
281                input_buf: Vec::new(),
282                output_buf: Vec::new(),
283            });
284        }
285
286        let inner = build_sinc_resampler(source_rate, target_rate, chunk_size)?;
287        let out_max = {
288            use rubato::Resampler;
289            inner.output_frames_max()
290        };
291        Ok(Self {
292            inner: Some(inner),
293            source_rate,
294            target_rate,
295            chunk_size,
296            input_buf: Vec::with_capacity(chunk_size),
297            output_buf: vec![0.0; out_max],
298        })
299    }
300
301    /// Source sample rate this resampler was built for.
302    pub fn source_rate(&self) -> u32 {
303        self.source_rate
304    }
305
306    /// Target sample rate this resampler emits.
307    pub fn target_rate(&self) -> u32 {
308        self.target_rate
309    }
310
311    /// Input chunk size — how many samples per internal step.
312    pub fn chunk_size(&self) -> usize {
313        self.chunk_size
314    }
315
316    /// Resample `input` and append the output samples to `out`.
317    ///
318    /// Input is buffered internally until a full `chunk_size` has been
319    /// received; partial chunks remain buffered until the next call.
320    /// State is carried across calls so there are no boundary artifacts
321    /// — feeding two adjacent 160-sample chunks is equivalent to
322    /// feeding one 320-sample chunk (modulo the resampler's group
323    /// delay, paid once at the start of the stream).
324    pub fn process(&mut self, input: &[f32], out: &mut Vec<f32>) -> Result<(), crate::CoreError> {
325        let Some(inner) = self.inner.as_mut() else {
326            out.extend_from_slice(input);
327            return Ok(());
328        };
329        use rubato::audioadapter_buffers::direct::InterleavedSlice;
330        use rubato::Resampler;
331
332        let mut remaining = input;
333        while !remaining.is_empty() {
334            let need = self.chunk_size - self.input_buf.len();
335            let take = need.min(remaining.len());
336            self.input_buf.extend_from_slice(&remaining[..take]);
337            remaining = &remaining[take..];
338
339            if self.input_buf.len() < self.chunk_size {
340                break;
341            }
342
343            let in_adapter = InterleavedSlice::new(&self.input_buf[..], 1, self.chunk_size)
344                .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
345            let out_buf_len = self.output_buf.len();
346            let mut out_adapter =
347                InterleavedSlice::new_mut(&mut self.output_buf[..], 1, out_buf_len)
348                    .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
349            let (_in_used, out_produced) = inner
350                .process_into_buffer(&in_adapter, &mut out_adapter, None)
351                .map_err(|e| crate::CoreError::Audio(e.to_string()))?;
352            out.extend_from_slice(&self.output_buf[..out_produced]);
353            self.input_buf.clear();
354        }
355        Ok(())
356    }
357}
358
359#[cfg(feature = "wav")]
360impl AudioFrame<'_> {
361    /// Write this frame to a WAV file at `path`.
362    ///
363    /// Always writes mono f32 PCM at the frame's native sample rate.
364    ///
365    /// # Example
366    ///
367    /// ```no_run
368    /// use wavekat_core::AudioFrame;
369    ///
370    /// let frame = AudioFrame::from_vec(vec![0.0f32; 16000], 16000);
371    /// frame.write_wav("output.wav").unwrap();
372    /// ```
373    pub fn write_wav(&self, path: impl AsRef<std::path::Path>) -> Result<(), crate::CoreError> {
374        let spec = hound::WavSpec {
375            channels: 1,
376            sample_rate: self.sample_rate,
377            bits_per_sample: 32,
378            sample_format: hound::SampleFormat::Float,
379        };
380        let mut writer = hound::WavWriter::create(path, spec)?;
381        for &sample in self.samples() {
382            writer.write_sample(sample)?;
383        }
384        writer.finalize()?;
385        Ok(())
386    }
387}
388
389#[cfg(feature = "wav")]
390impl AudioFrame<'static> {
391    /// Read a mono WAV file and return an owned `AudioFrame`.
392    ///
393    /// Accepts both f32 and i16 WAV files. i16 samples are normalised to
394    /// `[-1.0, 1.0]` (divided by 32768).
395    ///
396    /// # Example
397    ///
398    /// ```no_run
399    /// use wavekat_core::AudioFrame;
400    ///
401    /// let frame = AudioFrame::from_wav("input.wav").unwrap();
402    /// println!("{} Hz, {} samples", frame.sample_rate(), frame.len());
403    /// ```
404    pub fn from_wav(path: impl AsRef<std::path::Path>) -> Result<Self, crate::CoreError> {
405        let mut reader = hound::WavReader::open(path)?;
406        let spec = reader.spec();
407        let sample_rate = spec.sample_rate;
408        let samples: Vec<f32> = match spec.sample_format {
409            hound::SampleFormat::Float => reader.samples::<f32>().collect::<Result<_, _>>()?,
410            hound::SampleFormat::Int => reader
411                .samples::<i16>()
412                .map(|s| s.map(|v| v as f32 / 32768.0))
413                .collect::<Result<_, _>>()?,
414        };
415        Ok(AudioFrame::from_vec(samples, sample_rate))
416    }
417}
418
419/// Trait for types that can be converted into audio samples.
420///
421/// Implemented for `&[f32]` (zero-copy) and `&[i16]` (normalized conversion).
422pub trait IntoSamples<'a> {
423    /// Convert into f32 samples normalized to `[-1.0, 1.0]`.
424    fn into_samples(self) -> Cow<'a, [f32]>;
425}
426
427impl<'a> IntoSamples<'a> for &'a [f32] {
428    #[inline]
429    fn into_samples(self) -> Cow<'a, [f32]> {
430        Cow::Borrowed(self)
431    }
432}
433
434impl<'a> IntoSamples<'a> for &'a Vec<f32> {
435    #[inline]
436    fn into_samples(self) -> Cow<'a, [f32]> {
437        Cow::Borrowed(self.as_slice())
438    }
439}
440
441impl<'a, const N: usize> IntoSamples<'a> for &'a [f32; N] {
442    #[inline]
443    fn into_samples(self) -> Cow<'a, [f32]> {
444        Cow::Borrowed(self.as_slice())
445    }
446}
447
448impl<'a> IntoSamples<'a> for &'a [i16] {
449    #[inline]
450    fn into_samples(self) -> Cow<'a, [f32]> {
451        Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
452    }
453}
454
455impl<'a> IntoSamples<'a> for &'a Vec<i16> {
456    #[inline]
457    fn into_samples(self) -> Cow<'a, [f32]> {
458        Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
459    }
460}
461
462impl<'a, const N: usize> IntoSamples<'a> for &'a [i16; N] {
463    #[inline]
464    fn into_samples(self) -> Cow<'a, [f32]> {
465        Cow::Owned(self.iter().map(|&s| s as f32 / 32768.0).collect())
466    }
467}
468
469#[cfg(test)]
470mod tests {
471    use super::*;
472
473    #[test]
474    fn f32_is_zero_copy() {
475        let samples = vec![0.1f32, -0.2, 0.3];
476        let frame = AudioFrame::new(samples.as_slice(), 16000);
477        // Cow::Borrowed — the pointer should be the same
478        assert!(matches!(frame.samples, Cow::Borrowed(_)));
479        assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
480    }
481
482    #[test]
483    fn i16_normalizes_to_f32() {
484        let samples: Vec<i16> = vec![0, 16384, -16384, i16::MAX, i16::MIN];
485        let frame = AudioFrame::new(samples.as_slice(), 16000);
486        assert!(matches!(frame.samples, Cow::Owned(_)));
487
488        let s = frame.samples();
489        assert!((s[0] - 0.0).abs() < f32::EPSILON);
490        assert!((s[1] - 0.5).abs() < 0.001);
491        assert!((s[2] - -0.5).abs() < 0.001);
492        assert!((s[3] - (i16::MAX as f32 / 32768.0)).abs() < f32::EPSILON);
493        assert!((s[4] - -1.0).abs() < f32::EPSILON);
494    }
495
496    #[test]
497    fn metadata() {
498        let samples = vec![0.0f32; 160];
499        let frame = AudioFrame::new(samples.as_slice(), 16000);
500        assert_eq!(frame.sample_rate(), 16000);
501        assert_eq!(frame.len(), 160);
502        assert!(!frame.is_empty());
503        assert!((frame.duration_secs() - 0.01).abs() < 1e-9);
504    }
505
506    #[test]
507    fn empty_frame() {
508        let samples: &[f32] = &[];
509        let frame = AudioFrame::new(samples, 16000);
510        assert!(frame.is_empty());
511        assert_eq!(frame.len(), 0);
512    }
513
514    #[test]
515    fn into_owned() {
516        let samples = vec![0.5f32, -0.5];
517        let frame = AudioFrame::new(samples.as_slice(), 16000);
518        let owned: AudioFrame<'static> = frame.into_owned();
519        assert_eq!(owned.samples(), &[0.5, -0.5]);
520        assert_eq!(owned.sample_rate(), 16000);
521    }
522
523    #[cfg(feature = "wav")]
524    #[test]
525    fn wav_read_i16() {
526        // Write an i16 WAV directly via hound, then read it with from_wav.
527        let path = std::env::temp_dir().join("wavekat_test_i16.wav");
528        let spec = hound::WavSpec {
529            channels: 1,
530            sample_rate: 16000,
531            bits_per_sample: 16,
532            sample_format: hound::SampleFormat::Int,
533        };
534        let i16_samples: &[i16] = &[0, i16::MAX, i16::MIN, 16384];
535        let mut writer = hound::WavWriter::create(&path, spec).unwrap();
536        for &s in i16_samples {
537            writer.write_sample(s).unwrap();
538        }
539        writer.finalize().unwrap();
540
541        let frame = AudioFrame::from_wav(&path).unwrap();
542        assert_eq!(frame.sample_rate(), 16000);
543        assert_eq!(frame.len(), 4);
544        let s = frame.samples();
545        assert!((s[0] - 0.0).abs() < 1e-6);
546        assert!((s[1] - (i16::MAX as f32 / 32768.0)).abs() < 1e-6);
547        assert!((s[2] - -1.0).abs() < 1e-6);
548        assert!((s[3] - 0.5).abs() < 1e-4);
549    }
550
551    #[cfg(feature = "wav")]
552    #[test]
553    fn wav_round_trip() {
554        let original = AudioFrame::from_vec(vec![0.5f32, -0.5, 0.0, 1.0], 16000);
555        let path = std::env::temp_dir().join("wavekat_test.wav");
556        original.write_wav(&path).unwrap();
557        let loaded = AudioFrame::from_wav(&path).unwrap();
558        assert_eq!(loaded.sample_rate(), 16000);
559        for (a, b) in original.samples().iter().zip(loaded.samples()) {
560            assert!((a - b).abs() < 1e-6, "sample mismatch: {a} vs {b}");
561        }
562    }
563
564    #[test]
565    fn from_vec_is_zero_copy() {
566        let samples = vec![0.5f32, -0.5];
567        let ptr = samples.as_ptr();
568        let frame = AudioFrame::from_vec(samples, 24000);
569        assert_eq!(frame.samples().as_ptr(), ptr);
570        assert_eq!(frame.sample_rate(), 24000);
571    }
572
573    #[test]
574    fn into_samples_vec_f32() {
575        let samples = vec![0.1f32, -0.2, 0.3];
576        let frame = AudioFrame::new(&samples, 16000);
577        assert!(matches!(frame.samples, Cow::Borrowed(_)));
578        assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
579    }
580
581    #[test]
582    fn into_samples_array_f32() {
583        let samples = [0.1f32, -0.2, 0.3];
584        let frame = AudioFrame::new(&samples, 16000);
585        assert!(matches!(frame.samples, Cow::Borrowed(_)));
586        assert_eq!(frame.samples(), &[0.1, -0.2, 0.3]);
587    }
588
589    #[test]
590    fn into_samples_vec_i16() {
591        let samples: Vec<i16> = vec![0, 16384, i16::MIN];
592        let frame = AudioFrame::new(&samples, 16000);
593        assert!(matches!(frame.samples, Cow::Owned(_)));
594        let s = frame.samples();
595        assert!((s[0] - 0.0).abs() < f32::EPSILON);
596        assert!((s[1] - 0.5).abs() < 0.001);
597        assert!((s[2] - -1.0).abs() < f32::EPSILON);
598    }
599
600    #[test]
601    fn into_samples_array_i16() {
602        let samples: [i16; 3] = [0, 16384, i16::MIN];
603        let frame = AudioFrame::new(&samples, 16000);
604        assert!(matches!(frame.samples, Cow::Owned(_)));
605        let s = frame.samples();
606        assert!((s[0] - 0.0).abs() < f32::EPSILON);
607        assert!((s[1] - 0.5).abs() < 0.001);
608        assert!((s[2] - -1.0).abs() < f32::EPSILON);
609    }
610
611    #[cfg(feature = "resample")]
612    #[test]
613    fn resample_noop_same_rate() {
614        let samples = vec![0.1f32, -0.2, 0.3, 0.4, 0.5];
615        let frame = AudioFrame::from_vec(samples.clone(), 16000);
616        let resampled = frame.resample(16000).unwrap();
617        assert_eq!(resampled.sample_rate(), 16000);
618        assert_eq!(resampled.samples(), &samples[..]);
619    }
620
621    #[cfg(feature = "resample")]
622    #[test]
623    fn resample_empty_frame() {
624        let frame = AudioFrame::from_vec(Vec::new(), 44100);
625        let resampled = frame.resample(16000).unwrap();
626        assert_eq!(resampled.sample_rate(), 16000);
627        assert!(resampled.is_empty());
628    }
629
630    #[cfg(feature = "resample")]
631    #[test]
632    fn resample_downsample() {
633        // 1 second of silence at 48 kHz → 16 kHz
634        let frame = AudioFrame::from_vec(vec![0.0f32; 48000], 48000);
635        let resampled = frame.resample(16000).unwrap();
636        assert_eq!(resampled.sample_rate(), 16000);
637        // Should produce ~16000 samples (allow small tolerance from resampler)
638        let expected = 16000;
639        let tolerance = 50;
640        assert!(
641            (resampled.len() as i64 - expected as i64).unsigned_abs() < tolerance,
642            "expected ~{expected} samples, got {}",
643            resampled.len()
644        );
645    }
646
647    #[cfg(feature = "resample")]
648    #[test]
649    fn resample_upsample() {
650        // 1 second at 16 kHz → 24 kHz
651        let frame = AudioFrame::from_vec(vec![0.0f32; 16000], 16000);
652        let resampled = frame.resample(24000).unwrap();
653        assert_eq!(resampled.sample_rate(), 24000);
654        let expected = 24000;
655        let tolerance = 50;
656        assert!(
657            (resampled.len() as i64 - expected as i64).unsigned_abs() < tolerance,
658            "expected ~{expected} samples, got {}",
659            resampled.len()
660        );
661    }
662
663    #[cfg(feature = "resample")]
664    #[test]
665    fn resample_short_input_upsample_large_ratio() {
666        // The exact case from the wavekat-voice RTP path: a 20 ms G.711 frame
667        // (160 samples @ 8 kHz) upsampled to 44.1 kHz. Before the fix this
668        // returned `InsufficientOutputBufferSize`.
669        let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
670        let resampled = frame.resample(44_100).unwrap();
671        assert_eq!(resampled.sample_rate(), 44_100);
672        let expected = (160.0 * 44_100.0 / 8_000.0) as i64; // 882
673        let actual = resampled.len() as i64;
674        assert!(
675            (actual - expected).unsigned_abs() < 50,
676            "expected ~{expected} samples, got {actual}"
677        );
678    }
679
680    #[cfg(feature = "resample")]
681    #[test]
682    fn resample_short_input_upsample_small_ratio() {
683        // 160 samples @ 8 kHz → 16 kHz. Also failed before the fix even
684        // though the ratio is modest, because nbr_input_frames < chunk_size.
685        let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
686        let resampled = frame.resample(16_000).unwrap();
687        assert_eq!(resampled.sample_rate(), 16_000);
688        let expected: i64 = 320;
689        let actual = resampled.len() as i64;
690        assert!(
691            (actual - expected).unsigned_abs() < 50,
692            "expected ~{expected} samples, got {actual}"
693        );
694    }
695
696    #[cfg(feature = "resample")]
697    #[test]
698    fn resample_single_g711_frame_to_48k() {
699        // The other common device rate: 160 @ 8 kHz → 48 kHz.
700        let frame = AudioFrame::from_vec(vec![0.0f32; 160], 8000);
701        let resampled = frame.resample(48_000).unwrap();
702        assert_eq!(resampled.sample_rate(), 48_000);
703        let expected: i64 = 960;
704        let actual = resampled.len() as i64;
705        assert!(
706            (actual - expected).unsigned_abs() < 50,
707            "expected ~{expected} samples, got {actual}"
708        );
709    }
710
711    #[cfg(feature = "resample")]
712    #[test]
713    fn resample_preserves_sine_frequency() {
714        // Generate a 440 Hz sine at 44100 Hz, resample to 16000 Hz,
715        // then verify the dominant frequency is still ~440 Hz by
716        // checking zero-crossing rate.
717        let sr_in: u32 = 44100;
718        let sr_out: u32 = 16000;
719        let duration_secs = 1.0;
720        let freq = 440.0;
721        let n = (sr_in as f64 * duration_secs) as usize;
722        let samples: Vec<f32> = (0..n)
723            .map(|i| (2.0 * std::f64::consts::PI * freq * i as f64 / sr_in as f64).sin() as f32)
724            .collect();
725
726        let frame = AudioFrame::from_vec(samples, sr_in);
727        let resampled = frame.resample(sr_out).unwrap();
728
729        // Count zero crossings (sign changes)
730        let s = resampled.samples();
731        let crossings: usize = s
732            .windows(2)
733            .filter(|w| w[0].signum() != w[1].signum())
734            .count();
735        // A pure sine at f Hz has 2*f zero crossings per second
736        let measured_freq = crossings as f64 / (2.0 * duration_secs);
737        assert!(
738            (measured_freq - freq).abs() < 5.0,
739            "expected ~{freq} Hz, measured {measured_freq} Hz"
740        );
741    }
742
743    #[cfg(feature = "resample")]
744    #[test]
745    fn streaming_resampler_same_rate_is_passthrough() {
746        // No-op short-circuit: no resampler is built, no work is done,
747        // samples pass through verbatim. Guards against accidentally
748        // putting a same-rate stream through rubato (which adds group
749        // delay we don't want).
750        use crate::StreamingResampler;
751        let mut r = StreamingResampler::new(16000, 16000, 160).unwrap();
752        let input = vec![0.1, -0.2, 0.3, -0.4];
753        let mut out = Vec::new();
754        r.process(&input, &mut out).unwrap();
755        assert_eq!(out, input);
756    }
757
758    #[cfg(feature = "resample")]
759    #[test]
760    fn streaming_resampler_accessors_report_construction_args() {
761        use crate::StreamingResampler;
762        let r = StreamingResampler::new(8000, 44100, 160).unwrap();
763        assert_eq!(r.source_rate(), 8000);
764        assert_eq!(r.target_rate(), 44100);
765        assert_eq!(r.chunk_size(), 160);
766    }
767
768    #[cfg(feature = "resample")]
769    #[test]
770    fn streaming_resampler_short_input_chunked_calls() {
771        // The exact shape `wavekat-voice`'s RTP receive path drives:
772        // repeated 160-sample inputs at 8 kHz → 44.1 kHz. Each call
773        // produces ~882 output samples; total over N calls is ~N × 882
774        // (the first chunk may emit slightly less while rubato fills
775        // its filter delay).
776        use crate::StreamingResampler;
777        let mut r = StreamingResampler::new(8000, 44100, 160).unwrap();
778        let mut out = Vec::new();
779        for _ in 0..10 {
780            let input = vec![0.0f32; 160];
781            r.process(&input, &mut out).unwrap();
782        }
783        // 10 × 160 input @ 8k = 200 ms; @ 44.1k that's ~8820 samples.
784        // Allow generous tolerance for rubato's initial transient.
785        let expected = (10 * 160 * 44100 / 8000) as i64;
786        let actual = out.len() as i64;
787        assert!(
788            (actual - expected).unsigned_abs() < 2000,
789            "expected ~{expected} samples, got {actual}"
790        );
791    }
792
793    #[cfg(feature = "resample")]
794    #[test]
795    fn streaming_resampler_buffers_across_partial_calls() {
796        // Splitting an input across two `process` calls must produce
797        // the same output as one big call. Catches a regression where
798        // partial input is dropped on the floor instead of buffered.
799        use crate::StreamingResampler;
800        let input: Vec<f32> = (0..320).map(|i| (i as f32) * 0.01).collect();
801
802        let mut split_out = Vec::new();
803        let mut r1 = StreamingResampler::new(8000, 16000, 160).unwrap();
804        r1.process(&input[..50], &mut split_out).unwrap();
805        // No full chunk yet — buffered.
806        assert!(split_out.is_empty(), "no output before a full chunk");
807        r1.process(&input[50..], &mut split_out).unwrap();
808
809        let mut whole_out = Vec::new();
810        let mut r2 = StreamingResampler::new(8000, 16000, 160).unwrap();
811        r2.process(&input, &mut whole_out).unwrap();
812
813        assert_eq!(
814            split_out.len(),
815            whole_out.len(),
816            "split call must produce same number of samples as one-shot"
817        );
818        // The samples themselves must match too — same rubato state
819        // either way.
820        for (i, (a, b)) in split_out.iter().zip(whole_out.iter()).enumerate() {
821            assert!(
822                (a - b).abs() < 1e-6,
823                "split vs whole differ at {i}: {a} vs {b}"
824            );
825        }
826    }
827
828    #[cfg(feature = "resample")]
829    #[test]
830    fn streaming_resampler_avoids_per_frame_edge_artifacts() {
831        // The motivating regression: a stateless per-call resampler
832        // (i.e. `AudioFrame::resample` invoked on each 160-sample
833        // chunk) produces edge artifacts at every chunk boundary,
834        // because rubato assumes silence before t=0 and after t=N for
835        // each isolated chunk — sinc reconstruction near the edges
836        // sees an abrupt step.
837        //
838        // We don't compare against a reference signal (group-delay
839        // offsets across approaches make sample-index alignment
840        // unreliable). Instead we check the output's own *smoothness*:
841        // a band-limited signal at the input rate, upsampled, produces
842        // a band-limited output, so consecutive-sample deltas are
843        // bounded by `2π × freq / sr_out`. Edge artifacts show up as
844        // spikes in that consecutive delta — much larger than the
845        // smooth bound.
846        use crate::StreamingResampler;
847        let sr_in: u32 = 8000;
848        let sr_out: u32 = 44100;
849        let chunks = 30;
850        let chunk_size = 160;
851
852        // Mid-band sine that exercises the sinc filter without
853        // touching the anti-aliasing edge.
854        let freq = 600.0_f32;
855        let signal: Vec<f32> = (0..chunks * chunk_size)
856            .map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sr_in as f32).sin())
857            .collect();
858
859        // Streaming: state carried across calls.
860        let mut streaming = StreamingResampler::new(sr_in, sr_out, chunk_size).unwrap();
861        let mut streaming_out: Vec<f32> = Vec::new();
862        for c in 0..chunks {
863            streaming
864                .process(
865                    &signal[c * chunk_size..(c + 1) * chunk_size],
866                    &mut streaming_out,
867                )
868                .unwrap();
869        }
870
871        // Stateless per-chunk: fresh resampler every call (the bug).
872        let mut stateless_out: Vec<f32> = Vec::new();
873        for c in 0..chunks {
874            let chunk =
875                AudioFrame::from_vec(signal[c * chunk_size..(c + 1) * chunk_size].to_vec(), sr_in);
876            let resampled = chunk.resample(sr_out).unwrap();
877            stateless_out.extend_from_slice(resampled.samples());
878        }
879
880        // Skip the initial group-delay transient and the trailing
881        // tail; we want steady-state behavior.
882        let skip = 1500;
883        let tail = 500;
884
885        // Smooth bound: for a 600 Hz sine sampled at 44.1 kHz, the
886        // maximum delta between adjacent samples is ~ 2π × 600 / 44100
887        // ≈ 0.085. Allow generous headroom (4×) before we call a delta
888        // "spiky."
889        let expected_max_delta = 2.0 * std::f32::consts::PI * freq / sr_out as f32;
890        let spike_threshold = expected_max_delta * 4.0;
891
892        let count_spikes = |samples: &[f32], skip: usize, tail: usize| -> usize {
893            samples[skip..samples.len() - tail]
894                .windows(2)
895                .filter(|w| (w[1] - w[0]).abs() > spike_threshold)
896                .count()
897        };
898
899        let streaming_spikes = count_spikes(&streaming_out, skip, tail);
900        let stateless_spikes = count_spikes(&stateless_out, skip, tail);
901
902        // Streaming output should be smooth: essentially zero spikes
903        // in steady state.
904        assert!(
905            streaming_spikes < 10,
906            "streaming output should be smooth, found {streaming_spikes} sample-delta spikes (threshold {spike_threshold})"
907        );
908        // Stateless per-chunk output should have many spikes — one
909        // per chunk boundary, at minimum. We have ~25 chunks in the
910        // compared range, so expect at least 25 spikes.
911        assert!(
912            stateless_spikes > streaming_spikes * 5,
913            "stateless per-chunk should have far more spikes than streaming; got stateless={stateless_spikes}, streaming={streaming_spikes}"
914        );
915    }
916
917    #[cfg(feature = "resample")]
918    #[test]
919    fn streaming_resampler_rejects_zero_rate() {
920        use crate::StreamingResampler;
921        assert!(StreamingResampler::new(0, 16000, 160).is_err());
922        assert!(StreamingResampler::new(16000, 0, 160).is_err());
923        assert!(StreamingResampler::new(0, 0, 160).is_err());
924    }
925
926    #[cfg(feature = "resample")]
927    #[test]
928    fn streaming_resampler_rejects_zero_chunk_size() {
929        use crate::StreamingResampler;
930        assert!(StreamingResampler::new(8000, 16000, 0).is_err());
931    }
932}
wavekat_core/audio.rs

wavekat_core/
audio.rs