Skip to main content

axon/ots/native/
resample.rs

1//! Linear resampler for PCM16 audio.
2//!
3//! Polyphase FIR would preserve spectral content better; linear
4//! interpolation is what telephony-tier audio (μ-law, 8 kHz) can
5//! tolerate and that's the 11.e use case. Adopters needing
6//! studio-quality rate conversion register a higher-quality
7//! transformer at startup; our native path yields in cost to
8//! anything they declare with `cost_hint() < self.cost_hint()`.
9//!
10//! The kind tags `pcm16_8k`, `pcm16_16k`, `pcm16_48k` encode both
11//! byte layout AND sample rate. This is deliberate: a Whisper-class
12//! consumer that wants 16 kHz PCM16 declares `pcm16_16k` and OTS
13//! resolves the resample step automatically.
14
15use crate::buffer::{BufferKind, ZeroCopyBuffer};
16use crate::ots::pipeline::{OtsError, Transformer, TransformerBackend};
17
18pub struct Resample {
19    pub from_hz: u32,
20    pub to_hz: u32,
21}
22
23impl Resample {
24    pub fn new(from_hz: u32, to_hz: u32) -> Self {
25        assert!(from_hz > 0 && to_hz > 0, "rates must be positive");
26        Resample { from_hz, to_hz }
27    }
28
29    fn source_slug(&self) -> String {
30        format!("pcm16_{}k", self.from_hz / 1000)
31    }
32
33    fn sink_slug(&self) -> String {
34        format!("pcm16_{}k", self.to_hz / 1000)
35    }
36
37    fn resample_linear(samples: &[i16], from_hz: u32, to_hz: u32) -> Vec<i16> {
38        if samples.is_empty() || from_hz == to_hz {
39            return samples.to_vec();
40        }
41        let output_len =
42            ((samples.len() as u64 * to_hz as u64) / from_hz as u64)
43                .max(1) as usize;
44        let mut out = Vec::with_capacity(output_len);
45        for i in 0..output_len {
46            // Map the output index back into the input timeline.
47            let src_pos =
48                (i as u64 * from_hz as u64) as f64 / to_hz as f64;
49            let src_idx = src_pos.floor() as usize;
50            let frac = src_pos - src_idx as f64;
51            if src_idx + 1 >= samples.len() {
52                out.push(samples[samples.len() - 1]);
53            } else {
54                let a = samples[src_idx] as f64;
55                let b = samples[src_idx + 1] as f64;
56                out.push((a + (b - a) * frac).round() as i16);
57            }
58        }
59        out
60    }
61}
62
63impl Transformer for Resample {
64    fn source_kind(&self) -> BufferKind {
65        BufferKind::new(self.source_slug())
66    }
67
68    fn sink_kind(&self) -> BufferKind {
69        BufferKind::new(self.sink_slug())
70    }
71
72    fn backend(&self) -> TransformerBackend {
73        TransformerBackend::Native
74    }
75
76    fn cost_hint(&self) -> u32 {
77        // Resample is cheaper than the μ-law/PCM codecs because
78        // the per-sample work is just a multiply-add; we still
79        // bias toward shorter paths when an adopter declares a
80        // higher-quality alternative.
81        1
82    }
83
84    fn transform(
85        &self,
86        input: &ZeroCopyBuffer,
87    ) -> Result<ZeroCopyBuffer, OtsError> {
88        let src = input.as_slice();
89        if src.len() % 2 != 0 {
90            return Err(OtsError::TransformFailed(format!(
91                "PCM16 input must be even-length, got {}",
92                src.len()
93            )));
94        }
95        let samples: Vec<i16> = src
96            .chunks_exact(2)
97            .map(|c| i16::from_le_bytes([c[0], c[1]]))
98            .collect();
99        let resampled =
100            Resample::resample_linear(&samples, self.from_hz, self.to_hz);
101        let mut out = Vec::with_capacity(resampled.len() * 2);
102        for sample in resampled {
103            out.extend_from_slice(&sample.to_le_bytes());
104        }
105        let mut buf = ZeroCopyBuffer::from_bytes(out, self.sink_kind());
106        if let Some(tenant) = input.tenant_id() {
107            buf = buf.with_tenant(tenant.to_string());
108        }
109        Ok(buf)
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn identity_resample_returns_same_samples() {
119        let samples = vec![100i16, 200, 300, -100, -200];
120        let result =
121            Resample::resample_linear(&samples, 16_000, 16_000);
122        assert_eq!(result, samples);
123    }
124
125    #[test]
126    fn upsample_doubles_length_approximately() {
127        let samples = vec![0i16; 100];
128        let result =
129            Resample::resample_linear(&samples, 8_000, 16_000);
130        // 100 samples at 8 kHz → 200 samples at 16 kHz.
131        assert!(result.len() >= 199 && result.len() <= 201);
132    }
133
134    #[test]
135    fn downsample_halves_length_approximately() {
136        let samples = vec![0i16; 200];
137        let result =
138            Resample::resample_linear(&samples, 16_000, 8_000);
139        assert!(result.len() >= 99 && result.len() <= 101);
140    }
141
142    #[test]
143    fn kind_tags_follow_rate_convention() {
144        let r = Resample::new(8_000, 16_000);
145        assert_eq!(r.source_kind().slug(), "pcm16_8k");
146        assert_eq!(r.sink_kind().slug(), "pcm16_16k");
147    }
148
149    #[test]
150    fn transform_rejects_odd_length() {
151        let r = Resample::new(8_000, 16_000);
152        let odd = ZeroCopyBuffer::from_bytes(
153            vec![0u8, 1, 2],
154            r.source_kind(),
155        );
156        let err = r.transform(&odd).unwrap_err();
157        matches!(err, OtsError::TransformFailed(_));
158    }
159
160    #[test]
161    fn transform_preserves_tenant_tag() {
162        let r = Resample::new(8_000, 16_000);
163        let input = ZeroCopyBuffer::from_bytes(
164            vec![0u8; 16],
165            r.source_kind(),
166        )
167        .with_tenant("alpha");
168        let out = r.transform(&input).unwrap();
169        assert_eq!(out.tenant_id(), Some("alpha"));
170    }
171}