resampler/
resampler.rs

1
2use std::{cmp::min, sync::Arc, fmt::{self, Debug, Formatter}};
3use rustfft::{FftPlanner, Fft, num_complex::Complex};
4
5#[derive(Debug, Clone)]
6pub enum ResamplerError {
7    SizeError(String),
8}
9
10/// How the Resampler works
11/// For audio stretching:
12///   1. The input audio remains its original length, and zero-padding is applied at the end to reach the target length.
13///   2. Perform FFT transformation to obtain the frequency domain.
14///   3. In the frequency domain, scale down the frequency values proportionally (shift them lower).
15///   4. Perform inverse FFT to obtain the stretched audio.
16/// 
17/// For audio compression:
18///   1. Take the input audio.
19///   2. Perform FFT transformation.
20///   3. In the frequency domain, scale up the frequency values proportionally (shift them higher).
21///   4. Perform inverse FFT to obtain audio with increased pitch but unchanged length.
22///   5. Truncate the audio to shorten its duration.
23/// 
24/// This implies: the FFT length must be chosen as the longest possible length involved.
25#[derive(Clone)]
26pub struct Resampler {
27    fft_forward: Arc<dyn Fft<f64>>,
28    fft_inverse: Arc<dyn Fft<f64>>,
29    fft_size: usize,
30    normalize_scaler: f64,
31}
32
33fn get_average(complexes: &[Complex<f64>]) -> Complex<f64> {
34    let sum: Complex<f64> = complexes.iter().copied().sum();
35    let scaler = 1.0 / complexes.len() as f64;
36    Complex::<f64> {
37        re: sum.re * scaler,
38        im: sum.im * scaler,
39    }
40}
41
42fn interpolate(c1: Complex<f64>, c2: Complex<f64>, s: f64) -> Complex<f64> {
43    c1 + (c2 - c1) * s
44}
45
46impl Resampler {
47    pub fn new(fft_size: usize) -> Self {
48        let mut planner = FftPlanner::new();
49        if fft_size & 1 != 0 {
50            panic!("The input size and the output size must be times of 2, got {fft_size}");
51        }
52        Self {
53            fft_forward: planner.plan_fft_forward(fft_size),
54            fft_inverse: planner.plan_fft_inverse(fft_size),
55            fft_size,
56            normalize_scaler: 1.0 / fft_size as f64,
57        }
58    }
59
60    /// * The fft size can be any number greater than the sample rate of the encoder or the decoder.
61    /// * It is for the resampler. A greater number results in better resample quality, but the process could be slower.
62    /// * In most cases, the audio sampling rate is about `11025` to `48000`, so `65536` is the best number for the resampler.
63    pub fn get_rounded_up_fft_size(sample_rate: u32) -> usize {
64        for i in 0..31 {
65            let fft_size = 1usize << i;
66            if fft_size >= sample_rate as usize {
67                return fft_size;
68            }
69        }
70        0x1_00000000_usize
71    }
72
73    /// `desired_length`: The target audio length to achieve, which must not exceed the FFT size.
74    /// When samples.len() < desired_length, it indicates audio stretching to desired_length.
75    /// When samples.len() > desired_length, it indicates audio compression to desired_length.
76    pub fn resample_core(&self, samples: &[f32], desired_length: usize) -> Result<Vec<f32>, ResamplerError> {
77        const INTERPOLATE_UPSCALE: bool = true;
78        const INTERPOLATE_DNSCALE: bool = true;
79
80        let input_size = samples.len();
81        if input_size == desired_length {
82            return Ok(samples.to_vec());
83        }
84
85        if desired_length > self.fft_size {
86            return Err(ResamplerError::SizeError(format!("The desired size {desired_length} must not exceed the FFT size {}", self.fft_size)));
87        }
88
89        let mut fftbuf: Vec<Complex<f64>> = samples.iter().map(|sample: &f32| -> Complex<f64> {Complex{re: *sample as f64, im: 0.0}}).collect();
90
91        if fftbuf.len() <= self.fft_size {
92            fftbuf.resize(self.fft_size, Complex{re: 0.0, im: 0.0});
93        } else {
94            return Err(ResamplerError::SizeError(format!("The input size {} must not exceed the FFT size {}", fftbuf.len(), self.fft_size)));
95        }
96
97        // 进行 FFT 正向变换
98        self.fft_forward.process(&mut fftbuf);
99
100        // 准备进行插值
101        let mut fftdst = vec![Complex::<f64>{re: 0.0, im: 0.0}; self.fft_size];
102
103        let half = self.fft_size / 2;
104        let back = self.fft_size - 1;
105        let scaling = desired_length as f64 / input_size as f64;
106        if input_size > desired_length {
107            // Input size exceeds output size, indicating audio compression.
108            // This implies stretching in the frequency domain (scaling up).
109            for i in 0..half {
110                let scaled = i as f64 * scaling;
111                let i1 = scaled.trunc() as usize;
112                let i2 = i1 + 1;
113                let s = scaled.fract();
114                if INTERPOLATE_DNSCALE {
115                    fftdst[i] = interpolate(fftbuf[i1], fftbuf[i2], s);
116                    fftdst[back - i] = interpolate(fftbuf[back - i1], fftbuf[back - i2], s);
117                } else {
118                    fftdst[i] = fftbuf[i1];
119                    fftdst[back - i] = fftbuf[back - i1];
120                }
121            }
122        } else {
123            // Input size is smaller than the output size, indicating audio stretching.
124            // This implies compression in the frequency domain (scaling down).
125            for i in 0..half {
126                let i1 = (i as f64 * scaling).trunc() as usize;
127                let i2 = ((i + 1) as f64 * scaling).trunc() as usize;
128                if i2 >= half {break;}
129                let j1 = back - i2;
130                let j2 = back - i1;
131                if INTERPOLATE_UPSCALE {
132                    fftdst[i] = get_average(&fftbuf[i1..i2]);
133                    fftdst[back - i] = get_average(&fftbuf[j1..j2]);
134                } else {
135                    fftdst[i] = fftbuf[i1];
136                    fftdst[back - i] = fftbuf[back - i1];
137                }
138            }
139        }
140
141        self.fft_inverse.process(&mut fftdst);
142
143        fftdst.truncate(desired_length);
144
145        Ok(fftdst.into_iter().map(|c| -> f32 {(c.re * self.normalize_scaler) as f32}).collect())
146    }
147
148    /// The processing unit size should be adjusted to work in "chunks per second", 
149    /// and artifacts will vanish when the chunk count aligns with the maximum infrasonic frequency.
150    /// Calling `self.get_desired_length()` determines the processed chunk size calculated based on the target sample rate.
151    pub fn get_process_size(&self, orig_size: usize, src_sample_rate: u32, dst_sample_rate: u32) -> usize {
152        const MAX_INFRASOUND_FREQ: usize = 20;
153        if src_sample_rate == dst_sample_rate {
154            min(self.fft_size, orig_size)
155        } else {
156            min(self.fft_size, src_sample_rate as usize / MAX_INFRASOUND_FREQ)
157        }
158    }
159
160    /// Get the processed chunk size calculated based on the target sample rate.
161    pub fn get_desired_length(&self, proc_size: usize, src_sample_rate: u32, dst_sample_rate: u32) -> usize {
162        min(self.fft_size, proc_size * dst_sample_rate as usize / src_sample_rate as usize)
163    }
164
165    pub fn resample(&self, input: &[f32], src_sample_rate: u32, dst_sample_rate: u32) -> Result<Vec<f32>, ResamplerError> {
166        if src_sample_rate == dst_sample_rate {
167            Ok(input.to_vec())
168        } else {
169            let proc_size = self.get_process_size(self.fft_size, src_sample_rate, dst_sample_rate);
170            let desired_length = self.get_desired_length(proc_size, src_sample_rate, dst_sample_rate);
171            if input.len() > proc_size {
172                Err(ResamplerError::SizeError(format!("To resize the waveform, the input size should be {proc_size}, not {}", input.len())))
173            } else if src_sample_rate > dst_sample_rate {
174                // Source sample rate is higher than the target, indicating waveform compression.
175                self.resample_core(input, desired_length)
176            } else {
177                // Source sample rate is lower than the target, indicating waveform stretching.
178                // When the input length is less than the desired length, zero-padding is applied to the end.
179                input.to_vec().resize(proc_size, 0.0);
180                self.resample_core(input, desired_length)
181            }
182        }
183    }
184
185    pub fn get_fft_size(&self) -> usize {
186        self.fft_size
187    }
188}
189
190impl Debug for Resampler {
191    fn fmt(&self, fmt: &mut Formatter) -> fmt::Result {
192        fmt.debug_struct("Resampler")
193            .field("fft_forward", &format_args!("..."))
194            .field("fft_inverse", &format_args!("..."))
195            .field("fft_size", &self.fft_size)
196            .field("normalize_scaler", &self.normalize_scaler)
197            .finish()
198    }
199}