use rustfft::{num_complex::Complex, FftPlanner};
const N_FFT: usize = 400;
const HOP_LENGTH: usize = 160;
pub fn log_mel_spectrogram(pcm: &[f32], n_mels: usize, mel_filters: &[f32]) -> Vec<f32> {
let n_fft_half = N_FFT / 2 + 1;
let padded = reflect_pad(pcm, N_FFT / 2);
let magnitudes = stft_magnitudes_squared(&padded);
let n_frames_raw = magnitudes.len() / n_fft_half;
let n_frames = n_frames_raw - 1;
let mut mel_spec = vec![0f32; n_mels * n_frames];
for m in 0..n_mels {
for t in 0..n_frames {
let mut sum = 0f32;
for f in 0..n_fft_half {
sum += mel_filters[m * n_fft_half + f] * magnitudes[f * n_frames_raw + t];
}
mel_spec[m * n_frames + t] = sum;
}
}
for v in &mut mel_spec {
*v = v.max(1e-10).log10();
}
let global_max = mel_spec.iter().copied().fold(f32::NEG_INFINITY, f32::max);
let clamp_min = global_max - 8.0;
for v in &mut mel_spec {
*v = v.max(clamp_min);
}
for v in &mut mel_spec {
*v = (*v + 4.0) / 4.0;
}
mel_spec
}
fn reflect_pad(signal: &[f32], pad: usize) -> Vec<f32> {
let n = signal.len();
let mut out = Vec::with_capacity(n + 2 * pad);
for i in (1..=pad).rev() {
out.push(signal[i.min(n - 1)]);
}
out.extend_from_slice(signal);
for i in 1..=pad {
out.push(signal[(n - 1).saturating_sub(i)]);
}
out
}
fn stft_magnitudes_squared(padded: &[f32]) -> Vec<f32> {
let n_fft_half = N_FFT / 2 + 1;
let n_frames = (padded.len() - N_FFT) / HOP_LENGTH + 1;
let hann: Vec<f32> = (0..N_FFT)
.map(|i| 0.5 * (1.0 - (2.0 * std::f32::consts::PI * i as f32 / N_FFT as f32).cos()))
.collect();
let mut planner = FftPlanner::<f32>::new();
let fft = planner.plan_fft_forward(N_FFT);
let mut magnitudes = vec![0f32; n_fft_half * n_frames];
let mut buffer = vec![Complex::new(0f32, 0f32); N_FFT];
for t in 0..n_frames {
let offset = t * HOP_LENGTH;
for i in 0..N_FFT {
buffer[i] = Complex::new(padded[offset + i] * hann[i], 0.0);
}
fft.process(&mut buffer);
for f in 0..n_fft_half {
magnitudes[f * n_frames + t] = buffer[f].norm_sqr();
}
}
magnitudes
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_reflect_pad() {
let signal = vec![1.0, 2.0, 3.0, 4.0, 5.0];
let padded = reflect_pad(&signal, 2);
assert_eq!(padded, vec![3.0, 2.0, 1.0, 2.0, 3.0, 4.0, 5.0, 4.0, 3.0]);
}
#[test]
fn test_mel_shape() {
let pcm = vec![0.0f32; 16000];
let filters = vec![0.0f32; 80 * 201];
let mel = log_mel_spectrogram(&pcm, 80, &filters);
let n_frames = mel.len() / 80;
assert_eq!(n_frames, 100); }
}