use crate::util;
use rustdct::DctPlanner;
use super::make_log_spectrum;
#[derive(Clone)]
struct Triangle {
pub x1: f64,
pub x2: f64,
pub x3: f64,
pub y1: f64,
pub y2: f64,
slope_ascending: f64,
slope_descending: f64
}
impl Triangle {
pub fn new(x1: f64, x2: f64, x3: f64, y1: f64, y2: f64) -> Triangle {
Triangle {
x1: x1,
x2: x2,
x3: x3,
y1: y1,
y2: y2,
slope_ascending: (y2 - y1) / (x2 - x1),
slope_descending: (y1 - y2) / (x3 - x2)
}
}
pub fn compute(&self, pos: f64) -> f64 {
if pos <= self.x1 || pos >= self.x3 {
return self.y1;
} else if pos <= self.x2 {
return self.slope_ascending * (pos - self.x1) + self.y1;
} else {
return self.slope_descending * (pos - self.x2) + self.y2;
}
}
}
#[derive(Clone)]
struct TriangleFilter {
start_idx: usize,
end_idx: usize,
triangle_filter: Vec<f64>
}
impl TriangleFilter {
pub fn new(start_idx: usize, end_idx: usize, triangle: Triangle, fft_freqs: &[f64], normalize: bool) -> TriangleFilter {
let length = end_idx - start_idx + 1;
let mut filter: Vec<f64> = Vec::with_capacity(length);
if normalize {
let coef = 2.0 / (fft_freqs[end_idx] - fft_freqs[start_idx]);
for i in 0..length {
filter.push(triangle.compute(fft_freqs[i + start_idx]) * coef);
}
} else {
for i in 0..length {
filter.push(triangle.compute(fft_freqs[i + start_idx]));
}
}
TriangleFilter {
start_idx: start_idx,
end_idx: end_idx,
triangle_filter: filter
}
}
pub fn filter(&self, vec: &[f64]) -> f64 {
let mut result: f64 = 0.0;
let mut i: usize = 0;
let mut j: usize = self.start_idx;
while j <= self.end_idx {
result += self.triangle_filter[i] * vec[j];
i += 1;
j += 1;
}
result
}
}
#[derive(Clone)]
pub struct MelFilterbank {
freq_low: f64,
freq_high: f64,
filters: Vec<TriangleFilter>,
num_filters: usize
}
impl MelFilterbank {
pub fn new(freq_low: f64, freq_high: f64, num_filters: usize, fft_freqs: &[f64], normalize: bool) -> MelFilterbank {
let arr_len = num_filters + 2;
let mel_low = freq_to_mel(freq_low, true);
let mel_high = freq_to_mel(freq_high, true);
let mel_center_freqs: Vec<f64> = util::linspace(mel_low, mel_high, arr_len, true);
let freq_center_freqs: Vec<f64> = mel_center_freqs.iter().map(|x| mel_to_freq(*x, true)).collect();
let mut filterbank: Vec<TriangleFilter> = Vec::with_capacity(num_filters);
for i in 1..num_filters+1 {
let low_freq: f64;
let mid_freq: f64;
let high_freq: f64;
low_freq = freq_center_freqs[i-1];
mid_freq = freq_center_freqs[i];
high_freq = freq_center_freqs[i+1];
let tri = Triangle::new(low_freq, mid_freq, high_freq, 0.0, 1.0);
let start_idx = util::ordered_search_le(&fft_freqs, freq_center_freqs[i-1]).unwrap();
let mut end_idx = util::ordered_search_le(&fft_freqs, freq_center_freqs[i+1]).unwrap() + 2;
if end_idx >= fft_freqs.len() {
end_idx = fft_freqs.len() - 1;
}
let tri_filter = TriangleFilter::new(start_idx, end_idx, tri, fft_freqs, normalize);
filterbank.push(tri_filter);
}
MelFilterbank {
freq_low: freq_low,
freq_high: freq_high,
filters: filterbank,
num_filters: num_filters
}
}
pub fn filter(&self, vec: &[f64]) -> Vec<f64> {
let mut filtered: Vec<f64> = Vec::with_capacity(self.num_filters);
for i in 0..self.num_filters {
filtered.push(self.filters[i].filter(&vec))
}
filtered
}
pub fn get_freq_low(&self) -> f64 {
self.freq_low
}
pub fn get_freq_high(&self) -> f64 {
self.freq_high
}
pub fn len(&self) -> usize {
self.num_filters
}
}
#[inline]
pub fn freq_to_mel(freq: f64, slaney: bool) -> f64 {
if slaney {
if freq < 1000.0 {
3.0 * freq / 200.0
} else {
15.0 + 27.0 * (f64::log10(freq / 1000.0) / f64::log10(6.4))
}
} else {
2595.0 * f64::log10(1.0 + freq / 700.0)
}
}
#[inline]
pub fn mel_to_freq(mel: f64, slaney: bool) -> f64 {
if slaney {
if mel < 15.0 {
200.0 * mel / 3.0
} else {
1000.0 * f64::powf(10.0, f64::log10(6.4) * (mel - 15.0) / 27.0)
}
} else {
700.0 * (f64::powf(10.0, mel / 2595.0) - 1.0)
}
}
pub fn melscale(freqs: &[f64]) -> Vec<f64> {
let mut scale: Vec<f64> = Vec::with_capacity(freqs.len());
for freq in freqs {
scale.push(freq_to_mel(*freq, true));
}
scale
}
pub fn make_mel_spectrogram(spectrogram: &[Vec<f64>], filterbank: &MelFilterbank) -> Vec<Vec<f64>> {
let mut mel_spectrogram: Vec<Vec<f64>> = Vec::with_capacity(spectrogram.len());
for i in 0..spectrogram.len() {
mel_spectrogram.push(filterbank.filter(&spectrogram[i]));
}
mel_spectrogram
}
pub fn mfcc_spectrum(mel_spectrum: &[f64], num_mfccs: usize, lifter: Option<f64>) -> Vec<f64> {
let mut mfccs: Vec<f64> = make_log_spectrum(mel_spectrum, 10.0, -10e8, Some(-80.0));
if mel_spectrum.len() > 0 {
let mut planner: DctPlanner<f64> = DctPlanner::new();
let dct2 = planner.plan_dct2(mel_spectrum.len());
dct2.process_dct2(&mut mfccs);
let norm_0 = 2.0 * f64::sqrt(1.0 / (4 * mel_spectrum.len()) as f64);
let norm_1 = 2.0 * f64::sqrt(1.0 / (2 * mel_spectrum.len()) as f64);
mfccs[0] *= norm_0;
for k in 1..mfccs.len() {
mfccs[k] *= norm_1;
}
if let Some(lif) = lifter {
for k in 0..mfccs.len() {
mfccs[k] *= 1.0 + (lif / 2.0) * f64::sin(std::f64::consts::PI * (k + 1) as f64 / lif);
}
}
}
mfccs[..num_mfccs].to_vec()
}
pub fn mfcc_spectrogram(mel_spectrogram: &[Vec<f64>], num_mfccs: usize, lifter: Option<f64>) -> Vec<Vec<f64>> {
let mut mfccs: Vec<Vec<f64>> = Vec::new();
let mut planner = DctPlanner::new();
let dct2 = planner.plan_dct2(mel_spectrogram[0].len());
if mel_spectrogram.len() > 0 {
if mel_spectrogram[0].len() > 0 {
let norm_0 = 2.0 * f64::sqrt(1.0 / (4 * mel_spectrogram[0].len()) as f64);
let norm_1 = 2.0 * f64::sqrt(1.0 / (2 * mel_spectrogram[0].len()) as f64);
for i in 0..mel_spectrogram.len() {
let mut mfccs_vec: Vec<f64> = make_log_spectrum(&mel_spectrogram[i], 10.0, -10e8, Some(-80.0));
dct2.process_dct2(&mut mfccs_vec);
mfccs_vec[0] *= norm_0;
for k in 1..mfccs_vec.len() {
mfccs_vec[k] *= norm_1;
}
if let Some(lif) = lifter {
for k in 0..mfccs_vec.len() {
mfccs_vec[k] *= 1.0 + (lif / 2.0) * f64::sin(std::f64::consts::PI * (k + 1) as f64 / lif);
}
}
mfccs.push(mfccs_vec[..num_mfccs].to_vec());
}
}
}
mfccs
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{spectrum, analysis};
#[test]
fn test_freq_to_mel() {
const EPSILON: f64 = 1e-6;
assert!(f64::abs(freq_to_mel(-1.004, true) - -0.015059999999999999) < EPSILON);
assert!(f64::abs(freq_to_mel(0.0, true) - 0.0) < EPSILON);
assert!(f64::abs(freq_to_mel(1.0, true) - 0.015) < EPSILON);
assert!(f64::abs(freq_to_mel(50.0, true) - 0.75) < EPSILON);
assert!(f64::abs(freq_to_mel(142.429, true) - 2.136435) < EPSILON);
assert!(f64::abs(freq_to_mel(451.987, true) - 6.779805) < EPSILON);
assert!(f64::abs(freq_to_mel(1000.0, true) - 14.999999999999998) < EPSILON);
assert!(f64::abs(freq_to_mel(1002.428, true) - 15.035272646925904) < EPSILON);
assert!(f64::abs(freq_to_mel(5304.53, true) - 39.26935324537849) < EPSILON);
assert!(f64::abs(freq_to_mel(12042.233, true) - 51.194262623415284) < EPSILON);
assert!(f64::abs(freq_to_mel(-1.004, false) - -1.617591975265908) < EPSILON);
assert!(f64::abs(freq_to_mel(0.0, false) - 0.0) < EPSILON);
assert!(f64::abs(freq_to_mel(1.0, false) - 1.6088427864826338) < EPSILON);
assert!(f64::abs(freq_to_mel(50.0, false) - 77.75456466446511) < EPSILON);
assert!(f64::abs(freq_to_mel(142.429, false) - 208.72952224868627) < EPSILON);
assert!(f64::abs(freq_to_mel(451.987, false) - 561.4270515062376) < EPSILON);
assert!(f64::abs(freq_to_mel(586.1, false) - 685.5385318706192) < EPSILON);
assert!(f64::abs(freq_to_mel(1002.428, false) - 1001.5940016448719) < EPSILON);
assert!(f64::abs(freq_to_mel(5304.53, false) - 2422.1236404690194) < EPSILON);
assert!(f64::abs(freq_to_mel(12042.233, false) - 3270.0827681073483) < EPSILON);
}
#[test]
fn test_mel_to_freq() {
const EPSILON: f64 = 1e-6;
assert!(f64::abs(mel_to_freq(-1.01, true) - -67.33333333333334) < EPSILON);
assert!(f64::abs(mel_to_freq(0.0, true) - 0.0) < EPSILON);
assert!(f64::abs(mel_to_freq(0.32, true) - 21.333333333333336) < EPSILON);
assert!(f64::abs(mel_to_freq(1.45, true) - 96.66666666666667) < EPSILON);
assert!(f64::abs(mel_to_freq(5.923, true) - 394.8666666666667) < EPSILON);
assert!(f64::abs(mel_to_freq(11.483, true) - 765.5333333333334) < EPSILON);
assert!(f64::abs(mel_to_freq(15.0, true) - 1000.0000000000002) < EPSILON);
assert!(f64::abs(mel_to_freq(15.03, true) - 1002.0646818488809) < EPSILON);
assert!(f64::abs(mel_to_freq(18.4922, true) - 1271.3698701976102) < EPSILON);
assert!(f64::abs(mel_to_freq(23.5809, true) - 1803.9020548956844) < EPSILON);
assert!(f64::abs(mel_to_freq(-43.0, false) - -26.205110846993996) < EPSILON);
assert!(f64::abs(mel_to_freq(0.0, false) - 0.0) < EPSILON);
assert!(f64::abs(mel_to_freq(1.23, false) - 0.7643961548333467) < EPSILON);
assert!(f64::abs(mel_to_freq(43.45, false) - 27.51470832169547) < EPSILON);
assert!(f64::abs(mel_to_freq(120.4335, false) - 78.94692390028159) < EPSILON);
assert!(f64::abs(mel_to_freq(435.239, false) - 329.9596192156131) < EPSILON);
assert!(f64::abs(mel_to_freq(801.43, false) - 725.3918244975519) < EPSILON);
assert!(f64::abs(mel_to_freq(1009.87, false) - 1014.975669072667) < EPSILON);
assert!(f64::abs(mel_to_freq(2003.49, false) - 3441.482623133689) < EPSILON);
assert!(f64::abs(mel_to_freq(3210.49, false) - 11385.958101160506) < EPSILON);
}
#[test]
fn test_triangle_computation() {
const EPSILON: f64 = 1e-6;
let tri = Triangle::new(5.0, 20.0, 82.0, 1.1, 9.44);
assert!({f64::abs(tri.compute(2.0) - 1.1) < EPSILON});
assert!({f64::abs(tri.compute(4.0) - 1.1) < EPSILON});
assert!({f64::abs(tri.compute(5.0) - 1.1) < EPSILON});
assert!({f64::abs(tri.compute(11.0) - 4.436) < EPSILON});
assert!({f64::abs(tri.compute(20.0) - 9.44) < EPSILON});
assert!({f64::abs(tri.compute(21.0) - 9.305483870967741) < EPSILON});
assert!({f64::abs(tri.compute(43.0) - 6.346129032258064) < EPSILON});
assert!({f64::abs(tri.compute(82.0) - 1.1) < EPSILON});
assert!({f64::abs(tri.compute(98.0) - 1.1) < EPSILON});
}
#[test]
fn test_mel_filterbank() {
const EPSILON: f64 = 1e-6;
const FFT_SIZE: usize = 256;
const SAMPLE_RATE: u32 = 22050;
const NUM_MELS: usize = 20;
const LOW_FREQ: f64 = 0.0;
const HIGH_FREQ: f64 = 11025.0;
let rfreqs = crate::spectrum::rfftfreq(FFT_SIZE, SAMPLE_RATE);
let fb = MelFilterbank::new(LOW_FREQ, HIGH_FREQ, NUM_MELS, &rfreqs, true);
let fake_fft_power_spectrum: Vec<f64> = vec![
1e-2, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
12e-5, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
3e-4, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
2e-3, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
43e-6, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
2e-3, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
6e-5, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
4e-3, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
2e-5
];
let mel_spec = fb.filter(&fake_fft_power_spectrum);
let out_vec: Vec<f64> = vec![6.76895304e-06, 1.36028451e-06, 2.77367273e-06, 1.89267587e-05,
4.07858842e-04, 3.19873457e-04, 5.04253261e-06, 8.52845397e-06,
4.99160938e-06, 5.40207921e-06, 3.29421796e-04, 7.69145971e-05,
6.61126227e-06, 2.21605018e-04, 4.42958155e-05, 1.83195179e-04,
9.42967421e-05, 1.23944028e-04, 1.34227360e-04, 1.39157067e-04];
for i in 0..out_vec.len() {
assert!({f64::abs(out_vec[i] - mel_spec[i]) < EPSILON})
}
}
#[test]
fn test_mel_spec() {
let fft_size = 2048;
let audio = crate::read("myfile.wav").unwrap();
let rfft_freqs = spectrum::rfftfreq(fft_size, audio.sample_rate);
let mel_filterbank = analysis::mel::MelFilterbank::new(20.0, 8000.0, 40, &rfft_freqs, false);
let imaginary_spectrogram = spectrum::rstft(&audio.samples[0], fft_size, fft_size / 2, crate::WindowType::Hanning);
let (magnitude_spectrogram, _) = spectrum::complex_to_polar_rstft(&imaginary_spectrogram);
let power_spectrogram = analysis::make_power_spectrogram(&magnitude_spectrogram);
let _ = analysis::mel::make_mel_spectrogram(&power_spectrogram, &mel_filterbank);
}
#[test]
fn test_mfcc_spectrum() {
let fft_size = 2048;
let audio = crate::read("myfile.wav").unwrap();
let rfft_freqs = spectrum::rfftfreq(fft_size, audio.sample_rate);
let mel_filterbank = analysis::mel::MelFilterbank::new(20.0, 8000.0, 40, &rfft_freqs, false);
let imaginary_spectrogram = spectrum::rstft(&audio.samples[0], fft_size, fft_size / 2, crate::WindowType::Hanning);
let (magnitude_spectrogram, _) = spectrum::complex_to_polar_rstft(&imaginary_spectrogram);
let power_spectrogram = analysis::make_power_spectrogram(&magnitude_spectrogram);
let mel_spectrogram = analysis::mel::make_mel_spectrogram(&power_spectrogram, &mel_filterbank);
let _ = analysis::mel::mfcc_spectrogram(&mel_spectrogram, 20, None); }
#[test]
fn test_mfccs() {
const EPSILON: f64 = 1e-4;
const FFT_SIZE: usize = 256;
const SAMPLE_RATE: u32 = 22050;
const NUM_MELS: usize = 20;
const LOW_FREQ: f64 = 0.0;
const HIGH_FREQ: f64 = 11025.0;
let rfreqs = crate::spectrum::rfftfreq(FFT_SIZE, SAMPLE_RATE);
let fb = MelFilterbank::new(LOW_FREQ, HIGH_FREQ, NUM_MELS, &rfreqs, true);
let fake_fft_power_spectrum: Vec<f64> = vec![
1e-2, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
12e-5, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
3e-4, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
2e-3, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
43e-6, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
2e-3, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
6e-5, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
4e-3, 2e-5, 15e-4, 21e-5, 1e-4, 2e-5, 11e-5, 31e-4, 132e-9, 22e-3, 152e-3, 201e-5, 123e-5, 221e-6, 11e-5, 32e-4,
2e-5
];
let mel_spec = fb.filter(&fake_fft_power_spectrum);
let mfccs = mfcc_spectrum(&mel_spec, 20, None);
let python_output = vec![
-4.05909252e+02, -1.82740679e+01, 6.32330435e-01, -4.93239988e+00,
-1.11250498e+01, -1.15649299e+01, -4.36550765e+00, 9.53562101e+00,
1.45629020e+01, 3.80200863e+00, 4.17683131e-01, 5.27779813e+00,
-8.32106253e-01, -9.12864518e+00, 4.33139092e-01, 1.10599927e+01,
-5.45434448e-02, -2.88455542e+00, 3.76468289e+00, -3.82628473e+00];
for i in 0..mfccs.len() {
assert!(f64::abs(mfccs[i] - python_output[i]) < EPSILON);
}
}
#[test]
fn test_dct() {
const EPSILON: f64 = 1e-6;
let mut planner: DctPlanner<f64> = rustdct::DctPlanner::new();
let dct = planner.plan_dct2(16);
let mut myvec = vec![0.1, -0.3, 0.43, -0.2, 0.1, 0.22, -0.432, -0.01, 0.1, -0.3, 0.43, 0.422, 0.1, -0.22, -0.43223, -0.231];
dct.process_dct2(&mut myvec);
let python_output = vec![
-0.44646, 0.88984127, -1.16548374, 2.46539874, -2.63440108, -0.2669338,
1.18680059, -1.30619017, 1.39332563, -0.61721852, -1.45113856, 1.15916989,
3.94025219, 1.42283727, 0.40242927, 0.82046698];
for i in 0..myvec.len() {
assert!(f64::abs(2.0 * myvec[i] - python_output[i]) < EPSILON);
}
let mut myvec2 = vec![-3002.48488, -40.384, -958.38506, -1002.4035506, -877.2312, -321.12101, -558.9853, -20.5983, -505.9681, -120.2489, -6590.843, -10.48729, -583.86948, -59.834972, -212.12, -89.348];
let python_output2 = vec![
-29908.6260852, -3049.72757743, 1953.89015, -15692.81401219,
3241.78874056, 5128.05286375, -15217.1865778, -955.11239568,
3916.70892585, -15803.26515976, -2549.46171482, 5938.48155781,
-13807.33352441, -1109.48122982, 9696.12754217, -15191.45693698];
dct.process_dct2(&mut myvec2);
for i in 0..myvec2.len() {
assert!(f64::abs(2.0 * myvec2[i] - python_output2[i]) < EPSILON);
}
}
}