use crate::signal_processing::{fft, spectral};
#[cfg(feature = "no-std")]
use core::f32::consts::PI;
#[cfg(not(feature = "no-std"))]
use std::f32::consts::PI;
#[cfg(feature = "no-std")]
extern crate alloc;
#[cfg(feature = "no-std")]
use alloc::{vec, vec::Vec};
pub mod mfcc {
use super::*;
pub struct MfccExtractor {
#[allow(dead_code)] sample_rate: f32,
n_mfcc: usize,
n_mels: usize,
n_fft: usize,
hop_length: usize,
mel_filters: Vec<Vec<f32>>,
dct_matrix: Vec<Vec<f32>>,
}
impl MfccExtractor {
pub fn new(
sample_rate: f32,
n_mfcc: usize,
n_mels: usize,
n_fft: usize,
hop_length: usize,
) -> Self {
let mel_filters = create_mel_filter_bank(n_mels, n_fft, sample_rate);
let dct_matrix = create_dct_matrix(n_mfcc, n_mels);
Self {
sample_rate,
n_mfcc,
n_mels,
n_fft,
hop_length,
mel_filters,
dct_matrix,
}
}
pub fn extract(&self, audio: &[f32]) -> Vec<Vec<f32>> {
let mut mfcc_features = Vec::new();
let window = spectral::hamming_window(self.n_fft);
for start in (0..audio.len()).step_by(self.hop_length) {
let end = (start + self.n_fft).min(audio.len());
if end - start < self.n_fft {
break;
}
let mut windowed: Vec<f32> = audio[start..end]
.iter()
.zip(window.iter())
.map(|(&a, &w)| a * w)
.collect();
windowed.resize(self.n_fft, 0.0);
let fft_result = fft::rfft(&windowed);
let power_spectrum: Vec<f32> =
fft_result.iter().map(|c| c.magnitude().powi(2)).collect();
let mel_spectrum = self.apply_mel_filters(&power_spectrum);
let log_mel: Vec<f32> = mel_spectrum
.iter()
.map(|&x| (x + 1e-10).ln()) .collect();
let mfcc = self.apply_dct(&log_mel);
mfcc_features.push(mfcc);
}
mfcc_features
}
fn apply_mel_filters(&self, power_spectrum: &[f32]) -> Vec<f32> {
let mut mel_spectrum = vec![0.0; self.n_mels];
for (i, filter) in self.mel_filters.iter().enumerate() {
let mut energy = 0.0;
for (j, &filter_val) in filter.iter().enumerate() {
if j < power_spectrum.len() {
energy += power_spectrum[j] * filter_val;
}
}
mel_spectrum[i] = energy;
}
mel_spectrum
}
fn apply_dct(&self, log_mel: &[f32]) -> Vec<f32> {
let mut mfcc = vec![0.0; self.n_mfcc];
for (i, mfcc_i) in mfcc.iter_mut().enumerate() {
for (j, &lm) in log_mel.iter().enumerate() {
*mfcc_i += lm * self.dct_matrix[i][j];
}
}
mfcc
}
}
fn create_mel_filter_bank(n_mels: usize, n_fft: usize, sample_rate: f32) -> Vec<Vec<f32>> {
let n_freqs = n_fft / 2 + 1;
let mut filters = vec![vec![0.0; n_freqs]; n_mels];
let hz_to_mel = |hz: f32| 2595.0 * (1.0 + hz / 700.0).log10();
let mel_to_hz = |mel: f32| 700.0 * (10.0_f32.powf(mel / 2595.0) - 1.0);
let mel_min = hz_to_mel(0.0);
let mel_max = hz_to_mel(sample_rate / 2.0);
let mel_points: Vec<f32> = (0..=n_mels + 1)
.map(|i| mel_min + (mel_max - mel_min) * i as f32 / (n_mels + 1) as f32)
.collect();
let hz_points: Vec<f32> = mel_points.iter().map(|&mel| mel_to_hz(mel)).collect();
let bin_points: Vec<usize> = hz_points
.iter()
.map(|&hz| ((n_fft + 1) as f32 * hz / sample_rate).floor() as usize)
.collect();
for m in 0..n_mels {
let left = bin_points[m];
let center = bin_points[m + 1];
let right = bin_points[m + 2];
#[allow(clippy::needless_range_loop)] for k in left..=right {
if k < n_freqs {
if k <= center {
if center > left {
filters[m][k] = (k - left) as f32 / (center - left) as f32;
}
} else if right > center {
filters[m][k] = (right - k) as f32 / (right - center) as f32;
}
}
}
}
filters
}
fn create_dct_matrix(n_mfcc: usize, n_mels: usize) -> Vec<Vec<f32>> {
let mut dct_matrix = vec![vec![0.0; n_mels]; n_mfcc];
for (i, row) in dct_matrix.iter_mut().enumerate() {
for (j, cell) in row.iter_mut().enumerate() {
*cell = (PI * i as f32 * (j as f32 + 0.5) / n_mels as f32).cos()
* (2.0 / n_mels as f32).sqrt();
}
}
dct_matrix
}
}
pub mod features {
use super::*;
pub fn zero_crossing_rate(audio: &[f32], frame_length: usize, hop_length: usize) -> Vec<f32> {
let mut zcr = Vec::new();
for start in (0..audio.len()).step_by(hop_length) {
let end = (start + frame_length).min(audio.len());
if end - start < frame_length {
break;
}
let frame = &audio[start..end];
let mut crossings = 0;
for i in 1..frame.len() {
if (frame[i] >= 0.0) != (frame[i - 1] >= 0.0) {
crossings += 1;
}
}
zcr.push(crossings as f32 / frame.len() as f32);
}
zcr
}
pub fn spectral_centroid_frames(
audio: &[f32],
sample_rate: f32,
frame_length: usize,
hop_length: usize,
) -> Vec<f32> {
let mut centroids = Vec::new();
let window = spectral::hamming_window(frame_length);
for start in (0..audio.len()).step_by(hop_length) {
let end = (start + frame_length).min(audio.len());
if end - start < frame_length {
break;
}
let windowed: Vec<f32> = audio[start..end]
.iter()
.zip(window.iter())
.map(|(&a, &w)| a * w)
.collect();
let fft_result = fft::rfft(&windowed);
let power_spectrum: Vec<f32> =
fft_result.iter().map(|c| c.magnitude().powi(2)).collect();
let centroid = spectral::spectral_centroid(&power_spectrum, sample_rate);
centroids.push(centroid);
}
centroids
}
pub fn spectral_rolloff_frames(
audio: &[f32],
sample_rate: f32,
frame_length: usize,
hop_length: usize,
rolloff_percent: f32,
) -> Vec<f32> {
let mut rolloffs = Vec::new();
let window = spectral::hamming_window(frame_length);
for start in (0..audio.len()).step_by(hop_length) {
let end = (start + frame_length).min(audio.len());
if end - start < frame_length {
break;
}
let windowed: Vec<f32> = audio[start..end]
.iter()
.zip(window.iter())
.map(|(&a, &w)| a * w)
.collect();
let fft_result = fft::rfft(&windowed);
let power_spectrum: Vec<f32> =
fft_result.iter().map(|c| c.magnitude().powi(2)).collect();
let rolloff = spectral::spectral_rolloff(&power_spectrum, sample_rate, rolloff_percent);
rolloffs.push(rolloff);
}
rolloffs
}
pub fn rms_energy(audio: &[f32], frame_length: usize, hop_length: usize) -> Vec<f32> {
let mut rms = Vec::new();
for start in (0..audio.len()).step_by(hop_length) {
let end = (start + frame_length).min(audio.len());
if end - start < frame_length {
break;
}
let frame = &audio[start..end];
let mean_square: f32 = frame.iter().map(|&x| x * x).sum::<f32>() / frame.len() as f32;
rms.push(mean_square.sqrt());
}
rms
}
pub fn estimate_tempo(audio: &[f32], sample_rate: f32) -> f32 {
let hop_length = 512;
let frame_length = 2048;
let mut onset_strength = Vec::new();
let mut prev_spectrum: Option<Vec<f32>> = None;
for start in (0..audio.len()).step_by(hop_length) {
let end = (start + frame_length).min(audio.len());
if end - start < frame_length {
break;
}
let frame = &audio[start..end];
let fft_result = fft::rfft(frame);
let spectrum: Vec<f32> = fft_result.iter().map(|c| c.magnitude()).collect();
if let Some(ref prev) = prev_spectrum {
let flux: f32 = spectrum
.iter()
.zip(prev.iter())
.map(|(&curr, &prev)| (curr - prev).max(0.0))
.sum();
onset_strength.push(flux);
}
prev_spectrum = Some(spectrum);
}
if onset_strength.len() < 2 {
return 120.0; }
let autocorr = crate::signal_processing::convolution::autocorrelation(&onset_strength);
let min_period = (60.0 * sample_rate / (200.0 * hop_length as f32)) as usize; let max_period = (60.0 * sample_rate / (60.0 * hop_length as f32)) as usize;
let mut max_autocorr = 0.0;
let mut best_period = min_period;
for (i, &val) in autocorr
.iter()
.enumerate()
.take(max_period.min(autocorr.len() / 2))
.skip(min_period)
{
if val > max_autocorr {
max_autocorr = val;
best_period = i;
}
}
60.0 * sample_rate / (best_period as f32 * hop_length as f32)
}
}
pub mod effects {
use super::*;
pub fn reverb(audio: &[f32], impulse_response: &[f32]) -> Vec<f32> {
crate::signal_processing::convolution::convolve_1d(audio, impulse_response)
}
pub fn delay(audio: &[f32], delay_samples: usize, feedback: f32, mix: f32) -> Vec<f32> {
let mut output = vec![0.0; audio.len()];
for i in 0..audio.len() {
output[i] = audio[i];
if i >= delay_samples {
output[i] += feedback * output[i - delay_samples];
}
output[i] = (1.0 - mix) * audio[i] + mix * output[i];
}
output
}
pub fn chorus(audio: &[f32], sample_rate: f32, rate: f32, depth: f32, delay: f32) -> Vec<f32> {
let delay_samples = (delay * sample_rate / 1000.0) as usize;
let depth_samples = depth * sample_rate / 1000.0;
let mut output = vec![0.0; audio.len()];
for i in 0..audio.len() {
let time = i as f32 / sample_rate;
let lfo = (2.0 * PI * rate * time).sin();
let variable_delay = delay_samples as f32 + depth_samples * lfo;
let delay_int = variable_delay.floor() as usize;
let delay_frac = variable_delay - delay_int as f32;
let mut delayed_sample = 0.0;
if i > delay_int {
let sample1 = audio[i - delay_int];
let sample2 = audio[i - delay_int - 1];
delayed_sample = sample1 * (1.0 - delay_frac) + sample2 * delay_frac;
}
output[i] = (audio[i] + delayed_sample) * 0.5;
}
output
}
pub fn distortion(audio: &[f32], gain: f32, threshold: f32) -> Vec<f32> {
audio
.iter()
.map(|&sample| {
let amplified = sample * gain;
if amplified.abs() > threshold {
threshold * amplified.signum()
} else {
amplified
}
})
.collect()
}
pub fn compressor(
audio: &[f32],
threshold: f32,
ratio: f32,
attack_time: f32,
release_time: f32,
sample_rate: f32,
) -> Vec<f32> {
let attack_coeff = (-1.0 / (attack_time * sample_rate)).exp();
let release_coeff = (-1.0 / (release_time * sample_rate)).exp();
let mut output = vec![0.0; audio.len()];
let mut envelope = 0.0;
for (i, &sample) in audio.iter().enumerate() {
let input_level = sample.abs();
if input_level > envelope {
envelope = attack_coeff * envelope + (1.0 - attack_coeff) * input_level;
} else {
envelope = release_coeff * envelope + (1.0 - release_coeff) * input_level;
}
let gain_reduction = if envelope > threshold {
threshold + (envelope - threshold) / ratio
} else {
envelope
};
let gain = if envelope > 0.0 {
gain_reduction / envelope
} else {
1.0
};
output[i] = sample * gain;
}
output
}
}
pub mod pitch {
#[cfg(feature = "no-std")]
use alloc::vec;
pub fn autocorrelation_pitch(
audio: &[f32],
sample_rate: f32,
min_freq: f32,
max_freq: f32,
) -> Option<f32> {
if audio.len() < 2 {
return None;
}
let autocorr = crate::signal_processing::convolution::autocorrelation(audio);
let min_period = (sample_rate / max_freq) as usize;
let max_period = (sample_rate / min_freq) as usize;
let search_range = min_period..max_period.min(autocorr.len() / 2);
let best_period = search_range.max_by(|&a, &b| {
autocorr[a]
.partial_cmp(&autocorr[b])
.unwrap_or(core::cmp::Ordering::Equal)
})?;
Some(sample_rate / best_period as f32)
}
pub fn yin_pitch(
audio: &[f32],
sample_rate: f32,
min_freq: f32,
max_freq: f32,
threshold: f32,
) -> Option<f32> {
let min_period = (sample_rate / max_freq) as usize;
let max_period = (sample_rate / min_freq) as usize;
let w = audio.len() / 2;
let mut d = vec![0.0; max_period + 1];
for tau in 1..=max_period.min(w) {
for j in 0..(w - tau) {
let diff = audio[j] - audio[j + tau];
d[tau] += diff * diff;
}
}
let mut cmnd = vec![0.0; d.len()];
cmnd[0] = 1.0;
let mut running_sum = 0.0;
for tau in 1..d.len() {
running_sum += d[tau];
if running_sum == 0.0 {
cmnd[tau] = 1.0;
} else {
cmnd[tau] = d[tau] * tau as f32 / running_sum;
}
}
for tau in min_period..cmnd.len() {
if cmnd[tau] < threshold {
if tau > 0 && tau < cmnd.len() - 1 {
let x0 = cmnd[tau - 1];
let x1 = cmnd[tau];
let x2 = cmnd[tau + 1];
let a = (x0 - 2.0 * x1 + x2) / 2.0;
let b = (x2 - x0) / 2.0;
let tau_fractional = if a != 0.0 {
tau as f32 - b / (2.0 * a)
} else {
tau as f32
};
return Some(sample_rate / tau_fractional);
} else {
return Some(sample_rate / tau as f32);
}
}
}
None
}
}
#[allow(non_snake_case)]
#[cfg(all(test, not(feature = "no-std")))]
mod tests {
use super::*;
use approx::assert_abs_diff_eq;
#[test]
fn test_mfcc_extractor() {
let extractor = mfcc::MfccExtractor::new(16000.0, 13, 26, 512, 256);
let sample_rate = 16000.0;
let duration = 1.0; let frequency = 440.0; let samples = (sample_rate * duration) as usize;
let audio: Vec<f32> = (0..samples)
.map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin() * 0.5)
.collect();
let mfcc_features = extractor.extract(&audio);
assert!(!mfcc_features.is_empty());
assert_eq!(mfcc_features[0].len(), 13);
}
#[test]
fn test_zero_crossing_rate() {
let audio = vec![1.0, -1.0, 1.0, -1.0, 1.0, -1.0]; let zcr = features::zero_crossing_rate(&audio, 4, 2);
assert!(!zcr.is_empty());
assert!(zcr[0] > 0.5); }
#[test]
fn test_rms_energy() {
let audio = vec![0.5, -0.5, 0.5, -0.5];
let rms = features::rms_energy(&audio, 4, 4);
assert_eq!(rms.len(), 1);
assert_abs_diff_eq!(rms[0], 0.5, epsilon = 1e-6);
}
#[test]
fn test_delay_effect() {
let audio = vec![1.0, 0.0, 0.0, 0.0, 0.0];
let delayed = effects::delay(&audio, 2, 0.5, 0.5);
assert!(delayed[2] > 0.0);
assert_eq!(delayed.len(), audio.len());
}
#[test]
fn test_distortion_effect() {
let audio = vec![0.1, 0.5, 1.0, 2.0];
let distorted = effects::distortion(&audio, 2.0, 1.0);
assert_abs_diff_eq!(distorted[0], 0.2, epsilon = 1e-6);
assert_abs_diff_eq!(distorted[1], 1.0, epsilon = 1e-6);
assert_abs_diff_eq!(distorted[2], 1.0, epsilon = 1e-6); assert_abs_diff_eq!(distorted[3], 1.0, epsilon = 1e-6); }
#[test]
fn test_compressor_effect() {
let audio = vec![0.1, 0.5, 0.8, 1.0, 0.2];
let compressed = effects::compressor(&audio, 0.5, 2.0, 0.001, 0.1, 44100.0);
assert_eq!(compressed.len(), audio.len());
let input_range = audio
.iter()
.max_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
.expect("operation should succeed")
- audio
.iter()
.min_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
.expect("operation should succeed");
let output_range = compressed
.iter()
.max_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
.expect("operation should succeed")
- compressed
.iter()
.min_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
.expect("operation should succeed");
assert!(output_range <= input_range * 1.1); }
#[test]
#[ignore] fn test_autocorrelation_pitch() {
let sample_rate = 44100.0;
let frequency = 440.0; let duration = 0.1; let samples = (sample_rate * duration) as usize;
let audio: Vec<f32> = (0..samples)
.map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin())
.collect();
let detected_pitch = pitch::autocorrelation_pitch(&audio, sample_rate, 80.0, 2000.0);
if let Some(pitch) = detected_pitch {
assert!((pitch - frequency).abs() < 200.0);
}
}
#[test]
fn test_yin_pitch() {
let sample_rate = 44100.0;
let frequency = 220.0; let duration = 0.1;
let samples = (sample_rate * duration) as usize;
let audio: Vec<f32> = (0..samples)
.map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin())
.collect();
let detected_pitch = pitch::yin_pitch(&audio, sample_rate, 80.0, 1000.0, 0.1);
if let Some(pitch) = detected_pitch {
assert!((pitch - frequency).abs() < 50.0);
}
}
#[test]
#[ignore] fn test_spectral_centroid_frames() {
let sample_rate = 44100.0;
let frequency = 1000.0;
let duration = 0.1;
let samples = (sample_rate * duration) as usize;
let audio: Vec<f32> = (0..samples)
.map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin())
.collect();
let centroids = features::spectral_centroid_frames(&audio, sample_rate, 1024, 512);
assert!(!centroids.is_empty());
if !centroids.is_empty() {
assert!(centroids[0] > 100.0 && centroids[0] < 5000.0); }
}
}