1use crate::signal_processing::{fft, spectral};
7
8#[cfg(feature = "no-std")]
9use core::f32::consts::PI;
10#[cfg(not(feature = "no-std"))]
11use std::f32::consts::PI;
12
13#[cfg(feature = "no-std")]
14extern crate alloc;
15#[cfg(feature = "no-std")]
16use alloc::{vec, vec::Vec};
17
18pub mod mfcc {
20 use super::*;
21
22 pub struct MfccExtractor {
24 #[allow(dead_code)] sample_rate: f32,
26 n_mfcc: usize,
27 n_mels: usize,
28 n_fft: usize,
29 hop_length: usize,
30 mel_filters: Vec<Vec<f32>>,
31 dct_matrix: Vec<Vec<f32>>,
32 }
33
34 impl MfccExtractor {
35 pub fn new(
36 sample_rate: f32,
37 n_mfcc: usize,
38 n_mels: usize,
39 n_fft: usize,
40 hop_length: usize,
41 ) -> Self {
42 let mel_filters = create_mel_filter_bank(n_mels, n_fft, sample_rate);
43 let dct_matrix = create_dct_matrix(n_mfcc, n_mels);
44
45 Self {
46 sample_rate,
47 n_mfcc,
48 n_mels,
49 n_fft,
50 hop_length,
51 mel_filters,
52 dct_matrix,
53 }
54 }
55
56 pub fn extract(&self, audio: &[f32]) -> Vec<Vec<f32>> {
58 let mut mfcc_features = Vec::new();
59
60 let window = spectral::hamming_window(self.n_fft);
62
63 for start in (0..audio.len()).step_by(self.hop_length) {
64 let end = (start + self.n_fft).min(audio.len());
65 if end - start < self.n_fft {
66 break;
67 }
68
69 let mut windowed: Vec<f32> = audio[start..end]
71 .iter()
72 .zip(window.iter())
73 .map(|(&a, &w)| a * w)
74 .collect();
75
76 windowed.resize(self.n_fft, 0.0);
78
79 let fft_result = fft::rfft(&windowed);
81 let power_spectrum: Vec<f32> =
82 fft_result.iter().map(|c| c.magnitude().powi(2)).collect();
83
84 let mel_spectrum = self.apply_mel_filters(&power_spectrum);
86
87 let log_mel: Vec<f32> = mel_spectrum
89 .iter()
90 .map(|&x| (x + 1e-10).ln()) .collect();
92
93 let mfcc = self.apply_dct(&log_mel);
94 mfcc_features.push(mfcc);
95 }
96
97 mfcc_features
98 }
99
100 fn apply_mel_filters(&self, power_spectrum: &[f32]) -> Vec<f32> {
101 let mut mel_spectrum = vec![0.0; self.n_mels];
102
103 for (i, filter) in self.mel_filters.iter().enumerate() {
104 let mut energy = 0.0;
105 for (j, &filter_val) in filter.iter().enumerate() {
106 if j < power_spectrum.len() {
107 energy += power_spectrum[j] * filter_val;
108 }
109 }
110 mel_spectrum[i] = energy;
111 }
112
113 mel_spectrum
114 }
115
116 fn apply_dct(&self, log_mel: &[f32]) -> Vec<f32> {
117 let mut mfcc = vec![0.0; self.n_mfcc];
118
119 for (i, mfcc_i) in mfcc.iter_mut().enumerate() {
120 for (j, &lm) in log_mel.iter().enumerate() {
121 *mfcc_i += lm * self.dct_matrix[i][j];
122 }
123 }
124
125 mfcc
126 }
127 }
128
129 fn create_mel_filter_bank(n_mels: usize, n_fft: usize, sample_rate: f32) -> Vec<Vec<f32>> {
131 let n_freqs = n_fft / 2 + 1;
132 let mut filters = vec![vec![0.0; n_freqs]; n_mels];
133
134 let hz_to_mel = |hz: f32| 2595.0 * (1.0 + hz / 700.0).log10();
136 let mel_to_hz = |mel: f32| 700.0 * (10.0_f32.powf(mel / 2595.0) - 1.0);
137
138 let mel_min = hz_to_mel(0.0);
139 let mel_max = hz_to_mel(sample_rate / 2.0);
140
141 let mel_points: Vec<f32> = (0..=n_mels + 1)
143 .map(|i| mel_min + (mel_max - mel_min) * i as f32 / (n_mels + 1) as f32)
144 .collect();
145
146 let hz_points: Vec<f32> = mel_points.iter().map(|&mel| mel_to_hz(mel)).collect();
147 let bin_points: Vec<usize> = hz_points
148 .iter()
149 .map(|&hz| ((n_fft + 1) as f32 * hz / sample_rate).floor() as usize)
150 .collect();
151
152 for m in 0..n_mels {
154 let left = bin_points[m];
155 let center = bin_points[m + 1];
156 let right = bin_points[m + 2];
157
158 #[allow(clippy::needless_range_loop)] for k in left..=right {
160 if k < n_freqs {
161 if k <= center {
162 if center > left {
163 filters[m][k] = (k - left) as f32 / (center - left) as f32;
164 }
165 } else if right > center {
166 filters[m][k] = (right - k) as f32 / (right - center) as f32;
167 }
168 }
169 }
170 }
171
172 filters
173 }
174
175 fn create_dct_matrix(n_mfcc: usize, n_mels: usize) -> Vec<Vec<f32>> {
177 let mut dct_matrix = vec![vec![0.0; n_mels]; n_mfcc];
178
179 for (i, row) in dct_matrix.iter_mut().enumerate() {
180 for (j, cell) in row.iter_mut().enumerate() {
181 *cell = (PI * i as f32 * (j as f32 + 0.5) / n_mels as f32).cos()
182 * (2.0 / n_mels as f32).sqrt();
183 }
184 }
185
186 dct_matrix
187 }
188}
189
190pub mod features {
192 use super::*;
193
194 pub fn zero_crossing_rate(audio: &[f32], frame_length: usize, hop_length: usize) -> Vec<f32> {
196 let mut zcr = Vec::new();
197
198 for start in (0..audio.len()).step_by(hop_length) {
199 let end = (start + frame_length).min(audio.len());
200 if end - start < frame_length {
201 break;
202 }
203
204 let frame = &audio[start..end];
205 let mut crossings = 0;
206
207 for i in 1..frame.len() {
208 if (frame[i] >= 0.0) != (frame[i - 1] >= 0.0) {
209 crossings += 1;
210 }
211 }
212
213 zcr.push(crossings as f32 / frame.len() as f32);
214 }
215
216 zcr
217 }
218
219 pub fn spectral_centroid_frames(
221 audio: &[f32],
222 sample_rate: f32,
223 frame_length: usize,
224 hop_length: usize,
225 ) -> Vec<f32> {
226 let mut centroids = Vec::new();
227 let window = spectral::hamming_window(frame_length);
228
229 for start in (0..audio.len()).step_by(hop_length) {
230 let end = (start + frame_length).min(audio.len());
231 if end - start < frame_length {
232 break;
233 }
234
235 let windowed: Vec<f32> = audio[start..end]
237 .iter()
238 .zip(window.iter())
239 .map(|(&a, &w)| a * w)
240 .collect();
241
242 let fft_result = fft::rfft(&windowed);
244 let power_spectrum: Vec<f32> =
245 fft_result.iter().map(|c| c.magnitude().powi(2)).collect();
246
247 let centroid = spectral::spectral_centroid(&power_spectrum, sample_rate);
248 centroids.push(centroid);
249 }
250
251 centroids
252 }
253
254 pub fn spectral_rolloff_frames(
256 audio: &[f32],
257 sample_rate: f32,
258 frame_length: usize,
259 hop_length: usize,
260 rolloff_percent: f32,
261 ) -> Vec<f32> {
262 let mut rolloffs = Vec::new();
263 let window = spectral::hamming_window(frame_length);
264
265 for start in (0..audio.len()).step_by(hop_length) {
266 let end = (start + frame_length).min(audio.len());
267 if end - start < frame_length {
268 break;
269 }
270
271 let windowed: Vec<f32> = audio[start..end]
273 .iter()
274 .zip(window.iter())
275 .map(|(&a, &w)| a * w)
276 .collect();
277
278 let fft_result = fft::rfft(&windowed);
280 let power_spectrum: Vec<f32> =
281 fft_result.iter().map(|c| c.magnitude().powi(2)).collect();
282
283 let rolloff = spectral::spectral_rolloff(&power_spectrum, sample_rate, rolloff_percent);
284 rolloffs.push(rolloff);
285 }
286
287 rolloffs
288 }
289
290 pub fn rms_energy(audio: &[f32], frame_length: usize, hop_length: usize) -> Vec<f32> {
292 let mut rms = Vec::new();
293
294 for start in (0..audio.len()).step_by(hop_length) {
295 let end = (start + frame_length).min(audio.len());
296 if end - start < frame_length {
297 break;
298 }
299
300 let frame = &audio[start..end];
301 let mean_square: f32 = frame.iter().map(|&x| x * x).sum::<f32>() / frame.len() as f32;
302 rms.push(mean_square.sqrt());
303 }
304
305 rms
306 }
307
308 pub fn estimate_tempo(audio: &[f32], sample_rate: f32) -> f32 {
310 let hop_length = 512;
311 let frame_length = 2048;
312
313 let mut onset_strength = Vec::new();
315 let mut prev_spectrum: Option<Vec<f32>> = None;
316
317 for start in (0..audio.len()).step_by(hop_length) {
318 let end = (start + frame_length).min(audio.len());
319 if end - start < frame_length {
320 break;
321 }
322
323 let frame = &audio[start..end];
324 let fft_result = fft::rfft(frame);
325 let spectrum: Vec<f32> = fft_result.iter().map(|c| c.magnitude()).collect();
326
327 if let Some(ref prev) = prev_spectrum {
328 let flux: f32 = spectrum
329 .iter()
330 .zip(prev.iter())
331 .map(|(&curr, &prev)| (curr - prev).max(0.0))
332 .sum();
333 onset_strength.push(flux);
334 }
335
336 prev_spectrum = Some(spectrum);
337 }
338
339 if onset_strength.len() < 2 {
341 return 120.0; }
343
344 let autocorr = crate::signal_processing::convolution::autocorrelation(&onset_strength);
345
346 let min_period = (60.0 * sample_rate / (200.0 * hop_length as f32)) as usize; let max_period = (60.0 * sample_rate / (60.0 * hop_length as f32)) as usize; let mut max_autocorr = 0.0;
351 let mut best_period = min_period;
352
353 for (i, &val) in autocorr
354 .iter()
355 .enumerate()
356 .take(max_period.min(autocorr.len() / 2))
357 .skip(min_period)
358 {
359 if val > max_autocorr {
360 max_autocorr = val;
361 best_period = i;
362 }
363 }
364
365 60.0 * sample_rate / (best_period as f32 * hop_length as f32)
367 }
368}
369
370pub mod effects {
372 use super::*;
373
374 pub fn reverb(audio: &[f32], impulse_response: &[f32]) -> Vec<f32> {
376 crate::signal_processing::convolution::convolve_1d(audio, impulse_response)
377 }
378
379 pub fn delay(audio: &[f32], delay_samples: usize, feedback: f32, mix: f32) -> Vec<f32> {
381 let mut output = vec![0.0; audio.len()];
382
383 for i in 0..audio.len() {
384 output[i] = audio[i];
385
386 if i >= delay_samples {
387 output[i] += feedback * output[i - delay_samples];
388 }
389
390 output[i] = (1.0 - mix) * audio[i] + mix * output[i];
391 }
392
393 output
394 }
395
396 pub fn chorus(audio: &[f32], sample_rate: f32, rate: f32, depth: f32, delay: f32) -> Vec<f32> {
398 let delay_samples = (delay * sample_rate / 1000.0) as usize;
399 let depth_samples = depth * sample_rate / 1000.0;
400 let mut output = vec![0.0; audio.len()];
401
402 for i in 0..audio.len() {
403 let time = i as f32 / sample_rate;
404 let lfo = (2.0 * PI * rate * time).sin();
405 let variable_delay = delay_samples as f32 + depth_samples * lfo;
406
407 let delay_int = variable_delay.floor() as usize;
409 let delay_frac = variable_delay - delay_int as f32;
410
411 let mut delayed_sample = 0.0;
412 if i > delay_int {
413 let sample1 = audio[i - delay_int];
414 let sample2 = audio[i - delay_int - 1];
415 delayed_sample = sample1 * (1.0 - delay_frac) + sample2 * delay_frac;
416 }
417
418 output[i] = (audio[i] + delayed_sample) * 0.5;
419 }
420
421 output
422 }
423
424 pub fn distortion(audio: &[f32], gain: f32, threshold: f32) -> Vec<f32> {
426 audio
427 .iter()
428 .map(|&sample| {
429 let amplified = sample * gain;
430 if amplified.abs() > threshold {
431 threshold * amplified.signum()
432 } else {
433 amplified
434 }
435 })
436 .collect()
437 }
438
439 pub fn compressor(
441 audio: &[f32],
442 threshold: f32,
443 ratio: f32,
444 attack_time: f32,
445 release_time: f32,
446 sample_rate: f32,
447 ) -> Vec<f32> {
448 let attack_coeff = (-1.0 / (attack_time * sample_rate)).exp();
449 let release_coeff = (-1.0 / (release_time * sample_rate)).exp();
450
451 let mut output = vec![0.0; audio.len()];
452 let mut envelope = 0.0;
453
454 for (i, &sample) in audio.iter().enumerate() {
455 let input_level = sample.abs();
456
457 if input_level > envelope {
459 envelope = attack_coeff * envelope + (1.0 - attack_coeff) * input_level;
460 } else {
461 envelope = release_coeff * envelope + (1.0 - release_coeff) * input_level;
462 }
463
464 let gain_reduction = if envelope > threshold {
466 threshold + (envelope - threshold) / ratio
467 } else {
468 envelope
469 };
470
471 let gain = if envelope > 0.0 {
472 gain_reduction / envelope
473 } else {
474 1.0
475 };
476
477 output[i] = sample * gain;
478 }
479
480 output
481 }
482}
483
484pub mod pitch {
486 #[cfg(feature = "no-std")]
487 use alloc::vec;
488
489 pub fn autocorrelation_pitch(
491 audio: &[f32],
492 sample_rate: f32,
493 min_freq: f32,
494 max_freq: f32,
495 ) -> Option<f32> {
496 if audio.len() < 2 {
497 return None;
498 }
499
500 let autocorr = crate::signal_processing::convolution::autocorrelation(audio);
501
502 let min_period = (sample_rate / max_freq) as usize;
503 let max_period = (sample_rate / min_freq) as usize;
504
505 let search_range = min_period..max_period.min(autocorr.len() / 2);
506
507 let best_period = search_range.max_by(|&a, &b| {
508 autocorr[a]
509 .partial_cmp(&autocorr[b])
510 .unwrap_or(core::cmp::Ordering::Equal)
511 })?;
512
513 Some(sample_rate / best_period as f32)
514 }
515
516 pub fn yin_pitch(
518 audio: &[f32],
519 sample_rate: f32,
520 min_freq: f32,
521 max_freq: f32,
522 threshold: f32,
523 ) -> Option<f32> {
524 let min_period = (sample_rate / max_freq) as usize;
525 let max_period = (sample_rate / min_freq) as usize;
526 let w = audio.len() / 2;
527
528 let mut d = vec![0.0; max_period + 1];
529
530 for tau in 1..=max_period.min(w) {
532 for j in 0..(w - tau) {
533 let diff = audio[j] - audio[j + tau];
534 d[tau] += diff * diff;
535 }
536 }
537
538 let mut cmnd = vec![0.0; d.len()];
540 cmnd[0] = 1.0;
541
542 let mut running_sum = 0.0;
543 for tau in 1..d.len() {
544 running_sum += d[tau];
545 if running_sum == 0.0 {
546 cmnd[tau] = 1.0;
547 } else {
548 cmnd[tau] = d[tau] * tau as f32 / running_sum;
549 }
550 }
551
552 for tau in min_period..cmnd.len() {
554 if cmnd[tau] < threshold {
555 if tau > 0 && tau < cmnd.len() - 1 {
557 let x0 = cmnd[tau - 1];
558 let x1 = cmnd[tau];
559 let x2 = cmnd[tau + 1];
560
561 let a = (x0 - 2.0 * x1 + x2) / 2.0;
562 let b = (x2 - x0) / 2.0;
563
564 let tau_fractional = if a != 0.0 {
565 tau as f32 - b / (2.0 * a)
566 } else {
567 tau as f32
568 };
569
570 return Some(sample_rate / tau_fractional);
571 } else {
572 return Some(sample_rate / tau as f32);
573 }
574 }
575 }
576
577 None
578 }
579}
580
581#[allow(non_snake_case)]
582#[cfg(all(test, not(feature = "no-std")))]
583mod tests {
584 use super::*;
585 use approx::assert_abs_diff_eq;
586
587 #[test]
588 fn test_mfcc_extractor() {
589 let extractor = mfcc::MfccExtractor::new(16000.0, 13, 26, 512, 256);
590
591 let sample_rate = 16000.0;
593 let duration = 1.0; let frequency = 440.0; let samples = (sample_rate * duration) as usize;
596
597 let audio: Vec<f32> = (0..samples)
598 .map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin() * 0.5)
599 .collect();
600
601 let mfcc_features = extractor.extract(&audio);
602
603 assert!(!mfcc_features.is_empty());
604 assert_eq!(mfcc_features[0].len(), 13);
605 }
606
607 #[test]
608 fn test_zero_crossing_rate() {
609 let audio = vec![1.0, -1.0, 1.0, -1.0, 1.0, -1.0]; let zcr = features::zero_crossing_rate(&audio, 4, 2);
611
612 assert!(!zcr.is_empty());
613 assert!(zcr[0] > 0.5); }
615
616 #[test]
617 fn test_rms_energy() {
618 let audio = vec![0.5, -0.5, 0.5, -0.5];
619 let rms = features::rms_energy(&audio, 4, 4);
620
621 assert_eq!(rms.len(), 1);
622 assert_abs_diff_eq!(rms[0], 0.5, epsilon = 1e-6);
623 }
624
625 #[test]
626 fn test_delay_effect() {
627 let audio = vec![1.0, 0.0, 0.0, 0.0, 0.0];
628 let delayed = effects::delay(&audio, 2, 0.5, 0.5);
629
630 assert!(delayed[2] > 0.0);
632 assert_eq!(delayed.len(), audio.len());
633 }
634
635 #[test]
636 fn test_distortion_effect() {
637 let audio = vec![0.1, 0.5, 1.0, 2.0];
638 let distorted = effects::distortion(&audio, 2.0, 1.0);
639
640 assert_abs_diff_eq!(distorted[0], 0.2, epsilon = 1e-6);
642 assert_abs_diff_eq!(distorted[1], 1.0, epsilon = 1e-6);
643 assert_abs_diff_eq!(distorted[2], 1.0, epsilon = 1e-6); assert_abs_diff_eq!(distorted[3], 1.0, epsilon = 1e-6); }
646
647 #[test]
648 fn test_compressor_effect() {
649 let audio = vec![0.1, 0.5, 0.8, 1.0, 0.2];
650 let compressed = effects::compressor(&audio, 0.5, 2.0, 0.001, 0.1, 44100.0);
651
652 assert_eq!(compressed.len(), audio.len());
653 let input_range = audio
655 .iter()
656 .max_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
657 .expect("operation should succeed")
658 - audio
659 .iter()
660 .min_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
661 .expect("operation should succeed");
662 let output_range = compressed
663 .iter()
664 .max_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
665 .expect("operation should succeed")
666 - compressed
667 .iter()
668 .min_by(|a, b| a.partial_cmp(b).expect("operation should succeed"))
669 .expect("operation should succeed");
670
671 assert!(output_range <= input_range * 1.1); }
674
675 #[test]
676 #[ignore] fn test_autocorrelation_pitch() {
678 let sample_rate = 44100.0;
679 let frequency = 440.0; let duration = 0.1; let samples = (sample_rate * duration) as usize;
682
683 let audio: Vec<f32> = (0..samples)
685 .map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin())
686 .collect();
687
688 let detected_pitch = pitch::autocorrelation_pitch(&audio, sample_rate, 80.0, 2000.0);
689
690 if let Some(pitch) = detected_pitch {
691 assert!((pitch - frequency).abs() < 200.0);
693 }
694 }
695
696 #[test]
697 fn test_yin_pitch() {
698 let sample_rate = 44100.0;
699 let frequency = 220.0; let duration = 0.1;
701 let samples = (sample_rate * duration) as usize;
702
703 let audio: Vec<f32> = (0..samples)
705 .map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin())
706 .collect();
707
708 let detected_pitch = pitch::yin_pitch(&audio, sample_rate, 80.0, 1000.0, 0.1);
709
710 if let Some(pitch) = detected_pitch {
711 assert!((pitch - frequency).abs() < 50.0);
713 }
714 }
715
716 #[test]
717 #[ignore] fn test_spectral_centroid_frames() {
719 let sample_rate = 44100.0;
720 let frequency = 1000.0;
721 let duration = 0.1;
722 let samples = (sample_rate * duration) as usize;
723
724 let audio: Vec<f32> = (0..samples)
725 .map(|i| (2.0 * PI * frequency * i as f32 / sample_rate).sin())
726 .collect();
727
728 let centroids = features::spectral_centroid_frames(&audio, sample_rate, 1024, 512);
729
730 assert!(!centroids.is_empty());
731 if !centroids.is_empty() {
733 assert!(centroids[0] > 100.0 && centroids[0] < 5000.0); }
735 }
736}