1extern crate noisy_float;
7
8use crate::Feature;
9
10use super::errors::{AnalysisError, AnalysisResult};
11use super::utils::{Normalize, hz_to_octs_inplace, stft};
12use ndarray::{Array, Array1, Array2, Axis, Zip, arr1, arr2, concatenate, s};
13use ndarray_stats::QuantileExt;
14use ndarray_stats::interpolate::Midpoint;
15use noisy_float::prelude::*;
16
17#[derive(Debug, Clone)]
28#[allow(clippy::module_name_repetitions)]
29pub struct ChromaDesc {
30 sample_rate: u32,
31 n_chroma: u32,
32 values_chroma: Array2<f64>,
33}
34
35impl Normalize for ChromaDesc {
36 const MAX_VALUE: Feature = 0.12;
37 const MIN_VALUE: Feature = 0.;
38}
39
40impl ChromaDesc {
41 pub const WINDOW_SIZE: usize = 8192;
42
43 #[must_use]
44 #[inline]
45 pub fn new(sample_rate: u32, n_chroma: u32) -> Self {
46 Self {
47 sample_rate,
48 n_chroma,
49 values_chroma: Array2::zeros((n_chroma as usize, 0)),
50 }
51 }
52
53 #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
60 #[inline]
61 pub fn do_(&mut self, signal: &[f32]) -> AnalysisResult<()> {
62 let mut stft = stft(signal, Self::WINDOW_SIZE, 2205);
63 let tuning = estimate_tuning(self.sample_rate, &stft, Self::WINDOW_SIZE, 0.01, 12)?;
64 let chroma = chroma_stft(
65 self.sample_rate,
66 &mut stft,
67 Self::WINDOW_SIZE,
68 self.n_chroma,
69 tuning,
70 )?;
71 self.values_chroma = concatenate![Axis(1), self.values_chroma, chroma];
72 Ok(())
73 }
74
75 #[inline]
86 pub fn get_value(&mut self) -> Vec<Feature> {
87 #[allow(clippy::cast_possible_truncation)]
88 chroma_interval_features(&self.values_chroma)
89 .mapv(|x| self.normalize(x as Feature))
90 .to_vec()
91 }
92}
93
94#[allow(
97 clippy::missing_errors_doc,
98 clippy::missing_panics_doc,
99 clippy::module_name_repetitions
100)]
101#[must_use]
102#[inline]
103pub fn chroma_interval_features(chroma: &Array2<f64>) -> Array1<f64> {
104 let chroma = normalize_feature_sequence(&chroma.mapv(|x| (x * 15.).exp()));
105 let templates = arr2(&[
106 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
107 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
108 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
109 [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
110 [0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
111 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
112 [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
113 [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
114 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
115 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
116 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
117 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
118 ]);
119 let interval_feature_matrix = extract_interval_features(&chroma, &templates);
120 interval_feature_matrix.mean_axis(Axis(1)).unwrap()
121}
122
123#[must_use]
124#[inline]
125pub fn extract_interval_features(chroma: &Array2<f64>, templates: &Array2<i32>) -> Array2<f64> {
126 let mut f_intervals: Array2<f64> = Array::zeros((chroma.shape()[1], templates.shape()[1]));
127 for (template, mut f_interval) in templates
128 .axis_iter(Axis(1))
129 .zip(f_intervals.axis_iter_mut(Axis(1)))
130 {
131 for shift in 0..12 {
132 let mut vec: Vec<i32> = template.to_vec();
133 vec.rotate_right(shift);
134 let rolled = arr1(&vec);
135 let power = Zip::from(chroma.t())
136 .and_broadcast(&rolled)
137 .map_collect(|&f, &s| f.powi(s))
138 .map_axis_mut(Axis(1), |x| x.product());
139 f_interval += &power;
140 }
141 }
142 f_intervals.t().to_owned()
143}
144
145#[inline]
146pub fn normalize_feature_sequence(feature: &Array2<f64>) -> Array2<f64> {
147 let mut normalized_sequence = feature.to_owned();
148 for mut column in normalized_sequence.columns_mut() {
149 let mut sum = column.mapv(f64::abs).sum();
150 if sum < 0.0001 {
151 sum = 1.;
152 }
153 column /= sum;
154 }
155
156 normalized_sequence
157}
158
159#[allow(
167 clippy::missing_errors_doc,
168 clippy::missing_panics_doc,
169 clippy::module_name_repetitions,
170 clippy::missing_inline_in_public_items
171)]
172pub fn chroma_filter(
173 sample_rate: u32,
174 n_fft: usize,
175 n_chroma: u32,
176 tuning: f64,
177) -> AnalysisResult<Array2<f64>> {
178 let ctroct = 5.0;
179 let octwidth = 2.;
180 let n_chroma_float = f64::from(n_chroma);
181 let n_chroma2 = (n_chroma_float / 2.0).round();
182
183 let frequencies = Array::linspace(0., f64::from(sample_rate), n_fft + 1);
184
185 let mut freq_bins = frequencies;
186 hz_to_octs_inplace(&mut freq_bins, tuning, n_chroma);
187 freq_bins.mapv_inplace(|x| x * n_chroma_float);
188 freq_bins[0] = 1.5f64.mul_add(-n_chroma_float, freq_bins[1]);
189
190 let mut binwidth_bins = Array::ones(freq_bins.raw_dim());
191 binwidth_bins.slice_mut(s![0..freq_bins.len() - 1]).assign(
192 &(&freq_bins.slice(s![1..]) - &freq_bins.slice(s![..-1]))
193 .mapv(|x| if x <= 1. { 1. } else { x }),
194 );
195
196 let mut d: Array2<f64> = Array::zeros((n_chroma as usize, (freq_bins).len()));
197 for (idx, mut row) in d.rows_mut().into_iter().enumerate() {
198 #[allow(clippy::cast_precision_loss)]
199 row.fill(idx as f64);
200 }
201 d = -d + &freq_bins;
202
203 d.mapv_inplace(|x| 10f64.mul_add(n_chroma_float, x + n_chroma2) % n_chroma_float - n_chroma2);
204 d = d / binwidth_bins;
205 d.mapv_inplace(|x| (-0.5 * (2. * x) * (2. * x)).exp());
206
207 let mut wts = d;
208 for mut col in wts.columns_mut() {
210 let mut sum = col.mapv(|x| x * x).sum().sqrt();
211 if sum < f64::MIN_POSITIVE {
212 sum = 1.;
213 }
214 col /= sum;
215 }
216
217 freq_bins.mapv_inplace(|x| (-0.5 * ((x / n_chroma_float - ctroct) / octwidth).powi(2)).exp());
218
219 wts *= &freq_bins;
220
221 let mut b = Array2::zeros(wts.dim());
223 b.slice_mut(s![-3.., ..]).assign(&wts.slice(s![..3, ..]));
224 b.slice_mut(s![..-3, ..]).assign(&wts.slice(s![3.., ..]));
225
226 wts = b;
227 let non_aliased = 1 + n_fft / 2;
228 Ok(wts.slice_move(s![.., ..non_aliased]))
229}
230
231#[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
232#[allow(clippy::missing_inline_in_public_items)]
233pub fn pip_track(
234 sample_rate: u32,
235 spectrum: &Array2<f64>,
236 n_fft: usize,
237) -> AnalysisResult<(Vec<f64>, Vec<f64>)> {
238 let sample_rate_float = f64::from(sample_rate);
239 let fmin = 150.0_f64;
240 let fmax = 4000.0_f64.min(sample_rate_float / 2.0);
241 let threshold = 0.1;
242
243 let fft_freqs = Array::linspace(0., sample_rate_float / 2., 1 + n_fft / 2);
244
245 let length = spectrum.len_of(Axis(0));
246
247 let freq_mask = fft_freqs
249 .iter()
250 .map(|&f| (fmin <= f) && (f < fmax))
251 .collect::<Vec<bool>>();
252
253 let ref_value = spectrum.map_axis(Axis(0), |x| {
254 let first: f64 = *x.first().expect("empty spectrum axis");
255 let max = x.fold(first, |acc, &elem| acc.max(elem));
256 threshold * max
257 });
258
259 let taken_columns = freq_mask
261 .iter()
262 .fold(0, |acc, &x| if x { acc + 1 } else { acc });
263 let mut pitches = Vec::with_capacity(taken_columns * length);
264 let mut mags = Vec::with_capacity(taken_columns * length);
265
266 let beginning = freq_mask
267 .iter()
268 .position(|&b| b)
269 .ok_or_else(|| AnalysisError::AnalysisError(String::from("in chroma")))?;
270 let end = freq_mask
271 .iter()
272 .rposition(|&b| b)
273 .ok_or_else(|| AnalysisError::AnalysisError(String::from("in chroma")))?;
274
275 let zipped = Zip::indexed(spectrum.slice(s![beginning..end - 3, ..]))
276 .and(spectrum.slice(s![beginning + 1..end - 2, ..]))
277 .and(spectrum.slice(s![beginning + 2..end - 1, ..]));
278
279 zipped.for_each(|(i, j), &before_elem, &elem, &after_elem| {
282 if elem > ref_value[j] && after_elem <= elem && before_elem < elem {
283 let avg = 0.5 * (after_elem - before_elem);
284 let mut shift = 2f64.mul_add(elem, -after_elem) - before_elem;
285 if shift.abs() < f64::MIN_POSITIVE {
286 shift += 1.;
287 }
288 shift = avg / shift;
289 #[allow(clippy::cast_precision_loss)]
290 pitches.push(((i + beginning + 1) as f64 + shift) * sample_rate_float / n_fft as f64);
291 mags.push((0.5 * avg).mul_add(shift, elem));
292 }
293 });
294
295 Ok((pitches, mags))
296}
297
298#[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
300#[inline]
301pub fn pitch_tuning(
302 frequencies: &mut Array1<f64>,
303 resolution: f64,
304 bins_per_octave: u32,
305) -> AnalysisResult<f64> {
306 if frequencies.is_empty() {
307 return Ok(0.0);
308 }
309 hz_to_octs_inplace(frequencies, 0.0, 12);
310 frequencies.mapv_inplace(|x| f64::from(bins_per_octave) * x % 1.0);
311
312 frequencies.mapv_inplace(|x| if x >= 0.5 { x - 1. } else { x });
314
315 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
316 let indexes = ((frequencies.to_owned() - -0.5) / resolution).mapv(|x| x as usize);
317 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
318 let mut counts: Array1<usize> = Array::zeros(((0.5 - -0.5) / resolution) as usize);
319 for &idx in &indexes {
320 counts[idx] += 1;
321 }
322 let max_index = counts
323 .argmax()
324 .map_err(|e| AnalysisError::AnalysisError(format!("in chroma: {e}")))?;
325
326 #[allow(clippy::cast_precision_loss)]
328 Ok((100. * resolution).mul_add(max_index as f64, -50.) / 100.)
329}
330
331#[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
332#[inline]
333pub fn estimate_tuning(
334 sample_rate: u32,
335 spectrum: &Array2<f64>,
336 n_fft: usize,
337 resolution: f64,
338 bins_per_octave: u32,
339) -> AnalysisResult<f64> {
340 let (pitch, mag) = pip_track(sample_rate, spectrum, n_fft)?;
341
342 let (filtered_pitch, filtered_mag): (Vec<N64>, Vec<N64>) = pitch
343 .iter()
344 .zip(&mag)
345 .filter(|&(&p, _)| p > 0.)
346 .map(|(x, y)| (n64(*x), n64(*y)))
347 .unzip();
348
349 if pitch.is_empty() {
350 return Ok(0.);
351 }
352
353 let threshold: N64 = Array::from(filtered_mag.clone())
354 .quantile_axis_mut(Axis(0), n64(0.5), &Midpoint)
355 .map_err(|e| AnalysisError::AnalysisError(format!("in chroma: {e}")))?
356 .into_scalar();
357 let mut pitch = filtered_pitch
358 .iter()
359 .zip(&filtered_mag)
360 .filter_map(|(&p, &m)| if m >= threshold { Some(p.into()) } else { None })
361 .collect::<Array1<f64>>();
362 pitch_tuning(&mut pitch, resolution, bins_per_octave)
363}
364
365#[allow(
366 clippy::missing_errors_doc,
367 clippy::missing_panics_doc,
368 clippy::module_name_repetitions
369)]
370#[inline]
371pub fn chroma_stft(
372 sample_rate: u32,
373 spectrum: &mut Array2<f64>,
374 n_fft: usize,
375 n_chroma: u32,
376 tuning: f64,
377) -> AnalysisResult<Array2<f64>> {
378 spectrum.mapv_inplace(|x| x * x);
379 let mut raw_chroma = chroma_filter(sample_rate, n_fft, n_chroma, tuning)?;
380
381 raw_chroma = raw_chroma.dot(spectrum);
382 for mut row in raw_chroma.columns_mut() {
383 let mut sum = row.mapv(f64::abs).sum();
384 if sum < f64::MIN_POSITIVE {
385 sum = 1.;
386 }
387 row /= sum;
388 }
389 Ok(raw_chroma)
390}
391
392#[cfg(test)]
393mod test {
394 use super::*;
395 use crate::{
396 SAMPLE_RATE,
397 decoder::{Decoder as _, MecompDecoder as Decoder},
398 utils::stft,
399 };
400 use ndarray::{Array2, arr1, arr2};
401 use ndarray_npy::ReadNpyExt as _;
402 use std::{fs::File, path::Path};
403
404 #[test]
405 fn test_chroma_interval_features() {
406 let file = File::open("data/chroma.npy").unwrap();
407 let chroma = Array2::<f64>::read_npy(file).unwrap();
408 let features = chroma_interval_features(&chroma);
409 let expected_features = arr1(&[
410 0.038_602_84,
411 0.021_852_81,
412 0.042_243_79,
413 0.063_852_78,
414 0.073_111_48,
415 0.025_125_66,
416 0.003_198_99,
417 0.003_113_08,
418 0.001_074_33,
419 0.002_418_61,
420 ]);
421 for (expected, actual) in expected_features.iter().zip(&features) {
422 assert!(
423 0.000_000_01 > (expected - actual.abs()),
424 "{expected} !~= {actual}"
425 );
426 }
427 }
428
429 #[test]
430 fn test_extract_interval_features() {
431 let file = File::open("data/chroma-interval.npy").unwrap();
432 let chroma = Array2::<f64>::read_npy(file).unwrap();
433 let templates = arr2(&[
434 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
435 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
436 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
437 [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
438 [0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
439 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
440 [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
441 [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
442 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
443 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
444 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
445 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
446 ]);
447
448 let file = File::open("data/interval-feature-matrix.npy").unwrap();
449 let expected_interval_features = Array2::<f64>::read_npy(file).unwrap();
450
451 let interval_features = extract_interval_features(&chroma, &templates);
452 for (expected, actual) in expected_interval_features
453 .iter()
454 .zip(interval_features.iter())
455 {
456 assert!(
457 0.000_000_1 > (expected - actual).abs(),
458 "{expected} !~= {actual}"
459 );
460 }
461 }
462
463 #[test]
464 fn test_normalize_feature_sequence() {
465 let array = arr2(&[[0.1, 0.3, 0.4], [1.1, 0.53, 1.01]]);
466 let expected_array = arr2(&[
467 [0.083_333_33, 0.361_445_78, 0.283_687_94],
468 [0.916_666_67, 0.638_554_22, 0.716_312_06],
469 ]);
470
471 let normalized_array = normalize_feature_sequence(&array);
472
473 assert!(!array.is_empty() && !expected_array.is_empty());
474
475 for (expected, actual) in normalized_array.iter().zip(expected_array.iter()) {
476 assert!(
477 0.000_000_1 > (expected - actual).abs(),
478 "{expected} !~= {actual}"
479 );
480 }
481 }
482
483 #[test]
484 fn test_chroma_desc() {
485 let song = Decoder::new()
486 .unwrap()
487 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
488 .unwrap();
489 let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
490 chroma_desc.do_(&song.samples).unwrap();
491 let expected_values = [
492 -0.356_619_36,
493 -0.635_786_53,
494 -0.295_936_82,
495 0.064_213_04,
496 0.218_524_58,
497 -0.581_239,
498 -0.946_683_5,
499 -0.948_115_3,
500 -0.982_094_5,
501 -0.959_689_74,
502 ];
503 for (expected, actual) in expected_values.iter().zip(chroma_desc.get_value().iter()) {
504 let relative_error = (expected - actual).abs() / expected.abs();
506 assert!(
507 relative_error < 0.01,
508 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
509 );
510 }
511 }
512
513 #[test]
514 fn test_chroma_stft_decode() {
515 let signal = Decoder::new()
516 .unwrap()
517 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
518 .unwrap()
519 .samples;
520 let mut stft = stft(&signal, 8192, 2205);
521
522 let file = File::open("data/chroma.npy").unwrap();
523 let expected_chroma = Array2::<f64>::read_npy(file).unwrap();
524
525 let chroma = chroma_stft(22050, &mut stft, 8192, 12, -0.049_999_999_999_999_99).unwrap();
526
527 assert!(!chroma.is_empty() && !expected_chroma.is_empty());
528
529 for (expected, actual) in expected_chroma.iter().zip(chroma.iter()) {
530 let relative_error = (expected - actual).abs() / expected.abs();
532 assert!(
533 relative_error < 0.01,
534 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
535 );
536 }
537 }
538
539 #[test]
540 fn test_estimate_tuning() {
541 let file = File::open("data/spectrum-chroma.npy").unwrap();
542 let arr = Array2::<f64>::read_npy(file).unwrap();
543
544 let tuning = estimate_tuning(22050, &arr, 2048, 0.01, 12).unwrap();
545 assert!(
546 0.000_001 > (-0.099_999_999_999_999_98 - tuning).abs(),
547 "{tuning} !~= -0.09999999999999998"
548 );
549 }
550
551 #[test]
552 fn test_chroma_estimate_tuning_empty_fix() {
553 assert!(0. == estimate_tuning(22050, &Array2::zeros((8192, 1)), 8192, 0.01, 12).unwrap());
554 }
555
556 #[test]
557 fn test_estimate_tuning_decode() {
558 let signal = Decoder::new()
559 .unwrap()
560 .decode(Path::new("data/s16_mono_22_5kHz.flac"))
561 .unwrap()
562 .samples;
563 let stft = stft(&signal, 8192, 2205);
564
565 let tuning = estimate_tuning(22050, &stft, 8192, 0.01, 12).unwrap();
566 assert!(
567 0.000_001 > (-0.049_999_999_999_999_99 - tuning).abs(),
568 "{tuning} !~= -0.04999999999999999"
569 );
570 }
571
572 #[test]
573 fn test_pitch_tuning() {
574 let file = File::open("data/pitch-tuning.npy").unwrap();
575 let mut pitch = Array1::<f64>::read_npy(file).unwrap();
576 let tuned = pitch_tuning(&mut pitch, 0.05, 12).unwrap();
577 assert!(f64::EPSILON > (tuned + 0.1).abs(), "{tuned} != -0.1");
578 }
579
580 #[test]
581 fn test_pitch_tuning_no_frequencies() {
582 let mut frequencies = arr1(&[]);
583 let tuned = pitch_tuning(&mut frequencies, 0.05, 12).unwrap();
584 assert!(f64::EPSILON > tuned.abs(), "{tuned} != 0");
585 }
586
587 #[test]
588 fn test_pip_track() {
589 let file = File::open("data/spectrum-chroma.npy").unwrap();
590 let spectrum = Array2::<f64>::read_npy(file).unwrap();
591
592 let mags_file = File::open("data/spectrum-chroma-mags.npy").unwrap();
593 let expected_mags = Array1::<f64>::read_npy(mags_file).unwrap();
594
595 let pitches_file = File::open("data/spectrum-chroma-pitches.npy").unwrap();
596 let expected_pitches = Array1::<f64>::read_npy(pitches_file).unwrap();
597
598 let (mut pitches, mut mags) = pip_track(22050, &spectrum, 2048).unwrap();
599 pitches.sort_by(|a, b| a.partial_cmp(b).unwrap());
600 mags.sort_by(|a, b| a.partial_cmp(b).unwrap());
601
602 for (expected_pitches, actual_pitches) in expected_pitches.iter().zip(pitches.iter()) {
603 assert!(
604 0.000_000_01 > (expected_pitches - actual_pitches).abs(),
605 "{expected_pitches} !~= {actual_pitches}"
606 );
607 }
608 for (expected_mags, actual_mags) in expected_mags.iter().zip(mags.iter()) {
609 assert!(
610 0.000_000_01 > (expected_mags - actual_mags).abs(),
611 "{expected_mags} !~= {actual_mags}"
612 );
613 }
614 }
615
616 #[test]
617 fn test_chroma_filter() {
618 let file = File::open("data/chroma-filter.npy").unwrap();
619 let expected_filter = Array2::<f64>::read_npy(file).unwrap();
620
621 let filter = chroma_filter(22050, 2048, 12, -0.1).unwrap();
622
623 for (expected, actual) in expected_filter.iter().zip(filter.iter()) {
624 assert!(
625 0.000_000_001 > (expected - actual).abs(),
626 "{expected} !~= {actual}"
627 );
628 }
629 }
630}