1extern crate noisy_float;
7
8use crate::Feature;
9
10use super::errors::{AnalysisError, AnalysisResult};
11use super::utils::{hz_to_octs_inplace, stft, Normalize};
12use ndarray::{arr1, arr2, concatenate, s, Array, Array1, Array2, Axis, Zip};
13use ndarray_stats::interpolate::Midpoint;
14use ndarray_stats::QuantileExt;
15use noisy_float::prelude::*;
16
17#[derive(Debug, Clone)]
28#[allow(clippy::module_name_repetitions)]
29pub struct ChromaDesc {
30 sample_rate: u32,
31 n_chroma: u32,
32 values_chroma: Array2<f64>,
33}
34
35impl Normalize for ChromaDesc {
36 const MAX_VALUE: Feature = 0.12;
37 const MIN_VALUE: Feature = 0.;
38}
39
40impl ChromaDesc {
41 pub const WINDOW_SIZE: usize = 8192;
42
43 #[must_use]
44 pub fn new(sample_rate: u32, n_chroma: u32) -> Self {
45 Self {
46 sample_rate,
47 n_chroma,
48 values_chroma: Array2::zeros((n_chroma as usize, 0)),
49 }
50 }
51
52 #[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
59 pub fn do_(&mut self, signal: &[f32]) -> AnalysisResult<()> {
60 let mut stft = stft(signal, Self::WINDOW_SIZE, 2205);
61 let tuning = estimate_tuning(self.sample_rate, &stft, Self::WINDOW_SIZE, 0.01, 12)?;
62 let chroma = chroma_stft(
63 self.sample_rate,
64 &mut stft,
65 Self::WINDOW_SIZE,
66 self.n_chroma,
67 tuning,
68 )?;
69 self.values_chroma = concatenate![Axis(1), self.values_chroma, chroma];
70 Ok(())
71 }
72
73 pub fn get_value(&mut self) -> Vec<Feature> {
84 #[allow(clippy::cast_possible_truncation)]
85 chroma_interval_features(&self.values_chroma)
86 .mapv(|x| self.normalize(x as Feature))
87 .to_vec()
88 }
89}
90
91#[allow(
94 clippy::missing_errors_doc,
95 clippy::missing_panics_doc,
96 clippy::module_name_repetitions
97)]
98#[must_use]
99pub fn chroma_interval_features(chroma: &Array2<f64>) -> Array1<f64> {
100 let chroma = normalize_feature_sequence(&chroma.mapv(|x| (x * 15.).exp()));
101 let templates = arr2(&[
102 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
103 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
104 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
105 [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
106 [0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
107 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
108 [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
109 [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
110 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
111 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
112 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
113 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
114 ]);
115 let interval_feature_matrix = extract_interval_features(&chroma, &templates);
116 interval_feature_matrix.mean_axis(Axis(1)).unwrap()
117}
118
119#[must_use]
120pub fn extract_interval_features(chroma: &Array2<f64>, templates: &Array2<i32>) -> Array2<f64> {
121 let mut f_intervals: Array2<f64> = Array::zeros((chroma.shape()[1], templates.shape()[1]));
122 for (template, mut f_interval) in templates
123 .axis_iter(Axis(1))
124 .zip(f_intervals.axis_iter_mut(Axis(1)))
125 {
126 for shift in 0..12 {
127 let mut vec: Vec<i32> = template.to_vec();
128 vec.rotate_right(shift);
129 let rolled = arr1(&vec);
130 let power = Zip::from(chroma.t())
131 .and_broadcast(&rolled)
132 .map_collect(|&f, &s| f.powi(s))
133 .map_axis_mut(Axis(1), |x| x.product());
134 f_interval += &power;
135 }
136 }
137 f_intervals.t().to_owned()
138}
139
140pub fn normalize_feature_sequence(feature: &Array2<f64>) -> Array2<f64> {
141 let mut normalized_sequence = feature.to_owned();
142 for mut column in normalized_sequence.columns_mut() {
143 let mut sum = column.mapv(f64::abs).sum();
144 if sum < 0.0001 {
145 sum = 1.;
146 }
147 column /= sum;
148 }
149
150 normalized_sequence
151}
152
153#[allow(
161 clippy::missing_errors_doc,
162 clippy::missing_panics_doc,
163 clippy::module_name_repetitions
164)]
165pub fn chroma_filter(
166 sample_rate: u32,
167 n_fft: usize,
168 n_chroma: u32,
169 tuning: f64,
170) -> AnalysisResult<Array2<f64>> {
171 let ctroct = 5.0;
172 let octwidth = 2.;
173 let n_chroma_float = f64::from(n_chroma);
174 #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
175 let n_chroma2 = (n_chroma_float / 2.0).round(); let frequencies = Array::linspace(0., f64::from(sample_rate), n_fft + 1);
178
179 let mut freq_bins = frequencies;
180 hz_to_octs_inplace(&mut freq_bins, tuning, n_chroma);
181 freq_bins.mapv_inplace(|x| x * n_chroma_float);
182 freq_bins[0] = 1.5f64.mul_add(-n_chroma_float, freq_bins[1]);
183
184 let mut binwidth_bins = Array::ones(freq_bins.raw_dim());
185 binwidth_bins.slice_mut(s![0..freq_bins.len() - 1]).assign(
186 &(&freq_bins.slice(s![1..]) - &freq_bins.slice(s![..-1])).mapv(|x| {
187 if x <= 1. {
188 1.
189 } else {
190 x
191 }
192 }),
193 );
194
195 let mut d: Array2<f64> = Array::zeros((n_chroma as usize, (freq_bins).len()));
196 for (idx, mut row) in d.rows_mut().into_iter().enumerate() {
197 #[allow(clippy::cast_precision_loss)]
198 row.fill(idx as f64);
199 }
200 d = -d + &freq_bins;
201
202 d.mapv_inplace(|x| 10f64.mul_add(n_chroma_float, x + n_chroma2) % n_chroma_float - n_chroma2);
203 d = d / binwidth_bins;
204 d.mapv_inplace(|x| (-0.5 * (2. * x) * (2. * x)).exp());
205
206 let mut wts = d;
207 for mut col in wts.columns_mut() {
209 let mut sum = col.mapv(|x| x * x).sum().sqrt();
210 if sum < f64::MIN_POSITIVE {
211 sum = 1.;
212 }
213 col /= sum;
214 }
215
216 freq_bins.mapv_inplace(|x| (-0.5 * ((x / n_chroma_float - ctroct) / octwidth).powi(2)).exp());
217
218 wts *= &freq_bins;
219
220 let mut uninit: Vec<f64> = vec![0.; (wts).len()];
222 unsafe {
223 uninit.set_len(wts.len());
224 }
225 let mut b = Array::from(uninit)
226 .to_shape(wts.dim())
227 .map_err(|e| AnalysisError::AnalysisError(format!("in chroma: {e}")))?
228 .to_owned();
229 b.slice_mut(s![-3.., ..]).assign(&wts.slice(s![..3, ..]));
230 b.slice_mut(s![..-3, ..]).assign(&wts.slice(s![3.., ..]));
231
232 wts = b;
233 let non_aliased = 1 + n_fft / 2;
234 Ok(wts.slice_move(s![.., ..non_aliased]))
235}
236
237#[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
238pub fn pip_track(
239 sample_rate: u32,
240 spectrum: &Array2<f64>,
241 n_fft: usize,
242) -> AnalysisResult<(Vec<f64>, Vec<f64>)> {
243 let sample_rate_float = f64::from(sample_rate);
244 let fmin = 150.0_f64;
245 let fmax = 4000.0_f64.min(sample_rate_float / 2.0);
246 let threshold = 0.1;
247
248 let fft_freqs = Array::linspace(0., sample_rate_float / 2., 1 + n_fft / 2);
249
250 let length = spectrum.len_of(Axis(0));
251
252 let freq_mask = fft_freqs
254 .iter()
255 .map(|&f| (fmin <= f) && (f < fmax))
256 .collect::<Vec<bool>>();
257
258 let ref_value = spectrum.map_axis(Axis(0), |x| {
259 let first: f64 = *x.first().expect("empty spectrum axis");
260 let max = x.fold(first, |acc, &elem| if acc > elem { acc } else { elem });
261 threshold * max
262 });
263
264 let taken_columns = freq_mask
266 .iter()
267 .fold(0, |acc, &x| if x { acc + 1 } else { acc });
268 let mut pitches = Vec::with_capacity(taken_columns * length);
269 let mut mags = Vec::with_capacity(taken_columns * length);
270
271 let beginning = freq_mask
272 .iter()
273 .position(|&b| b)
274 .ok_or_else(|| AnalysisError::AnalysisError(String::from("in chroma")))?;
275 let end = freq_mask
276 .iter()
277 .rposition(|&b| b)
278 .ok_or_else(|| AnalysisError::AnalysisError(String::from("in chroma")))?;
279
280 let zipped = Zip::indexed(spectrum.slice(s![beginning..end - 3, ..]))
281 .and(spectrum.slice(s![beginning + 1..end - 2, ..]))
282 .and(spectrum.slice(s![beginning + 2..end - 1, ..]));
283
284 zipped.for_each(|(i, j), &before_elem, &elem, &after_elem| {
287 if elem > ref_value[j] && after_elem <= elem && before_elem < elem {
288 let avg = 0.5 * (after_elem - before_elem);
289 let mut shift = 2f64.mul_add(elem, -after_elem) - before_elem;
290 if shift.abs() < f64::MIN_POSITIVE {
291 shift += 1.;
292 }
293 shift = avg / shift;
294 #[allow(clippy::cast_precision_loss)]
295 pitches.push(((i + beginning + 1) as f64 + shift) * sample_rate_float / n_fft as f64);
296 mags.push((0.5 * avg).mul_add(shift, elem));
297 }
298 });
299
300 Ok((pitches, mags))
301}
302
303#[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
305pub fn pitch_tuning(
306 frequencies: &mut Array1<f64>,
307 resolution: f64,
308 bins_per_octave: u32,
309) -> AnalysisResult<f64> {
310 if frequencies.is_empty() {
311 return Ok(0.0);
312 }
313 hz_to_octs_inplace(frequencies, 0.0, 12);
314 frequencies.mapv_inplace(|x| f64::from(bins_per_octave) * x % 1.0);
315
316 frequencies.mapv_inplace(|x| if x >= 0.5 { x - 1. } else { x });
318
319 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
320 let indexes = ((frequencies.to_owned() - -0.5) / resolution).mapv(|x| x as usize);
321 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
322 let mut counts: Array1<usize> = Array::zeros(((0.5 - -0.5) / resolution) as usize);
323 for &idx in &indexes {
324 counts[idx] += 1;
325 }
326 let max_index = counts
327 .argmax()
328 .map_err(|e| AnalysisError::AnalysisError(format!("in chroma: {e}")))?;
329
330 #[allow(clippy::cast_precision_loss)]
332 Ok((100. * resolution).mul_add(max_index as f64, -50.) / 100.)
333}
334
335#[allow(clippy::missing_errors_doc, clippy::missing_panics_doc)]
336pub fn estimate_tuning(
337 sample_rate: u32,
338 spectrum: &Array2<f64>,
339 n_fft: usize,
340 resolution: f64,
341 bins_per_octave: u32,
342) -> AnalysisResult<f64> {
343 let (pitch, mag) = pip_track(sample_rate, spectrum, n_fft)?;
344
345 let (filtered_pitch, filtered_mag): (Vec<N64>, Vec<N64>) = pitch
346 .iter()
347 .zip(&mag)
348 .filter(|(&p, _)| p > 0.)
349 .map(|(x, y)| (n64(*x), n64(*y)))
350 .unzip();
351
352 if pitch.is_empty() {
353 return Ok(0.);
354 }
355
356 let threshold: N64 = Array::from(filtered_mag.clone())
357 .quantile_axis_mut(Axis(0), n64(0.5), &Midpoint)
358 .map_err(|e| AnalysisError::AnalysisError(format!("in chroma: {e}")))?
359 .into_scalar();
360 let mut pitch = filtered_pitch
361 .iter()
362 .zip(&filtered_mag)
363 .filter_map(|(&p, &m)| if m >= threshold { Some(p.into()) } else { None })
364 .collect::<Array1<f64>>();
365 pitch_tuning(&mut pitch, resolution, bins_per_octave)
366}
367
368#[allow(
369 clippy::missing_errors_doc,
370 clippy::missing_panics_doc,
371 clippy::module_name_repetitions
372)]
373pub fn chroma_stft(
374 sample_rate: u32,
375 spectrum: &mut Array2<f64>,
376 n_fft: usize,
377 n_chroma: u32,
378 tuning: f64,
379) -> AnalysisResult<Array2<f64>> {
380 spectrum.par_mapv_inplace(|x| x * x);
381 let mut raw_chroma = chroma_filter(sample_rate, n_fft, n_chroma, tuning)?;
382
383 raw_chroma = raw_chroma.dot(spectrum);
384 for mut row in raw_chroma.columns_mut() {
385 let mut sum = row.mapv(f64::abs).sum();
386 if sum < f64::MIN_POSITIVE {
387 sum = 1.;
388 }
389 row /= sum;
390 }
391 Ok(raw_chroma)
392}
393
394#[cfg(test)]
395mod test {
396 use super::*;
397 use crate::{
398 decoder::{Decoder as _, MecompDecoder as Decoder},
399 utils::stft,
400 SAMPLE_RATE,
401 };
402 use ndarray::{arr1, arr2, Array2};
403 use ndarray_npy::ReadNpyExt as _;
404 use std::{fs::File, path::Path};
405
406 #[test]
407 fn test_chroma_interval_features() {
408 let file = File::open("data/chroma.npy").unwrap();
409 let chroma = Array2::<f64>::read_npy(file).unwrap();
410 let features = chroma_interval_features(&chroma);
411 let expected_features = arr1(&[
412 0.038_602_84,
413 0.021_852_81,
414 0.042_243_79,
415 0.063_852_78,
416 0.073_111_48,
417 0.025_125_66,
418 0.003_198_99,
419 0.003_113_08,
420 0.001_074_33,
421 0.002_418_61,
422 ]);
423 for (expected, actual) in expected_features.iter().zip(&features) {
424 assert!(
425 0.000_000_01 > (expected - actual.abs()),
426 "{expected} !~= {actual}"
427 );
428 }
429 }
430
431 #[test]
432 fn test_extract_interval_features() {
433 let file = File::open("data/chroma-interval.npy").unwrap();
434 let chroma = Array2::<f64>::read_npy(file).unwrap();
435 let templates = arr2(&[
436 [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
437 [1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
438 [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
439 [0, 0, 1, 0, 0, 0, 0, 1, 1, 0],
440 [0, 0, 0, 1, 0, 0, 1, 0, 0, 1],
441 [0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
442 [0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
443 [0, 0, 0, 0, 0, 0, 1, 1, 0, 0],
444 [0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
445 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
446 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
447 [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
448 ]);
449
450 let file = File::open("data/interval-feature-matrix.npy").unwrap();
451 let expected_interval_features = Array2::<f64>::read_npy(file).unwrap();
452
453 let interval_features = extract_interval_features(&chroma, &templates);
454 for (expected, actual) in expected_interval_features
455 .iter()
456 .zip(interval_features.iter())
457 {
458 assert!(
459 0.000_000_1 > (expected - actual).abs(),
460 "{expected} !~= {actual}"
461 );
462 }
463 }
464
465 #[test]
466 fn test_normalize_feature_sequence() {
467 let array = arr2(&[[0.1, 0.3, 0.4], [1.1, 0.53, 1.01]]);
468 let expected_array = arr2(&[
469 [0.083_333_33, 0.361_445_78, 0.283_687_94],
470 [0.916_666_67, 0.638_554_22, 0.716_312_06],
471 ]);
472
473 let normalized_array = normalize_feature_sequence(&array);
474
475 assert!(!array.is_empty() && !expected_array.is_empty());
476
477 for (expected, actual) in normalized_array.iter().zip(expected_array.iter()) {
478 assert!(
479 0.000_000_1 > (expected - actual).abs(),
480 "{expected} !~= {actual}"
481 );
482 }
483 }
484
485 #[test]
486 fn test_chroma_desc() {
487 let song = Decoder::decode(Path::new("data/s16_mono_22_5kHz.flac")).unwrap();
488 let mut chroma_desc = ChromaDesc::new(SAMPLE_RATE, 12);
489 chroma_desc.do_(&song.samples).unwrap();
490 let expected_values = [
491 -0.356_619_36,
492 -0.635_786_53,
493 -0.295_936_82,
494 0.064_213_04,
495 0.218_524_58,
496 -0.581_239,
497 -0.946_683_5,
498 -0.948_115_3,
499 -0.982_094_5,
500 -0.959_689_74,
501 ];
502 for (expected, actual) in expected_values.iter().zip(chroma_desc.get_value().iter()) {
503 let relative_error = (expected - actual).abs() / expected.abs();
505 assert!(
506 relative_error < 0.01,
507 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
508 );
509 }
510 }
511
512 #[test]
513 fn test_chroma_stft_decode() {
514 let signal = Decoder::decode(Path::new("data/s16_mono_22_5kHz.flac"))
515 .unwrap()
516 .samples;
517 let mut stft = stft(&signal, 8192, 2205);
518
519 let file = File::open("data/chroma.npy").unwrap();
520 let expected_chroma = Array2::<f64>::read_npy(file).unwrap();
521
522 let chroma = chroma_stft(22050, &mut stft, 8192, 12, -0.049_999_999_999_999_99).unwrap();
523
524 assert!(!chroma.is_empty() && !expected_chroma.is_empty());
525
526 for (expected, actual) in expected_chroma.iter().zip(chroma.iter()) {
527 let relative_error = (expected - actual).abs() / expected.abs();
529 assert!(
530 relative_error < 0.01,
531 "relative error: {relative_error}, expected: {expected}, actual: {actual}"
532 );
533 }
534 }
535
536 #[test]
537 fn test_estimate_tuning() {
538 let file = File::open("data/spectrum-chroma.npy").unwrap();
539 let arr = Array2::<f64>::read_npy(file).unwrap();
540
541 let tuning = estimate_tuning(22050, &arr, 2048, 0.01, 12).unwrap();
542 assert!(
543 0.000_001 > (-0.099_999_999_999_999_98 - tuning).abs(),
544 "{tuning} !~= -0.09999999999999998"
545 );
546 }
547
548 #[test]
549 fn test_chroma_estimate_tuning_empty_fix() {
550 assert!(0. == estimate_tuning(22050, &Array2::zeros((8192, 1)), 8192, 0.01, 12).unwrap());
551 }
552
553 #[test]
554 fn test_estimate_tuning_decode() {
555 let signal = Decoder::decode(Path::new("data/s16_mono_22_5kHz.flac"))
556 .unwrap()
557 .samples;
558 let stft = stft(&signal, 8192, 2205);
559
560 let tuning = estimate_tuning(22050, &stft, 8192, 0.01, 12).unwrap();
561 assert!(
562 0.000_001 > (-0.049_999_999_999_999_99 - tuning).abs(),
563 "{tuning} !~= -0.04999999999999999"
564 );
565 }
566
567 #[test]
568 fn test_pitch_tuning() {
569 let file = File::open("data/pitch-tuning.npy").unwrap();
570 let mut pitch = Array1::<f64>::read_npy(file).unwrap();
571
572 assert_eq!(-0.1, pitch_tuning(&mut pitch, 0.05, 12).unwrap());
573 }
574
575 #[test]
576 fn test_pitch_tuning_no_frequencies() {
577 let mut frequencies = arr1(&[]);
578 assert_eq!(0.0, pitch_tuning(&mut frequencies, 0.05, 12).unwrap());
579 }
580
581 #[test]
582 fn test_pip_track() {
583 let file = File::open("data/spectrum-chroma.npy").unwrap();
584 let spectrum = Array2::<f64>::read_npy(file).unwrap();
585
586 let mags_file = File::open("data/spectrum-chroma-mags.npy").unwrap();
587 let expected_mags = Array1::<f64>::read_npy(mags_file).unwrap();
588
589 let pitches_file = File::open("data/spectrum-chroma-pitches.npy").unwrap();
590 let expected_pitches = Array1::<f64>::read_npy(pitches_file).unwrap();
591
592 let (mut pitches, mut mags) = pip_track(22050, &spectrum, 2048).unwrap();
593 pitches.sort_by(|a, b| a.partial_cmp(b).unwrap());
594 mags.sort_by(|a, b| a.partial_cmp(b).unwrap());
595
596 for (expected_pitches, actual_pitches) in expected_pitches.iter().zip(pitches.iter()) {
597 assert!(
598 0.000_000_01 > (expected_pitches - actual_pitches).abs(),
599 "{expected_pitches} !~= {actual_pitches}"
600 );
601 }
602 for (expected_mags, actual_mags) in expected_mags.iter().zip(mags.iter()) {
603 assert!(
604 0.000_000_01 > (expected_mags - actual_mags).abs(),
605 "{expected_mags} !~= {actual_mags}"
606 );
607 }
608 }
609
610 #[test]
611 fn test_chroma_filter() {
612 let file = File::open("data/chroma-filter.npy").unwrap();
613 let expected_filter = Array2::<f64>::read_npy(file).unwrap();
614
615 let filter = chroma_filter(22050, 2048, 12, -0.1).unwrap();
616
617 for (expected, actual) in expected_filter.iter().zip(filter.iter()) {
618 assert!(
619 0.000_000_001 > (expected - actual).abs(),
620 "{expected} !~= {actual}"
621 );
622 }
623 }
624}