songrec/fingerprinting/
algorithm.rs1use chfft::RFft1D;
2use std::error::Error;
3use std::io::BufReader;
4use std::collections::HashMap;
5
6use crate::fingerprinting::hanning::HANNING_WINDOW_2048_MULTIPLIERS;
7use crate::fingerprinting::signature_format::{DecodedSignature, FrequencyBand, FrequencyPeak};
8
9
10pub struct SignatureGenerator {
11
12 ring_buffer_of_samples: Vec<i16>,
15 ring_buffer_of_samples_index: usize,
17
18 reordered_ring_buffer_of_samples: Vec<f32>,
19 fft_outputs: Vec<Vec<f32>>,
22 fft_outputs_index: usize,
24
25 fft_object: RFft1D<f32>,
26
27 spread_fft_outputs: Vec<Vec<f32>>,
28 spread_fft_outputs_index: usize,
30
31 num_spread_ffts_done: u32,
32
33 signature: DecodedSignature,
34}
35
36impl SignatureGenerator {
37 pub fn make_signature_from_file(file_path: &str) -> Result<DecodedSignature, Box<dyn Error>> {
38 if !std::path::Path::new(file_path).exists() {
40 return Err(format!("File not found: {}", file_path).into());
41 }
42
43 let file = std::fs::File::open(file_path)
45 .map_err(|e| format!("Failed to open file '{}': {}", file_path, e))?;
46
47 let decoder = rodio::Decoder::new(BufReader::new(file))
48 .map_err(|e| format!("Failed to decode audio file '{}': {}. Note: M4A/AAC format may not be fully supported on all platforms.", file_path, e))?;
49
50 let converted_file = rodio::source::UniformSourceIterator::new(decoder, 1, 16000);
54
55 let raw_pcm_samples: Vec<i16> = converted_file.collect();
56
57 if raw_pcm_samples.is_empty() {
59 return Err(format!("No audio samples could be extracted from file '{}'. The file may be corrupted or in an unsupported format.", file_path).into());
60 }
61
62 let mut raw_pcm_samples_slice: &[i16] = &raw_pcm_samples;
63
64 let slice_len = raw_pcm_samples_slice.len().min(12 * 16000);
65
66 if slice_len < 3 * 16000 {
68 return Err(format!("Audio file '{}' is too short for fingerprinting. Need at least 3 seconds of audio, but only got {:.2} seconds.",
69 file_path, slice_len as f32 / 16000.0).into());
70 }
71
72 if raw_pcm_samples_slice.len() > 12 * 16000 {
73 let middle = raw_pcm_samples.len() / 2;
74
75 raw_pcm_samples_slice = &raw_pcm_samples_slice[middle - (6 * 16000)..middle + (6 * 16000)];
76 }
77
78 Ok(SignatureGenerator::make_signature_from_buffer(&raw_pcm_samples_slice[..slice_len]))
79 }
80
81 pub fn make_signature_from_buffer(s16_mono_16khz_buffer: &[i16]) -> DecodedSignature {
82 let mut this = SignatureGenerator {
83 ring_buffer_of_samples: vec![0i16; 2048],
84 ring_buffer_of_samples_index: 0,
85
86 reordered_ring_buffer_of_samples: vec![0.0f32; 2048],
87
88 fft_outputs: vec![vec![0.0f32; 1025]; 256],
89 fft_outputs_index: 0,
90
91 fft_object: RFft1D::<f32>::new(2048),
92
93 spread_fft_outputs: vec![vec![0.0f32; 1025]; 256],
94 spread_fft_outputs_index: 0,
95
96 num_spread_ffts_done: 0,
97
98 signature: DecodedSignature {
99 sample_rate_hz: 16000,
100 number_samples: s16_mono_16khz_buffer.len() as u32,
101 frequency_band_to_sound_peaks: HashMap::new(),
102 },
103 }; for chunk in s16_mono_16khz_buffer.chunks_exact(128) {
104 this.do_fft_internal(chunk);
105
106 this.do_peak_spreading();
107
108 this.num_spread_ffts_done += 1;
109
110 if this.num_spread_ffts_done >= 46 {
111 this.do_peak_recognition();
112 }
113 }
114
115 this.signature
116 }
117
118 pub fn new() -> Self {
120 Self {
121 ring_buffer_of_samples: vec![0i16; 2048],
122 ring_buffer_of_samples_index: 0,
123 reordered_ring_buffer_of_samples: vec![0.0f32; 2048],
124 fft_outputs: vec![vec![0.0f32; 1025]; 256],
125 fft_outputs_index: 0,
126 fft_object: RFft1D::<f32>::new(2048),
127 spread_fft_outputs: vec![vec![0.0f32; 1025]; 256],
128 spread_fft_outputs_index: 0,
129 num_spread_ffts_done: 0,
130 signature: DecodedSignature {
131 sample_rate_hz: 16000,
132 number_samples: 0,
133 frequency_band_to_sound_peaks: HashMap::new(),
134 },
135 }
136 }
137
138 pub fn do_fft(&mut self, s16_mono_16khz_buffer: &[i16], sample_rate: u32) {
141 self.signature.number_samples += s16_mono_16khz_buffer.len() as u32;
143 self.signature.sample_rate_hz = sample_rate;
144
145 self.do_fft_internal(s16_mono_16khz_buffer);
147
148 self.do_peak_spreading();
149 self.num_spread_ffts_done += 1;
150
151 if self.num_spread_ffts_done >= 46 {
152 self.do_peak_recognition();
153 }
154 }
155
156 pub fn get_signature(&self) -> DecodedSignature {
158 self.signature.clone()
159 }
160
161 fn do_fft_internal(&mut self, s16_mono_16khz_buffer: &[i16]) {
162
163 self.ring_buffer_of_samples[self.ring_buffer_of_samples_index..self.ring_buffer_of_samples_index + 128].copy_from_slice(s16_mono_16khz_buffer);
166
167 self.ring_buffer_of_samples_index += 128;
168 self.ring_buffer_of_samples_index &= 2047;
169
170 for (index, multiplier) in HANNING_WINDOW_2048_MULTIPLIERS.iter().enumerate() {
173 self.reordered_ring_buffer_of_samples[index] =
174 self.ring_buffer_of_samples[(index + self.ring_buffer_of_samples_index) & 2047] as f32 *
175 multiplier;
176 }
177
178 let complex_fft_results = self.fft_object.forward(&self.reordered_ring_buffer_of_samples);
181
182 assert_eq!(complex_fft_results.len(), 1025);
183
184 let real_fft_results = &mut self.fft_outputs[self.fft_outputs_index];
187
188 for index in 0..=1024 {
189 real_fft_results[index] = (
190 (
191 complex_fft_results[index].re.powi(2) +
192 complex_fft_results[index].im.powi(2)
193 ) / ((1 << 17) as f32)
194 ).max(0.0000000001);
195 }
196
197 self.fft_outputs_index += 1;
198 self.fft_outputs_index &= 255;
199 }
200
201 fn do_peak_spreading(&mut self) {
202 let real_fft_results = &self.fft_outputs[((self.fft_outputs_index as i32 - 1) & 255) as usize];
203
204 let spread_fft_results = &mut self.spread_fft_outputs[self.spread_fft_outputs_index];
205
206 spread_fft_results.copy_from_slice(real_fft_results);
209
210 for position in 0..=1022 {
211 spread_fft_results[position] = spread_fft_results[position]
212 .max(spread_fft_results[position + 1])
213 .max(spread_fft_results[position + 2]);
214 }
215
216 let spread_fft_results_copy = spread_fft_results.clone(); for position in 0..=1024 {
221 for former_fft_number in &[1, 3, 6] {
222 let former_fft_output = &mut self.spread_fft_outputs[((self.spread_fft_outputs_index as i32 - *former_fft_number) & 255) as usize];
223
224 former_fft_output[position] = former_fft_output[position]
225 .max(spread_fft_results_copy[position]);
226 }
227 }
228
229 self.spread_fft_outputs_index += 1;
230 self.spread_fft_outputs_index &= 255;
231 }
232
233 fn do_peak_recognition(&mut self) {
234
235 let fft_minus_46 = &self.fft_outputs[((self.fft_outputs_index as i32 - 46) & 255) as usize];
239 let fft_minus_49 = &self.spread_fft_outputs[((self.spread_fft_outputs_index as i32 - 49) & 255) as usize];
240
241 for bin_position in 10..=1014 {
242
243 if fft_minus_46[bin_position] >= 1.0 / 64.0 &&
246 fft_minus_46[bin_position] >= fft_minus_49[bin_position - 1] {
247
248 let mut max_neighbor_in_fft_minus_49: f32 = 0.0;
251
252 for neighbor_offset in &[-10, -7, -4, -3, 1, 2, 5, 8] {
253 max_neighbor_in_fft_minus_49 = max_neighbor_in_fft_minus_49
254 .max(fft_minus_49[(bin_position as i32 + *neighbor_offset) as usize]);
255 }
256
257 if fft_minus_46[bin_position] > max_neighbor_in_fft_minus_49 {
258
259 let mut max_neighbor_in_other_adjacent_ffts = max_neighbor_in_fft_minus_49;
262
263 for other_offset in &[-53, -45,
264 165, 172, 179, 186, 193, 200,
265 214, 221, 228, 235, 242, 249] {
266 let other_fft = &self.spread_fft_outputs[((self.spread_fft_outputs_index as i32 + other_offset) & 255) as usize];
267
268 max_neighbor_in_other_adjacent_ffts = max_neighbor_in_other_adjacent_ffts
269 .max(other_fft[bin_position - 1]);
270 }
271
272 if fft_minus_46[bin_position] > max_neighbor_in_other_adjacent_ffts {
273
274 let fft_pass_number = self.num_spread_ffts_done - 46;
277
278 let peak_magnitude: f32 = fft_minus_46[bin_position].ln().max(1.0 / 64.0) * 1477.3 + 6144.0;
279 let peak_magnitude_before: f32 = fft_minus_46[bin_position - 1].ln().max(1.0 / 64.0) * 1477.3 + 6144.0;
280 let peak_magnitude_after: f32 = fft_minus_46[bin_position + 1].ln().max(1.0 / 64.0) * 1477.3 + 6144.0;
281
282 let peak_variation_1: f32 = peak_magnitude * 2.0 - peak_magnitude_before - peak_magnitude_after;
283 let peak_variation_2: f32 = (peak_magnitude_after - peak_magnitude_before) * 32.0 / peak_variation_1;
284
285 let corrected_peak_frequency_bin: u16 = ((bin_position as i32 * 64) + (peak_variation_2 as i32)) as u16;
286
287 assert!(peak_variation_1 >= 0.0);
288
289 let frequency_hz: f32 = corrected_peak_frequency_bin as f32 * (16000.0 / 2.0 / 1024.0 / 64.0);
294
295 let frequency_band = match frequency_hz as i32 {
300 250..=519 => FrequencyBand::_250_520,
301 520..=1449 => FrequencyBand::_520_1450,
302 1450..=3499 => FrequencyBand::_1450_3500,
303 3500..=5500 => FrequencyBand::_3500_5500,
304 _ => { continue; }
305 };
306
307 self.signature.frequency_band_to_sound_peaks
312 .entry(frequency_band)
313 .or_default();
314
315 self.signature.frequency_band_to_sound_peaks.get_mut(&frequency_band).unwrap().push(
316 FrequencyPeak {
317 fft_pass_number,
318 peak_magnitude: peak_magnitude as u16,
319 corrected_peak_frequency_bin
320 }
321 );
322 }
323 }
324 }
325 }
326 }
327}