dywapitchtrack/
lib.rs

1/* This code is a Rust port of dywapitchtrack by Antoine Schmitt.
2   It implements a wavelet algorithm, described in a paper by Eric Larson and Ross Maddox:
3   “Real-Time Time-Domain Pitch Tracking Using Wavelets” of UIUC Physics.
4
5   Note that the original implementation by Schmitt uses double instead of float data type.
6 -------
7 Dynamic Wavelet Algorithm Pitch Tracking library
8 Released under the MIT open source licence
9
10 Copyright (c) 2010 Antoine Schmitt
11
12 Permission is hereby granted, free of charge, to any person obtaining a copy
13 of this software and associated documentation files (the "Software"), to deal
14 in the Software without restriction, including without limitation the rights
15 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
16 copies of the Software, and to permit persons to whom the Software is
17 furnished to do so, subject to the following conditions:
18
19 The above copyright notice and this permission notice shall be included in
20 all copies or substantial portions of the Software.
21
22 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
23 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
24 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
25 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
26 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
27 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
28 THE SOFTWARE.
29*/
30
31#[derive(Clone)]
32pub struct DywaPitchTracker {
33    prev_pitch: f32,
34    pitch_confidence: i32,
35
36    pub max_flwt_levels: i32,
37    pub max_frequency: f32,
38    pub difference_levels_n: i32,
39    pub maxima_threshold_ratio: f32,
40    pub sample_rate_hz: i32,
41}
42
43impl DywaPitchTracker {
44    pub fn new() -> Self {
45        Self {
46            prev_pitch: -1.0,
47            pitch_confidence: -1,
48            max_flwt_levels: 6,
49            max_frequency: 3000.0,
50            difference_levels_n: 3,
51            maxima_threshold_ratio: 0.75,
52            sample_rate_hz: 44100,
53        }
54    }
55
56    pub fn compute_pitch(
57        &mut self,
58        samples: &Vec<f32>,
59        start_sample: usize,
60        sample_count: usize,
61    ) -> f32 {
62        let mut raw_pitch = self.compute_wavelet_pitch(samples, start_sample, sample_count);
63        if self.sample_rate_hz != 44100 {
64            raw_pitch *= self.sample_rate_hz as f32 / 44100.0
65        }
66
67        return self.dynamic_post_processing(raw_pitch);
68    }
69
70    pub fn clear_pitch_history(&mut self) {
71        self.prev_pitch = -1.0;
72        self.pitch_confidence = -1;
73    }
74
75    pub fn needed_sample_count(min_freq: i32) -> i32 {
76        let mut nb_sam: i32 = 3 * 44100 / min_freq;
77        nb_sam = DywaPitchTracker::is_power_of_2(nb_sam);
78        return nb_sam;
79    }
80
81    fn float_abs(a: f32) -> f32 {
82        return if a < 0.0 { -a } else { a };
83    }
84
85    fn is_power_of_2(value: i32) -> i32 {
86        if value == 0 {
87            return 1;
88        }
89        if value == 2 {
90            return 1;
91        }
92        if (value & 0x1) != 0 {
93            return 0;
94        }
95        return DywaPitchTracker::is_power_of_2(value >> 1);
96    }
97
98    fn bit_count(value: i32) -> i32 {
99        if value == 0 {
100            return 0;
101        }
102        if value == 1 {
103            return 1;
104        }
105        if value == 2 {
106            return 2;
107        }
108        return DywaPitchTracker::bit_count(value >> 1) + 1;
109    }
110
111    fn ceil_power_of_2(value: i32) -> i32 {
112        if DywaPitchTracker::is_power_of_2(value) != 0 {
113            return value;
114        }
115
116        if value == 1 {
117            return 2;
118        }
119
120        let i = DywaPitchTracker::bit_count(value);
121        let mut res = 1;
122        let mut j = 0;
123
124        while j < i {
125            res <<= 1;
126            j += 1;
127        }
128        return res;
129    }
130
131    fn floor_power_of_2(value: i32) -> i32 {
132        if DywaPitchTracker::is_power_of_2(value) != 0 {
133            return value;
134        }
135        return DywaPitchTracker::ceil_power_of_2(value) / 2;
136    }
137
138    fn int_max(a: i32, b: i32) -> i32 {
139        let max = if a > b { a } else { b };
140        return max;
141    }
142
143    fn int_min(a: i32, b: i32) -> i32 {
144        let max = if a < b { a } else { b };
145        return max;
146    }
147
148    fn int_abs(x: i32) -> i32 {
149        if x > 0 {
150            return x;
151        }
152        return -x;
153    }
154
155    fn power_of_2(n: i32) -> i32 {
156        let mut res = 1;
157        let mut j = 0;
158        while j < n {
159            res <<= 1;
160            j += 1
161        }
162        return res;
163    }
164
165    fn compute_wavelet_pitch(
166        &mut self,
167        samples: &Vec<f32>,
168        start_sample: usize,
169        sample_count: usize,
170    ) -> f32 {
171        let mut pitch_f: f32 = 0.0;
172
173        let mut si: f32;
174        let mut si1: f32;
175
176        let sample_count = DywaPitchTracker::floor_power_of_2(sample_count as i32) as usize;
177
178        let mut sam = samples[start_sample..start_sample+sample_count].to_vec();
179        let mut cur_sam_nb = sample_count;
180
181        let mut distances: Vec<i32> = vec![0; sample_count];
182        let mut mins: Vec<i32> = vec![0; sample_count];
183        let mut maxs: Vec<i32> = vec![0; sample_count];
184        let mut nb_mins: i32;
185        let mut nb_maxs: i32;
186
187        let amplitude_treshold: f32;
188        let mut the_dc: f32 = 0.0;
189
190        {
191            let mut max_value = f32::MIN;
192            let mut min_value = f32::MAX;
193
194            let mut i = 0;
195            while i < sample_count {
196                si = sam[i];
197                the_dc = the_dc + si;
198
199                if si > max_value {
200                    max_value = si;
201                }
202                if si < min_value {
203                    min_value = si;
204                }
205                i += 1;
206            }
207
208            the_dc /= sample_count as f32;
209
210            max_value -= the_dc;
211            min_value -= the_dc;
212
213            let amplitude_max = if max_value > -min_value {
214                max_value
215            } else {
216                -min_value
217            };
218
219            amplitude_treshold = amplitude_max * self.maxima_threshold_ratio;
220        }
221
222        let mut cur_level = 0;
223        let mut cur_mode_distance = -1.0;
224        let mut delta: i32;
225
226        loop {
227            delta = (44100.0
228                / (DywaPitchTracker::power_of_2(cur_level) as f32 * self.max_frequency))
229                as i32;
230
231            if cur_sam_nb < 2 {
232                return pitch_f;
233            }
234
235            let mut dv: f32;
236            let mut previous_dv: f32 = -1000.0;
237
238            nb_maxs = 0;
239            nb_mins = 0;
240
241            let mut last_min_index = -1000000;
242            let mut last_max_index = -1000000;
243
244            let mut find_max = 0;
245            let mut find_min = 0;
246
247            let mut i = 1;
248            while i < cur_sam_nb {
249                si = sam[i as usize] - the_dc;
250                si1 = sam[(i - 1) as usize] - the_dc;
251
252                if si1 <= 0.0 && si > 0.0 {
253                    find_max = 1;
254                    find_min = 0;
255                }
256                if si1 >= 0.0 && si < 0.0 {
257                    find_min = 1;
258                    find_max = 0;
259                }
260
261                dv = si - si1;
262
263                if previous_dv > -1000.0 {
264                    if find_min != 0 && previous_dv < 0.0 && dv >= 0.0 {
265                        if DywaPitchTracker::float_abs(si1) >= amplitude_treshold {
266                            if i as i32 - 1 > last_min_index + delta {
267                                mins[nb_mins as usize] = i as i32 - 1;
268                                nb_mins += 1;
269                                last_min_index = i as i32 - 1;
270                                find_min = 0;
271                            } else {
272                            }
273                        } else {
274                        }
275                    }
276
277                    if find_max != 0 && previous_dv > 0.0 && dv <= 0.0 {
278                        if DywaPitchTracker::float_abs(si1) >= amplitude_treshold {
279                            if i as i32 - 1 > last_max_index + delta {
280                                maxs[nb_maxs as usize] = i as i32 - 1;
281                                nb_maxs += 1;
282                                last_max_index = i as i32 - 1;
283                                find_max = 0;
284                            } else {
285                            }
286                        } else {
287                        }
288                    }
289                }
290                previous_dv = dv;
291
292                i += 1;
293            }
294
295            if nb_mins == 0 && nb_maxs == 0 {
296                return pitch_f;
297            }
298
299            let mut d: i32;
300
301            let mut index = 0;
302
303            while index < distances.len() {
304                distances[index] = 0;
305                index += 1
306            }
307
308            {
309                let mut i = 0;
310                while i < nb_mins {
311                    let mut j = 1;
312                    while j < self.difference_levels_n {
313                        if i + j < nb_mins {
314                            d = DywaPitchTracker::int_abs(
315                                mins[i as usize] - mins[(i + j) as usize],
316                            );
317                            distances[d as usize] = distances[d as usize] + 1;
318                        }
319                        j += 1;
320                    }
321                    i += 1;
322                }
323            }
324
325            {
326                let mut i = 0;
327                while i < nb_maxs {
328                    let mut j = 1;
329                    while j < self.difference_levels_n {
330                        if i + j < nb_maxs {
331                            d = DywaPitchTracker::int_abs(
332                                maxs[i as usize] - maxs[(i + j) as usize],
333                            );
334                            distances[d as usize] = distances[d as usize] + 1;
335                        }
336                        j += 1;
337                    }
338                    i += 1;
339                }
340            }
341
342            let mut best_distance: i32 = -1;
343            let mut best_value = -1;
344
345            {
346                let mut i: i32 = 0;
347                while i < cur_sam_nb as i32 {
348                    let mut summed = 0;
349                    let mut j = -delta;
350                    while j <= delta {
351                        if i + j > 0 && i + j < cur_sam_nb as i32 {
352                            summed += distances[(i + j) as usize];
353                        }
354                        j += 1;
355                    }
356                    if summed == best_value {
357                        if i == 2 * best_value {
358                            best_distance = i;
359                        }
360                    } else if summed > best_value {
361                        best_value = summed;
362                        best_distance = i;
363                    }
364                    i += 1;
365                }
366            }
367
368            let mut dist_avg: f32 = 0.0;
369            let mut nb_dists: f32 = 0.0;
370
371            {
372                let mut j = -delta;
373                while j <= delta {
374                    if best_distance + j >= 0 && best_distance + j < sample_count as i32 {
375                        let nb_dist = distances[(best_distance + j) as usize];
376                        if nb_dist > 0 {
377                            nb_dists += nb_dist as f32;
378                            dist_avg += ((best_distance + j) * nb_dist) as f32;
379                        }
380                    }
381                    j += 1;
382                }
383            }
384
385            dist_avg /= nb_dists;
386
387            if cur_mode_distance > -1.0 {
388                let similarity = DywaPitchTracker::float_abs(dist_avg * 2.0 - cur_mode_distance);
389                if similarity <= 2.0 * delta as f32 {
390                    pitch_f = 44100.0
391                        / (DywaPitchTracker::power_of_2(cur_level - 1) as f32 * cur_mode_distance);
392                    return pitch_f;
393                }
394            }
395
396            cur_mode_distance = dist_avg;
397
398            cur_level += 1;
399            if cur_level >= self.max_flwt_levels {
400                return pitch_f;
401            }
402
403            if cur_sam_nb < 2 {
404                return pitch_f;
405            }
406
407            {
408                let mut i = 0;
409                while i < cur_sam_nb / 2 {
410                    sam[i as usize] = (sam[(2 * i) as usize] + sam[(2 * i + 1) as usize]) / 2.0;
411                    i += 1;
412                }
413            }
414
415            cur_sam_nb /= 2;
416        }
417    }
418
419    /*
420        - a pitch cannot change much all of a sudden (20%) (impossible humanly,
421        so if such a situation happens, consider that it is a mistake and drop it.
422        - a pitch cannot float or be divided by 2 all of a sudden : it is an
423        algorithm side-effect : divide it or float it by 2.
424        - a lonely voiced pitch cannot happen, nor can a sudden drop in the middle
425        of a voiced segment. Smooth the plot.
426    */
427
428    fn dynamic_post_processing(&mut self, mut pitch: f32) -> f32 {
429        if pitch == 0.0 {
430            pitch = -1.0
431        }
432
433        let mut estimated_pitch: f32 = -1.0;
434        let accepted_error: f32 = 0.2;
435        let max_confidence = 5;
436
437        if pitch != -1.0 {
438            if self.prev_pitch == -1.0 {
439                estimated_pitch = pitch;
440                self.prev_pitch = pitch;
441                self.pitch_confidence = 1;
442            } else if DywaPitchTracker::float_abs(self.prev_pitch - pitch) / pitch < accepted_error
443            {
444                self.prev_pitch = pitch;
445                estimated_pitch = pitch;
446                self.pitch_confidence =
447                    DywaPitchTracker::int_min(max_confidence, self.pitch_confidence + 1);
448            } else if (self.pitch_confidence >= max_confidence - 2)
449                && DywaPitchTracker::float_abs(self.prev_pitch - 2.0 * pitch) / (2.0 * pitch)
450                    < accepted_error
451            {
452                estimated_pitch = 2.0 * pitch;
453                self.prev_pitch = estimated_pitch;
454            } else if (self.pitch_confidence >= max_confidence - 2)
455                && DywaPitchTracker::float_abs(self.prev_pitch - 0.5 * pitch) / (0.5 * pitch)
456                    < accepted_error
457            {
458                estimated_pitch = 0.5 * pitch;
459                self.prev_pitch = estimated_pitch;
460            } else {
461                if self.pitch_confidence >= 1 {
462                    estimated_pitch = self.prev_pitch;
463                    self.pitch_confidence = DywaPitchTracker::int_max(0, self.pitch_confidence - 1);
464                } else {
465                    estimated_pitch = pitch;
466                    self.prev_pitch = pitch;
467                    self.pitch_confidence = 1;
468                }
469            }
470        } else {
471            if self.prev_pitch != -1.0 {
472                if self.pitch_confidence >= 1 {
473                    estimated_pitch = self.prev_pitch;
474                    self.pitch_confidence = DywaPitchTracker::int_max(0, self.pitch_confidence - 1);
475                } else {
476                    self.prev_pitch = -1.0;
477                    estimated_pitch = -1.0;
478                    self.pitch_confidence = 0;
479                }
480            }
481        }
482
483        if self.pitch_confidence >= 1 {
484            pitch = estimated_pitch;
485        } else {
486            pitch = -1.0;
487        }
488
489        if pitch == -1.0 {
490            pitch = 0.0;
491        }
492
493        return pitch;
494    }
495}