irapt/
lib.rs

1#![no_std]
2#![warn(missing_docs)]
3
4//! `irapt` is an implementation of the IRAPT pitch estimation algorithm.
5//!
6//! IRAPT is an "instantaneous" version of the Robust Algorithm for Pitch Tracking (RAPT).
7//!
8//! # Usage
9//!
10//! Currently, the [parameters](Parameters) to [`Irapt`] are technical and may be difficult to tune, but the
11//! [`Parameters::default`] provides a sensible set of defaults for ordinary human speech which is computationally
12//! efficient, given the input can be resampled to the default [`Parameters::sample_rate`].
13//!
14//! The input must be given as a [`VecDeque`] to [`Irapt::process`] which is to facilitate the sliding analysis window.
15//! The number of samples removed from the buffer by `process` can be calculated on each invocation in order to track
16//! the global sample index at which each pitch is estimated:
17//!
18//! ```
19//! use irapt::{Irapt, Parameters};
20//! use std::collections::VecDeque;
21//! use std::f64::consts::PI;
22//!
23//! let parameters = Parameters::default();
24//! let mut irapt = Irapt::new(parameters.clone()).expect("the default parameters should be valid");
25//!
26//! let mut sample_buffer = (0..parameters.sample_rate as usize)
27//!     .map(|sample_index| f64::sin(sample_index as f64 / parameters.sample_rate * 2.0 * PI * 100.0))
28//!     .collect::<VecDeque<_>>();
29//!
30//! let mut sample_index = 0;
31//! while let (initial_sample_buffer_len, Some(output)) = (
32//!     sample_buffer.len(),
33//!     irapt.process(&mut sample_buffer),
34//! ) {
35//!     let estimated_pitch = output.pitch_estimates().final_estimate();
36//!     let estimated_pitch_index = (sample_index as isize + estimated_pitch.offset) as usize;
37//!     let estimated_pitch_time = estimated_pitch_index as f64 / parameters.sample_rate;
38//!     println!("estimated pitch at {:0.3}: {}Hz with energy {}",
39//!              estimated_pitch_time, estimated_pitch.frequency, estimated_pitch.energy);
40//!     sample_index += initial_sample_buffer_len - sample_buffer.len();
41//! }
42//! ```
43
44extern crate alloc;
45
46#[macro_use]
47mod util;
48
49#[doc(hidden)]
50pub mod candidates;
51pub mod error;
52#[doc(hidden)]
53pub mod fir_filter;
54#[doc(hidden)]
55pub mod harmonics;
56#[doc(hidden)]
57pub mod interpolate;
58#[doc(hidden)]
59pub mod polyphase_filter;
60
61use self::candidates::{CandidateFrequencyIter, CandidateGenerator, CandidateSelectionStepIter, CandidateSelector};
62use self::error::InvalidParameterError;
63use self::harmonics::HarmonicParametersEstimator;
64
65use alloc::collections::VecDeque;
66use core::iter::Enumerate;
67use core::ops::RangeInclusive;
68
69/// Implementation of the IRAPT pitch estimation algorithm.
70///
71/// IRAPT is an "instantaneous" version of the Robust Algorithm for Pitch Tracking. Though pitch estimates are provided
72/// every [`harmonics_estimation_interval`] (`0.005` seconds by default), a larger sliding window of
73/// [`candidate_selection_window_duration`] (`0.3` seconds by default) is used to improve accuracy at the cost of a
74/// small delay.
75///
76/// [`harmonics_estimation_interval`]: Parameters::harmonics_estimation_interval
77/// [`candidate_selection_window_duration`]: Parameters::candidate_selection_window_duration
78pub struct Irapt {
79    parameters:          Parameters,
80    estimator:           HarmonicParametersEstimator,
81    candidate_generator: CandidateGenerator,
82    candidate_selector:  CandidateSelector,
83}
84
85/// Various tunable parameters for [`Irapt`].
86///
87/// The [`Default`] implementation provides suggested defaults for all parameters, given that the input is resampled
88/// near to the suggested default sample rate.
89#[derive(Clone, Debug)]
90pub struct Parameters {
91    /// The constant sample rate, in Hz, the input was sampled with.
92    ///
93    /// The suggested default is `6000.0`.
94    pub sample_rate: f64,
95
96    /// Interval, in seconds, at which harmonics of the input are estimated.
97    ///
98    /// The suggested default is `0.005`.
99    pub harmonics_estimation_interval: f64,
100
101    /// Duration, in seconds, of the sliding window upon which harmonics of the input are estimated.
102    ///
103    /// The suggested default is `0.05`.
104    pub harmonics_estimation_window_duration: f64,
105
106    /// Duration, in seconds, of the sliding window upon which pitches are estimated.
107    ///
108    /// A shorter candidate selection window will be more responsive to fluctuations in input, but less accurate. The
109    /// suggested default is `0.3`.
110    pub candidate_selection_window_duration: f64,
111
112    /// Frequency range, in Hz, within which to detect pitch.
113    ///
114    /// Wider frequency ranges require a larger [`candidate_generator_fft_len`] or [`sample_rate`] to maintain adequate
115    /// frequency resolution of pitch detection. The suggested default for any human speech is `50.0..=450.0`.
116    ///
117    /// [`candidate_generator_fft_len`]: Self::candidate_generator_fft_len
118    /// [`sample_rate`]: Self::sample_rate
119    pub pitch_range: RangeInclusive<f64>,
120
121    /// Size of the FFT used for candidate generation.
122    ///
123    /// The candidate generation FFT size affects the frequency resolution of pitch detection. Larger FFT sizes result
124    /// in a higher resolution. The suggested default is `16384`.
125    ///
126    /// Certain FFT sizes, e.g. powers of two, are more computationally efficient than others. See the
127    /// [`rustfft`] crate for the supported optimizations based on FFT size.
128    ///
129    /// [`rustfft`]: https://docs.rs/rustfft
130    pub candidate_generator_fft_len: usize,
131
132    /// Half-length of the window of the interpolator used on generated pitch candidates.
133    ///
134    /// A window too short for the given [`candidate_generator_fft_len`] will suffer from artifacts resulting from poor
135    /// interpolation. The suggested default is `12`.
136    ///
137    /// The window half-length must be less than or equal to both:
138    ///
139    ///  * `(sample_rate / pitch_range.end()).floor()`, and
140    ///  * `candidate_generator_fft_len - (sample_rate / pitch_range.start()).ceil()`
141    ///
142    /// [`candidate_generator_fft_len`]: Self::candidate_generator_fft_len
143    pub half_interpolation_window_len: u32,
144
145    /// Number of pitch candidates to interpolate in between each generated pitch candidate.
146    ///
147    /// The suggested default in `2`.
148    pub interpolation_factor: u8,
149
150    /// Taper factor applied to candidates within a time step.
151    ///
152    /// Candidates within a single time step will be weighted from `1.0 - candidate_taper..=1.0` linearly proportional
153    /// to their frequencies. The suggested default is `0.25`.
154    pub candidate_taper: f64,
155
156    /// Decay factor applied to candidates at each time step within the given [`candidate_selection_window_duration`].
157    ///
158    /// The suggested default is `0.95`.
159    ///
160    /// [`candidate_selection_window_duration`]: Self::candidate_selection_window_duration
161    pub candidate_step_decay: f64,
162
163    /// Assumed maximum distance a valid pitch will change within the [`harmonics_estimation_interval`].
164    ///
165    /// The unit of distance is in candidates, which is an arbitrary logarithmic frequency scale.
166    ///
167    /// [`harmonics_estimation_interval`]: Self::harmonics_estimation_interval
168    pub candidate_max_jump: usize,
169}
170
171/// The output of [`Irapt::process`].
172///
173/// This `struct` holds the output, including estimated pitches, of the IRAPT algorithm after processing a single time step, and is created
174/// by the [`process`](Irapt::process) method on [`Irapt`]. See its documentation for more.
175pub struct Output<'a> {
176    estimated_pitches: EstimatedPitchIter<'a>,
177    more_output: bool,
178}
179
180/// An estimate of the pitch in the input at a specific sample offset.
181#[derive(Clone, Copy, Debug, PartialEq)]
182pub struct EstimatedPitch {
183    /// Frequency, in Hz, of the estimated pitch.
184    pub frequency: f64,
185    /// Arbitrary measure, from `0.0..`, of the energy associated with the estimated pitch.
186    pub energy:    f64,
187    /// The offset in samples within the input buffer (_before_ removal of consumed samples) at which the pitch was estimated. This may be
188    /// negative, since estimates can be returned for samples which have already been removed from the input buffer.
189    pub offset:    isize,
190}
191
192/// An iterator over pitches estimated over time in the input, in reverse chronological order.
193///
194/// This `struct` is created by the [`pitch_estimates`](Output::pitch_estimates) method on [`Output`]. See its documentation for more.
195#[derive(Clone)]
196pub struct EstimatedPitchIter<'a> {
197    selected_candidates: Enumerate<CandidateSelectionStepIter<'a>>,
198    candidate_frequencies: CandidateFrequencyIter,
199    last_step_sample_index: usize,
200    step_len: usize,
201}
202
203impl Irapt {
204    /// Constructs a new `Irapt`.
205    ///
206    /// # Errors
207    ///
208    /// If any of the supplied parameters are invalid or conflict with each other, then an error is returned.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// use irapt::{Irapt, Parameters};
214    ///
215    /// let mut irapt = Irapt::new(Parameters::default()).expect("the default parameters should be valid");
216    /// ```
217    pub fn new(parameters: Parameters) -> Result<Self, InvalidParameterError> {
218        let estimator = HarmonicParametersEstimator::new(parameters.harmonics_window_len());
219
220        let candidate_generator = CandidateGenerator::new(
221            parameters.candidate_generator_fft_len,
222            parameters.half_interpolation_window_len,
223            parameters.interpolation_factor,
224            parameters.sample_rate,
225            parameters.pitch_range.clone(),
226        )?;
227        let candidate_selector = CandidateSelector::new(
228            parameters.candidate_selection_window_len(),
229            parameters.candidate_taper,
230            candidate_generator.normalized_candidate_frequencies(parameters.sample_rate, parameters.pitch_range.clone()),
231        );
232        Ok(Self {
233            parameters,
234            estimator,
235            candidate_generator,
236            candidate_selector,
237        })
238    }
239
240    /// Returns the `Parameters` specified during construction.
241    pub fn parameters(&self) -> &Parameters {
242        &self.parameters
243    }
244
245    /// Process input from a queue of samples in a [`VecDeque`].
246    ///
247    /// As many samples as necessary to calculate the next pitch estimate are read from the [`VecDeque`], otherwise
248    /// [`None`] is returned if more are required. To process as many samples as possible from the [`VecDeque`],
249    /// `process` should be called repeatedly until [`None`] is returned.
250    ///
251    /// Input samples consumed are eventually removed from the front of the [`VecDeque`] by `process`, but a fixed-size
252    /// window of past samples are left remaining in the [`VecDeque`] for access by later calls to `process` and are
253    /// only removed when they are no longer needed.
254    ///
255    /// # Examples
256    ///
257    /// ```
258    /// use irapt::{Irapt, Parameters};
259    /// use std::collections::VecDeque;
260    /// use std::f64::consts::PI;
261    ///
262    /// let parameters = Parameters::default();
263    /// let mut irapt = Irapt::new(parameters.clone()).expect("the default parameters should be valid");
264    ///
265    /// // Use a 100Hz sine wave as an example input signal
266    /// let mut samples = (0..).map(|sample_index| f64::sin(sample_index as f64 / parameters.sample_rate * 2.0 * PI * 100.0));
267    ///
268    /// // Collect half of a second of input
269    /// let mut sample_buffer = VecDeque::new();
270    /// sample_buffer.extend(samples.by_ref().take(parameters.sample_rate as usize / 2));
271    ///
272    /// // Process as many samples as possible
273    /// while let Some(output) = irapt.process(&mut sample_buffer) {
274    ///     let estimated_pitch = output.pitch_estimates().final_estimate();
275    ///     println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
276    /// }
277    ///
278    /// // Simulate that half of a second more samples have become availoble and process them
279    /// sample_buffer.extend(samples.by_ref().take(parameters.sample_rate as usize / 2));
280    ///
281    /// while let Some(output) = irapt.process(&mut sample_buffer) {
282    ///     let estimated_pitch = output.pitch_estimates().final_estimate();
283    ///     println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
284    /// }
285    /// ```
286    pub fn process<S: Into<f64> + Copy>(&mut self, samples: &mut VecDeque<S>) -> Option<Output<'_>> {
287        let initial_samples_len = samples.len();
288        let step_len = self.parameters.step_len();
289
290        let mut processed_step_sample_index = None;
291        while let (step_sample_index, Some(harmonics)) = (
292            initial_samples_len - samples.len() + self.estimator.next_step_samples_len(),
293            self.estimator.process_step(samples, step_len, self.parameters.sample_rate),
294        ) {
295            let mut energy = 0.0;
296            let harmonics = harmonics.inspect(|harmonic| {
297                energy += harmonic.amplitude * harmonic.amplitude;
298            });
299
300            self.candidate_generator.process_step_harmonics(harmonics, self.parameters.sample_rate);
301            let candidates = self.candidate_generator.generate_step_candidates();
302
303            self.candidate_selector.process_step(
304                candidates,
305                energy,
306                self.parameters.candidate_selection_window_len(),
307                self.parameters.candidate_max_jump,
308                self.parameters.candidate_step_decay,
309            );
310
311            if self.candidate_selector.initialized(self.parameters.candidate_selection_window_len()) {
312                processed_step_sample_index = Some(step_sample_index);
313                break;
314            }
315        }
316
317        let last_step_sample_index = processed_step_sample_index?;
318        let more_output = samples.len() >= self.estimator.next_step_samples_len();
319
320        let selected_candidates = self.candidate_selector.best_candidate_steps(
321            self.parameters.candidate_selection_window_len(),
322            self.parameters.candidate_max_jump,
323        );
324        Some(Output {
325            estimated_pitches: EstimatedPitchIter {
326                selected_candidates: selected_candidates?.enumerate(),
327                candidate_frequencies: self.candidate_generator.candidate_frequencies(self.parameters.sample_rate),
328                last_step_sample_index,
329                step_len: self.parameters.step_len(),
330            },
331            more_output,
332        })
333    }
334
335    /// Resets all internal state associated with the sliding analysis window.
336    ///
337    /// The internal state after a reset is equivalent to that of a newly constructed [`Irapt`]. Resetting can be useful
338    /// to avoid causing artifacts in the analysis when skipping a number of samples in the input without processing
339    /// them.
340    ///
341    /// # Examples
342    ///
343    /// ```
344    /// use irapt::{Irapt, Parameters};
345    /// use std::collections::VecDeque;
346    /// use std::f64::consts::PI;
347    ///
348    /// let parameters = Parameters::default();
349    /// let mut irapt = Irapt::new(parameters.clone()).expect("the default parameters should be valid");
350    ///
351    /// // Use a 100Hz sine wave as an example input signal
352    /// let mut samples = (0..).map(|sample_index| f64::sin(sample_index as f64 / parameters.sample_rate * 2.0 * PI * 100.0));
353    ///
354    /// // Collect half of a second of input
355    /// let mut sample_buffer = VecDeque::new();
356    /// sample_buffer.extend(samples.by_ref().take(parameters.sample_rate as usize / 2));
357    ///
358    /// while let Some(output) = irapt.process(&mut sample_buffer) {
359    ///     let estimated_pitch = output.pitch_estimates().final_estimate();
360    ///     println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
361    /// }
362    ///
363    /// // Simulate that many more samples have become available
364    /// let more_samples = samples.by_ref().take(parameters.sample_rate as usize * 10);
365    ///
366    /// // Reset irapt, clear the input buffer, skip all but half a second of input samples, and process the rest
367    /// irapt.reset();
368    /// sample_buffer.clear();
369    /// sample_buffer.extend(more_samples.skip(parameters.sample_rate as usize * 19 / 2));
370    ///
371    /// while let Some(output) = irapt.process(&mut sample_buffer) {
372    ///     let estimated_pitch = output.pitch_estimates().final_estimate();
373    ///     println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
374    /// }
375    /// ```
376    pub fn reset(&mut self) {
377        self.estimator.reset();
378        self.candidate_selector.reset();
379    }
380}
381
382//
383// Parameters impls
384//
385
386impl Parameters {
387    /// Suggested default parameters.
388    pub const DEFAULT: Self = Self {
389        sample_rate: 6000.0,
390
391        harmonics_estimation_interval:        0.005,
392        harmonics_estimation_window_duration: 0.05,
393        candidate_selection_window_duration:  0.3,
394
395        pitch_range: 50.0..=450.0,
396
397        candidate_generator_fft_len:   16384,
398        half_interpolation_window_len: 12,
399        interpolation_factor:          2,
400
401        candidate_taper:      0.25,
402        candidate_step_decay: 0.95,
403        candidate_max_jump:   23,
404    };
405
406    fn candidate_selection_window_len(&self) -> usize {
407        (self.candidate_selection_window_duration / self.harmonics_estimation_interval + 0.5) as usize
408    }
409
410    fn harmonics_window_len(&self) -> u32 {
411        (self.harmonics_estimation_window_duration * self.sample_rate / 2.0).round() as u32 * 2 + 1
412    }
413
414    fn step_len(&self) -> usize {
415        (self.harmonics_estimation_interval * self.sample_rate).round() as usize
416    }
417}
418
419impl Default for Parameters {
420    fn default() -> Self {
421        Self::DEFAULT
422    }
423}
424
425//
426// Output impls
427//
428
429impl Output<'_> {
430    /// Returns whether further output can be produced given the input samples.
431    ///
432    /// More output can be produced by calling [`Irapt::process`].
433    pub fn more_output(&self) -> bool {
434        self.more_output
435    }
436
437    /// Returns all pitch estimates for the given input, including both those tentative and final, in reverse chronological order.
438    ///
439    /// All but the last of the yielded pitches are tentative estimates calculated up to [`candidate_selection_window_duration`] seconds in
440    /// the past. The estimates are returned in reverse chronological order. The exact sample offsets for the estimates are returned in
441    /// [`EstimatedPitch::offset`].
442    ///
443    /// The last estimate yielded is final for the given time offset. It can also be retrieved by calling [`final_estimate`] on the returned
444    /// iterator.
445    ///
446    /// [`candidate_selection_window_duration`]: Parameters::candidate_selection_window_duration
447    /// [`final_estimate`]: EstimatedPitchIter::final_estimate
448    pub fn pitch_estimates(&self) -> EstimatedPitchIter<'_> {
449        self.estimated_pitches.clone()
450    }
451}
452
453//
454// EstimatedPitchIter impls
455//
456
457impl EstimatedPitchIter<'_> {
458    /// Returns a final pitch estimate for the given input, at a time delay.
459    ///
460    /// The returned pitch is the final estimate calculated at approximately [`candidate_selection_window_duration`] seconds in the past.
461    /// The exact sample offset for the estimate is returned in [`EstimatedPitch::offset`].
462    ///
463    /// [`candidate_selection_window_duration`]: Parameters::candidate_selection_window_duration
464    pub fn final_estimate(self) -> EstimatedPitch {
465        self.last().unwrap_or_else(|| unreachable!())
466    }
467}
468
469impl Iterator for EstimatedPitchIter<'_> {
470    type Item = EstimatedPitch;
471
472    fn next(&mut self) -> Option<Self::Item> {
473        let (step_index, candidate_selection) = self.selected_candidates.next()?;
474        let frequency = (self.candidate_frequencies.clone())
475            .nth(candidate_selection.selected_candidate_index)
476            .unwrap_or_else(|| panic!("candidate index out of bounds"));
477        Some(EstimatedPitch {
478            frequency,
479            energy: candidate_selection.energy,
480            offset: self.last_step_sample_index.wrapping_sub((1 + step_index) * self.step_len) as isize
481        })
482    }
483
484    fn size_hint(&self) -> (usize, Option<usize>) {
485        self.selected_candidates.size_hint()
486    }
487}
488
489impl ExactSizeIterator for EstimatedPitchIter<'_> {}