irapt/lib.rs
1#![no_std]
2#![warn(missing_docs)]
3
4//! `irapt` is an implementation of the IRAPT pitch estimation algorithm.
5//!
6//! IRAPT is an "instantaneous" version of the Robust Algorithm for Pitch Tracking (RAPT).
7//!
8//! # Usage
9//!
10//! Currently, the [parameters](Parameters) to [`Irapt`] are technical and may be difficult to tune, but the
11//! [`Parameters::default`] provides a sensible set of defaults for ordinary human speech which is computationally
12//! efficient, given the input can be resampled to the default [`Parameters::sample_rate`].
13//!
14//! The input must be given as a [`VecDeque`] to [`Irapt::process`] which is to facilitate the sliding analysis window.
15//! The number of samples removed from the buffer by `process` can be calculated on each invocation in order to track
16//! the global sample index at which each pitch is estimated:
17//!
18//! ```
19//! use irapt::{Irapt, Parameters};
20//! use std::collections::VecDeque;
21//! use std::f64::consts::PI;
22//!
23//! let parameters = Parameters::default();
24//! let mut irapt = Irapt::new(parameters.clone()).expect("the default parameters should be valid");
25//!
26//! let mut sample_buffer = (0..parameters.sample_rate as usize)
27//! .map(|sample_index| f64::sin(sample_index as f64 / parameters.sample_rate * 2.0 * PI * 100.0))
28//! .collect::<VecDeque<_>>();
29//!
30//! let mut sample_index = 0;
31//! while let (initial_sample_buffer_len, Some(output)) = (
32//! sample_buffer.len(),
33//! irapt.process(&mut sample_buffer),
34//! ) {
35//! let estimated_pitch = output.pitch_estimates().final_estimate();
36//! let estimated_pitch_index = (sample_index as isize + estimated_pitch.offset) as usize;
37//! let estimated_pitch_time = estimated_pitch_index as f64 / parameters.sample_rate;
38//! println!("estimated pitch at {:0.3}: {}Hz with energy {}",
39//! estimated_pitch_time, estimated_pitch.frequency, estimated_pitch.energy);
40//! sample_index += initial_sample_buffer_len - sample_buffer.len();
41//! }
42//! ```
43
44extern crate alloc;
45
46#[macro_use]
47mod util;
48
49#[doc(hidden)]
50pub mod candidates;
51pub mod error;
52#[doc(hidden)]
53pub mod fir_filter;
54#[doc(hidden)]
55pub mod harmonics;
56#[doc(hidden)]
57pub mod interpolate;
58#[doc(hidden)]
59pub mod polyphase_filter;
60
61use self::candidates::{CandidateFrequencyIter, CandidateGenerator, CandidateSelectionStepIter, CandidateSelector};
62use self::error::InvalidParameterError;
63use self::harmonics::HarmonicParametersEstimator;
64
65use alloc::collections::VecDeque;
66use core::iter::Enumerate;
67use core::ops::RangeInclusive;
68
69/// Implementation of the IRAPT pitch estimation algorithm.
70///
71/// IRAPT is an "instantaneous" version of the Robust Algorithm for Pitch Tracking. Though pitch estimates are provided
72/// every [`harmonics_estimation_interval`] (`0.005` seconds by default), a larger sliding window of
73/// [`candidate_selection_window_duration`] (`0.3` seconds by default) is used to improve accuracy at the cost of a
74/// small delay.
75///
76/// [`harmonics_estimation_interval`]: Parameters::harmonics_estimation_interval
77/// [`candidate_selection_window_duration`]: Parameters::candidate_selection_window_duration
78pub struct Irapt {
79 parameters: Parameters,
80 estimator: HarmonicParametersEstimator,
81 candidate_generator: CandidateGenerator,
82 candidate_selector: CandidateSelector,
83}
84
85/// Various tunable parameters for [`Irapt`].
86///
87/// The [`Default`] implementation provides suggested defaults for all parameters, given that the input is resampled
88/// near to the suggested default sample rate.
89#[derive(Clone, Debug)]
90pub struct Parameters {
91 /// The constant sample rate, in Hz, the input was sampled with.
92 ///
93 /// The suggested default is `6000.0`.
94 pub sample_rate: f64,
95
96 /// Interval, in seconds, at which harmonics of the input are estimated.
97 ///
98 /// The suggested default is `0.005`.
99 pub harmonics_estimation_interval: f64,
100
101 /// Duration, in seconds, of the sliding window upon which harmonics of the input are estimated.
102 ///
103 /// The suggested default is `0.05`.
104 pub harmonics_estimation_window_duration: f64,
105
106 /// Duration, in seconds, of the sliding window upon which pitches are estimated.
107 ///
108 /// A shorter candidate selection window will be more responsive to fluctuations in input, but less accurate. The
109 /// suggested default is `0.3`.
110 pub candidate_selection_window_duration: f64,
111
112 /// Frequency range, in Hz, within which to detect pitch.
113 ///
114 /// Wider frequency ranges require a larger [`candidate_generator_fft_len`] or [`sample_rate`] to maintain adequate
115 /// frequency resolution of pitch detection. The suggested default for any human speech is `50.0..=450.0`.
116 ///
117 /// [`candidate_generator_fft_len`]: Self::candidate_generator_fft_len
118 /// [`sample_rate`]: Self::sample_rate
119 pub pitch_range: RangeInclusive<f64>,
120
121 /// Size of the FFT used for candidate generation.
122 ///
123 /// The candidate generation FFT size affects the frequency resolution of pitch detection. Larger FFT sizes result
124 /// in a higher resolution. The suggested default is `16384`.
125 ///
126 /// Certain FFT sizes, e.g. powers of two, are more computationally efficient than others. See the
127 /// [`rustfft`] crate for the supported optimizations based on FFT size.
128 ///
129 /// [`rustfft`]: https://docs.rs/rustfft
130 pub candidate_generator_fft_len: usize,
131
132 /// Half-length of the window of the interpolator used on generated pitch candidates.
133 ///
134 /// A window too short for the given [`candidate_generator_fft_len`] will suffer from artifacts resulting from poor
135 /// interpolation. The suggested default is `12`.
136 ///
137 /// The window half-length must be less than or equal to both:
138 ///
139 /// * `(sample_rate / pitch_range.end()).floor()`, and
140 /// * `candidate_generator_fft_len - (sample_rate / pitch_range.start()).ceil()`
141 ///
142 /// [`candidate_generator_fft_len`]: Self::candidate_generator_fft_len
143 pub half_interpolation_window_len: u32,
144
145 /// Number of pitch candidates to interpolate in between each generated pitch candidate.
146 ///
147 /// The suggested default in `2`.
148 pub interpolation_factor: u8,
149
150 /// Taper factor applied to candidates within a time step.
151 ///
152 /// Candidates within a single time step will be weighted from `1.0 - candidate_taper..=1.0` linearly proportional
153 /// to their frequencies. The suggested default is `0.25`.
154 pub candidate_taper: f64,
155
156 /// Decay factor applied to candidates at each time step within the given [`candidate_selection_window_duration`].
157 ///
158 /// The suggested default is `0.95`.
159 ///
160 /// [`candidate_selection_window_duration`]: Self::candidate_selection_window_duration
161 pub candidate_step_decay: f64,
162
163 /// Assumed maximum distance a valid pitch will change within the [`harmonics_estimation_interval`].
164 ///
165 /// The unit of distance is in candidates, which is an arbitrary logarithmic frequency scale.
166 ///
167 /// [`harmonics_estimation_interval`]: Self::harmonics_estimation_interval
168 pub candidate_max_jump: usize,
169}
170
171/// The output of [`Irapt::process`].
172///
173/// This `struct` holds the output, including estimated pitches, of the IRAPT algorithm after processing a single time step, and is created
174/// by the [`process`](Irapt::process) method on [`Irapt`]. See its documentation for more.
175pub struct Output<'a> {
176 estimated_pitches: EstimatedPitchIter<'a>,
177 more_output: bool,
178}
179
180/// An estimate of the pitch in the input at a specific sample offset.
181#[derive(Clone, Copy, Debug, PartialEq)]
182pub struct EstimatedPitch {
183 /// Frequency, in Hz, of the estimated pitch.
184 pub frequency: f64,
185 /// Arbitrary measure, from `0.0..`, of the energy associated with the estimated pitch.
186 pub energy: f64,
187 /// The offset in samples within the input buffer (_before_ removal of consumed samples) at which the pitch was estimated. This may be
188 /// negative, since estimates can be returned for samples which have already been removed from the input buffer.
189 pub offset: isize,
190}
191
192/// An iterator over pitches estimated over time in the input, in reverse chronological order.
193///
194/// This `struct` is created by the [`pitch_estimates`](Output::pitch_estimates) method on [`Output`]. See its documentation for more.
195#[derive(Clone)]
196pub struct EstimatedPitchIter<'a> {
197 selected_candidates: Enumerate<CandidateSelectionStepIter<'a>>,
198 candidate_frequencies: CandidateFrequencyIter,
199 last_step_sample_index: usize,
200 step_len: usize,
201}
202
203impl Irapt {
204 /// Constructs a new `Irapt`.
205 ///
206 /// # Errors
207 ///
208 /// If any of the supplied parameters are invalid or conflict with each other, then an error is returned.
209 ///
210 /// # Examples
211 ///
212 /// ```
213 /// use irapt::{Irapt, Parameters};
214 ///
215 /// let mut irapt = Irapt::new(Parameters::default()).expect("the default parameters should be valid");
216 /// ```
217 pub fn new(parameters: Parameters) -> Result<Self, InvalidParameterError> {
218 let estimator = HarmonicParametersEstimator::new(parameters.harmonics_window_len());
219
220 let candidate_generator = CandidateGenerator::new(
221 parameters.candidate_generator_fft_len,
222 parameters.half_interpolation_window_len,
223 parameters.interpolation_factor,
224 parameters.sample_rate,
225 parameters.pitch_range.clone(),
226 )?;
227 let candidate_selector = CandidateSelector::new(
228 parameters.candidate_selection_window_len(),
229 parameters.candidate_taper,
230 candidate_generator.normalized_candidate_frequencies(parameters.sample_rate, parameters.pitch_range.clone()),
231 );
232 Ok(Self {
233 parameters,
234 estimator,
235 candidate_generator,
236 candidate_selector,
237 })
238 }
239
240 /// Returns the `Parameters` specified during construction.
241 pub fn parameters(&self) -> &Parameters {
242 &self.parameters
243 }
244
245 /// Process input from a queue of samples in a [`VecDeque`].
246 ///
247 /// As many samples as necessary to calculate the next pitch estimate are read from the [`VecDeque`], otherwise
248 /// [`None`] is returned if more are required. To process as many samples as possible from the [`VecDeque`],
249 /// `process` should be called repeatedly until [`None`] is returned.
250 ///
251 /// Input samples consumed are eventually removed from the front of the [`VecDeque`] by `process`, but a fixed-size
252 /// window of past samples are left remaining in the [`VecDeque`] for access by later calls to `process` and are
253 /// only removed when they are no longer needed.
254 ///
255 /// # Examples
256 ///
257 /// ```
258 /// use irapt::{Irapt, Parameters};
259 /// use std::collections::VecDeque;
260 /// use std::f64::consts::PI;
261 ///
262 /// let parameters = Parameters::default();
263 /// let mut irapt = Irapt::new(parameters.clone()).expect("the default parameters should be valid");
264 ///
265 /// // Use a 100Hz sine wave as an example input signal
266 /// let mut samples = (0..).map(|sample_index| f64::sin(sample_index as f64 / parameters.sample_rate * 2.0 * PI * 100.0));
267 ///
268 /// // Collect half of a second of input
269 /// let mut sample_buffer = VecDeque::new();
270 /// sample_buffer.extend(samples.by_ref().take(parameters.sample_rate as usize / 2));
271 ///
272 /// // Process as many samples as possible
273 /// while let Some(output) = irapt.process(&mut sample_buffer) {
274 /// let estimated_pitch = output.pitch_estimates().final_estimate();
275 /// println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
276 /// }
277 ///
278 /// // Simulate that half of a second more samples have become availoble and process them
279 /// sample_buffer.extend(samples.by_ref().take(parameters.sample_rate as usize / 2));
280 ///
281 /// while let Some(output) = irapt.process(&mut sample_buffer) {
282 /// let estimated_pitch = output.pitch_estimates().final_estimate();
283 /// println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
284 /// }
285 /// ```
286 pub fn process<S: Into<f64> + Copy>(&mut self, samples: &mut VecDeque<S>) -> Option<Output<'_>> {
287 let initial_samples_len = samples.len();
288 let step_len = self.parameters.step_len();
289
290 let mut processed_step_sample_index = None;
291 while let (step_sample_index, Some(harmonics)) = (
292 initial_samples_len - samples.len() + self.estimator.next_step_samples_len(),
293 self.estimator.process_step(samples, step_len, self.parameters.sample_rate),
294 ) {
295 let mut energy = 0.0;
296 let harmonics = harmonics.inspect(|harmonic| {
297 energy += harmonic.amplitude * harmonic.amplitude;
298 });
299
300 self.candidate_generator.process_step_harmonics(harmonics, self.parameters.sample_rate);
301 let candidates = self.candidate_generator.generate_step_candidates();
302
303 self.candidate_selector.process_step(
304 candidates,
305 energy,
306 self.parameters.candidate_selection_window_len(),
307 self.parameters.candidate_max_jump,
308 self.parameters.candidate_step_decay,
309 );
310
311 if self.candidate_selector.initialized(self.parameters.candidate_selection_window_len()) {
312 processed_step_sample_index = Some(step_sample_index);
313 break;
314 }
315 }
316
317 let last_step_sample_index = processed_step_sample_index?;
318 let more_output = samples.len() >= self.estimator.next_step_samples_len();
319
320 let selected_candidates = self.candidate_selector.best_candidate_steps(
321 self.parameters.candidate_selection_window_len(),
322 self.parameters.candidate_max_jump,
323 );
324 Some(Output {
325 estimated_pitches: EstimatedPitchIter {
326 selected_candidates: selected_candidates?.enumerate(),
327 candidate_frequencies: self.candidate_generator.candidate_frequencies(self.parameters.sample_rate),
328 last_step_sample_index,
329 step_len: self.parameters.step_len(),
330 },
331 more_output,
332 })
333 }
334
335 /// Resets all internal state associated with the sliding analysis window.
336 ///
337 /// The internal state after a reset is equivalent to that of a newly constructed [`Irapt`]. Resetting can be useful
338 /// to avoid causing artifacts in the analysis when skipping a number of samples in the input without processing
339 /// them.
340 ///
341 /// # Examples
342 ///
343 /// ```
344 /// use irapt::{Irapt, Parameters};
345 /// use std::collections::VecDeque;
346 /// use std::f64::consts::PI;
347 ///
348 /// let parameters = Parameters::default();
349 /// let mut irapt = Irapt::new(parameters.clone()).expect("the default parameters should be valid");
350 ///
351 /// // Use a 100Hz sine wave as an example input signal
352 /// let mut samples = (0..).map(|sample_index| f64::sin(sample_index as f64 / parameters.sample_rate * 2.0 * PI * 100.0));
353 ///
354 /// // Collect half of a second of input
355 /// let mut sample_buffer = VecDeque::new();
356 /// sample_buffer.extend(samples.by_ref().take(parameters.sample_rate as usize / 2));
357 ///
358 /// while let Some(output) = irapt.process(&mut sample_buffer) {
359 /// let estimated_pitch = output.pitch_estimates().final_estimate();
360 /// println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
361 /// }
362 ///
363 /// // Simulate that many more samples have become available
364 /// let more_samples = samples.by_ref().take(parameters.sample_rate as usize * 10);
365 ///
366 /// // Reset irapt, clear the input buffer, skip all but half a second of input samples, and process the rest
367 /// irapt.reset();
368 /// sample_buffer.clear();
369 /// sample_buffer.extend(more_samples.skip(parameters.sample_rate as usize * 19 / 2));
370 ///
371 /// while let Some(output) = irapt.process(&mut sample_buffer) {
372 /// let estimated_pitch = output.pitch_estimates().final_estimate();
373 /// println!("estimated pitch: {}Hz with energy {}", estimated_pitch.frequency, estimated_pitch.energy);
374 /// }
375 /// ```
376 pub fn reset(&mut self) {
377 self.estimator.reset();
378 self.candidate_selector.reset();
379 }
380}
381
382//
383// Parameters impls
384//
385
386impl Parameters {
387 /// Suggested default parameters.
388 pub const DEFAULT: Self = Self {
389 sample_rate: 6000.0,
390
391 harmonics_estimation_interval: 0.005,
392 harmonics_estimation_window_duration: 0.05,
393 candidate_selection_window_duration: 0.3,
394
395 pitch_range: 50.0..=450.0,
396
397 candidate_generator_fft_len: 16384,
398 half_interpolation_window_len: 12,
399 interpolation_factor: 2,
400
401 candidate_taper: 0.25,
402 candidate_step_decay: 0.95,
403 candidate_max_jump: 23,
404 };
405
406 fn candidate_selection_window_len(&self) -> usize {
407 (self.candidate_selection_window_duration / self.harmonics_estimation_interval + 0.5) as usize
408 }
409
410 fn harmonics_window_len(&self) -> u32 {
411 (self.harmonics_estimation_window_duration * self.sample_rate / 2.0).round() as u32 * 2 + 1
412 }
413
414 fn step_len(&self) -> usize {
415 (self.harmonics_estimation_interval * self.sample_rate).round() as usize
416 }
417}
418
419impl Default for Parameters {
420 fn default() -> Self {
421 Self::DEFAULT
422 }
423}
424
425//
426// Output impls
427//
428
429impl Output<'_> {
430 /// Returns whether further output can be produced given the input samples.
431 ///
432 /// More output can be produced by calling [`Irapt::process`].
433 pub fn more_output(&self) -> bool {
434 self.more_output
435 }
436
437 /// Returns all pitch estimates for the given input, including both those tentative and final, in reverse chronological order.
438 ///
439 /// All but the last of the yielded pitches are tentative estimates calculated up to [`candidate_selection_window_duration`] seconds in
440 /// the past. The estimates are returned in reverse chronological order. The exact sample offsets for the estimates are returned in
441 /// [`EstimatedPitch::offset`].
442 ///
443 /// The last estimate yielded is final for the given time offset. It can also be retrieved by calling [`final_estimate`] on the returned
444 /// iterator.
445 ///
446 /// [`candidate_selection_window_duration`]: Parameters::candidate_selection_window_duration
447 /// [`final_estimate`]: EstimatedPitchIter::final_estimate
448 pub fn pitch_estimates(&self) -> EstimatedPitchIter<'_> {
449 self.estimated_pitches.clone()
450 }
451}
452
453//
454// EstimatedPitchIter impls
455//
456
457impl EstimatedPitchIter<'_> {
458 /// Returns a final pitch estimate for the given input, at a time delay.
459 ///
460 /// The returned pitch is the final estimate calculated at approximately [`candidate_selection_window_duration`] seconds in the past.
461 /// The exact sample offset for the estimate is returned in [`EstimatedPitch::offset`].
462 ///
463 /// [`candidate_selection_window_duration`]: Parameters::candidate_selection_window_duration
464 pub fn final_estimate(self) -> EstimatedPitch {
465 self.last().unwrap_or_else(|| unreachable!())
466 }
467}
468
469impl Iterator for EstimatedPitchIter<'_> {
470 type Item = EstimatedPitch;
471
472 fn next(&mut self) -> Option<Self::Item> {
473 let (step_index, candidate_selection) = self.selected_candidates.next()?;
474 let frequency = (self.candidate_frequencies.clone())
475 .nth(candidate_selection.selected_candidate_index)
476 .unwrap_or_else(|| panic!("candidate index out of bounds"));
477 Some(EstimatedPitch {
478 frequency,
479 energy: candidate_selection.energy,
480 offset: self.last_step_sample_index.wrapping_sub((1 + step_index) * self.step_len) as isize
481 })
482 }
483
484 fn size_hint(&self) -> (usize, Option<usize>) {
485 self.selected_candidates.size_hint()
486 }
487}
488
489impl ExactSizeIterator for EstimatedPitchIter<'_> {}