use crate::estimator::{PitchEstimator, PitchFrame, Result};
use pyin::{Framing, PYINExecutor, PadMode};
const PYIN_SR: u32 = 48000;
const FRAME_LENGTH: usize = 2048;
const HOP_LENGTH: usize = 960;
const FMIN: f64 = 65.0;
const FMAX: f64 = 1100.0;
const BATCH_SAMPLES: usize = 16000;
pub struct PyinEstimator {
executor: PYINExecutor<f64>,
buffer: Vec<f64>,
samples_processed: u64,
}
fn make_exec() -> PYINExecutor<f64> {
PYINExecutor::<f64>::new(
FMIN,
FMAX,
PYIN_SR,
FRAME_LENGTH,
None, Some(HOP_LENGTH), None, )
}
impl PyinEstimator {
pub fn new() -> Result<Self> {
Ok(Self {
executor: make_exec(),
buffer: Vec::with_capacity(BATCH_SAMPLES * 2),
samples_processed: 0,
})
}
}
impl PitchEstimator for PyinEstimator {
fn name(&self) -> &str {
"pyin"
}
fn target_sample_rate(&self) -> u32 {
PYIN_SR
}
fn reset(&mut self) {
self.buffer.clear();
self.samples_processed = 0;
self.executor = make_exec();
}
fn process(&mut self, audio_target_sr: &[f32]) -> Result<Vec<PitchFrame>> {
self.buffer.extend(audio_target_sr.iter().map(|&s| s as f64));
let hop_s = HOP_LENGTH as f32 / PYIN_SR as f32;
let mut frames = Vec::new();
while self.buffer.len() >= BATCH_SAMPLES {
let chunk: Vec<f64> = self.buffer.drain(..BATCH_SAMPLES).collect();
let (_timestamps, f0s, _voiced, voiced_prob) = self.executor.pyin(
&chunk,
f64::NAN, Framing::Center(PadMode::Reflect),
);
let time_offset = self.samples_processed as f32 / PYIN_SR as f32;
let frame_idx_offset = self.samples_processed / HOP_LENGTH as u64;
let n = f0s.len();
for i in 0..n {
let f0 = f0s[i];
let vp = voiced_prob[i];
let pitch = if f0.is_nan() { 0.0 } else { f0 as f32 };
frames.push(PitchFrame {
frame_index: frame_idx_offset + i as u64,
time_s: time_offset + i as f32 * hop_s,
pitch_hz: pitch,
confidence: vp as f32,
is_preliminary: false,
});
}
self.samples_processed += BATCH_SAMPLES as u64;
}
Ok(frames)
}
}