use pyin;
const DBFS_EPSILON: f64 = 1e-20;
#[inline(always)]
pub fn dc_bias(audio: &[f64]) -> f64 {
let mut sum = 0.0;
for i in 0..audio.len() {
sum += audio[i];
}
sum / audio.len() as f64
}
#[inline(always)]
pub fn dbfs(val: f64, epsilon: f64) -> f64 {
if val.abs() < epsilon {
f64::NEG_INFINITY
} else {
20.0 * val.log10()
}
}
pub fn dbfs_max(audio: &[f64]) -> f64 {
let mut maxval = 0.0;
for i in 0..audio.len() {
let sample_abs = audio[i].abs();
if sample_abs > maxval {
maxval = sample_abs;
}
}
dbfs(maxval, DBFS_EPSILON)
}
pub fn energy(audio: &[f64]) -> f64 {
let mut sumsquare: f64 = 0.0;
for i in 0..audio.len() {
sumsquare += audio[i].powf(2.0);
}
if audio.len() < 1 {
return 0.0;
} else {
return f64::sqrt(1.0 / audio.len() as f64 * sumsquare);
}
}
pub fn zero_crossing_rate(audio: &[f64], sample_rate: u32) -> f64 {
let mut num_zc: f64 = 0.0;
for i in 1..audio.len() {
if audio[i-1] * audio[i] < 0.0 {
num_zc += 1.0;
} else if i < audio.len() - 1 && audio[i+1] < 0.0 && audio[i] == 0.0 {
num_zc += 1.0;
}
}
num_zc as f64 * sample_rate as f64 / audio.len() as f64
}
pub fn pyin_pitch_estimator_single(audio: &[f64], sample_rate: u32, f_min: f64, f_max: f64) -> f64 {
let frame_length: usize = usize::min(audio.len(), 14000);
let resolution = 0.1;
let fill_unvoiced = f64::NAN;
let framing = pyin::Framing::Center::<f64>(pyin::PadMode::<f64>::Constant(0.0));
let mut executor = pyin::PYINExecutor::<f64>::new(f_min, f_max, sample_rate, frame_length, None, None, Some(resolution));
let result = executor.pyin(audio, fill_unvoiced, framing);
let mut output_vec: Vec<f64> = Vec::with_capacity(result.1.len());
for i in 0..result.1.len() {
if !result.1[i].is_nan() {
output_vec.push(result.1[i]);
}
}
if output_vec.len() > 0 {
output_vec.sort_unstable_by(|a, b| {
match a.partial_cmp(b) {
Some(x) => x,
None => std::cmp::Ordering::Equal
}
});
let median = output_vec[output_vec.len() / 2];
median
} else {
f64::NAN
}
}
pub fn pyin_pitch_estimator(audio: &[f64], sample_rate: u32, f_min: f64, f_max: f64, frame_length: usize) -> (Vec<f64>, Vec<f64>, Vec<bool>, Vec<f64>) {
let resolution = 0.1;
let fill_unvoiced = f64::NAN;
let framing = pyin::Framing::Center::<f64>(pyin::PadMode::<f64>::Constant(0.0));
let mut executor = pyin::PYINExecutor::<f64>::new(f_min, f_max, sample_rate, frame_length, None, None, Some(resolution));
executor.pyin(audio, fill_unvoiced, framing)
}
#[cfg(test)]
mod test {
use super::*;
use crate::read;
#[test]
fn test_pyin() {
let fft_size: usize = 2048;
let audio_path = String::from("D:\\Recording\\Samples\\Iowa\\Bass.arco.mono.2444.1\\samples\\Bass.arco.sulD.ff.C3B3.mono.19.wav");
let audio = match read(&audio_path) {
Ok(x) => x,
Err(_) => panic!("could not read audio")
};
let result = pyin_pitch_estimator(&audio.samples[0], audio.sample_rate, 50.0, 500.0, fft_size);
println!("Timestamps: {:?}\nFrequencies: {:?}\nVoiced: {:?}\nProbabilities: {:?}", result.0, result.1, result.2, result.3);
}
#[test]
fn test_pyin_single() {
let audio_path = String::from("D:\\Recording\\Samples\\Iowa\\Bass.arco.mono.2444.1\\samples\\Bass.arco.sulD.ff.C3B3.mono.19.wav");
let audio = match read(&audio_path) {
Ok(x) => x,
Err(_) => panic!("could not read audio")
};
let result = pyin_pitch_estimator_single(&audio.samples[0], audio.sample_rate, 50.0, 500.0);
println!("Result: {}", result);
}
}