use crate::silk::ltp::pitch_lag_bounds;
use crate::toc::OpusBandwidth;
#[derive(Copy, Clone, Debug, Default)]
pub struct PitchEstimate {
pub voiced: bool,
pub lag_internal: i32,
pub correlation: f32,
}
const VOICING_CORR_THRESHOLD: f32 = 0.4;
const VOICING_ENERGY_THRESHOLD: f32 = 1e-4;
fn downsample_for_pitch(pcm: &[f32], bw: OpusBandwidth) -> (Vec<f32>, u32) {
match bw {
OpusBandwidth::Narrowband => (pcm.to_vec(), 8_000),
OpusBandwidth::Mediumband => {
(box_downsample(pcm, 2), 6_000)
}
OpusBandwidth::Wideband => {
(box_downsample(pcm, 2), 8_000)
}
_ => (box_downsample(pcm, 2), 8_000),
}
}
fn box_downsample(pcm: &[f32], ratio: usize) -> Vec<f32> {
if ratio <= 1 {
return pcm.to_vec();
}
let n_out = pcm.len() / ratio;
let mut out = Vec::with_capacity(n_out);
for i in 0..n_out {
let mut s = 0f32;
for k in 0..ratio {
s += pcm[i * ratio + k];
}
out.push(s / ratio as f32);
}
out
}
fn normalized_autocorr(x: &[f32], lag: usize) -> f32 {
if lag == 0 || lag >= x.len() {
return 0.0;
}
let mut num = 0f64;
let mut e0 = 0f64;
let mut e1 = 0f64;
for n in lag..x.len() {
let a = x[n] as f64;
let b = x[n - lag] as f64;
num += a * b;
e0 += a * a;
e1 += b * b;
}
let denom = (e0 * e1).sqrt();
if denom < 1e-12 {
return 0.0;
}
(num / denom) as f32
}
pub fn analyze_pitch(pcm_internal: &[f32], bw: OpusBandwidth) -> PitchEstimate {
let rms_sq: f64 = pcm_internal.iter().map(|v| (*v as f64) * (*v as f64)).sum();
let rms = (rms_sq / pcm_internal.len().max(1) as f64).sqrt() as f32;
if rms < VOICING_ENERGY_THRESHOLD {
return PitchEstimate::default();
}
let (ds, ds_rate) = downsample_for_pitch(pcm_internal, bw);
if ds.len() < 32 {
return PitchEstimate::default();
}
let (min_lag_int, max_lag_int) = pitch_lag_bounds(bw);
let internal_rate = match bw {
OpusBandwidth::Narrowband => 8_000.0_f32,
OpusBandwidth::Mediumband => 12_000.0,
OpusBandwidth::Wideband => 16_000.0,
_ => 16_000.0,
};
let scale = ds_rate as f32 / internal_rate;
let min_lag_ds = ((min_lag_int as f32 * scale).floor() as i32).max(2);
let max_lag_ds = ((max_lag_int as f32 * scale).ceil() as i32).min(ds.len() as i32 - 1);
if max_lag_ds <= min_lag_ds {
return PitchEstimate::default();
}
let search_len = (max_lag_ds - min_lag_ds + 1) as usize;
let mut corrs = Vec::with_capacity(search_len);
let mut best_lag_ds = min_lag_ds;
let mut best_corr = -1f32;
for lag in min_lag_ds..=max_lag_ds {
let c = normalized_autocorr(&ds, lag as usize);
corrs.push(c);
if c > best_corr {
best_corr = c;
best_lag_ds = lag;
}
}
let best_corr_at_discovery = best_corr;
let mut k = 2i32;
loop {
let cand = best_lag_ds / k;
if cand < min_lag_ds {
break;
}
let lo = (cand - 1).max(min_lag_ds);
let hi = (cand + 1).min(max_lag_ds);
let mut cand_best_lag = cand;
let mut cand_best_corr = -1f32;
for l in lo..=hi {
let c = corrs[(l - min_lag_ds) as usize];
if c > cand_best_corr {
cand_best_corr = c;
cand_best_lag = l;
}
}
if cand_best_corr >= 0.85 * best_corr_at_discovery {
best_lag_ds = cand_best_lag;
best_corr = cand_best_corr;
}
k += 1;
if k > 12 {
break;
}
}
if best_corr < VOICING_CORR_THRESHOLD {
return PitchEstimate {
voiced: false,
lag_internal: 0,
correlation: best_corr.max(0.0),
};
}
let lag_internal_f = best_lag_ds as f32 / scale;
let lag_internal = (lag_internal_f.round() as i32).clamp(min_lag_int, max_lag_int);
PitchEstimate {
voiced: true,
lag_internal,
correlation: best_corr,
}
}
#[cfg(test)]
mod tests {
use super::*;
use core::f32::consts::PI;
fn synth_sine(freq: f32, rate: u32, n: usize, amp: f32) -> Vec<f32> {
(0..n)
.map(|i| (2.0 * PI * freq * i as f32 / rate as f32).sin() * amp)
.collect()
}
fn synth_harmonic(f0: f32, rate: u32, n: usize, amp: f32) -> Vec<f32> {
(0..n)
.map(|i| {
let t = i as f32 / rate as f32;
((2.0 * PI * f0 * t).sin()
+ 0.5 * (2.0 * PI * 2.0 * f0 * t).sin()
+ 0.25 * (2.0 * PI * 3.0 * f0 * t).sin())
* amp
})
.collect()
}
#[test]
fn silence_is_unvoiced() {
let pcm = vec![0f32; 160];
let p = analyze_pitch(&pcm, OpusBandwidth::Narrowband);
assert!(!p.voiced);
assert_eq!(p.lag_internal, 0);
}
#[test]
fn pure_sine_200hz_voiced_at_nb() {
let pcm = synth_sine(200.0, 8_000, 160, 0.3);
let p = analyze_pitch(&pcm, OpusBandwidth::Narrowband);
assert!(
p.voiced,
"200 Hz sine should be voiced (corr={})",
p.correlation
);
assert!(
(p.lag_internal - 40).abs() <= 2,
"expected lag ≈ 40 samples, got {}",
p.lag_internal
);
assert!(p.correlation > 0.8);
}
#[test]
fn harmonic_150hz_voiced_at_wb() {
let pcm = synth_harmonic(150.0, 16_000, 320, 0.3);
let p = analyze_pitch(&pcm, OpusBandwidth::Wideband);
assert!(
p.voiced,
"harmonic @ 150 Hz should be voiced (corr={})",
p.correlation
);
assert!(
(p.lag_internal - 107).abs() <= 4,
"expected lag ≈ 107, got {}",
p.lag_internal
);
}
#[test]
fn white_noise_is_unvoiced() {
let mut s = 0x1234_5678u32;
let pcm: Vec<f32> = (0..320)
.map(|_| {
s = s.wrapping_mul(1103515245).wrapping_add(12345);
(s >> 16) as i16 as f32 / 32768.0 * 0.3
})
.collect();
let p = analyze_pitch(&pcm, OpusBandwidth::Wideband);
assert!(
!p.voiced,
"white noise should be unvoiced (corr={})",
p.correlation
);
}
#[test]
fn lag_scales_across_bandwidths() {
let nb = synth_sine(200.0, 8_000, 160, 0.3);
let mb = synth_sine(200.0, 12_000, 240, 0.3);
let wb = synth_sine(200.0, 16_000, 320, 0.3);
let pn = analyze_pitch(&nb, OpusBandwidth::Narrowband);
let pm = analyze_pitch(&mb, OpusBandwidth::Mediumband);
let pw = analyze_pitch(&wb, OpusBandwidth::Wideband);
assert!(pn.voiced && pm.voiced && pw.voiced);
assert!((pn.lag_internal - 40).abs() <= 2);
assert!((pm.lag_internal - 60).abs() <= 4);
assert!((pw.lag_internal - 80).abs() <= 4);
}
}