use crate::packet::Bandwidth;
#[derive(Debug, Clone, Copy)]
pub(crate) struct FrameAnalysis {
pub music_probability: f32,
pub detected_bandwidth: Bandwidth,
pub energy: f32,
}
pub(crate) fn analyze_frame(pcm: &[f32], channels: usize) -> FrameAnalysis {
let ch = channels.max(1);
let n = pcm.len() / ch;
if n == 0 {
return FrameAnalysis {
music_probability: 0.5,
detected_bandwidth: Bandwidth::FullBand,
energy: 0.0,
};
}
const FS: f32 = 48_000.0;
let lp_coef = |f_hz: f32| -> f32 {
let rc = 1.0 / (core::f32::consts::TAU * f_hz);
let dt = 1.0 / FS;
dt / (rc + dt)
};
let (a0, a1, a2, a3) = (lp_coef(4_000.0), lp_coef(6_000.0), lp_coef(8_000.0), lp_coef(12_000.0));
let mut energy_acc = 0.0f32;
let (mut y0, mut y1, mut y2, mut y3) = (0.0f32, 0.0f32, 0.0f32, 0.0f32);
let (mut e0, mut e1, mut e2, mut e3) = (0.0f32, 0.0f32, 0.0f32, 0.0f32);
let (mut r0, mut r1) = (0.0f32, 0.0f32);
let mut zc = 0usize;
let mut prev = 0.0f32;
for i in 0..n {
let x = if ch == 1 {
pcm[i]
} else {
let mut acc = 0.0f32;
for c in 0..ch {
acc += pcm[i * ch + c];
}
acc / ch as f32
};
energy_acc += x * x;
y0 += a0 * (x - y0);
e0 += y0 * y0;
y1 += a1 * (x - y1);
e1 += y1 * y1;
y2 += a2 * (x - y2);
e2 += y2 * y2;
y3 += a3 * (x - y3);
e3 += y3 * y3;
if i >= 1 {
r0 += x * x;
r1 += x * prev;
if (x >= 0.0) != (prev >= 0.0) {
zc += 1;
}
}
prev = x;
}
let energy = energy_acc / n as f32;
let lp_energy = [e0 / n as f32, e1 / n as f32, e2 / n as f32, e3 / n as f32];
let e_total = energy.max(1e-12);
let b0 = lp_energy[0];
let b1 = (lp_energy[1] - lp_energy[0]).max(0.0);
let b2 = (lp_energy[2] - lp_energy[1]).max(0.0);
let b3 = (lp_energy[3] - lp_energy[2]).max(0.0);
let b4 = (e_total - lp_energy[3]).max(0.0);
let high_frac = (b3 + b4) / e_total;
let top_frac = b4 / e_total;
let low_frac = b0 / e_total;
let lag1_corr = if r0 > 1e-12 { (r1 / r0).clamp(-1.0, 1.0) } else { 0.0 };
let zcr = zc as f32 / n as f32;
let detected_bandwidth = if top_frac > 0.02 {
Bandwidth::FullBand
} else if b3 / e_total > 0.02 {
Bandwidth::SuperWideBand
} else if b2 / e_total > 0.02 {
Bandwidth::WideBand
} else if b1 / e_total > 0.02 {
Bandwidth::MediumBand
} else {
Bandwidth::NarrowBand
};
let mut p = 0.5f32;
p += 1.6 * high_frac; p += 2.0 * top_frac; p -= 0.5 * low_frac; if lag1_corr > 0.9 && zcr < 0.1 && high_frac < 0.05 {
p -= 0.2;
}
if zcr > 0.25 && high_frac > 0.1 {
p += 0.2;
}
let music_probability = p.clamp(0.0, 1.0);
FrameAnalysis {
music_probability,
detected_bandwidth,
energy,
}
}
#[cfg(test)]
mod tests {
use alloc::vec::Vec;
use super::*;
fn tone(freqs: &[(f32, f32)], n: usize) -> Vec<f32> {
(0..n)
.map(|i| {
let t = i as f32 / 48_000.0;
freqs
.iter()
.map(|&(f, a)| a * (core::f32::consts::TAU * f * t).sin())
.sum()
})
.collect()
}
#[test]
fn speech_like_low_band_reads_as_speech() {
let pcm = tone(&[(180.0, 0.5), (900.0, 0.25), (2400.0, 0.1)], 960);
let a = analyze_frame(&pcm, 1);
assert!(
a.music_probability < 0.45,
"expected speech, got p={}",
a.music_probability
);
assert!(
matches!(
a.detected_bandwidth,
Bandwidth::NarrowBand | Bandwidth::MediumBand | Bandwidth::WideBand
),
"speech bandwidth {:?}",
a.detected_bandwidth
);
}
#[test]
fn bright_broadband_reads_as_music() {
let pcm = tone(&[(220.0, 0.3), (3000.0, 0.3), (9000.0, 0.35), (15000.0, 0.3)], 960);
let a = analyze_frame(&pcm, 1);
assert!(
a.music_probability > 0.55,
"expected music, got p={}",
a.music_probability
);
assert_eq!(a.detected_bandwidth, Bandwidth::FullBand);
}
#[test]
fn silence_is_handled() {
let a = analyze_frame(&[0.0; 960], 1);
assert!(a.energy < 1e-9);
}
}