use crate::{CodecError, CodecResult};
use super::packet::OpusBandwidth;
use super::range_decoder::RangeDecoder;
const MAX_LPC_ORDER: usize = 16;
const MAX_PITCH_LAG: usize = 320;
const LSF_STAGES: usize = 3;
const LSF_COUNT: usize = 16;
const SUBFRAMES: usize = 4;
const LSF_CODEBOOK: [[f32; LSF_COUNT]; 8] = [
[
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5,
],
[
0.05, 0.15, 0.25, 0.35, 0.45, 0.55, 0.65, 0.75, 0.85, 0.95, 1.05, 1.15, 1.25, 1.35, 1.45,
1.55,
],
[
0.02, 0.12, 0.22, 0.32, 0.42, 0.52, 0.62, 0.72, 0.82, 0.92, 1.02, 1.12, 1.22, 1.32, 1.42,
1.52,
],
[
0.08, 0.18, 0.28, 0.38, 0.48, 0.58, 0.68, 0.78, 0.88, 0.98, 1.08, 1.18, 1.28, 1.38, 1.48,
1.58,
],
[
0.03, 0.13, 0.23, 0.33, 0.43, 0.53, 0.63, 0.73, 0.83, 0.93, 1.03, 1.13, 1.23, 1.33, 1.43,
1.53,
],
[
0.07, 0.17, 0.27, 0.37, 0.47, 0.57, 0.67, 0.77, 0.87, 0.97, 1.07, 1.17, 1.27, 1.37, 1.47,
1.57,
],
[
0.04, 0.14, 0.24, 0.34, 0.44, 0.54, 0.64, 0.74, 0.84, 0.94, 1.04, 1.14, 1.24, 1.34, 1.44,
1.54,
],
[
0.06, 0.16, 0.26, 0.36, 0.46, 0.56, 0.66, 0.76, 0.86, 0.96, 1.06, 1.16, 1.26, 1.36, 1.46,
1.56,
],
];
const GAIN_CODEBOOK: [f32; 32] = [
0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8,
1.9, 2.0, 2.2, 2.4, 2.6, 2.8, 3.0, 3.5, 4.0, 4.5, 5.0, 6.0, 7.0,
];
#[derive(Debug)]
pub struct SilkDecoder {
sample_rate: u32,
channels: usize,
#[allow(dead_code)]
bandwidth: OpusBandwidth,
lpc_coeffs: Vec<f32>,
pitch_lag: usize,
pitch_gain: f32,
prev_samples: Vec<f32>,
excitation_history: Vec<f32>,
consecutive_losses: usize,
last_lsf: Vec<f32>,
}
impl SilkDecoder {
pub fn new(sample_rate: u32, channels: usize, bandwidth: OpusBandwidth) -> Self {
let max_frame_size = (sample_rate / 50) as usize;
Self {
sample_rate,
channels,
bandwidth,
lpc_coeffs: vec![0.0; MAX_LPC_ORDER],
pitch_lag: 100,
pitch_gain: 0.0,
prev_samples: vec![0.0; max_frame_size * channels],
excitation_history: vec![0.0; MAX_PITCH_LAG],
consecutive_losses: 0,
last_lsf: Self::initialize_lsf(),
}
}
fn initialize_lsf() -> Vec<f32> {
let mut lsf = Vec::with_capacity(LSF_COUNT);
for i in 0..LSF_COUNT {
lsf.push((i as f32 + 0.5) * std::f32::consts::PI / (LSF_COUNT as f32 + 1.0));
}
lsf
}
pub fn decode(
&mut self,
data: &[u8],
output: &mut [f32],
frame_size: usize,
) -> CodecResult<()> {
if output.len() < frame_size * self.channels {
return Err(CodecError::InvalidData(
"Output buffer too small".to_string(),
));
}
if data.is_empty() {
return self.decode_plc(output, frame_size);
}
let mut decoder = RangeDecoder::new(data)?;
self.consecutive_losses = 0;
let frame_params = self.decode_frame_params(&mut decoder)?;
let lsf = self.decode_lsf(&mut decoder)?;
self.last_lsf.copy_from_slice(&lsf);
Self::lsf_to_lpc(&lsf, &mut self.lpc_coeffs);
let subframe_size = frame_size / SUBFRAMES;
for subframe_idx in 0..SUBFRAMES {
let subframe_params = self.decode_subframe_params(&mut decoder, &frame_params)?;
let offset = subframe_idx * subframe_size * self.channels;
let subframe_output = &mut output[offset..offset + subframe_size * self.channels];
self.decode_subframe(
&mut decoder,
subframe_output,
subframe_size,
&subframe_params,
)?;
}
let sample_count = frame_size * self.channels;
if sample_count <= self.prev_samples.len() {
self.prev_samples[..sample_count].copy_from_slice(&output[..sample_count]);
}
Ok(())
}
fn decode_frame_params(&mut self, decoder: &mut RangeDecoder) -> CodecResult<FrameParams> {
let vad_flag = decoder.decode_bit(16384)?;
let ltpf_flag = decoder.decode_bit(16384)?;
let gain_index = decoder.decode_uniform(32)? as usize;
let gain = if gain_index < GAIN_CODEBOOK.len() {
GAIN_CODEBOOK[gain_index]
} else {
1.0
};
Ok(FrameParams {
vad_flag,
ltpf_flag,
gain,
})
}
fn decode_lsf(&mut self, decoder: &mut RangeDecoder) -> CodecResult<Vec<f32>> {
let mut lsf = vec![0.0f32; LSF_COUNT];
for stage in 0..LSF_STAGES {
let codebook_index = decoder.decode_uniform(8)? as usize;
if codebook_index < LSF_CODEBOOK.len() {
let codebook_entry = &LSF_CODEBOOK[codebook_index];
for i in 0..LSF_COUNT {
let weight = 1.0 / (stage + 1) as f32;
lsf[i] += codebook_entry[i] * weight;
}
}
}
self.stabilize_lsf(&mut lsf);
Ok(lsf)
}
fn stabilize_lsf(&self, lsf: &mut [f32]) {
const MIN_DISTANCE: f32 = 0.01;
for i in 1..lsf.len() {
if lsf[i] <= lsf[i - 1] + MIN_DISTANCE {
lsf[i] = lsf[i - 1] + MIN_DISTANCE;
}
}
for coeff in lsf.iter_mut() {
*coeff = coeff.clamp(0.0, std::f32::consts::PI);
}
}
fn lsf_to_lpc(lsf: &[f32], lpc: &mut [f32]) {
lpc.fill(0.0);
for (i, &freq) in lsf.iter().enumerate().take(lpc.len()) {
let cos_freq = freq.cos();
lpc[i] = -2.0 * cos_freq;
}
for (i, coeff) in lpc.iter_mut().enumerate() {
let gamma = 0.99_f32.powi(i as i32 + 1);
*coeff *= gamma;
}
}
fn decode_subframe_params(
&mut self,
decoder: &mut RangeDecoder,
frame_params: &FrameParams,
) -> CodecResult<SubframeParams> {
let pitch_lag_delta = decoder.decode_int(5)? as i32;
let pitch_lag =
(self.pitch_lag as i32 + pitch_lag_delta).clamp(20, MAX_PITCH_LAG as i32) as usize;
let pitch_gain_index = decoder.decode_uniform(16)?;
let pitch_gain = (pitch_gain_index as f32 / 15.0).clamp(0.0, 1.0);
let mut ltp_taps = [0.0f32; 5];
for tap in &mut ltp_taps {
let tap_index = decoder.decode_uniform(8)?;
*tap = (tap_index as f32 / 7.0 - 0.5) * 0.5;
}
let tap_sum: f32 = ltp_taps.iter().sum();
if tap_sum.abs() > 0.001 {
for tap in &mut ltp_taps {
*tap /= tap_sum;
}
}
self.pitch_lag = pitch_lag;
self.pitch_gain = pitch_gain;
Ok(SubframeParams {
pitch_lag,
pitch_gain,
ltp_taps,
subframe_gain: frame_params.gain,
})
}
#[allow(clippy::too_many_arguments)]
fn decode_subframe(
&mut self,
decoder: &mut RangeDecoder,
output: &mut [f32],
subframe_size: usize,
params: &SubframeParams,
) -> CodecResult<()> {
let mut excitation = vec![0.0f32; subframe_size];
self.decode_excitation(decoder, &mut excitation, params.subframe_gain)?;
self.apply_ltp(&mut excitation, params);
let mut synthesis = vec![0.0f32; subframe_size];
self.apply_lpc_synthesis(&excitation, &mut synthesis);
self.apply_noise_shaping(&mut synthesis, params.subframe_gain);
for (i, &sample) in synthesis.iter().enumerate() {
for ch in 0..self.channels {
let idx = i * self.channels + ch;
if idx < output.len() {
output[idx] = sample;
}
}
}
self.update_excitation_history(&excitation);
Ok(())
}
fn decode_excitation(
&self,
decoder: &mut RangeDecoder,
excitation: &mut [f32],
gain: f32,
) -> CodecResult<()> {
let pulse_count = decoder.decode_uniform(32)? as usize;
for _ in 0..pulse_count {
let position = decoder.decode_uniform(excitation.len() as u32)? as usize;
let amplitude_index = decoder.decode_uniform(8)?;
let amplitude = (amplitude_index as f32 / 7.0 - 0.5) * gain * 2.0;
if position < excitation.len() {
excitation[position] += amplitude;
}
}
Ok(())
}
fn apply_ltp(&mut self, excitation: &mut [f32], params: &SubframeParams) {
for i in 0..excitation.len() {
let mut ltp_contribution = 0.0;
for (tap_idx, &tap_weight) in params.ltp_taps.iter().enumerate() {
let lag_idx = i + self.excitation_history.len() - params.pitch_lag - 2 + tap_idx;
if lag_idx < self.excitation_history.len() {
ltp_contribution += self.excitation_history[lag_idx] * tap_weight;
}
}
excitation[i] += ltp_contribution * params.pitch_gain;
}
}
fn apply_lpc_synthesis(&self, excitation: &[f32], output: &mut [f32]) {
let mut state = vec![0.0f32; MAX_LPC_ORDER];
for (i, &exc) in excitation.iter().enumerate() {
let mut sum = exc;
for (j, &coeff) in self.lpc_coeffs.iter().enumerate() {
sum -= coeff * state[j];
}
state.rotate_right(1);
state[0] = sum;
output[i] = sum;
}
}
fn apply_noise_shaping(&self, samples: &mut [f32], gain: f32) {
const SHAPING_COEFF: f32 = 0.5;
let mut prev_sample = 0.0;
for sample in samples.iter_mut() {
let shaped = *sample + SHAPING_COEFF * prev_sample;
*sample = shaped * gain.sqrt();
prev_sample = *sample;
}
}
fn update_excitation_history(&mut self, excitation: &[f32]) {
let history_len = self.excitation_history.len();
let exc_len = excitation.len();
if exc_len >= history_len {
self.excitation_history
.copy_from_slice(&excitation[exc_len - history_len..]);
} else {
self.excitation_history.rotate_left(exc_len);
let start = history_len - exc_len;
self.excitation_history[start..].copy_from_slice(excitation);
}
}
fn decode_plc(&mut self, output: &mut [f32], frame_size: usize) -> CodecResult<()> {
self.consecutive_losses += 1;
let attenuation = 0.95_f32.powi(self.consecutive_losses as i32);
for i in 0..frame_size {
for ch in 0..self.channels {
let idx = i * self.channels + ch;
if idx < output.len() {
let pitch_idx = if i >= self.pitch_lag {
(i - self.pitch_lag) * self.channels + ch
} else {
idx
};
if pitch_idx < self.prev_samples.len() {
output[idx] = self.prev_samples[pitch_idx] * attenuation * self.pitch_gain;
} else {
output[idx] = 0.0;
}
}
}
}
self.add_comfort_noise(output, attenuation * 0.01);
Ok(())
}
fn add_comfort_noise(&self, output: &mut [f32], amplitude: f32) {
let mut seed = self.consecutive_losses as u32 * 1_103_515_245 + 12345;
for sample in output.iter_mut() {
seed = seed.wrapping_mul(1_103_515_245).wrapping_add(12345);
let noise = ((seed / 65536) % 32768) as f32 / 32768.0 - 0.5;
*sample += noise * amplitude;
}
}
pub fn reset(&mut self) {
self.lpc_coeffs.fill(0.0);
self.pitch_lag = 100;
self.pitch_gain = 0.0;
self.prev_samples.fill(0.0);
self.excitation_history.fill(0.0);
self.consecutive_losses = 0;
self.last_lsf = Self::initialize_lsf();
}
#[must_use]
pub const fn sample_rate(&self) -> u32 {
self.sample_rate
}
#[must_use]
pub const fn channels(&self) -> usize {
self.channels
}
}
#[derive(Debug, Clone)]
struct FrameParams {
#[allow(dead_code)]
vad_flag: bool,
#[allow(dead_code)]
ltpf_flag: bool,
gain: f32,
}
#[derive(Debug, Clone)]
struct SubframeParams {
pitch_lag: usize,
pitch_gain: f32,
ltp_taps: [f32; 5],
subframe_gain: f32,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub enum VadDecision {
Active,
Inactive,
}
#[derive(Clone, Debug)]
pub struct VoiceActivityDetector {
sample_rate: u32,
signal_energy: [f32; 4],
noise_floor: [f32; 4],
signal_ema: f32,
noise_ema: f32,
hangover_counter: u32,
hangover_max: u32,
threshold_db: f32,
prev_band_energy: [f32; 4],
frame_count: u64,
}
impl VoiceActivityDetector {
#[must_use]
pub fn new(sample_rate: u32) -> Self {
Self {
sample_rate,
signal_energy: [1e-6f32; 4],
noise_floor: [1e-6f32; 4],
signal_ema: 0.3,
noise_ema: 0.02,
hangover_counter: 0,
hangover_max: 8, threshold_db: 12.0,
prev_band_energy: [0.0f32; 4],
frame_count: 0,
}
}
pub fn set_hangover(&mut self, frames: u32) {
self.hangover_max = frames;
}
pub fn set_threshold_db(&mut self, db: f32) {
self.threshold_db = db.clamp(0.0, 40.0);
}
pub fn process(&mut self, samples: &[f32]) -> VadDecision {
if samples.is_empty() {
self.frame_count += 1;
return VadDecision::Inactive;
}
let band_energy = self.compute_band_energy(samples);
self.update_signal_energy(&band_energy);
let flux: f32 = band_energy
.iter()
.zip(self.prev_band_energy.iter())
.map(|(&b, &p)| (b - p).abs())
.sum::<f32>();
self.prev_band_energy = band_energy;
let is_likely_noise = self.is_likely_noise(&band_energy);
if is_likely_noise {
for i in 0..4 {
self.noise_floor[i] =
self.noise_floor[i] * (1.0 - self.noise_ema) + band_energy[i] * self.noise_ema;
self.noise_floor[i] = self.noise_floor[i].min(self.signal_energy[i]);
}
}
let mut voice_bands = 0u32;
for i in 0..4 {
let noise = self.noise_floor[i].max(1e-10);
let snr_db = 10.0 * (self.signal_energy[i] / noise).log10();
if snr_db >= self.threshold_db {
voice_bands += 1;
}
}
let flux_boost = flux > 0.01;
let speech_active = voice_bands >= 2 || (voice_bands >= 1 && flux_boost);
self.frame_count += 1;
if speech_active {
self.hangover_counter = self.hangover_max;
VadDecision::Active
} else if self.hangover_counter > 0 {
self.hangover_counter -= 1;
VadDecision::Active
} else {
VadDecision::Inactive
}
}
fn compute_band_energy(&self, samples: &[f32]) -> [f32; 4] {
let n = samples.len() as f32;
let alpha2 = {
let fc = 500.0f32 / self.sample_rate as f32;
(-2.0 * std::f32::consts::PI * fc).exp()
};
let mut lp2 = 0.0f32;
let mut band0_e = 0.0f32;
let mut band1_e = 0.0f32;
for &s in samples {
lp2 = lp2 * alpha2 + s * (1.0 - alpha2);
let hp2 = s - lp2;
band0_e += lp2 * lp2;
band1_e += hp2 * hp2;
}
let alpha3 = {
let fc = 3000.0f32 / self.sample_rate as f32;
(-2.0 * std::f32::consts::PI * fc).exp()
};
let mut lp3 = 0.0f32;
let mut band2_e = 0.0f32;
let mut band3_e = 0.0f32;
for &s in samples {
lp3 = lp3 * alpha3 + s * (1.0 - alpha3);
let hp3 = s - lp3;
band2_e += lp3 * lp3;
band3_e += hp3 * hp3;
}
let inv_n = if n > 0.0 { 1.0 / n } else { 1.0 };
[
band0_e * inv_n,
band1_e * inv_n,
band2_e * inv_n,
band3_e * inv_n,
]
}
fn update_signal_energy(&mut self, band_energy: &[f32; 4]) {
for i in 0..4 {
self.signal_energy[i] =
self.signal_energy[i] * (1.0 - self.signal_ema) + band_energy[i] * self.signal_ema;
}
}
fn is_likely_noise(&self, band_energy: &[f32; 4]) -> bool {
let total: f32 = band_energy.iter().sum();
let running: f32 = self.signal_energy.iter().sum();
total < running * 0.5
}
pub fn reset(&mut self) {
self.signal_energy = [1e-6f32; 4];
self.noise_floor = [1e-6f32; 4];
self.hangover_counter = 0;
self.prev_band_energy = [0.0f32; 4];
self.frame_count = 0;
}
#[must_use]
pub const fn hangover_counter(&self) -> u32 {
self.hangover_counter
}
#[must_use]
pub const fn frame_count(&self) -> u64 {
self.frame_count
}
}
#[derive(Debug)]
pub struct SilkEncoder {
sample_rate: u32,
channels: usize,
#[allow(dead_code)]
bandwidth: OpusBandwidth,
vad: VoiceActivityDetector,
last_vad: VadDecision,
pub dtx_enabled: bool,
inactive_frame_count: u32,
}
impl SilkEncoder {
pub fn new(sample_rate: u32, channels: usize, bandwidth: OpusBandwidth) -> Self {
Self {
sample_rate,
channels,
bandwidth,
vad: VoiceActivityDetector::new(sample_rate),
last_vad: VadDecision::Inactive,
dtx_enabled: false,
inactive_frame_count: 0,
}
}
#[must_use]
pub fn run_vad(&mut self, input: &[f32], frame_size: usize) -> VadDecision {
let ch = self.channels;
let mono: Vec<f32> = if ch == 1 {
input[..frame_size.min(input.len())].to_vec()
} else {
(0..frame_size.min(input.len() / ch))
.map(|i| input[i * ch])
.collect()
};
self.last_vad = self.vad.process(&mono);
self.last_vad
}
#[must_use]
pub const fn last_vad_decision(&self) -> VadDecision {
self.last_vad
}
pub fn encode(
&mut self,
input: &[f32],
output: &mut [u8],
frame_size: usize,
) -> CodecResult<usize> {
if output.is_empty() {
return Err(CodecError::InvalidData("Output buffer empty".to_string()));
}
let vad = self.run_vad(input, frame_size);
if vad == VadDecision::Inactive {
self.inactive_frame_count += 1;
if self.dtx_enabled && self.inactive_frame_count > 1 {
return Ok(0);
}
} else {
self.inactive_frame_count = 0;
}
output[0] = if vad == VadDecision::Active {
0x01
} else {
0x00
};
Ok(1)
}
pub fn reset(&mut self) {
self.vad.reset();
self.last_vad = VadDecision::Inactive;
self.inactive_frame_count = 0;
}
#[must_use]
pub const fn vad(&self) -> &VoiceActivityDetector {
&self.vad
}
#[must_use]
pub const fn sample_rate(&self) -> u32 {
self.sample_rate
}
#[must_use]
pub const fn channels(&self) -> usize {
self.channels
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_silk_decoder_creation() {
let decoder = SilkDecoder::new(48000, 2, OpusBandwidth::Wideband);
assert_eq!(decoder.sample_rate(), 48000);
assert_eq!(decoder.channels(), 2);
}
#[test]
fn test_silk_decoder_plc() {
let mut decoder = SilkDecoder::new(48000, 1, OpusBandwidth::Wideband);
let mut output = vec![0.0f32; 480];
let result = decoder.decode_plc(&mut output, 480);
assert!(result.is_ok());
}
#[test]
fn test_lsf_initialization() {
let lsf = SilkDecoder::initialize_lsf();
assert_eq!(lsf.len(), LSF_COUNT);
for i in 1..lsf.len() {
assert!(lsf[i] > lsf[i - 1]);
}
}
#[test]
fn test_silk_encoder_creation() {
let encoder = SilkEncoder::new(48000, 2, OpusBandwidth::Wideband);
assert_eq!(encoder.sample_rate(), 48000);
assert_eq!(encoder.channels(), 2);
}
#[test]
fn test_silk_encoder_encode_active() {
let mut encoder = SilkEncoder::new(16000, 1, OpusBandwidth::Wideband);
let freq = 440.0f32;
let sr = 16000.0f32;
let input: Vec<f32> = (0..320)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / sr).sin() * 0.5)
.collect();
let mut output = vec![0u8; 1024];
let result = encoder.encode(&input, &mut output, 320);
assert!(result.is_ok());
assert!(
result.expect("encode should succeed") >= 1,
"Active frame must emit at least 1 byte"
);
}
#[test]
fn test_silk_encoder_dtx_silence() {
let mut encoder = SilkEncoder::new(16000, 1, OpusBandwidth::Wideband);
encoder.dtx_enabled = true;
let silence = vec![0.0f32; 320];
let mut output = vec![0u8; 1024];
let _ = encoder.encode(&silence, &mut output, 320);
for _ in 0..5 {
let _ = encoder.encode(&silence, &mut output, 320);
}
let result = encoder.encode(&silence, &mut output, 320);
assert!(result.is_ok());
assert_eq!(
result.expect("encode should succeed"),
0,
"DTX must suppress silent frames"
);
}
#[test]
fn test_vad_creation() {
let vad = VoiceActivityDetector::new(16000);
assert_eq!(vad.frame_count(), 0);
assert_eq!(vad.hangover_counter(), 0);
}
#[test]
fn test_vad_silence_returns_inactive() {
let mut vad = VoiceActivityDetector::new(16000);
let silence = vec![0.0f32; 320];
for _ in 0..20 {
let _ = vad.process(&silence);
}
let decision = vad.process(&silence);
assert_eq!(
decision,
VadDecision::Inactive,
"Prolonged silence must be inactive"
);
}
#[test]
fn test_vad_sine_wave_returns_active() {
let mut vad = VoiceActivityDetector::new(16000);
let silence = vec![0.0f32; 320];
for _ in 0..10 {
let _ = vad.process(&silence);
}
let sine: Vec<f32> = (0..320)
.map(|i| (2.0 * std::f32::consts::PI * 300.0 * i as f32 / 16000.0).sin() * 0.8)
.collect();
let decision = vad.process(&sine);
assert_eq!(
decision,
VadDecision::Active,
"Loud sine wave must be active"
);
}
#[test]
fn test_vad_frame_count_increments() {
let mut vad = VoiceActivityDetector::new(16000);
let frame = vec![0.0f32; 160];
for i in 1..=5 {
vad.process(&frame);
assert_eq!(vad.frame_count(), i);
}
}
#[test]
fn test_vad_empty_frame_returns_inactive() {
let mut vad = VoiceActivityDetector::new(16000);
let decision = vad.process(&[]);
assert_eq!(decision, VadDecision::Inactive);
}
#[test]
fn test_vad_hangover_maintains_active() {
let mut vad = VoiceActivityDetector::new(16000);
vad.set_hangover(4);
let silence = vec![0.0f32; 160];
for _ in 0..5 {
let _ = vad.process(&silence);
}
let tone: Vec<f32> = (0..160)
.map(|i| (2.0 * std::f32::consts::PI * 400.0 * i as f32 / 16000.0).sin() * 0.9)
.collect();
let d1 = vad.process(&tone);
let d2 = vad.process(&silence);
assert_eq!(d1, VadDecision::Active);
assert_eq!(d2, VadDecision::Active, "Hang-over should keep active flag");
}
#[test]
fn test_vad_reset_clears_state() {
let mut vad = VoiceActivityDetector::new(16000);
let frame = vec![0.5f32; 320];
for _ in 0..10 {
vad.process(&frame);
}
vad.reset();
assert_eq!(vad.frame_count(), 0);
assert_eq!(vad.hangover_counter(), 0);
}
#[test]
fn test_vad_set_threshold() {
let mut vad = VoiceActivityDetector::new(16000);
vad.set_threshold_db(20.0);
let low: Vec<f32> = vec![0.0001f32; 320];
for _ in 0..5 {
let _ = vad.process(&low);
}
let d = vad.process(&low);
assert_eq!(d, VadDecision::Inactive);
}
#[test]
fn test_encoder_vad_method() {
let mut encoder = SilkEncoder::new(16000, 1, OpusBandwidth::Narrowband);
let sine: Vec<f32> = (0..320)
.map(|i| (2.0 * std::f32::consts::PI * 250.0 * i as f32 / 16000.0).sin() * 0.7)
.collect();
let silence = vec![0.0f32; 320];
for _ in 0..5 {
let _ = encoder.run_vad(&silence, 320);
}
let decision = encoder.run_vad(&sine, 320);
assert_eq!(decision, VadDecision::Active);
assert_eq!(encoder.last_vad_decision(), VadDecision::Active);
}
}