use oxideav_celt::range_encoder::RangeEncoder;
use oxideav_core::Result;
use crate::silk::lsf;
use crate::silk::ltp;
use crate::silk::pitch_analysis::{analyze_pitch, PitchEstimate};
use crate::silk::tables;
use crate::toc::OpusBandwidth;
const NLSF_STAGE1_IDX: usize = 0;
const GAIN_INDEX_UNVOICED: i32 = 0;
const GAIN_INDEX_VOICED: i32 = 0;
const LTP_SCALE_Q14_VOICED: i32 = 15565;
const LTP_PERIODICITY_VOICED: usize = 2;
const CARRIER_FULL_SCALE: f32 = 120.0;
#[derive(Copy, Clone, Debug)]
pub struct BandwidthParams {
pub bandwidth: OpusBandwidth,
pub lpc_order: usize,
pub subframe_len: usize,
}
impl BandwidthParams {
pub const fn nb() -> Self {
Self {
bandwidth: OpusBandwidth::Narrowband,
lpc_order: 10,
subframe_len: 40, }
}
pub const fn mb() -> Self {
Self {
bandwidth: OpusBandwidth::Mediumband,
lpc_order: 10,
subframe_len: 60, }
}
pub const fn wb() -> Self {
Self {
bandwidth: OpusBandwidth::Wideband,
lpc_order: 16,
subframe_len: 80, }
}
}
pub struct SilkFrameEncoder {
params: BandwidthParams,
n_subframes: usize,
prev_synth: Vec<f32>,
prev_pitch_lag: i32,
ltp_history: Vec<f32>,
force_unvoiced: bool,
}
impl SilkFrameEncoder {
pub fn new(params: BandwidthParams) -> Self {
Self::new_with_subframes(params, 4)
}
pub fn new_with_subframes(params: BandwidthParams, n_subframes: usize) -> Self {
assert!(
n_subframes == 2 || n_subframes == 4,
"SILK frame encoder only supports 2 (10 ms) or 4 (20 ms) sub-frames, got {n_subframes}"
);
let order = params.lpc_order;
Self {
params,
n_subframes,
prev_synth: vec![0.0; order],
prev_pitch_lag: 0,
ltp_history: vec![0.0; 480],
force_unvoiced: false,
}
}
#[doc(hidden)]
pub fn set_force_unvoiced(&mut self, f: bool) {
self.force_unvoiced = f;
}
pub fn new_nb_20ms() -> Self {
Self::new(BandwidthParams::nb())
}
pub fn new_mb_20ms() -> Self {
Self::new(BandwidthParams::mb())
}
pub fn new_wb_20ms() -> Self {
Self::new(BandwidthParams::wb())
}
pub fn new_nb_10ms() -> Self {
Self::new_with_subframes(BandwidthParams::nb(), 2)
}
pub fn new_mb_10ms() -> Self {
Self::new_with_subframes(BandwidthParams::mb(), 2)
}
pub fn new_wb_10ms() -> Self {
Self::new_with_subframes(BandwidthParams::wb(), 2)
}
pub fn frame_len(&self) -> usize {
self.params.subframe_len * self.n_subframes
}
pub fn internal_rate_hz(&self) -> u32 {
super::internal_rate_hz(self.params.bandwidth)
}
pub fn lpc_order(&self) -> usize {
self.params.lpc_order
}
pub fn subframe_len(&self) -> usize {
self.params.subframe_len
}
pub fn n_subframes(&self) -> usize {
self.n_subframes
}
pub fn reset(&mut self) {
self.prev_synth = vec![0.0; self.params.lpc_order];
self.prev_pitch_lag = 0;
self.ltp_history = vec![0.0; 480];
}
pub fn encode_frame_body(
&mut self,
pcm_internal: &[f32],
enc: &mut RangeEncoder,
) -> Result<()> {
if self.force_unvoiced {
return self.encode_frame_body_unvoiced(pcm_internal, enc);
}
let pitch = analyze_pitch(pcm_internal, self.params.bandwidth);
if pitch.voiced {
self.encode_frame_body_voiced(pcm_internal, enc, pitch)
} else {
self.encode_frame_body_unvoiced(pcm_internal, enc)
}
}
fn encode_frame_body_unvoiced(
&mut self,
pcm_internal: &[f32],
enc: &mut RangeEncoder,
) -> Result<()> {
debug_assert_eq!(pcm_internal.len(), self.frame_len());
let order = self.params.lpc_order;
let frame_len = self.frame_len();
let subframe_len = self.params.subframe_len;
enc.encode_icdf(2, &tables::FRAME_TYPE_ACTIVE_ICDF, 8);
let signal_type: u8 = 1;
let residuals = vec![0i32; order];
let nlsf_q15 = synthesize_nlsf_like_decoder(NLSF_STAGE1_IDX, false, order, &residuals);
let nlsf_q15 = lsf::stabilize(&nlsf_q15, order);
let lpc = lsf::nlsf_to_lpc(&nlsf_q15, self.params.bandwidth);
let gain_index: i32 = GAIN_INDEX_UNVOICED;
let gain_q16 = super::gain_index_to_q16(gain_index);
let g = gain_q16.max(1) as f32 / 65536.0;
let scale = 128.0 / g;
let synth_hist = self.prev_synth.clone();
let mut out = vec![0f32; frame_len];
let mut signed_mags = vec![0i32; frame_len];
for n in 0..frame_len {
let mut pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
pred += lpc[k - 1] * past;
}
let e_desired = pcm_internal[n] - pred;
let signed_mag_f = (e_desired * scale).round();
let mag_i = signed_mag_f.abs().clamp(0.0, CARRIER_FULL_SCALE) as i32;
let neg = signed_mag_f < 0.0;
let signed = if neg { -mag_i } else { mag_i };
signed_mags[n] = signed;
let e_quant = (signed as f32 / 128.0) * g;
out[n] = (e_quant + pred).clamp(-1.0, 1.0);
}
let aligned = signed_mags.len().div_ceil(16) * 16;
signed_mags.resize(aligned, 0);
let recon = super::shell::quantize_to_shell(&signed_mags);
out.fill(0.0);
for n in 0..frame_len {
let mut pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
pred += lpc[k - 1] * past;
}
let e_quant = (recon[n] as f32 / 128.0) * g;
out[n] = (e_quant + pred).clamp(-1.0, 1.0);
}
signed_mags = recon;
let msb = ((gain_index >> 3) & 0x7) as usize;
let lsb = (gain_index & 0x7) as usize;
let msb_icdf = match signal_type {
0 => &tables::GAIN_MSB_INACTIVE_ICDF,
1 => &tables::GAIN_MSB_UNVOICED_ICDF,
_ => &tables::GAIN_MSB_VOICED_ICDF,
};
enc.encode_icdf(msb, msb_icdf, 8);
enc.encode_icdf(lsb, &tables::GAIN_LSB_ICDF, 8);
for _ in 1..self.n_subframes {
enc.encode_icdf(4, &tables::GAIN_DELTA_ICDF, 8);
}
let stage1_icdf: &[u8] = match self.params.bandwidth {
OpusBandwidth::Wideband => &tables::NLSF_WB_STAGE1_UNVOICED_ICDF,
_ => &tables::NLSF_NB_STAGE1_UNVOICED_ICDF,
};
enc.encode_icdf(NLSF_STAGE1_IDX, stage1_icdf, 8);
let uniform_11 = &tables::NLSF_RESIDUAL_UNIFORM_11_ICDF;
for &r in &residuals {
let mag = (r + 4).clamp(0, 10) as usize;
enc.encode_icdf(mag, uniform_11, 8);
}
enc.encode_icdf(3, &[192, 128, 64, 0], 8);
enc.encode_icdf(0, &tables::LCG_SEED_ICDF, 8);
let _ = subframe_len;
super::shell::encode_excitation(enc, &signed_mags, signal_type, 0);
let start = out.len().saturating_sub(order);
self.prev_synth.clear();
self.prev_synth.extend_from_slice(&out[start..]);
shift_ltp_history(&mut self.ltp_history, &out);
self.prev_pitch_lag = 0;
Ok(())
}
fn encode_frame_body_voiced(
&mut self,
pcm_internal: &[f32],
enc: &mut RangeEncoder,
pitch: PitchEstimate,
) -> Result<()> {
debug_assert_eq!(pcm_internal.len(), self.frame_len());
let order = self.params.lpc_order;
let frame_len = self.frame_len();
let subframe_len = self.params.subframe_len;
enc.encode_icdf(4, &tables::FRAME_TYPE_ACTIVE_ICDF, 8);
let signal_type: u8 = 2;
let residuals = vec![0i32; order];
let nlsf_q15 = synthesize_nlsf_like_decoder(NLSF_STAGE1_IDX, true, order, &residuals);
let nlsf_q15 = lsf::stabilize(&nlsf_q15, order);
let lpc = lsf::nlsf_to_lpc(&nlsf_q15, self.params.bandwidth);
let gain_index: i32 = GAIN_INDEX_VOICED;
let gain_q16 = super::gain_index_to_q16(gain_index);
let g = gain_q16.max(1) as f32 / 65536.0;
let scale = 128.0 / g;
let periodicity = LTP_PERIODICITY_VOICED;
let ltp_filter_idx = ltp::pick_ltp_filter_index(pitch.correlation, periodicity);
let ltp_taps = ltp::ltp_filter_from_index(ltp_filter_idx, periodicity);
let primary_lag = pitch.lag_internal;
let pitch_lags = vec![primary_lag; self.n_subframes];
let ltp_scale_q14 = LTP_SCALE_Q14_VOICED;
let ltp_scale = ltp_scale_q14 as f32 / 16384.0;
let ltp_attn = 0.25_f32;
let synth_hist = self.prev_synth.clone();
let ltp_hist_len = self.ltp_history.len();
let mut out = vec![0f32; frame_len];
let mut signed_mags = vec![0i32; frame_len];
for n in 0..frame_len {
let mut lpc_pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
lpc_pred += lpc[k - 1] * past;
}
let mut ltp_sum = 0f32;
for k in 0..5 {
let lag_k = primary_lag + (k as i32 - 2);
let idx = n as i32 - lag_k;
let past = if idx >= 0 {
out[idx as usize]
} else {
let hi = (ltp_hist_len as i32 + idx) as usize;
self.ltp_history.get(hi).copied().unwrap_or(0.0)
};
ltp_sum += ltp_taps[k] * past;
}
let ltp_pred = ltp_sum * ltp_scale * ltp_attn;
let e_desired = pcm_internal[n] - lpc_pred - ltp_pred;
let signed_mag_f = (e_desired * scale).round();
let mag_i = signed_mag_f.abs().clamp(0.0, CARRIER_FULL_SCALE) as i32;
let neg = signed_mag_f < 0.0;
let signed = if neg { -mag_i } else { mag_i };
signed_mags[n] = signed;
let e_quant = (signed as f32 / 128.0) * g;
out[n] = (e_quant + lpc_pred + ltp_pred).clamp(-1.0, 1.0);
}
let aligned = signed_mags.len().div_ceil(16) * 16;
signed_mags.resize(aligned, 0);
let recon = super::shell::quantize_to_shell(&signed_mags);
out.fill(0.0);
for n in 0..frame_len {
let mut lpc_pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
lpc_pred += lpc[k - 1] * past;
}
let mut ltp_sum = 0f32;
for k in 0..5 {
let lag_k = primary_lag + (k as i32 - 2);
let idx = n as i32 - lag_k;
let past = if idx >= 0 {
out[idx as usize]
} else {
let hi = (ltp_hist_len as i32 + idx) as usize;
self.ltp_history.get(hi).copied().unwrap_or(0.0)
};
ltp_sum += ltp_taps[k] * past;
}
let ltp_pred = ltp_sum * ltp_scale * ltp_attn;
let e_quant = (recon[n] as f32 / 128.0) * g;
out[n] = (e_quant + lpc_pred + ltp_pred).clamp(-1.0, 1.0);
}
signed_mags = recon;
let msb = ((gain_index >> 3) & 0x7) as usize;
let lsb = (gain_index & 0x7) as usize;
enc.encode_icdf(msb, &tables::GAIN_MSB_VOICED_ICDF, 8);
enc.encode_icdf(lsb, &tables::GAIN_LSB_ICDF, 8);
for _ in 1..self.n_subframes {
enc.encode_icdf(4, &tables::GAIN_DELTA_ICDF, 8);
}
let stage1_icdf: &[u8] = match self.params.bandwidth {
OpusBandwidth::Wideband => &tables::NLSF_WB_STAGE1_VOICED_ICDF,
_ => &tables::NLSF_NB_STAGE1_VOICED_ICDF,
};
enc.encode_icdf(NLSF_STAGE1_IDX, stage1_icdf, 8);
let uniform_11 = &tables::NLSF_RESIDUAL_UNIFORM_11_ICDF;
for &r in &residuals {
let mag = (r + 4).clamp(0, 10) as usize;
enc.encode_icdf(mag, uniform_11, 8);
}
enc.encode_icdf(3, &[192, 128, 64, 0], 8);
ltp::encode_primary_pitch_lag(enc, self.params.bandwidth, primary_lag, self.prev_pitch_lag);
ltp::encode_pitch_contour(enc, self.params.bandwidth);
ltp::encode_ltp_periodicity(enc, periodicity);
for _ in 0..self.n_subframes {
ltp::encode_ltp_filter_index(enc, periodicity, ltp_filter_idx);
}
ltp::encode_ltp_scaling(enc, ltp_scale_q14);
enc.encode_icdf(0, &tables::LCG_SEED_ICDF, 8);
let _ = subframe_len;
let _ = pitch_lags; super::shell::encode_excitation(enc, &signed_mags, signal_type, 0);
let start = out.len().saturating_sub(order);
self.prev_synth.clear();
self.prev_synth.extend_from_slice(&out[start..]);
shift_ltp_history(&mut self.ltp_history, &out);
self.prev_pitch_lag = primary_lag;
Ok(())
}
}
fn shift_ltp_history(history: &mut Vec<f32>, new_samples: &[f32]) {
let hist_len = history.len();
let keep = hist_len.saturating_sub(new_samples.len());
let mut new_hist = Vec::with_capacity(hist_len);
new_hist.extend_from_slice(&history[hist_len - keep..]);
new_hist.extend_from_slice(new_samples);
if new_hist.len() > hist_len {
let drop = new_hist.len() - hist_len;
new_hist.drain(0..drop);
} else if new_hist.len() < hist_len {
let mut pad = vec![0f32; hist_len - new_hist.len()];
pad.extend(new_hist);
new_hist = pad;
}
*history = new_hist;
}
pub fn stereo_mid_side(l: &[f32], r: &[f32]) -> (Vec<f32>, Vec<f32>) {
debug_assert_eq!(l.len(), r.len());
let n = l.len();
let mut mid = Vec::with_capacity(n);
let mut side = Vec::with_capacity(n);
for i in 0..n {
mid.push(l[i] + r[i]);
side.push(l[i] - r[i]);
}
(mid, side)
}
fn quantise_pred_weight_q13(weight_q13: i32) -> [i32; 3] {
let quant = &tables::STEREO_PRED_QUANT_Q13;
let mut best: i32 = i32::MAX;
let mut best_idx = [0i32, 0, 0];
for cell in 0..15 {
let low_q13 = quant[cell] as i32;
let high_q13 = quant[cell + 1] as i32;
let step_q13 = ((high_q13 - low_q13) * 6554) >> 16; for sub in 0..5 {
let level = low_q13 + step_q13 * (2 * sub + 1);
let diff = (level - weight_q13).abs();
if diff < best {
best = diff;
let c = cell as i32;
let ix2 = c / 3;
let ix0 = c - 3 * ix2;
best_idx = [ix0, sub, ix2];
}
}
}
best_idx
}
pub fn encode_stereo_pred_weights(enc: &mut RangeEncoder, pred_q13: [i32; 2]) {
let w0_coded = pred_q13[0] + pred_q13[1];
let w1_coded = pred_q13[1];
let ix0_all = quantise_pred_weight_q13(w0_coded);
let ix1_all = quantise_pred_weight_q13(w1_coded);
let n = 5 * ix0_all[2] + ix1_all[2];
enc.encode_icdf(n as usize, &tables::STEREO_PRED_JOINT_ICDF, 8);
for ix in [ix0_all, ix1_all] {
enc.encode_icdf(ix[0] as usize, &tables::STEREO_UNIFORM3_ICDF, 8);
enc.encode_icdf(ix[1] as usize, &tables::STEREO_UNIFORM5_ICDF, 8);
}
}
pub fn stereo_predict_weights_q13(mid: &[f32], side: &[f32]) -> [i32; 2] {
debug_assert_eq!(mid.len(), side.len());
let n = mid.len();
if n < 2 {
return [0, 0];
}
let mut r_mm = 0f64;
let mut r_mm1 = 0f64;
let mut r_m1m1 = 0f64;
let mut r_sm = 0f64;
let mut r_sm1 = 0f64;
for i in 1..n {
let m = mid[i] as f64;
let m1 = mid[i - 1] as f64;
let s = side[i] as f64;
r_mm += m * m;
r_mm1 += m * m1;
r_m1m1 += m1 * m1;
r_sm += s * m;
r_sm1 += s * m1;
}
let det = r_mm * r_m1m1 - r_mm1 * r_mm1;
if det.abs() < 1e-12 {
return [0, 0];
}
let w0 = (r_sm * r_m1m1 - r_sm1 * r_mm1) / det;
let w1 = (r_mm * r_sm1 - r_mm1 * r_sm) / det;
let clamp = |w: f64| -> i32 {
let q = (w * 8192.0).round();
q.clamp(-13500.0, 13500.0) as i32
};
[clamp(w0), clamp(w1)]
}
fn synthesize_nlsf_like_decoder(
stage1: usize,
voiced: bool,
order: usize,
residuals: &[i32],
) -> Vec<i16> {
let tilt = (stage1 as f32 / 32.0) * 0.25 + if voiced { 0.0 } else { 0.15 };
let mut nlsf = vec![0i16; order];
for k in 0..order {
let base = (k as f32 + 1.0) / (order as f32 + 1.0);
let tilted = base.powf(1.0 + tilt);
let mut q15 = (tilted * 32768.0) as i32;
q15 += residuals[k].clamp(-7, 7) * 128;
nlsf[k] = q15.clamp(1, 32767) as i16;
}
nlsf
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn nlsf_template_mirrors_decoder() {
let nlsf = synthesize_nlsf_like_decoder(0, false, 10, &[0; 10]);
assert_eq!(nlsf.len(), 10);
let stable = crate::silk::lsf::stabilize(&nlsf, 10);
for w in stable.windows(2) {
assert!(
w[1] >= w[0],
"stabilised NLSF should be non-decreasing ({} → {})",
w[0],
w[1]
);
}
}
#[test]
fn wb_frame_params_match_expectations() {
let wb = SilkFrameEncoder::new_wb_20ms();
assert_eq!(wb.lpc_order(), 16);
assert_eq!(wb.subframe_len(), 80);
assert_eq!(wb.frame_len(), 320);
assert_eq!(wb.internal_rate_hz(), 16_000);
}
#[test]
fn mb_frame_params_match_expectations() {
let mb = SilkFrameEncoder::new_mb_20ms();
assert_eq!(mb.lpc_order(), 10);
assert_eq!(mb.subframe_len(), 60);
assert_eq!(mb.frame_len(), 240);
assert_eq!(mb.internal_rate_hz(), 12_000);
}
#[test]
fn encode_decode_zero_frame_matches() {
use oxideav_celt::range_decoder::RangeDecoder;
let mut enc = SilkFrameEncoder::new_nb_20ms();
let pcm = vec![0.0f32; 160];
let mut re = RangeEncoder::new(512);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(&pcm, &mut re).unwrap();
let buf = re.done().expect("done");
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let mut s = crate::silk::SilkChannelState::new();
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
OpusBandwidth::Narrowband,
10,
40,
4,
&mut s,
)
.expect("decode");
let peak = decoded.iter().copied().fold(0f32, |a, b| a.max(b.abs()));
println!("zero-frame roundtrip peak = {peak:.6}");
assert!(
peak < 0.001,
"zero-frame decode should be ~0, got peak {peak}"
);
}
#[test]
fn encode_decode_zero_frame_produces_finite_output() {
let mut enc = SilkFrameEncoder::new_nb_20ms();
let pcm = vec![0.0f32; 160];
let mut re = RangeEncoder::new(512);
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let buf = re.done().expect("done");
assert!(!buf.is_empty());
assert_eq!(buf.len(), 512);
}
#[test]
fn encode_decode_nb_one_frame_internal_rate_snr() {
run_internal_rate_roundtrip(BandwidthParams::nb(), 8_000, 25.0);
}
#[test]
fn encode_decode_nb_10ms_internal_rate_snr() {
run_internal_rate_roundtrip_10ms(BandwidthParams::nb(), 8_000, 20.0);
}
#[test]
fn encode_decode_mb_10ms_internal_rate_snr() {
run_internal_rate_roundtrip_10ms(BandwidthParams::mb(), 12_000, 20.0);
}
#[test]
fn encode_decode_wb_10ms_internal_rate_snr() {
run_internal_rate_roundtrip_10ms(BandwidthParams::wb(), 16_000, 20.0);
}
fn run_internal_rate_roundtrip_10ms(params: BandwidthParams, rate: u32, snr_bar: f64) {
use oxideav_celt::range_decoder::RangeDecoder;
let mut enc = SilkFrameEncoder::new_with_subframes(params, 2);
let frame_len = enc.frame_len();
let freq = 300.0f32;
let pcm: Vec<f32> = (0..frame_len)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / rate as f32).sin() * 0.3)
.collect();
let mut re = RangeEncoder::new(1024);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let buf = re.done().expect("done");
let mut dec_state = crate::silk::SilkChannelState::new();
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
2,
&mut dec_state,
)
.expect("decode");
assert_eq!(decoded.len(), frame_len);
let sig: f64 = pcm.iter().map(|v| (*v as f64) * (*v as f64)).sum();
let err: f64 = pcm
.iter()
.zip(decoded.iter())
.map(|(a, b)| {
let e = (*a - *b) as f64;
e * e
})
.sum();
let snr = 10.0 * (sig / err.max(1e-30)).log10();
println!(
"{:?} 10 ms internal-rate SNR: {snr:.2} dB (bar {snr_bar})",
params.bandwidth
);
assert!(
snr > snr_bar,
"10 ms internal-rate SNR {snr:.2} dB below {snr_bar} dB bar"
);
}
#[test]
fn encode_decode_mb_one_frame_internal_rate_snr() {
run_internal_rate_roundtrip(BandwidthParams::mb(), 12_000, 25.0);
}
#[test]
fn encode_decode_wb_one_frame_internal_rate_snr() {
run_internal_rate_roundtrip(BandwidthParams::wb(), 16_000, 25.0);
}
#[test]
fn stereo_pred_weights_zero_for_identical_channels() {
let m: Vec<f32> = (0..100)
.map(|i| (2.0 * std::f32::consts::PI * 300.0 * i as f32 / 8_000.0).sin() * 0.3)
.collect();
let s = vec![0.0f32; m.len()];
let w = stereo_predict_weights_q13(&m, &s);
assert_eq!(w, [0, 0]);
}
#[test]
fn stereo_mid_side_reconstructs_lr() {
let l = vec![0.1f32, 0.2, 0.3, 0.4];
let r = vec![0.0f32, 0.1, 0.2, 0.3];
let (m, s) = stereo_mid_side(&l, &r);
for i in 0..l.len() {
let rec_l = (m[i] + s[i]) * 0.5;
let rec_r = (m[i] - s[i]) * 0.5;
assert!((rec_l - l[i]).abs() < 1e-6);
assert!((rec_r - r[i]).abs() < 1e-6);
}
}
#[test]
fn voiced_path_beats_unvoiced_on_speech_like_input() {
use oxideav_celt::range_decoder::RangeDecoder;
let params = BandwidthParams::wb();
let rate = 16_000u32;
let n_frames = 5;
let frame_len = params.subframe_len * 4; let total = frame_len * n_frames;
let f0 = 150.0f32;
let pcm: Vec<f32> = (0..total)
.map(|i| {
let t = i as f32 / rate as f32;
((2.0 * std::f32::consts::PI * f0 * t).sin()
+ 0.6 * (2.0 * std::f32::consts::PI * 2.0 * f0 * t).sin()
+ 0.3 * (2.0 * std::f32::consts::PI * 3.0 * f0 * t).sin()
+ 0.15 * (2.0 * std::f32::consts::PI * 4.0 * f0 * t).sin())
* 0.25
})
.collect();
fn encode_decode_all(
params: BandwidthParams,
pcm: &[f32],
n_frames: usize,
frame_len: usize,
force_unvoiced: bool,
) -> Vec<f32> {
let mut enc = SilkFrameEncoder::new(params);
enc.set_force_unvoiced(force_unvoiced);
let mut dec_state = crate::silk::SilkChannelState::new();
let mut decoded_all = Vec::with_capacity(pcm.len());
for i in 0..n_frames {
let slice = &pcm[i * frame_len..(i + 1) * frame_len];
let mut re = RangeEncoder::new(2048);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(slice, &mut re).expect("encode");
let buf = re.done().expect("done");
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let frame = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state,
)
.expect("decode");
decoded_all.extend_from_slice(&frame);
}
decoded_all
}
let dec_voiced = encode_decode_all(params, &pcm, n_frames, frame_len, false);
let dec_unvoiced = encode_decode_all(params, &pcm, n_frames, frame_len, true);
let skip = frame_len;
let snr_voiced = snr_db_range(&pcm, &dec_voiced, skip);
let snr_unvoiced = snr_db_range(&pcm, &dec_unvoiced, skip);
println!(
"voiced_vs_unvoiced WB harmonic: voiced={:.2} dB, unvoiced={:.2} dB, delta={:.2} dB",
snr_voiced,
snr_unvoiced,
snr_voiced - snr_unvoiced
);
assert!(
snr_voiced > snr_unvoiced - 1.0,
"voiced SNR {snr_voiced:.2} dB should be within 1 dB of unvoiced {snr_unvoiced:.2} dB"
);
assert!(snr_voiced > 15.0, "voiced SNR {snr_voiced:.2} dB too low");
}
#[test]
fn ltp_raw_sum_captures_periodicity() {
let params = BandwidthParams::wb();
let rate = 16_000u32;
let frame_len = params.subframe_len * 4;
let f0 = 180.0f32;
let pcm: Vec<f32> = (0..frame_len * 2)
.map(|i| {
let t = i as f32 / rate as f32;
((2.0 * std::f32::consts::PI * f0 * t).sin()
+ 0.6 * (2.0 * std::f32::consts::PI * 2.0 * f0 * t).sin()
+ 0.3 * (2.0 * std::f32::consts::PI * 3.0 * f0 * t).sin())
* 0.25
})
.collect();
let pitch = analyze_pitch(&pcm[frame_len..frame_len * 2], OpusBandwidth::Wideband);
assert!(pitch.voiced, "harmonic signal should be voiced");
let lag = pitch.lag_internal;
let periodicity = LTP_PERIODICITY_VOICED;
let idx = ltp::pick_ltp_filter_index(pitch.correlation, periodicity);
let taps = ltp::ltp_filter_from_index(idx, periodicity);
let start = frame_len;
let end = start + frame_len;
let mut ltp_energy = 0f64;
let mut sig_energy = 0f64;
for n in start..end {
let mut s = 0f32;
for k in 0..5 {
let lag_k = lag + (k as i32 - 2);
let j = n as i32 - lag_k;
let past = if j >= 0 { pcm[j as usize] } else { 0.0 };
s += taps[k] * past;
}
ltp_energy += (s as f64) * (s as f64);
let v = pcm[n] as f64;
sig_energy += v * v;
}
let ratio = (ltp_energy / sig_energy.max(1e-30)).sqrt();
println!(
"LTP raw-sum RMS / signal RMS on voiced frame: {ratio:.3} \
(lag={lag}, corr={:.3})",
pitch.correlation
);
assert!(
ratio > 0.5,
"LTP sum RMS ratio {ratio:.3} too small — pitch or taps wrong"
);
}
fn snr_db_range(ref_pcm: &[f32], dec: &[f32], skip: usize) -> f64 {
let n = ref_pcm.len().min(dec.len()).saturating_sub(skip);
let sig: f64 = ref_pcm[skip..skip + n]
.iter()
.map(|v| (*v as f64) * (*v as f64))
.sum();
let err: f64 = ref_pcm[skip..skip + n]
.iter()
.zip(dec[skip..skip + n].iter())
.map(|(a, b)| {
let e = (*a - *b) as f64;
e * e
})
.sum();
10.0 * (sig / err.max(1e-30)).log10()
}
fn run_internal_rate_roundtrip(params: BandwidthParams, rate: u32, snr_bar: f64) {
use oxideav_celt::range_decoder::RangeDecoder;
let mut enc = SilkFrameEncoder::new(params);
let frame_len = enc.frame_len();
let freq = 300.0f32;
let pcm: Vec<f32> = (0..frame_len)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / rate as f32).sin() * 0.3)
.collect();
let mut re = RangeEncoder::new(1024);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let buf = re.done().expect("done");
let mut dec_state = crate::silk::SilkChannelState::new();
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state,
)
.expect("decode");
assert_eq!(decoded.len(), frame_len);
let sig: f64 = pcm.iter().map(|v| (*v as f64) * (*v as f64)).sum();
let err: f64 = pcm
.iter()
.zip(decoded.iter())
.map(|(a, b)| {
let e = (*a - *b) as f64;
e * e
})
.sum();
let snr = 10.0 * (sig / err.max(1e-30)).log10();
println!(
"{:?} internal-rate SNR: {snr:.2} dB (bar {snr_bar})",
params.bandwidth
);
assert!(
snr > snr_bar,
"internal-rate SNR {snr:.2} dB below {snr_bar} dB bar"
);
}
#[test]
fn shell_coder_beats_mvp_on_sine_bitrate() {
use crate::silk::shell;
use crate::silk::tables;
use oxideav_celt::range_decoder::RangeDecoder;
let params = BandwidthParams::nb();
let rate = 8_000u32;
let mut enc = SilkFrameEncoder::new(params);
let frame_len = enc.frame_len();
let freq = 300.0f32;
let pcm: Vec<f32> = (0..frame_len)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / rate as f32).sin() * 0.3)
.collect();
let mut re = RangeEncoder::new(2048);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
let tell_before = re.tell();
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let tell_after = re.tell();
let live_frame_bits = tell_after - tell_before;
let buf = re.done().expect("done");
let mut dec_state = crate::silk::SilkChannelState::new();
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state,
)
.expect("decode");
let snr = snr_db_range(&pcm, &decoded, 0);
use crate::silk::excitation::MAG_NIBBLE_ICDF;
enc.reset();
let mut enc2 = SilkFrameEncoder::new(params);
enc2.set_force_unvoiced(true);
let mut re_unv = RangeEncoder::new(2048);
re_unv.encode_bit_logp(true, 1);
re_unv.encode_bit_logp(false, 1);
enc2.encode_frame_body(&pcm, &mut re_unv).expect("encode");
let buf_unv = re_unv.done().expect("done");
let mut dec_state2 = crate::silk::SilkChannelState::new();
let mut rc2 = RangeDecoder::new(&buf_unv);
let _v = rc2.decode_bit_logp(1);
let _l = rc2.decode_bit_logp(1);
let _decoded2 = crate::silk::decode_frame_body_pub(
&mut rc2,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state2,
)
.expect("decode2");
let mut signed_mags: Vec<i32> = pcm
.windows(2)
.map(|w| ((w[1] - w[0]) * 120.0).round() as i32)
.collect();
signed_mags.push(0);
let aligned = signed_mags.len().div_ceil(16) * 16;
signed_mags.resize(aligned, 0);
for v in signed_mags.iter_mut() {
*v = (*v).clamp(-120, 120);
}
let mut re_shell = RangeEncoder::new(2048);
let t0 = re_shell.tell();
shell::encode_excitation(&mut re_shell, &signed_mags, 1, 0);
let shell_bits = re_shell.tell() - t0;
let mut re_mvp = RangeEncoder::new(2048);
let t0 = re_mvp.tell();
re_mvp.encode_icdf(0, &tables::RATE_LEVEL_INACTIVE_ICDF, 8);
let n_shells = signed_mags.len() / 16;
for _ in 0..n_shells {
re_mvp.encode_icdf(0, &tables::PULSE_COUNT_ICDF[0], 8);
}
for &s in &signed_mags {
let m = s.unsigned_abs() as i32;
let hi = ((m >> 4) & 0xf) as usize;
let lo = (m & 0xf) as usize;
re_mvp.encode_icdf(hi, &MAG_NIBBLE_ICDF, 8);
re_mvp.encode_icdf(lo, &MAG_NIBBLE_ICDF, 8);
if m != 0 {
re_mvp.encode_bit_logp(s < 0, 1);
}
}
let mvp_bits = re_mvp.tell() - t0;
println!(
"sine bitrate — shell={shell_bits} bits mvp={mvp_bits} bits \
savings={:.1}% live_frame={live_frame_bits} bits snr={snr:.2} dB",
100.0 * (mvp_bits - shell_bits) as f32 / mvp_bits as f32
);
assert!(
shell_bits < mvp_bits,
"shell coder did not save bits on sine: shell={shell_bits} mvp={mvp_bits}"
);
assert!(snr > 25.0, "round-trip SNR dropped: {snr:.2} dB");
}
}