use oxideav_celt::range_encoder::RangeEncoder;
use oxideav_core::Result;
use crate::silk::lsf;
use crate::silk::ltp;
use crate::silk::pitch_analysis::{analyze_pitch, PitchEstimate};
use crate::silk::tables;
use crate::toc::OpusBandwidth;
const NLSF_STAGE1_IDX: usize = 0;
const GAIN_INDEX_UNVOICED: i32 = 35;
const GAIN_INDEX_VOICED: i32 = 35;
const LTP_SCALE_Q14_VOICED: i32 = 15565;
const LTP_PERIODICITY_VOICED: usize = 2;
const CARRIER_FULL_SCALE: f32 = 16384.0;
const ENCODER_LCG_SEED: u32 = 0;
#[inline]
fn lcg_step_q23(state: u32, e_raw: i32, offset_q23: i32) -> (i32, u32) {
let sgn = e_raw.signum();
let mut e_q23 = (e_raw << 8) - sgn * 20 + offset_q23;
let next = state.wrapping_mul(196_314_165).wrapping_add(907_633_515);
if next & 0x8000_0000 != 0 {
e_q23 = -e_q23;
}
let next = next.wrapping_add(e_raw as u32);
(e_q23, next)
}
#[inline]
fn lcg_pre_flip(state: u32) -> bool {
let next = state.wrapping_mul(196_314_165).wrapping_add(907_633_515);
next & 0x8000_0000 != 0
}
fn encode_nlsf_stage2(enc: &mut RangeEncoder, i1: usize, residuals: &[i32], is_wb: bool) {
for (k, &r) in residuals.iter().enumerate() {
let r = r.clamp(-10, 10);
let cb_letter = if is_wb {
tables::NLSF_WB_STAGE2_SELECT[i1][k] as usize
} else {
tables::NLSF_NBMB_STAGE2_SELECT[i1][k] as usize
};
let icdf: &[u8] = if is_wb {
&tables::NLSF_WB_STAGE2_ICDF[cb_letter]
} else {
&tables::NLSF_NBMB_STAGE2_ICDF[cb_letter]
};
let (sym, ext) = if r >= 4 {
(8usize, (r - 4) as usize)
} else if r <= -4 {
(0usize, (-r - 4) as usize)
} else {
((r + 4) as usize, 0usize)
};
enc.encode_icdf(sym, icdf, 8);
if !(-3..=3).contains(&r) {
enc.encode_icdf(ext.min(6), &tables::NLSF_STAGE2_EXTENSION_ICDF, 8);
}
}
}
#[derive(Copy, Clone, Debug)]
pub struct BandwidthParams {
pub bandwidth: OpusBandwidth,
pub lpc_order: usize,
pub subframe_len: usize,
}
impl BandwidthParams {
pub const fn nb() -> Self {
Self {
bandwidth: OpusBandwidth::Narrowband,
lpc_order: 10,
subframe_len: 40, }
}
pub const fn mb() -> Self {
Self {
bandwidth: OpusBandwidth::Mediumband,
lpc_order: 10,
subframe_len: 60, }
}
pub const fn wb() -> Self {
Self {
bandwidth: OpusBandwidth::Wideband,
lpc_order: 16,
subframe_len: 80, }
}
}
pub struct SilkFrameEncoder {
params: BandwidthParams,
n_subframes: usize,
prev_synth: Vec<f32>,
prev_pitch_lag: i32,
ltp_history: Vec<f32>,
force_unvoiced: bool,
}
impl SilkFrameEncoder {
pub fn new(params: BandwidthParams) -> Self {
Self::new_with_subframes(params, 4)
}
pub fn new_with_subframes(params: BandwidthParams, n_subframes: usize) -> Self {
assert!(
n_subframes == 2 || n_subframes == 4,
"SILK frame encoder only supports 2 (10 ms) or 4 (20 ms) sub-frames, got {n_subframes}"
);
let order = params.lpc_order;
Self {
params,
n_subframes,
prev_synth: vec![0.0; order],
prev_pitch_lag: 0,
ltp_history: vec![0.0; 480],
force_unvoiced: false,
}
}
#[doc(hidden)]
pub fn set_force_unvoiced(&mut self, f: bool) {
self.force_unvoiced = f;
}
pub fn new_nb_20ms() -> Self {
Self::new(BandwidthParams::nb())
}
pub fn new_mb_20ms() -> Self {
Self::new(BandwidthParams::mb())
}
pub fn new_wb_20ms() -> Self {
Self::new(BandwidthParams::wb())
}
pub fn new_nb_10ms() -> Self {
Self::new_with_subframes(BandwidthParams::nb(), 2)
}
pub fn new_mb_10ms() -> Self {
Self::new_with_subframes(BandwidthParams::mb(), 2)
}
pub fn new_wb_10ms() -> Self {
Self::new_with_subframes(BandwidthParams::wb(), 2)
}
pub fn frame_len(&self) -> usize {
self.params.subframe_len * self.n_subframes
}
pub fn internal_rate_hz(&self) -> u32 {
super::internal_rate_hz(self.params.bandwidth)
}
pub fn lpc_order(&self) -> usize {
self.params.lpc_order
}
pub fn subframe_len(&self) -> usize {
self.params.subframe_len
}
pub fn n_subframes(&self) -> usize {
self.n_subframes
}
pub fn reset(&mut self) {
self.prev_synth = vec![0.0; self.params.lpc_order];
self.prev_pitch_lag = 0;
self.ltp_history = vec![0.0; 480];
}
pub fn encode_frame_body(
&mut self,
pcm_internal: &[f32],
enc: &mut RangeEncoder,
) -> Result<()> {
if self.force_unvoiced {
return self.encode_frame_body_unvoiced(pcm_internal, enc);
}
let pitch = analyze_pitch(pcm_internal, self.params.bandwidth);
if pitch.voiced {
self.encode_frame_body_voiced(pcm_internal, enc, pitch)
} else {
self.encode_frame_body_unvoiced(pcm_internal, enc)
}
}
fn encode_frame_body_unvoiced(
&mut self,
pcm_internal: &[f32],
enc: &mut RangeEncoder,
) -> Result<()> {
debug_assert_eq!(pcm_internal.len(), self.frame_len());
let order = self.params.lpc_order;
let frame_len = self.frame_len();
let subframe_len = self.params.subframe_len;
enc.encode_icdf(2, &tables::FRAME_TYPE_ACTIVE_ICDF, 8);
let signal_type: u8 = 1;
let residuals = vec![0i32; order];
let nlsf_q15 = synthesize_nlsf_like_decoder(NLSF_STAGE1_IDX, false, order, &residuals);
let nlsf_q15 = lsf::stabilize(&nlsf_q15, order == 16);
let lpc = lsf::nlsf_to_lpc(&nlsf_q15, self.params.bandwidth);
let gain_index: i32 = GAIN_INDEX_UNVOICED;
let gain_q16 = super::gain_index_to_q16(gain_index);
let g = gain_q16.max(1) as f32 / 65536.0;
let scale = 32768.0 / g;
let st = (signal_type as usize).min(2);
let offset_q23 = super::shell::QUANT_OFFSET_Q23[st][0];
let inv_q23 = 1.0_f32 / 8_388_608.0;
let synth_hist = self.prev_synth.clone();
let mut out = vec![0f32; frame_len];
let mut signed_mags = vec![0i32; frame_len];
let mut lcg_state = ENCODER_LCG_SEED;
for n in 0..frame_len {
let mut pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
pred += lpc[k - 1] * past;
}
let e_desired = pcm_internal[n] - pred;
let flip = lcg_pre_flip(lcg_state);
let target = if flip { -e_desired } else { e_desired };
let signed_mag_f = (target * scale).round();
let mag_i = signed_mag_f.abs().clamp(0.0, CARRIER_FULL_SCALE) as i32;
let neg = signed_mag_f < 0.0;
let signed = if neg { -mag_i } else { mag_i };
signed_mags[n] = signed;
let (e_q23, next_state) = lcg_step_q23(lcg_state, signed, offset_q23);
lcg_state = next_state;
let e_quant = e_q23 as f32 * inv_q23 * g;
out[n] = (e_quant + pred).clamp(-1.0, 1.0);
}
let aligned = signed_mags.len().div_ceil(16) * 16;
signed_mags.resize(aligned, 0);
let recon = super::shell::quantize_to_shell(&signed_mags);
out.fill(0.0);
let mut lcg_state = ENCODER_LCG_SEED;
for n in 0..frame_len {
let mut pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
pred += lpc[k - 1] * past;
}
let (e_q23, next_state) = lcg_step_q23(lcg_state, recon[n], offset_q23);
lcg_state = next_state;
let e_quant = e_q23 as f32 * inv_q23 * g;
out[n] = (e_quant + pred).clamp(-1.0, 1.0);
}
signed_mags = recon;
let msb = ((gain_index >> 3) & 0x7) as usize;
let lsb = (gain_index & 0x7) as usize;
let msb_icdf = match signal_type {
0 => &tables::GAIN_MSB_INACTIVE_ICDF,
1 => &tables::GAIN_MSB_UNVOICED_ICDF,
_ => &tables::GAIN_MSB_VOICED_ICDF,
};
enc.encode_icdf(msb, msb_icdf, 8);
enc.encode_icdf(lsb, &tables::GAIN_LSB_ICDF, 8);
for _ in 1..self.n_subframes {
enc.encode_icdf(4, &tables::GAIN_DELTA_ICDF, 8);
}
let stage1_icdf: &[u8] = match self.params.bandwidth {
OpusBandwidth::Wideband => &tables::NLSF_WB_STAGE1_UNVOICED_ICDF,
_ => &tables::NLSF_NB_STAGE1_UNVOICED_ICDF,
};
enc.encode_icdf(NLSF_STAGE1_IDX, stage1_icdf, 8);
encode_nlsf_stage2(enc, NLSF_STAGE1_IDX, &residuals, order == 16);
if self.n_subframes == 4 {
enc.encode_icdf(4, &tables::NLSF_INTERP_ICDF, 8);
}
enc.encode_icdf(0, &tables::LCG_SEED_ICDF, 8);
let _ = subframe_len;
super::shell::encode_excitation(enc, &signed_mags, signal_type, 0);
let start = out.len().saturating_sub(order);
self.prev_synth.clear();
self.prev_synth.extend_from_slice(&out[start..]);
shift_ltp_history(&mut self.ltp_history, &out);
self.prev_pitch_lag = 0;
Ok(())
}
fn encode_frame_body_voiced(
&mut self,
pcm_internal: &[f32],
enc: &mut RangeEncoder,
pitch: PitchEstimate,
) -> Result<()> {
debug_assert_eq!(pcm_internal.len(), self.frame_len());
let order = self.params.lpc_order;
let frame_len = self.frame_len();
let subframe_len = self.params.subframe_len;
enc.encode_icdf(4, &tables::FRAME_TYPE_ACTIVE_ICDF, 8);
let signal_type: u8 = 2;
let residuals = vec![0i32; order];
let nlsf_q15 = synthesize_nlsf_like_decoder(NLSF_STAGE1_IDX, true, order, &residuals);
let nlsf_q15 = lsf::stabilize(&nlsf_q15, order == 16);
let lpc = lsf::nlsf_to_lpc(&nlsf_q15, self.params.bandwidth);
let gain_index: i32 = GAIN_INDEX_VOICED;
let gain_q16 = super::gain_index_to_q16(gain_index);
let g = gain_q16.max(1) as f32 / 65536.0;
let st = (signal_type as usize).min(2);
let offset_q23 = super::shell::QUANT_OFFSET_Q23[st][0];
let inv_q23 = 1.0_f32 / 8_388_608.0;
let periodicity = LTP_PERIODICITY_VOICED;
let primary_lag = pitch.lag_internal;
let ltp_filter_idx = if primary_lag > 2 {
ltp::pick_ltp_filter_from_history(
pcm_internal,
&self.ltp_history,
primary_lag,
periodicity,
)
} else {
ltp::pick_ltp_filter_index(pitch.correlation, periodicity)
};
let ltp_taps = ltp::ltp_filter_from_index(ltp_filter_idx, periodicity);
let pitch_lags = vec![primary_lag; self.n_subframes];
let ltp_scale_q14 = LTP_SCALE_Q14_VOICED;
let ltp_scale = ltp_scale_q14 as f32 / 16384.0;
let synth_hist = self.prev_synth.clone();
let ltp_hist_len = self.ltp_history.len();
let mut out = vec![0f32; frame_len];
let mut res_enc = vec![0f32; frame_len]; let mut signed_mags = vec![0i32; frame_len];
let mut lcg_state = ENCODER_LCG_SEED;
for n in 0..frame_len {
let mut lpc_pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
lpc_pred += lpc[k - 1] * past;
}
let mut ltp_sum = 0f32;
for k in 0..5 {
let idx = n as i32 - primary_lag + 2 - k as i32;
let past = if idx >= 0 {
res_enc[idx as usize]
} else {
let hi = (ltp_hist_len as i32 + idx) as usize;
self.ltp_history.get(hi).copied().unwrap_or(0.0)
};
ltp_sum += ltp_taps[k] * ltp_scale * past;
}
let e_desired_q0 = (pcm_internal[n] - lpc_pred - ltp_sum) / g;
let flip = lcg_pre_flip(lcg_state);
let target = if flip { -e_desired_q0 } else { e_desired_q0 };
let signed_mag_f = (target * 32768.0).round();
let mag_i = signed_mag_f.abs().clamp(0.0, CARRIER_FULL_SCALE) as i32;
let neg = signed_mag_f < 0.0;
let signed = if neg { -mag_i } else { mag_i };
signed_mags[n] = signed;
let (e_q23, next_state) = lcg_step_q23(lcg_state, signed, offset_q23);
lcg_state = next_state;
let e_quant = g * (e_q23 as f32 * inv_q23) + ltp_sum;
res_enc[n] = e_quant; out[n] = e_quant + lpc_pred; }
let aligned = signed_mags.len().div_ceil(16) * 16;
signed_mags.resize(aligned, 0);
let recon = super::shell::quantize_to_shell(&signed_mags);
out.fill(0.0);
res_enc.fill(0.0);
let mut lcg_state = ENCODER_LCG_SEED;
for n in 0..frame_len {
let mut lpc_pred = 0f32;
for k in 1..=order {
let idx = n as i32 - k as i32;
let past = if idx >= 0 {
out[idx as usize]
} else {
synth_hist[(synth_hist.len() as i32 + idx) as usize]
};
lpc_pred += lpc[k - 1] * past;
}
let mut ltp_sum = 0f32;
for k in 0..5 {
let idx = n as i32 - primary_lag + 2 - k as i32;
let past = if idx >= 0 {
res_enc[idx as usize]
} else {
let hi = (ltp_hist_len as i32 + idx) as usize;
self.ltp_history.get(hi).copied().unwrap_or(0.0)
};
ltp_sum += ltp_taps[k] * ltp_scale * past;
}
let (e_q23, next_state) = lcg_step_q23(lcg_state, recon[n], offset_q23);
lcg_state = next_state;
let e_quant = g * (e_q23 as f32 * inv_q23) + ltp_sum;
res_enc[n] = e_quant; out[n] = e_quant + lpc_pred; }
signed_mags = recon;
let msb = ((gain_index >> 3) & 0x7) as usize;
let lsb = (gain_index & 0x7) as usize;
enc.encode_icdf(msb, &tables::GAIN_MSB_VOICED_ICDF, 8);
enc.encode_icdf(lsb, &tables::GAIN_LSB_ICDF, 8);
for _ in 1..self.n_subframes {
enc.encode_icdf(4, &tables::GAIN_DELTA_ICDF, 8);
}
let stage1_icdf: &[u8] = match self.params.bandwidth {
OpusBandwidth::Wideband => &tables::NLSF_WB_STAGE1_VOICED_ICDF,
_ => &tables::NLSF_NB_STAGE1_VOICED_ICDF,
};
enc.encode_icdf(NLSF_STAGE1_IDX, stage1_icdf, 8);
encode_nlsf_stage2(enc, NLSF_STAGE1_IDX, &residuals, order == 16);
if self.n_subframes == 4 {
enc.encode_icdf(4, &tables::NLSF_INTERP_ICDF, 8);
}
ltp::encode_primary_pitch_lag(enc, self.params.bandwidth, primary_lag, self.prev_pitch_lag);
ltp::encode_pitch_contour(enc, self.params.bandwidth);
ltp::encode_ltp_periodicity(enc, periodicity);
for _ in 0..self.n_subframes {
ltp::encode_ltp_filter_index(enc, periodicity, ltp_filter_idx);
}
ltp::encode_ltp_scaling(enc, ltp_scale_q14);
enc.encode_icdf(0, &tables::LCG_SEED_ICDF, 8);
let _ = subframe_len;
let _ = pitch_lags; super::shell::encode_excitation(enc, &signed_mags, signal_type, 0);
let start = out.len().saturating_sub(order);
self.prev_synth.clear();
self.prev_synth.extend_from_slice(&out[start..]);
shift_ltp_history(&mut self.ltp_history, &res_enc);
self.prev_pitch_lag = primary_lag;
Ok(())
}
}
fn shift_ltp_history(history: &mut Vec<f32>, new_samples: &[f32]) {
let hist_len = history.len();
let keep = hist_len.saturating_sub(new_samples.len());
let mut new_hist = Vec::with_capacity(hist_len);
new_hist.extend_from_slice(&history[hist_len - keep..]);
new_hist.extend_from_slice(new_samples);
if new_hist.len() > hist_len {
let drop = new_hist.len() - hist_len;
new_hist.drain(0..drop);
} else if new_hist.len() < hist_len {
let mut pad = vec![0f32; hist_len - new_hist.len()];
pad.extend(new_hist);
new_hist = pad;
}
*history = new_hist;
}
pub fn stereo_mid_side(l: &[f32], r: &[f32]) -> (Vec<f32>, Vec<f32>) {
debug_assert_eq!(l.len(), r.len());
let n = l.len();
let mut mid = Vec::with_capacity(n);
let mut side = Vec::with_capacity(n);
for i in 0..n {
mid.push(l[i] + r[i]);
side.push(l[i] - r[i]);
}
(mid, side)
}
fn quantise_pred_weight_q13(weight_q13: i32) -> [i32; 3] {
let quant = &tables::STEREO_PRED_QUANT_Q13;
let mut best: i32 = i32::MAX;
let mut best_idx = [0i32, 0, 0];
for cell in 0..15 {
let low_q13 = quant[cell] as i32;
let high_q13 = quant[cell + 1] as i32;
let step_q13 = ((high_q13 - low_q13) * 6554) >> 16; for sub in 0..5 {
let level = low_q13 + step_q13 * (2 * sub + 1);
let diff = (level - weight_q13).abs();
if diff < best {
best = diff;
let c = cell as i32;
let ix2 = c / 3;
let ix0 = c - 3 * ix2;
best_idx = [ix0, sub, ix2];
}
}
}
best_idx
}
pub fn encode_stereo_pred_weights(enc: &mut RangeEncoder, pred_q13: [i32; 2]) {
let w0_coded = pred_q13[0] + pred_q13[1];
let w1_coded = pred_q13[1];
let ix0_all = quantise_pred_weight_q13(w0_coded);
let ix1_all = quantise_pred_weight_q13(w1_coded);
let n = 5 * ix0_all[2] + ix1_all[2];
enc.encode_icdf(n as usize, &tables::STEREO_PRED_JOINT_ICDF, 8);
for ix in [ix0_all, ix1_all] {
enc.encode_icdf(ix[0] as usize, &tables::STEREO_UNIFORM3_ICDF, 8);
enc.encode_icdf(ix[1] as usize, &tables::STEREO_UNIFORM5_ICDF, 8);
}
}
pub fn stereo_predict_weights_q13(mid: &[f32], side: &[f32]) -> [i32; 2] {
debug_assert_eq!(mid.len(), side.len());
let n = mid.len();
if n < 2 {
return [0, 0];
}
let mut r_mm = 0f64;
let mut r_mm1 = 0f64;
let mut r_m1m1 = 0f64;
let mut r_sm = 0f64;
let mut r_sm1 = 0f64;
for i in 1..n {
let m = mid[i] as f64;
let m1 = mid[i - 1] as f64;
let s = side[i] as f64;
r_mm += m * m;
r_mm1 += m * m1;
r_m1m1 += m1 * m1;
r_sm += s * m;
r_sm1 += s * m1;
}
let det = r_mm * r_m1m1 - r_mm1 * r_mm1;
if det.abs() < 1e-12 {
return [0, 0];
}
let w0 = (r_sm * r_m1m1 - r_sm1 * r_mm1) / det;
let w1 = (r_mm * r_sm1 - r_mm1 * r_sm) / det;
let clamp = |w: f64| -> i32 {
let q = (w * 8192.0).round();
q.clamp(-13500.0, 13500.0) as i32
};
[clamp(w0), clamp(w1)]
}
fn synthesize_nlsf_like_decoder(
stage1: usize,
voiced: bool,
order: usize,
residuals: &[i32],
) -> Vec<i16> {
let _ = voiced; let is_wb = order == 16;
let qstep: i32 = if is_wb { 9830 } else { 11796 };
let mut res_q10 = vec![0i32; order];
for k in (0..order).rev() {
let prev_term = if k + 1 < order {
let pred: u8 = if is_wb {
let sel = tables::NLSF_WB_PRED_SELECT[stage1][k] as usize;
tables::NLSF_PRED_WEIGHTS[2 + sel][k]
} else {
let sel = tables::NLSF_NBMB_PRED_SELECT[stage1][k] as usize;
tables::NLSF_PRED_WEIGHTS[sel][k]
};
(res_q10[k + 1] * pred as i32) >> 8
} else {
0
};
let i2 = residuals[k].clamp(-10, 10);
let sgn = i2.signum();
let raw = (i2 << 10) - sgn * 102;
res_q10[k] = prev_term + ((raw * qstep) >> 16);
}
let cb1_q8: Vec<i32> = if is_wb {
tables::NLSF_WB_CB1_Q8[stage1]
.iter()
.map(|&v| v as i32)
.collect()
} else {
tables::NLSF_NBMB_CB1_Q8[stage1]
.iter()
.map(|&v| v as i32)
.collect()
};
let w_q9 = ihmw_weights_local(&cb1_q8);
let mut nlsf = vec![0i16; order];
for k in 0..order {
let cb_term = cb1_q8[k] << 7;
let weighted = (res_q10[k] << 14) / w_q9[k] as i32;
nlsf[k] = (cb_term + weighted).clamp(1, 32767) as i16;
}
nlsf
}
fn ihmw_weights_local(cb1_q8: &[i32]) -> Vec<u16> {
let order = cb1_q8.len();
let mut w = vec![0u16; order];
for k in 0..order {
let prev = if k == 0 { 0 } else { cb1_q8[k - 1] };
let next = if k + 1 == order { 256 } else { cb1_q8[k + 1] };
let lo = (cb1_q8[k] - prev).max(1);
let hi = (next - cb1_q8[k]).max(1);
let w2_q18: i32 = (1024 / lo + 1024 / hi) << 16;
let w2 = w2_q18 as u32;
if w2 == 0 {
w[k] = 1;
continue;
}
let i = 32 - w2.leading_zeros() as i32;
let shift = (i - 8).max(0);
let f = ((w2 >> shift) & 127) as i32;
let base: i32 = if i & 1 == 1 { 32768 } else { 46214 };
let shr = ((32 - i) >> 1).max(0);
let y = base >> shr;
let v = y + ((213 * f * y) >> 16);
w[k] = v.clamp(1, u16::MAX as i32) as u16;
}
w
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn nlsf_template_mirrors_decoder() {
let nlsf = synthesize_nlsf_like_decoder(0, false, 10, &[0; 10]);
assert_eq!(nlsf.len(), 10);
for k in 0..10 {
let expected = (tables::NLSF_NBMB_CB1_Q8[0][k] as i32) << 7;
assert_eq!(
nlsf[k] as i32, expected,
"encoder's NLSF mirror diverges from cb1_Q8 << 7 at k={k}"
);
}
let stable = crate::silk::lsf::stabilize(&nlsf, false);
for w in stable.windows(2) {
assert!(
w[1] >= w[0],
"stabilised NLSF should be non-decreasing ({} → {})",
w[0],
w[1]
);
}
}
#[test]
fn wb_frame_params_match_expectations() {
let wb = SilkFrameEncoder::new_wb_20ms();
assert_eq!(wb.lpc_order(), 16);
assert_eq!(wb.subframe_len(), 80);
assert_eq!(wb.frame_len(), 320);
assert_eq!(wb.internal_rate_hz(), 16_000);
}
#[test]
fn mb_frame_params_match_expectations() {
let mb = SilkFrameEncoder::new_mb_20ms();
assert_eq!(mb.lpc_order(), 10);
assert_eq!(mb.subframe_len(), 60);
assert_eq!(mb.frame_len(), 240);
assert_eq!(mb.internal_rate_hz(), 12_000);
}
#[test]
fn encode_decode_zero_frame_matches() {
use oxideav_celt::range_decoder::RangeDecoder;
let mut enc = SilkFrameEncoder::new_nb_20ms();
let pcm = vec![0.0f32; 160];
let mut re = RangeEncoder::new(512);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(&pcm, &mut re).unwrap();
let buf = re.done().expect("done");
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let mut s = crate::silk::SilkChannelState::new();
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
OpusBandwidth::Narrowband,
10,
40,
4,
&mut s,
)
.expect("decode");
let peak = decoded.iter().copied().fold(0f32, |a, b| a.max(b.abs()));
println!("zero-frame roundtrip peak = {peak:.6}");
assert!(
peak < 0.01,
"zero-frame decode should be quiet (under §4.2.7.8.6 dither floor), got peak {peak}"
);
}
#[test]
fn encode_decode_zero_frame_produces_finite_output() {
let mut enc = SilkFrameEncoder::new_nb_20ms();
let pcm = vec![0.0f32; 160];
let mut re = RangeEncoder::new(512);
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let buf = re.done().expect("done");
assert!(!buf.is_empty());
assert_eq!(buf.len(), 512);
}
#[test]
fn encode_decode_nb_one_frame_internal_rate_snr() {
run_internal_rate_roundtrip(BandwidthParams::nb(), 8_000, 25.0);
}
#[test]
fn encode_decode_nb_10ms_internal_rate_snr() {
run_internal_rate_roundtrip_10ms(BandwidthParams::nb(), 8_000, 20.0);
}
#[test]
fn encode_decode_mb_10ms_internal_rate_snr() {
run_internal_rate_roundtrip_10ms(BandwidthParams::mb(), 12_000, 20.0);
}
#[test]
fn encode_decode_wb_10ms_internal_rate_snr() {
run_internal_rate_roundtrip_10ms(BandwidthParams::wb(), 16_000, 20.0);
}
fn run_internal_rate_roundtrip_10ms(params: BandwidthParams, rate: u32, snr_bar: f64) {
use oxideav_celt::range_decoder::RangeDecoder;
let mut enc = SilkFrameEncoder::new_with_subframes(params, 2);
let frame_len = enc.frame_len();
let freq = 300.0f32;
let pcm: Vec<f32> = (0..frame_len)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / rate as f32).sin() * 0.3)
.collect();
let mut re = RangeEncoder::new(1024);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let buf = re.done().expect("done");
let mut dec_state = crate::silk::SilkChannelState::new();
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
2,
&mut dec_state,
)
.expect("decode");
assert_eq!(decoded.len(), frame_len);
let sig: f64 = pcm.iter().map(|v| (*v as f64) * (*v as f64)).sum();
let err: f64 = pcm
.iter()
.zip(decoded.iter())
.map(|(a, b)| {
let e = (*a - *b) as f64;
e * e
})
.sum();
let snr = 10.0 * (sig / err.max(1e-30)).log10();
println!(
"{:?} 10 ms internal-rate SNR: {snr:.2} dB (bar {snr_bar})",
params.bandwidth
);
assert!(
snr > snr_bar,
"10 ms internal-rate SNR {snr:.2} dB below {snr_bar} dB bar"
);
}
#[test]
fn encode_decode_mb_one_frame_internal_rate_snr() {
run_internal_rate_roundtrip(BandwidthParams::mb(), 12_000, 25.0);
}
#[test]
fn encode_decode_wb_one_frame_internal_rate_snr() {
run_internal_rate_roundtrip(BandwidthParams::wb(), 16_000, 25.0);
}
#[test]
fn stereo_pred_weights_zero_for_identical_channels() {
let m: Vec<f32> = (0..100)
.map(|i| (2.0 * std::f32::consts::PI * 300.0 * i as f32 / 8_000.0).sin() * 0.3)
.collect();
let s = vec![0.0f32; m.len()];
let w = stereo_predict_weights_q13(&m, &s);
assert_eq!(w, [0, 0]);
}
#[test]
fn stereo_mid_side_reconstructs_lr() {
let l = vec![0.1f32, 0.2, 0.3, 0.4];
let r = vec![0.0f32, 0.1, 0.2, 0.3];
let (m, s) = stereo_mid_side(&l, &r);
for i in 0..l.len() {
let rec_l = (m[i] + s[i]) * 0.5;
let rec_r = (m[i] - s[i]) * 0.5;
assert!((rec_l - l[i]).abs() < 1e-6);
assert!((rec_r - r[i]).abs() < 1e-6);
}
}
#[test]
fn voiced_path_beats_unvoiced_on_speech_like_input() {
use oxideav_celt::range_decoder::RangeDecoder;
let params = BandwidthParams::wb();
let rate = 16_000u32;
let n_frames = 5;
let frame_len = params.subframe_len * 4; let total = frame_len * n_frames;
let f0 = 150.0f32;
let pcm: Vec<f32> = (0..total)
.map(|i| {
let t = i as f32 / rate as f32;
((2.0 * std::f32::consts::PI * f0 * t).sin()
+ 0.6 * (2.0 * std::f32::consts::PI * 2.0 * f0 * t).sin()
+ 0.3 * (2.0 * std::f32::consts::PI * 3.0 * f0 * t).sin()
+ 0.15 * (2.0 * std::f32::consts::PI * 4.0 * f0 * t).sin())
* 0.25
})
.collect();
fn encode_decode_all(
params: BandwidthParams,
pcm: &[f32],
n_frames: usize,
frame_len: usize,
force_unvoiced: bool,
) -> Vec<f32> {
let mut enc = SilkFrameEncoder::new(params);
enc.set_force_unvoiced(force_unvoiced);
let mut dec_state = crate::silk::SilkChannelState::new();
let mut decoded_all = Vec::with_capacity(pcm.len());
for i in 0..n_frames {
let slice = &pcm[i * frame_len..(i + 1) * frame_len];
let mut re = RangeEncoder::new(2048);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(slice, &mut re).expect("encode");
let buf = re.done().expect("done");
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let frame = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state,
)
.expect("decode");
decoded_all.extend_from_slice(&frame);
}
decoded_all
}
let dec_voiced = encode_decode_all(params, &pcm, n_frames, frame_len, false);
let dec_unvoiced = encode_decode_all(params, &pcm, n_frames, frame_len, true);
let skip = frame_len;
let snr_voiced = snr_db_range(&pcm, &dec_voiced, skip);
let snr_unvoiced = snr_db_range(&pcm, &dec_unvoiced, skip);
println!(
"voiced_vs_unvoiced WB harmonic: voiced={:.2} dB, unvoiced={:.2} dB, delta={:.2} dB",
snr_voiced,
snr_unvoiced,
snr_voiced - snr_unvoiced
);
assert!(
snr_voiced > snr_unvoiced - 1.0,
"voiced SNR {snr_voiced:.2} dB should be within 1 dB of unvoiced {snr_unvoiced:.2} dB"
);
assert!(snr_voiced > 15.0, "voiced SNR {snr_voiced:.2} dB too low");
}
#[test]
fn ltp_raw_sum_captures_periodicity() {
let params = BandwidthParams::wb();
let rate = 16_000u32;
let frame_len = params.subframe_len * 4;
let f0 = 180.0f32;
let pcm: Vec<f32> = (0..frame_len * 2)
.map(|i| {
let t = i as f32 / rate as f32;
((2.0 * std::f32::consts::PI * f0 * t).sin()
+ 0.6 * (2.0 * std::f32::consts::PI * 2.0 * f0 * t).sin()
+ 0.3 * (2.0 * std::f32::consts::PI * 3.0 * f0 * t).sin())
* 0.25
})
.collect();
let pitch = analyze_pitch(&pcm[frame_len..frame_len * 2], OpusBandwidth::Wideband);
assert!(pitch.voiced, "harmonic signal should be voiced");
let lag = pitch.lag_internal;
let periodicity = LTP_PERIODICITY_VOICED;
let idx = ltp::pick_ltp_filter_index(pitch.correlation, periodicity);
let taps = ltp::ltp_filter_from_index(idx, periodicity);
let start = frame_len;
let end = start + frame_len;
let mut ltp_energy = 0f64;
let mut sig_energy = 0f64;
for n in start..end {
let mut s = 0f32;
for k in 0..5 {
let lag_k = lag + (k as i32 - 2);
let j = n as i32 - lag_k;
let past = if j >= 0 { pcm[j as usize] } else { 0.0 };
s += taps[k] * past;
}
ltp_energy += (s as f64) * (s as f64);
let v = pcm[n] as f64;
sig_energy += v * v;
}
let ratio = (ltp_energy / sig_energy.max(1e-30)).sqrt();
println!(
"LTP raw-sum RMS / signal RMS on voiced frame: {ratio:.3} \
(lag={lag}, corr={:.3})",
pitch.correlation
);
assert!(
ratio > 0.5,
"LTP sum RMS ratio {ratio:.3} too small — pitch or taps wrong"
);
}
fn snr_db_range(ref_pcm: &[f32], dec: &[f32], skip: usize) -> f64 {
let n = ref_pcm.len().min(dec.len()).saturating_sub(skip);
let sig: f64 = ref_pcm[skip..skip + n]
.iter()
.map(|v| (*v as f64) * (*v as f64))
.sum();
let err: f64 = ref_pcm[skip..skip + n]
.iter()
.zip(dec[skip..skip + n].iter())
.map(|(a, b)| {
let e = (*a - *b) as f64;
e * e
})
.sum();
10.0 * (sig / err.max(1e-30)).log10()
}
fn run_internal_rate_roundtrip(params: BandwidthParams, rate: u32, snr_bar: f64) {
use oxideav_celt::range_decoder::RangeDecoder;
let mut enc = SilkFrameEncoder::new(params);
let frame_len = enc.frame_len();
let freq = 300.0f32;
let pcm: Vec<f32> = (0..frame_len)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / rate as f32).sin() * 0.3)
.collect();
let mut re = RangeEncoder::new(1024);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let buf = re.done().expect("done");
let mut dec_state = crate::silk::SilkChannelState::new();
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state,
)
.expect("decode");
assert_eq!(decoded.len(), frame_len);
let sig: f64 = pcm.iter().map(|v| (*v as f64) * (*v as f64)).sum();
let err: f64 = pcm
.iter()
.zip(decoded.iter())
.map(|(a, b)| {
let e = (*a - *b) as f64;
e * e
})
.sum();
let snr = 10.0 * (sig / err.max(1e-30)).log10();
println!(
"{:?} internal-rate SNR: {snr:.2} dB (bar {snr_bar})",
params.bandwidth
);
assert!(
snr > snr_bar,
"internal-rate SNR {snr:.2} dB below {snr_bar} dB bar"
);
}
#[test]
fn shell_coder_beats_mvp_on_sine_bitrate() {
use crate::silk::shell;
use crate::silk::tables;
use oxideav_celt::range_decoder::RangeDecoder;
let params = BandwidthParams::nb();
let rate = 8_000u32;
let mut enc = SilkFrameEncoder::new(params);
let frame_len = enc.frame_len();
let freq = 300.0f32;
let pcm: Vec<f32> = (0..frame_len)
.map(|i| (2.0 * std::f32::consts::PI * freq * i as f32 / rate as f32).sin() * 0.3)
.collect();
let mut re = RangeEncoder::new(2048);
re.encode_bit_logp(true, 1);
re.encode_bit_logp(false, 1);
let tell_before = re.tell();
enc.encode_frame_body(&pcm, &mut re).expect("encode");
let tell_after = re.tell();
let live_frame_bits = tell_after - tell_before;
let buf = re.done().expect("done");
let mut dec_state = crate::silk::SilkChannelState::new();
let mut rc = RangeDecoder::new(&buf);
let _vad = rc.decode_bit_logp(1);
let _lbrr = rc.decode_bit_logp(1);
let decoded = crate::silk::decode_frame_body_pub(
&mut rc,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state,
)
.expect("decode");
let snr = snr_db_range(&pcm, &decoded, 0);
use crate::silk::excitation::MAG_NIBBLE_ICDF;
enc.reset();
let mut enc2 = SilkFrameEncoder::new(params);
enc2.set_force_unvoiced(true);
let mut re_unv = RangeEncoder::new(2048);
re_unv.encode_bit_logp(true, 1);
re_unv.encode_bit_logp(false, 1);
enc2.encode_frame_body(&pcm, &mut re_unv).expect("encode");
let buf_unv = re_unv.done().expect("done");
let mut dec_state2 = crate::silk::SilkChannelState::new();
let mut rc2 = RangeDecoder::new(&buf_unv);
let _v = rc2.decode_bit_logp(1);
let _l = rc2.decode_bit_logp(1);
let _decoded2 = crate::silk::decode_frame_body_pub(
&mut rc2,
true,
params.bandwidth,
params.lpc_order,
params.subframe_len,
4,
&mut dec_state2,
)
.expect("decode2");
let mut signed_mags: Vec<i32> = pcm
.windows(2)
.map(|w| ((w[1] - w[0]) * 120.0).round() as i32)
.collect();
signed_mags.push(0);
let aligned = signed_mags.len().div_ceil(16) * 16;
signed_mags.resize(aligned, 0);
for v in signed_mags.iter_mut() {
*v = (*v).clamp(-120, 120);
}
let mut re_shell = RangeEncoder::new(2048);
let t0 = re_shell.tell();
shell::encode_excitation(&mut re_shell, &signed_mags, 1, 0);
let shell_bits = re_shell.tell() - t0;
let mut re_mvp = RangeEncoder::new(2048);
let t0 = re_mvp.tell();
re_mvp.encode_icdf(0, &tables::RATE_LEVEL_INACTIVE_ICDF, 8);
let n_shells = signed_mags.len() / 16;
for _ in 0..n_shells {
re_mvp.encode_icdf(0, &tables::PULSE_COUNT_ICDF[0], 8);
}
for &s in &signed_mags {
let m = s.unsigned_abs() as i32;
let hi = ((m >> 4) & 0xf) as usize;
let lo = (m & 0xf) as usize;
re_mvp.encode_icdf(hi, &MAG_NIBBLE_ICDF, 8);
re_mvp.encode_icdf(lo, &MAG_NIBBLE_ICDF, 8);
if m != 0 {
re_mvp.encode_bit_logp(s < 0, 1);
}
}
let mvp_bits = re_mvp.tell() - t0;
println!(
"sine bitrate — shell={shell_bits} bits mvp={mvp_bits} bits \
savings={:.1}% live_frame={live_frame_bits} bits snr={snr:.2} dB",
100.0 * (mvp_bits - shell_bits) as f32 / mvp_bits as f32
);
assert!(
shell_bits < mvp_bits,
"shell coder did not save bits on sine: shell={shell_bits} mvp={mvp_bits}"
);
assert!(snr > 25.0, "round-trip SNR dropped: {snr:.2} dB");
}
}