#![allow(clippy::many_single_char_names)]
pub mod encoder;
pub mod excitation;
pub mod lsf;
pub mod ltp;
pub mod pitch_analysis;
pub mod range_dec;
pub mod shell;
pub mod synth;
pub mod tables;
use oxideav_celt::range_decoder::RangeDecoder;
use oxideav_core::{Error, Result};
use crate::toc::{OpusBandwidth, Toc};
pub fn internal_rate_hz(bw: OpusBandwidth) -> u32 {
match bw {
OpusBandwidth::Narrowband => 8_000,
OpusBandwidth::Mediumband => 12_000,
OpusBandwidth::Wideband => 16_000,
_ => 16_000, }
}
pub const SUBFRAMES_20MS: usize = 4;
pub const SUBFRAMES_10MS: usize = 2;
#[derive(Debug, Clone)]
pub struct SilkChannelState {
pub prev_lpc: Vec<f32>,
pub prev_pitch_lag: i32,
pub prev_nlsf_q15: Vec<i16>,
pub lpc_history: Vec<f32>,
pub ltp_history: Vec<f32>,
pub prev_gain_q16: i32,
pub first_frame: bool,
}
impl SilkChannelState {
pub fn new() -> Self {
Self {
prev_lpc: Vec::new(),
prev_pitch_lag: 0,
prev_nlsf_q15: Vec::new(),
lpc_history: Vec::new(),
ltp_history: vec![0.0; 480],
prev_gain_q16: 0,
first_frame: true,
}
}
pub fn reset(&mut self) {
*self = Self::new();
}
}
impl Default for SilkChannelState {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Default)]
pub struct SilkStereoState {
pub pred_prev_q13: [i32; 2],
pub s_mid: [i16; 2],
pub s_side: [i16; 2],
pub prev_decode_only_mid: bool,
}
impl SilkStereoState {
pub fn new() -> Self {
Self::default()
}
}
pub struct SilkDecoder {
pub state: SilkChannelState,
pub side_state: SilkChannelState,
pub stereo_state: SilkStereoState,
pub bandwidth: OpusBandwidth,
pub lpc_order: usize,
pub subframe_len: usize,
pub frame_len: usize,
}
impl SilkDecoder {
pub fn new(bandwidth: OpusBandwidth) -> Self {
let (order, sub_len) = match bandwidth {
OpusBandwidth::Narrowband => (10, 40), OpusBandwidth::Mediumband => (10, 60), OpusBandwidth::Wideband => (16, 80), _ => (16, 80),
};
let frame_len = sub_len * SUBFRAMES_20MS;
Self {
state: SilkChannelState::new(),
side_state: SilkChannelState::new(),
stereo_state: SilkStereoState::new(),
bandwidth,
lpc_order: order,
subframe_len: sub_len,
frame_len,
}
}
pub fn decode_frame_to_48k(
&mut self,
rc: &mut RangeDecoder<'_>,
toc: &Toc,
) -> Result<Vec<f32>> {
let (n_frames_per_packet, n_subframes_per_frame) = match toc.frame_samples_48k {
480 => (1, SUBFRAMES_10MS),
960 => (1, SUBFRAMES_20MS),
1920 => (2, SUBFRAMES_20MS),
2880 => (3, SUBFRAMES_20MS),
_ => {
return Err(Error::unsupported("SILK: unsupported frame size"));
}
};
let n_internal_channels = if toc.stereo { 2 } else { 1 };
let mut vad_flags = [[false; 3]; 2]; let mut lbrr_channel = [false; 2];
for n in 0..n_internal_channels {
for i in 0..n_frames_per_packet {
vad_flags[n][i] = rc.decode_bit_logp(1);
}
lbrr_channel[n] = rc.decode_bit_logp(1);
}
let mut lbrr_flags = [[false; 3]; 2];
for n in 0..n_internal_channels {
if lbrr_channel[n] {
if n_frames_per_packet == 1 {
lbrr_flags[n][0] = true;
} else {
let icdf: &[u8] = if n_frames_per_packet == 2 {
&tables::LBRR_FLAGS_2_ICDF
} else {
&tables::LBRR_FLAGS_3_ICDF
};
let sym = rc.decode_icdf(icdf, 8) as u32 + 1;
for i in 0..n_frames_per_packet {
lbrr_flags[n][i] = ((sym >> i) & 1) != 0;
}
}
}
}
let any_lbrr =
(0..n_internal_channels).any(|n| (0..n_frames_per_packet).any(|i| lbrr_flags[n][i]));
if any_lbrr {
let mut lbrr_mid_state = SilkChannelState::new();
let mut lbrr_side_state = SilkChannelState::new();
for i in 0..n_frames_per_packet {
for n in 0..n_internal_channels {
if !lbrr_flags[n][i] {
continue;
}
let state_ref = if n == 0 {
&mut lbrr_mid_state
} else {
&mut lbrr_side_state
};
let _ = decode_frame_body(
rc,
vad_flags[n][i],
self.bandwidth,
self.lpc_order,
self.subframe_len,
n_subframes_per_frame,
state_ref,
)?;
}
}
}
let internal_rate = internal_rate_hz(self.bandwidth);
let fs_khz = (internal_rate / 1000) as i32;
let frame_len_internal = self.subframe_len * n_subframes_per_frame;
let mut out_per_packet_frame_interleaved: Vec<Vec<f32>> =
Vec::with_capacity(n_frames_per_packet);
for i in 0..n_frames_per_packet {
let mut ms_pred_q13 = [0i32; 2];
let mut decode_only_mid = false;
if n_internal_channels == 2 {
ms_pred_q13 = stereo_decode_pred(rc);
if !vad_flags[1][i] {
decode_only_mid = rc.decode_icdf(&tables::STEREO_ONLY_CODE_MID_ICDF, 8) != 0;
}
}
if n_internal_channels == 2
&& !decode_only_mid
&& self.stereo_state.prev_decode_only_mid
{
self.side_state.reset();
}
let mid_internal = decode_frame_body(
rc,
vad_flags[0][i],
self.bandwidth,
self.lpc_order,
self.subframe_len,
n_subframes_per_frame,
&mut self.state,
)?;
let side_internal = if n_internal_channels == 2 && !decode_only_mid {
decode_frame_body(
rc,
vad_flags[1][i],
self.bandwidth,
self.lpc_order,
self.subframe_len,
n_subframes_per_frame,
&mut self.side_state,
)?
} else {
vec![0.0f32; frame_len_internal]
};
let mid_48k = synth::upsample_to_48k(&mid_internal, internal_rate);
let side_48k = if n_internal_channels == 2 && !decode_only_mid {
synth::upsample_to_48k(&side_internal, internal_rate)
} else {
Vec::new()
};
if n_internal_channels == 1 {
out_per_packet_frame_interleaved.push(mid_48k);
} else {
let lr_48k = stereo_unmix_48k(
&mid_48k,
&side_48k,
&ms_pred_q13,
decode_only_mid,
fs_khz,
&mut self.stereo_state,
);
out_per_packet_frame_interleaved.push(lr_48k);
}
self.stereo_state.prev_decode_only_mid = decode_only_mid;
}
let total = out_per_packet_frame_interleaved
.iter()
.map(|v| v.len())
.sum();
let mut out = Vec::with_capacity(total);
for chunk in out_per_packet_frame_interleaved {
out.extend_from_slice(&chunk);
}
Ok(out)
}
}
#[doc(hidden)]
pub fn decode_frame_body_pub(
rc: &mut oxideav_celt::range_decoder::RangeDecoder<'_>,
vad_flag: bool,
bandwidth: OpusBandwidth,
lpc_order: usize,
subframe_len: usize,
n_subframes: usize,
state: &mut SilkChannelState,
) -> Result<Vec<f32>> {
decode_frame_body(
rc,
vad_flag,
bandwidth,
lpc_order,
subframe_len,
n_subframes,
state,
)
}
fn decode_frame_body(
rc: &mut RangeDecoder<'_>,
vad_flag: bool,
bandwidth: OpusBandwidth,
lpc_order: usize,
subframe_len: usize,
n_subframes: usize,
state: &mut SilkChannelState,
) -> Result<Vec<f32>> {
debug_assert!(n_subframes == SUBFRAMES_10MS || n_subframes == SUBFRAMES_20MS);
let frame_len = subframe_len * n_subframes;
let frame_type_sym = if vad_flag {
rc.decode_icdf(&tables::FRAME_TYPE_ACTIVE_ICDF, 8)
} else {
rc.decode_icdf(&tables::FRAME_TYPE_INACTIVE_ICDF, 8)
};
let (signal_type, quant_offset_type) = match frame_type_sym {
0 => (0u8, 0u8),
1 => (0, 1),
2 => (1, 0),
3 => (1, 1),
4 => (2, 0),
5 => (2, 1),
_ => (1, 0),
};
let voiced = signal_type == 2;
let mut gains_q16 = vec![0i32; n_subframes];
{
let msb_icdf: &[u8] = match signal_type {
0 => &tables::GAIN_MSB_INACTIVE_ICDF,
1 => &tables::GAIN_MSB_UNVOICED_ICDF,
_ => &tables::GAIN_MSB_VOICED_ICDF,
};
let msb = rc.decode_icdf(msb_icdf, 8) as i32;
let lsb = rc.decode_icdf(&tables::GAIN_LSB_ICDF, 8) as i32;
let idx = (msb << 3) | lsb;
gains_q16[0] = gain_index_to_q16(idx.clamp(0, 63));
let mut prev_log_gain = gain_index_of_q16(gains_q16[0]);
for sf in 1..n_subframes {
let delta = rc.decode_icdf(&tables::GAIN_DELTA_ICDF, 8) as i32;
let step = delta - 4;
let new_log = (prev_log_gain + step).clamp(0, 63);
gains_q16[sf] = gain_index_to_q16(new_log);
prev_log_gain = new_log;
}
}
let nlsf_q15 = lsf::decode_nlsf(rc, bandwidth, signal_type)?;
let lpc = lsf::nlsf_to_lpc(&nlsf_q15, bandwidth);
let mut pitch_lags = vec![0i32; n_subframes];
let mut ltp_filter = vec![[0f32; 5]; n_subframes];
let mut ltp_scale_q14 = 15565i32;
if voiced {
let abs_flag = rc.decode_bit_logp(1);
let primary_lag = if abs_flag || state.prev_pitch_lag == 0 {
ltp::decode_absolute_pitch_lag(rc, bandwidth)?
} else {
let delta = ltp::decode_delta_pitch_lag(rc)?;
state.prev_pitch_lag + delta
};
let contour_idx = ltp::decode_pitch_contour(rc, bandwidth)?;
ltp::expand_pitch_contour(primary_lag, contour_idx, bandwidth, &mut pitch_lags);
state.prev_pitch_lag = primary_lag;
let periodicity = rc.decode_icdf(&tables::LTP_PERIODICITY_ICDF, 8);
for sf in 0..n_subframes {
let tap = ltp::decode_ltp_filter(rc, periodicity);
ltp_filter[sf][..5].copy_from_slice(&tap[..5]);
}
let ltp_scale_idx = rc.decode_icdf(&tables::LTP_SCALING_ICDF, 8);
ltp_scale_q14 = match ltp_scale_idx {
0 => 15565,
1 => 12288,
_ => 8192,
};
}
let seed = rc.decode_icdf(&tables::LCG_SEED_ICDF, 8) as u32;
let excitation = excitation::decode_excitation(
rc,
frame_len,
subframe_len,
signal_type,
quant_offset_type,
seed,
)?;
let output = synth::synthesize(
&excitation,
&lpc,
&gains_q16,
&pitch_lags,
<p_filter,
ltp_scale_q14,
subframe_len,
n_subframes,
lpc_order,
voiced,
state,
);
state.first_frame = false;
state.prev_nlsf_q15 = nlsf_q15;
Ok(output)
}
fn stereo_decode_pred(rc: &mut RangeDecoder<'_>) -> [i32; 2] {
let n = rc.decode_icdf(&tables::STEREO_PRED_JOINT_ICDF, 8) as i32;
let mut ix = [[0i32; 3]; 2];
ix[0][2] = n / 5;
ix[1][2] = n - 5 * ix[0][2];
for row in ix.iter_mut() {
row[0] = rc.decode_icdf(&tables::STEREO_UNIFORM3_ICDF, 8) as i32;
row[1] = rc.decode_icdf(&tables::STEREO_UNIFORM5_ICDF, 8) as i32;
}
let mut pred_q13 = [0i32; 2];
for k in 0..2 {
ix[k][0] += 3 * ix[k][2];
let idx0 = (ix[k][0] as usize).min(15);
let idx1 = (idx0 + 1).min(15);
let low_q13 = tables::STEREO_PRED_QUANT_Q13[idx0] as i32;
let high_q13 = tables::STEREO_PRED_QUANT_Q13[idx1] as i32;
let step_q13 = ((high_q13 - low_q13) * 6554) >> 16;
pred_q13[k] = low_q13 + step_q13 * (2 * ix[k][1] + 1);
}
pred_q13[0] -= pred_q13[1];
pred_q13
}
fn stereo_unmix_48k(
mid: &[f32],
side: &[f32],
pred_q13: &[i32; 2],
decode_only_mid: bool,
_fs_khz: i32,
state: &mut SilkStereoState,
) -> Vec<f32> {
let n = mid.len();
let mut out = vec![0.0f32; 2 * n];
if n == 0 {
return out;
}
let mut x1 = vec![0.0f32; n + 2];
let mut x2 = vec![0.0f32; n + 2];
x1[0] = state.s_mid[0] as f32 / 32768.0;
x1[1] = state.s_mid[1] as f32 / 32768.0;
x2[0] = state.s_side[0] as f32 / 32768.0;
x2[1] = state.s_side[1] as f32 / 32768.0;
for i in 0..n {
x1[i + 2] = mid[i];
x2[i + 2] = if side.is_empty() { 0.0 } else { side[i] };
}
state.s_mid[0] = f32_to_q15_clamp(x1[n]);
state.s_mid[1] = f32_to_q15_clamp(x1[n + 1]);
state.s_side[0] = f32_to_q15_clamp(x2[n]);
state.s_side[1] = f32_to_q15_clamp(x2[n + 1]);
let interp_len = (8 * 48).min(n);
let prev0 = state.pred_prev_q13[0] as f32;
let prev1 = state.pred_prev_q13[1] as f32;
let curr0 = pred_q13[0] as f32;
let curr1 = pred_q13[1] as f32;
let q13_scale = 1.0 / 8192.0;
for idx in 0..n {
let t = if idx < interp_len {
(idx + 1) as f32 / interp_len as f32
} else {
1.0
};
let p0 = (prev0 + (curr0 - prev0) * t) * q13_scale;
let p1 = (prev1 + (curr1 - prev1) * t) * q13_scale;
let m = (x1[idx] + 2.0 * x1[idx + 1] + x1[idx + 2]) * 0.25;
let side_v = if decode_only_mid {
m * p0 + x1[idx + 1] * p1
} else {
x2[idx + 1] + m * p0 + x1[idx + 1] * p1
};
let mid_v = x1[idx + 1];
let l = ((mid_v + side_v) * 0.5).clamp(-1.0, 1.0);
let r = ((mid_v - side_v) * 0.5).clamp(-1.0, 1.0);
out[2 * idx] = l;
out[2 * idx + 1] = r;
}
state.pred_prev_q13[0] = pred_q13[0];
state.pred_prev_q13[1] = pred_q13[1];
out
}
fn f32_to_q15_clamp(x: f32) -> i16 {
let s = (x * 32768.0).round();
s.clamp(-32768.0, 32767.0) as i16
}
pub(crate) fn gain_index_to_q16(idx: i32) -> i32 {
let idx = idx.clamp(0, 63) as f32;
let log2 = (0x1D1C71u32 as f32 / 65536.0) * idx + (2090.0 / 65536.0);
let lin = 2f32.powf(log2);
(lin * 65536.0).round() as i32
}
pub(crate) fn gain_index_of_q16(gain: i32) -> i32 {
let log2 = (gain.max(1) as f32 / 65536.0).log2();
let idx = (log2 - 2090.0 / 65536.0) / (0x1D1C71u32 as f32 / 65536.0);
idx.round() as i32
}