extern crate alloc;
use super::super::indices::{MAX_NB_SUBFR, TYPE_VOICED};
use super::dsp::{apply_sine_window, autocorrelation, bwexpander, energy, k2a, schur};
pub(crate) const MAX_SHAPE_LPC_ORDER: usize = 24;
const SHAPE_LPC_WIN_MAX: usize = 15 * 16;
const BANDWIDTH_EXPANSION: f32 = 0.94;
const FIND_PITCH_WHITE_NOISE_FRACTION: f32 = 1e-3;
const SHAPE_WHITE_NOISE_FRACTION: f32 = 3e-5;
const BG_SNR_DECR_DB: f32 = 2.0;
const HARM_SNR_INCR_DB: f32 = 2.0;
const ENERGY_VARIATION_THRESHOLD_QNT_OFFSET: f32 = 0.6;
const LOW_FREQ_SHAPING: f32 = 4.0;
const LOW_QUALITY_LOW_FREQ_SHAPING_DECR: f32 = 0.5;
const HP_NOISE_COEF: f32 = 0.25;
const HARM_HP_NOISE_COEF: f32 = 0.35;
const HARMONIC_SHAPING: f32 = 0.3;
const HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING: f32 = 0.2;
const SUBFR_SMTH_COEF: f32 = 0.4;
const MIN_QGAIN_DB: f32 = 2.0;
const SUB_FRAME_LENGTH_MS: i32 = 5;
#[derive(Clone, Copy, Default)]
pub(crate) struct ShapeState {
pub harm_shape_gain_smth: f32,
pub tilt_smth: f32,
}
#[derive(Clone, Copy)]
pub(crate) struct NoiseShapeConfig {
pub fs_khz: i32,
pub nb_subfr: usize,
pub subfr_length: usize,
pub la_shape: usize,
pub shape_win_length: usize,
pub shaping_lpc_order: usize,
pub warping_q16: i32,
pub signal_type: i32,
pub snr_db_q7: i32,
pub speech_activity_q8: i32,
pub input_quality_bands_q15: [i32; 2],
pub use_cbr: bool,
pub ltp_corr: f32,
pub pred_gain: f32,
pub pitch_l: [i32; MAX_NB_SUBFR],
}
pub(crate) struct NoiseShapeResult {
pub ar_q13: [i16; MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
pub tilt_q14: [i32; MAX_NB_SUBFR],
pub lf_shp_q14: [i32; MAX_NB_SUBFR],
pub harm_shape_gain_q14: [i32; MAX_NB_SUBFR],
pub gains: [f32; MAX_NB_SUBFR],
pub quant_offset_type: i32,
pub input_quality: f32,
pub coding_quality: f32,
}
fn sigmoid(x: f32) -> f32 {
1.0 / (1.0 + (-x).exp())
}
fn float2int(x: f32) -> i32 {
x.round() as i32
}
fn warped_autocorrelation(corr: &mut [f32], input: &[f32], warping: f32, order: usize) {
debug_assert!(order & 1 == 0);
let warping = f64::from(warping);
let mut state = [0.0f64; MAX_SHAPE_LPC_ORDER + 1];
let mut c = [0.0f64; MAX_SHAPE_LPC_ORDER + 1];
for &xn in input {
let mut tmp1 = f64::from(xn);
let mut i = 0;
while i < order {
let tmp2 = state[i] + warping * state[i + 1] - warping * tmp1;
state[i] = tmp1;
c[i] += state[0] * tmp1;
tmp1 = state[i + 1] + warping * state[i + 2] - warping * tmp2;
state[i + 1] = tmp2;
c[i + 1] += state[0] * tmp2;
i += 2;
}
state[order] = tmp1;
c[order] += state[0] * tmp1;
}
for i in 0..order + 1 {
corr[i] = c[i] as f32;
}
}
fn warped_gain(coefs: &[f32], lambda: f32, order: usize) -> f32 {
let lambda = -lambda;
let mut gain = coefs[order - 1];
for i in (0..order - 1).rev() {
gain = lambda * gain + coefs[i];
}
1.0 / (1.0 - lambda * gain)
}
fn limit_coefs(coefs: &mut [f32], limit: f32, order: usize) {
for iter in 0..10 {
let mut maxabs = -1.0f32;
let mut ind = 0usize;
for (i, &c) in coefs.iter().enumerate().take(order) {
let t = c.abs();
if t > maxabs {
maxabs = t;
ind = i;
}
}
if maxabs <= limit {
return;
}
let chirp = 0.99 - (0.8 + 0.1 * iter as f32) * (maxabs - limit) / (maxabs * (ind as f32 + 1.0));
bwexpander(coefs, order, chirp);
}
}
fn warped_true2monic_coefs(coefs: &mut [f32], lambda: f32, limit: f32, order: usize) {
for i in (1..order).rev() {
coefs[i - 1] -= lambda * coefs[i];
}
let mut gain = (1.0 - lambda * lambda) / (1.0 + lambda * coefs[0]);
for c in coefs.iter_mut().take(order) {
*c *= gain;
}
for iter in 0..10 {
let mut maxabs = -1.0f32;
let mut ind = 0usize;
for (i, &c) in coefs.iter().enumerate().take(order) {
let t = c.abs();
if t > maxabs {
maxabs = t;
ind = i;
}
}
if maxabs <= limit {
return;
}
for i in 1..order {
coefs[i - 1] += lambda * coefs[i];
}
gain = 1.0 / gain;
for c in coefs.iter_mut().take(order) {
*c *= gain;
}
let chirp = 0.99 - (0.8 + 0.1 * iter as f32) * (maxabs - limit) / (maxabs * (ind as f32 + 1.0));
bwexpander(coefs, order, chirp);
for i in (1..order).rev() {
coefs[i - 1] -= lambda * coefs[i];
}
gain = (1.0 - lambda * lambda) / (1.0 + lambda * coefs[0]);
for c in coefs.iter_mut().take(order) {
*c *= gain;
}
}
}
#[allow(clippy::needless_range_loop, reason = "computed index ranges mirror the reference")]
pub(crate) fn noise_shape_analysis(
state: &mut ShapeState,
cfg: &NoiseShapeConfig,
pitch_res: &[f32],
x_buf: &[f32],
) -> NoiseShapeResult {
let order = cfg.shaping_lpc_order;
let frame_length = cfg.nb_subfr * cfg.subfr_length;
debug_assert_eq!(x_buf.len(), frame_length + 2 * cfg.la_shape);
let mut res = NoiseShapeResult {
ar_q13: [0; MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER],
tilt_q14: [0; MAX_NB_SUBFR],
lf_shp_q14: [0; MAX_NB_SUBFR],
harm_shape_gain_q14: [0; MAX_NB_SUBFR],
gains: [0.0; MAX_NB_SUBFR],
quant_offset_type: 0,
input_quality: 0.0,
coding_quality: 0.0,
};
let mut snr_adj_db = cfg.snr_db_q7 as f32 * (1.0 / 128.0);
let input_quality =
0.5 * (cfg.input_quality_bands_q15[0] + cfg.input_quality_bands_q15[1]) as f32 * (1.0 / 32768.0);
let coding_quality = sigmoid(0.25 * (snr_adj_db - 20.0));
res.input_quality = input_quality;
res.coding_quality = coding_quality;
if !cfg.use_cbr {
let b = 1.0 - cfg.speech_activity_q8 as f32 * (1.0 / 256.0);
snr_adj_db -= BG_SNR_DECR_DB * coding_quality * (0.5 + 0.5 * input_quality) * b * b;
}
if cfg.signal_type == TYPE_VOICED {
snr_adj_db += HARM_SNR_INCR_DB * cfg.ltp_corr;
} else {
snr_adj_db += (-0.4 * cfg.snr_db_q7 as f32 * (1.0 / 128.0) + 6.0) * (1.0 - input_quality);
}
if cfg.signal_type == TYPE_VOICED {
res.quant_offset_type = 0;
} else {
let n_samples = 2 * cfg.fs_khz as usize;
let n_segs = (SUB_FRAME_LENGTH_MS * cfg.nb_subfr as i32) as usize / 2;
let mut energy_variation = 0.0f32;
let mut log_energy_prev = 0.0f32;
for k in 0..n_segs {
let seg = &pitch_res[k * n_samples..(k + 1) * n_samples];
let nrg = n_samples as f32 + energy(seg) as f32;
let log_energy = nrg.log2();
if k > 0 {
energy_variation += (log_energy - log_energy_prev).abs();
}
log_energy_prev = log_energy;
}
res.quant_offset_type =
i32::from(energy_variation <= ENERGY_VARIATION_THRESHOLD_QNT_OFFSET * (n_segs as f32 - 1.0));
}
let strength = FIND_PITCH_WHITE_NOISE_FRACTION * cfg.pred_gain;
let bw_exp = BANDWIDTH_EXPANSION / (1.0 + strength * strength);
let warping = cfg.warping_q16 as f32 / 65536.0 + 0.01 * coding_quality;
let flat_part = cfg.fs_khz as usize * 3;
let slope_part = (cfg.shape_win_length - flat_part) / 2;
let mut x_windowed = [0.0f32; SHAPE_LPC_WIN_MAX];
let mut auto_corr = [0.0f32; MAX_SHAPE_LPC_ORDER + 1];
let mut rc = [0.0f32; MAX_SHAPE_LPC_ORDER + 1];
let mut ar_f = [0.0f32; MAX_NB_SUBFR * MAX_SHAPE_LPC_ORDER];
for k in 0..cfg.nb_subfr {
let x0 = k * cfg.subfr_length;
let x_ptr = &x_buf[x0..x0 + cfg.shape_win_length];
apply_sine_window(&mut x_windowed[..slope_part], &x_ptr[..slope_part], 1, slope_part);
x_windowed[slope_part..slope_part + flat_part].copy_from_slice(&x_ptr[slope_part..slope_part + flat_part]);
let s = slope_part + flat_part;
apply_sine_window(
&mut x_windowed[s..s + slope_part],
&x_ptr[s..s + slope_part],
2,
slope_part,
);
if cfg.warping_q16 > 0 {
warped_autocorrelation(&mut auto_corr, &x_windowed[..cfg.shape_win_length], warping, order);
} else {
autocorrelation(&mut auto_corr, &x_windowed[..cfg.shape_win_length], order + 1);
}
auto_corr[0] += auto_corr[0] * SHAPE_WHITE_NOISE_FRACTION + 1.0;
let nrg = schur(&mut rc, &auto_corr, order);
let ar = &mut ar_f[k * MAX_SHAPE_LPC_ORDER..k * MAX_SHAPE_LPC_ORDER + order];
k2a(ar, &rc, order);
let mut gain = nrg.max(0.0).sqrt();
if cfg.warping_q16 > 0 {
gain *= warped_gain(ar, warping, order);
}
bwexpander(ar, order, bw_exp);
if cfg.warping_q16 > 0 {
warped_true2monic_coefs(ar, warping, 3.999, order);
} else {
limit_coefs(ar, 3.999, order);
}
res.gains[k] = gain;
}
let gain_mult = 2.0f32.powf(-0.16 * snr_adj_db);
let gain_add = 2.0f32.powf(0.16 * MIN_QGAIN_DB);
for k in 0..cfg.nb_subfr {
res.gains[k] = res.gains[k] * gain_mult + gain_add;
}
let mut strength = LOW_FREQ_SHAPING
* (1.0 + LOW_QUALITY_LOW_FREQ_SHAPING_DECR * (cfg.input_quality_bands_q15[0] as f32 * (1.0 / 32768.0) - 1.0));
strength *= cfg.speech_activity_q8 as f32 * (1.0 / 256.0);
let (mut lf_ma, mut lf_ar) = ([0.0f32; MAX_NB_SUBFR], [0.0f32; MAX_NB_SUBFR]);
let tilt;
if cfg.signal_type == TYPE_VOICED {
for k in 0..cfg.nb_subfr {
let b = 0.2 / cfg.fs_khz as f32 + 3.0 / cfg.pitch_l[k] as f32;
lf_ma[k] = -1.0 + b;
lf_ar[k] = 1.0 - b - b * strength;
}
tilt =
-HP_NOISE_COEF - (1.0 - HP_NOISE_COEF) * HARM_HP_NOISE_COEF * cfg.speech_activity_q8 as f32 * (1.0 / 256.0);
} else {
let b = 1.3 / cfg.fs_khz as f32;
lf_ma[0] = -1.0 + b;
lf_ar[0] = 1.0 - b - b * strength * 0.6;
for k in 1..cfg.nb_subfr {
lf_ma[k] = lf_ma[0];
lf_ar[k] = lf_ar[0];
}
tilt = -HP_NOISE_COEF;
}
let harm_shape_gain = if cfg.signal_type == TYPE_VOICED {
let mut g = HARMONIC_SHAPING;
g += HIGH_RATE_OR_LOW_QUALITY_HARMONIC_SHAPING * (1.0 - (1.0 - coding_quality) * input_quality);
g * cfg.ltp_corr.max(0.0).sqrt()
} else {
0.0
};
for k in 0..cfg.nb_subfr {
state.harm_shape_gain_smth += SUBFR_SMTH_COEF * (harm_shape_gain - state.harm_shape_gain_smth);
state.tilt_smth += SUBFR_SMTH_COEF * (tilt - state.tilt_smth);
res.harm_shape_gain_q14[k] = float2int(state.harm_shape_gain_smth * 16384.0);
res.tilt_q14[k] = float2int(state.tilt_smth * 16384.0);
res.lf_shp_q14[k] = (float2int(lf_ar[k] * 16384.0) << 16) | (float2int(lf_ma[k] * 16384.0) & 0xffff);
for j in 0..order {
res.ar_q13[k * MAX_SHAPE_LPC_ORDER + j] = float2int(ar_f[k * MAX_SHAPE_LPC_ORDER + j] * 8192.0) as i16;
}
}
res
}