mousiki 0.2.0 - Docs.rs

#![allow(clippy::needless_range_loop)]
#![allow(clippy::too_many_arguments)]
#![allow(dead_code)]

#[cfg(test)]
extern crate std;

use core::cmp::{max, min};
use core::f32::consts::{LOG2_E, PI};

use libm::{floorf, fmaf, log, log10f, logf, sqrt, sqrtf};

use crate::celt::{
    AnalysisInfo, CELT_SIG_SCALE, KissFftCpx, KissFftState, OpusCustomMode, celt_maxabs32,
    fast_atan2f, float2int, opus_fft, opus_select_arch,
};
use crate::mlp::{
    LAYER0, LAYER1, LAYER2, MAX_NEURONS, analysis_compute_dense, analysis_compute_gru,
};

const NB_FRAMES: usize = 8;
const NB_TBANDS: usize = 18;
const ANALYSIS_BUF_SIZE: usize = 720;
const ANALYSIS_COUNT_MAX: i32 = 10_000;
const DETECT_SIZE: usize = 100;
const NB_TONAL_SKIP_BANDS: usize = 9;
const TRANSITION_PENALTY: f32 = 10.0;
const LEAK_BANDS: usize = 19;
const SCALE_ENER: f32 = 1.0 / (32_768.0 * 32_768.0);
const INITIAL_MEM_FILL: usize = 240;

#[inline]
fn mul_add_f32(a: f32, b: f32, c: f32) -> f32 {
    fmaf(a, b, c)
}

#[inline]
fn bin_energy_sum(r: f32, i: f32, mirror_r: f32, mirror_i: f32) -> f32 {
    let mr2 = mirror_r * mirror_r;
    let sum_r = mul_add_f32(r, r, mr2);
    let sum_i = mul_add_f32(i, i, sum_r);
    mul_add_f32(mirror_i, mirror_i, sum_i)
}

const DCT_TABLE: [f32; 128] = [
    0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000,
    0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000, 0.250_000,
    0.351_851, 0.338_330, 0.311_806, 0.273_300, 0.224_292, 0.166_664, 0.102_631, 0.034_654,
    -0.034_654, -0.102_631, -0.166_664, -0.224_292, -0.273_300, -0.311_806, -0.338_330, -0.351_851,
    0.346_760, 0.293_969, 0.196_424, 0.068_975, -0.068_975, -0.196_424, -0.293_969, -0.346_760,
    -0.346_760, -0.293_969, -0.196_424, -0.068_975, 0.068_975, 0.196_424, 0.293_969, 0.346_760,
    0.338_330, 0.224_292, 0.034_654, -0.166_664, -0.311_806, -0.351_851, -0.273_300, -0.102_631,
    0.102_631, 0.273_300, 0.351_851, 0.311_806, 0.166_664, -0.034_654, -0.224_292, -0.338_330,
    0.326_641, 0.135_299, -0.135_299, -0.326_641, -0.326_641, -0.135_299, 0.135_299, 0.326_641,
    0.326_641, 0.135_299, -0.135_299, -0.326_641, -0.326_641, -0.135_299, 0.135_299, 0.326_641,
    0.311_806, 0.034_654, -0.273_300, -0.338_330, -0.102_631, 0.224_292, 0.351_851, 0.166_664,
    -0.166_664, -0.351_851, -0.224_292, 0.102_631, 0.338_330, 0.273_300, -0.034_654, -0.311_806,
    0.293_969, -0.068_975, -0.346_760, -0.196_424, 0.196_424, 0.346_760, 0.068_975, -0.293_969,
    -0.293_969, 0.068_975, 0.346_760, 0.196_424, -0.196_424, -0.346_760, -0.068_975, 0.293_969,
    0.273_300, -0.166_664, -0.338_330, 0.034_654, 0.351_851, 0.102_631, -0.311_806, -0.224_292,
    0.224_292, 0.311_806, -0.102_631, -0.351_851, -0.034_654, 0.338_330, 0.166_664, -0.273_300,
];

const ANALYSIS_WINDOW: [f32; 240] = [
    0.000_043, 0.000_171, 0.000_385, 0.000_685, 0.001_071, 0.001_541, 0.002_098, 0.002_739,
    0.003_466, 0.004_278, 0.005_174, 0.006_156, 0.007_222, 0.008_373, 0.009_607, 0.010_926,
    0.012_329, 0.013_815, 0.015_385, 0.017_037, 0.018_772, 0.020_590, 0.022_490, 0.024_472,
    0.026_535, 0.028_679, 0.030_904, 0.033_210, 0.035_595, 0.038_060, 0.040_604, 0.043_227,
    0.045_928, 0.048_707, 0.051_564, 0.054_497, 0.057_506, 0.060_591, 0.063_752, 0.066_987,
    0.070_297, 0.073_680, 0.077_136, 0.080_665, 0.084_265, 0.087_937, 0.091_679, 0.095_492,
    0.099_373, 0.103_323, 0.107_342, 0.111_427, 0.115_579, 0.119_797, 0.124_080, 0.128_428,
    0.132_839, 0.137_313, 0.141_849, 0.146_447, 0.151_105, 0.155_823, 0.160_600, 0.165_435,
    0.170_327, 0.175_276, 0.180_280, 0.185_340, 0.190_453, 0.195_619, 0.200_838, 0.206_107,
    0.211_427, 0.216_797, 0.222_215, 0.227_680, 0.233_193, 0.238_751, 0.244_353, 0.250_000,
    0.255_689, 0.261_421, 0.267_193, 0.273_005, 0.278_856, 0.284_744, 0.290_670, 0.296_632,
    0.302_628, 0.308_658, 0.314_721, 0.320_816, 0.326_941, 0.333_097, 0.339_280, 0.345_492,
    0.351_729, 0.357_992, 0.364_280, 0.370_590, 0.376_923, 0.383_277, 0.389_651, 0.396_044,
    0.402_455, 0.408_882, 0.415_325, 0.421_783, 0.428_254, 0.434_737, 0.441_231, 0.447_736,
    0.454_249, 0.460_770, 0.467_298, 0.473_832, 0.480_370, 0.486_912, 0.493_455, 0.500_000,
    0.506_545, 0.513_088, 0.519_630, 0.526_168, 0.532_702, 0.539_230, 0.545_751, 0.552_264,
    0.558_769, 0.565_263, 0.571_746, 0.578_217, 0.584_675, 0.591_118, 0.597_545, 0.603_956,
    0.610_349, 0.616_723, 0.623_077, 0.629_410, 0.635_720, 0.642_008, 0.648_271, 0.654_508,
    0.660_720, 0.666_903, 0.673_059, 0.679_184, 0.685_279, 0.691_342, 0.697_372, 0.703_368,
    0.709_330, 0.715_256, 0.721_144, 0.726_995, 0.732_807, 0.738_579, 0.744_311, 0.750_000,
    0.755_647, 0.761_249, 0.766_807, 0.772_320, 0.777_785, 0.783_203, 0.788_573, 0.793_893,
    0.799_162, 0.804_381, 0.809_547, 0.814_660, 0.819_720, 0.824_724, 0.829_673, 0.834_565,
    0.839_400, 0.844_177, 0.848_895, 0.853_553, 0.858_151, 0.862_687, 0.867_161, 0.871_572,
    0.875_920, 0.880_203, 0.884_421, 0.888_573, 0.892_658, 0.896_677, 0.900_627, 0.904_508,
    0.908_321, 0.912_063, 0.915_735, 0.919_335, 0.922_864, 0.926_320, 0.929_703, 0.933_013,
    0.936_248, 0.939_409, 0.942_494, 0.945_503, 0.948_436, 0.951_293, 0.954_072, 0.956_773,
    0.959_396, 0.961_940, 0.964_405, 0.966_790, 0.969_096, 0.971_321, 0.973_465, 0.975_528,
    0.977_510, 0.979_410, 0.981_228, 0.982_963, 0.984_615, 0.986_185, 0.987_671, 0.989_074,
    0.990_393, 0.991_627, 0.992_778, 0.993_844, 0.994_826, 0.995_722, 0.996_534, 0.997_261,
    0.997_902, 0.998_459, 0.998_929, 0.999_315, 0.999_615, 0.999_829, 0.999_957, 1.000_000,
];

const TBANDS: [usize; NB_TBANDS + 1] = [
    4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 136, 160, 192, 240,
];

// Keep the literal to match the C M_PI value used in tonality analysis (do not replace).
#[allow(clippy::approx_constant)]
const M_PI_F64: f64 = 3.141592653;
const LEAKAGE_OFFSET: f32 = 2.5;
const LEAKAGE_SLOPE: f32 = 2.0;
const STD_FEATURE_BIAS: [f32; 9] = [
    5.684_947, 3.475_288, 1.770_634, 1.599_784, 3.773_215, 2.163_313, 1.260_756, 1.116_868,
    1.918_795,
];

// Use f64 intermediates to mirror the C sqrt(double) path; keep the cast/lint for parity.
#[allow(clippy::cast_lossless)]
fn stationarity_denom(l2: f32) -> f32 {
    sqrt(1e-15f64 + (NB_FRAMES as f64) * (l2 as f64)) as f32
}

// Compute pi^4 with the C literal and f64 order, then downcast to match reference bits.
// Keep this helper to preserve C bit-exactness (do not rewrite with core::f32::consts::PI).
fn pi4_f32() -> f32 {
    (M_PI_F64 * M_PI_F64 * M_PI_F64 * M_PI_F64) as f32
}

#[inline]
fn accumulate_t_e(acc: f32, bin_e: f32, tonality_clamped: f32) -> f32 {
    // Use FMA to mirror the C compiler's contraction for tE accumulation.
    fmaf(bin_e, tonality_clamped, acc)
}

pub(crate) trait DownmixInput {
    fn downmix(
        &self,
        output: &mut [f32],
        subframe: usize,
        offset: usize,
        c1: i32,
        c2: i32,
        channels: i32,
    );
}

impl DownmixInput for [f32] {
    fn downmix(
        &self,
        output: &mut [f32],
        subframe: usize,
        offset: usize,
        c1: i32,
        c2: i32,
        channels: i32,
    ) {
        let channels = channels as usize;
        debug_assert!(channels > 0);
        debug_assert!(c1 >= 0 && (c1 as usize) < channels);
        debug_assert!(output.len() >= subframe);
        let base = offset * channels;
        debug_assert!(base + subframe * channels <= self.len());

        for j in 0..subframe {
            let idx = base + j * channels + c1 as usize;
            let mut sample = self[idx] * CELT_SIG_SCALE;
            if c2 > -1 {
                let idx2 = base + j * channels + c2 as usize;
                sample += self[idx2] * CELT_SIG_SCALE;
            } else if c2 == -2 {
                for c in 1..channels {
                    sample += self[base + j * channels + c] * CELT_SIG_SCALE;
                }
            }
            output[j] = sample;
        }
    }
}

impl DownmixInput for [i16] {
    fn downmix(
        &self,
        output: &mut [f32],
        subframe: usize,
        offset: usize,
        c1: i32,
        c2: i32,
        channels: i32,
    ) {
        let channels = channels as usize;
        debug_assert!(channels > 0);
        debug_assert!(c1 >= 0 && (c1 as usize) < channels);
        debug_assert!(output.len() >= subframe);
        let base = offset * channels;
        debug_assert!(base + subframe * channels <= self.len());

        for j in 0..subframe {
            let idx = base + j * channels + c1 as usize;
            let mut sample = f32::from(self[idx]);
            if c2 > -1 {
                let idx2 = base + j * channels + c2 as usize;
                sample += f32::from(self[idx2]);
            } else if c2 == -2 {
                for c in 1..channels {
                    sample += f32::from(self[base + j * channels + c]);
                }
            }
            output[j] = sample;
        }
    }
}

impl DownmixInput for [i32] {
    fn downmix(
        &self,
        output: &mut [f32],
        subframe: usize,
        offset: usize,
        c1: i32,
        c2: i32,
        channels: i32,
    ) {
        let channels = channels as usize;
        debug_assert!(channels > 0);
        debug_assert!(c1 >= 0 && (c1 as usize) < channels);
        debug_assert!(output.len() >= subframe);
        let base = offset * channels;
        debug_assert!(base + subframe * channels <= self.len());

        for j in 0..subframe {
            let idx = base + j * channels + c1 as usize;
            let mut sample = self[idx] as f32 * (1.0 / 256.0);
            if c2 > -1 {
                let idx2 = base + j * channels + c2 as usize;
                sample += self[idx2] as f32 * (1.0 / 256.0);
            } else if c2 == -2 {
                for c in 1..channels {
                    sample += self[base + j * channels + c] as f32 * (1.0 / 256.0);
                }
            }
            output[j] = sample;
        }
    }
}

#[derive(Debug)]
pub(crate) struct TonalityAnalysisState {
    pub arch: i32,
    pub application: i32,
    pub fs: i32,
    angle: [f32; 240],
    d_angle: [f32; 240],
    d2_angle: [f32; 240],
    inmem: [f32; ANALYSIS_BUF_SIZE],
    mem_fill: usize,
    prev_band_tonality: [f32; NB_TBANDS],
    prev_tonality: f32,
    prev_bandwidth: i32,
    e: [[f32; NB_TBANDS]; NB_FRAMES],
    log_e: [[f32; NB_TBANDS]; NB_FRAMES],
    low_e: [f32; NB_TBANDS],
    high_e: [f32; NB_TBANDS],
    mean_e: [f32; NB_TBANDS + 1],
    mem: [f32; 32],
    cmean: [f32; 8],
    std: [f32; 9],
    e_tracker: f32,
    low_e_count: f32,
    e_count: usize,
    count: i32,
    analysis_offset: i32,
    write_pos: usize,
    read_pos: usize,
    read_subframe: i32,
    hp_ener_accum: f32,
    initialized: bool,
    rnn_state: [f32; MAX_NEURONS],
    downmix_state: [f32; 3],
    info: [AnalysisInfo; DETECT_SIZE],
    kfft: KissFftState,
}

impl TonalityAnalysisState {
    #[must_use]
    pub(crate) fn new(fs: i32) -> Self {
        let mut state = Self {
            arch: opus_select_arch(),
            application: 0,
            fs,
            angle: [0.0; 240],
            d_angle: [0.0; 240],
            d2_angle: [0.0; 240],
            inmem: [0.0; ANALYSIS_BUF_SIZE],
            mem_fill: 0,
            prev_band_tonality: [0.0; NB_TBANDS],
            prev_tonality: 0.0,
            prev_bandwidth: 0,
            e: [[0.0; NB_TBANDS]; NB_FRAMES],
            log_e: [[0.0; NB_TBANDS]; NB_FRAMES],
            low_e: [0.0; NB_TBANDS],
            high_e: [0.0; NB_TBANDS],
            mean_e: [0.0; NB_TBANDS + 1],
            mem: [0.0; 32],
            cmean: [0.0; 8],
            std: [0.0; 9],
            e_tracker: 0.0,
            low_e_count: 0.0,
            e_count: 0,
            count: 0,
            analysis_offset: 0,
            write_pos: 0,
            read_pos: 0,
            read_subframe: 0,
            hp_ener_accum: 0.0,
            initialized: false,
            rnn_state: [0.0; MAX_NEURONS],
            downmix_state: [0.0; 3],
            info: core::array::from_fn(|_| AnalysisInfo::default()),
            kfft: KissFftState::new(480),
        };
        state.reset();
        state
    }

    pub(crate) fn reset(&mut self) {
        self.angle.fill(0.0);
        self.d_angle.fill(0.0);
        self.d2_angle.fill(0.0);
        self.inmem.fill(0.0);
        self.mem_fill = 0;
        self.prev_band_tonality.fill(0.0);
        self.prev_tonality = 0.0;
        self.prev_bandwidth = 0;
        self.e = [[0.0; NB_TBANDS]; NB_FRAMES];
        self.log_e = [[0.0; NB_TBANDS]; NB_FRAMES];
        self.low_e.fill(0.0);
        self.high_e.fill(0.0);
        self.mean_e.fill(0.0);
        self.mem.fill(0.0);
        self.cmean.fill(0.0);
        self.std.fill(0.0);
        self.e_tracker = 0.0;
        self.low_e_count = 0.0;
        self.e_count = 0;
        self.count = 0;
        self.analysis_offset = 0;
        self.write_pos = 0;
        self.read_pos = 0;
        self.read_subframe = 0;
        self.hp_ener_accum = 0.0;
        self.initialized = false;
        self.rnn_state.fill(0.0);
        self.downmix_state.fill(0.0);
        self.info = core::array::from_fn(|_| AnalysisInfo::default());
    }

    #[inline]
    #[must_use]
    pub(crate) fn snapshot_read_state(&self) -> (usize, i32) {
        (self.read_pos, self.read_subframe)
    }

    #[inline]
    pub(crate) fn restore_read_state(&mut self, state: (usize, i32)) {
        self.read_pos = state.0;
        self.read_subframe = state.1;
    }
}

pub(crate) fn tonality_analysis_init(state: &mut TonalityAnalysisState, fs: i32) {
    *state = TonalityAnalysisState::new(fs);
}

pub(crate) fn tonality_analysis_reset(state: &mut TonalityAnalysisState) {
    state.reset();
}

fn is_digital_silence32(pcm: &[f32], frame_size: usize, channels: usize, lsb_depth: i32) -> bool {
    let total = frame_size * channels;
    if pcm.len() < total {
        return false;
    }
    let sample_max = celt_maxabs32(&pcm[..total]);
    sample_max <= 1.0 / (1_i32 << lsb_depth) as f32
}

fn silk_resampler_down2_hp(s: &mut [f32; 3], out: &mut [f32], input: &[f32]) -> f32 {
    let len2 = input.len() / 2;
    debug_assert!(out.len() >= len2);
    let mut hp_ener = 0.0f32;
    for k in 0..len2 {
        let in_even = input[2 * k];
        let y = in_even - s[0];
        let x = 0.607_437_1 * y;
        let mut out32 = s[0] + x;
        s[0] = in_even + x;
        let mut out32_hp = out32;

        let in_odd = input[2 * k + 1];
        let y = in_odd - s[1];
        let x = 0.150_63 * y;
        out32 += s[1];
        out32 += x;
        s[1] = in_odd + x;

        let y = -in_odd - s[2];
        let x = 0.150_63 * y;
        out32_hp += s[2];
        out32_hp += x;
        s[2] = -in_odd + x;

        hp_ener = mul_add_f32(out32_hp, out32_hp, hp_ener);
        out[k] = 0.5 * out32;
    }

    hp_ener * SCALE_ENER
}

fn downmix_and_resample<PCM: DownmixInput + ?Sized>(
    pcm: &PCM,
    y: &mut [f32],
    s: &mut [f32; 3],
    mut subframe: usize,
    mut offset: i32,
    c1: i32,
    c2: i32,
    channels: i32,
    fs: i32,
) -> f32 {
    if subframe == 0 {
        return 0.0;
    }

    debug_assert!(offset >= 0, "downmix offset must be non-negative");
    if fs == 48_000 {
        subframe *= 2;
        offset *= 2;
    } else if fs == 16_000 {
        subframe = subframe * 2 / 3;
        offset = offset * 2 / 3;
    }

    let mut tmp = [0.0f32; 960];
    debug_assert!(subframe <= tmp.len());
    pcm.downmix(
        &mut tmp[..subframe],
        subframe,
        offset as usize,
        c1,
        c2,
        channels,
    );

    if (c2 == -2 && channels == 2) || c2 > -1 {
        for value in tmp.iter_mut().take(subframe) {
            *value *= 0.5;
        }
    }

    if fs == 48_000 {
        silk_resampler_down2_hp(s, y, &tmp[..subframe])
    } else if fs == 24_000 {
        y[..subframe].copy_from_slice(&tmp[..subframe]);
        0.0
    } else {
        let mut tmp3x = [0.0f32; 1440];
        debug_assert!(3 * subframe <= tmp3x.len());
        for j in 0..subframe {
            let value = tmp[j];
            tmp3x[3 * j] = value;
            tmp3x[3 * j + 1] = value;
            tmp3x[3 * j + 2] = value;
        }
        silk_resampler_down2_hp(s, y, &tmp3x[..3 * subframe])
    }
}

pub(crate) fn tonality_get_info(
    tonal: &mut TonalityAnalysisState,
    info_out: &mut AnalysisInfo,
    len: usize,
) {
    let mut pos = tonal.read_pos as i32;
    let mut curr_lookahead = tonal.write_pos as i32 - tonal.read_pos as i32;
    if curr_lookahead < 0 {
        curr_lookahead += DETECT_SIZE as i32;
    }

    tonal.read_subframe += (len as i32) / (tonal.fs / 400);
    while tonal.read_subframe >= 8 {
        tonal.read_subframe -= 8;
        tonal.read_pos = (tonal.read_pos + 1) % DETECT_SIZE;
    }

    if len as i32 > tonal.fs / 50 && pos as usize != tonal.write_pos {
        pos += 1;
        if pos == DETECT_SIZE as i32 {
            pos = 0;
        }
    }
    if pos as usize == tonal.write_pos {
        pos -= 1;
    }
    if pos < 0 {
        pos = DETECT_SIZE as i32 - 1;
    }
    let pos0 = pos as usize;
    *info_out = tonal.info[pos0].clone();
    if !info_out.valid {
        return;
    }

    let mut tonality_max = info_out.tonality;
    let mut tonality_avg = info_out.tonality;
    let mut tonality_count = 1;
    let mut bandwidth_span = 6;

    let mut cursor = pos0;
    for _ in 0..3 {
        cursor = (cursor + 1) % DETECT_SIZE;
        if cursor == tonal.write_pos {
            break;
        }
        tonality_max = tonality_max.max(tonal.info[cursor].tonality);
        tonality_avg += tonal.info[cursor].tonality;
        tonality_count += 1;
        info_out.bandwidth = max(info_out.bandwidth, tonal.info[cursor].bandwidth);
        bandwidth_span -= 1;
    }

    cursor = pos0;
    for _ in 0..bandwidth_span {
        if cursor == 0 {
            cursor = DETECT_SIZE - 1;
        } else {
            cursor -= 1;
        }
        if cursor == tonal.write_pos {
            break;
        }
        info_out.bandwidth = max(info_out.bandwidth, tonal.info[cursor].bandwidth);
    }
    info_out.tonality = (tonality_avg / tonality_count as f32).max(tonality_max - 0.2);

    let mut mpos = pos0;
    let mut vpos = pos0;
    if curr_lookahead > 15 {
        mpos = (mpos + 5) % DETECT_SIZE;
        vpos = (vpos + 1) % DETECT_SIZE;
    }

    let mut prob_min = 1.0f32;
    let mut prob_max = 0.0f32;
    let vad_prob = tonal.info[vpos].activity_probability;
    let mut prob_count = vad_prob.max(0.1);
    let mut prob_avg = prob_count * tonal.info[mpos].music_prob;

    loop {
        mpos = (mpos + 1) % DETECT_SIZE;
        if mpos == tonal.write_pos {
            break;
        }
        vpos = (vpos + 1) % DETECT_SIZE;
        if vpos == tonal.write_pos {
            break;
        }
        let pos_vad = tonal.info[vpos].activity_probability;
        let delta = vad_prob - pos_vad;
        let min_term = mul_add_f32(-TRANSITION_PENALTY, delta, prob_avg) / prob_count;
        let max_term = mul_add_f32(TRANSITION_PENALTY, delta, prob_avg) / prob_count;
        prob_min = prob_min.min(min_term);
        prob_max = prob_max.max(max_term);
        prob_count += pos_vad.max(0.1);
        prob_avg = mul_add_f32(pos_vad.max(0.1), tonal.info[mpos].music_prob, prob_avg);
    }

    info_out.music_prob = prob_avg / prob_count;
    prob_min = prob_min.min(prob_avg / prob_count);
    prob_max = prob_max.max(prob_avg / prob_count);
    prob_min = prob_min.max(0.0);
    prob_max = prob_max.min(1.0);

    if curr_lookahead < 10 {
        let mut pmin = prob_min;
        let mut pmax = prob_max;
        cursor = pos0;
        for _ in 0..min(tonal.count.saturating_sub(1), 15) {
            if cursor == 0 {
                cursor = DETECT_SIZE - 1;
            } else {
                cursor -= 1;
            }
            if cursor == tonal.write_pos {
                break;
            }
            pmin = pmin.min(tonal.info[cursor].music_prob);
            pmax = pmax.max(tonal.info[cursor].music_prob);
        }
        pmin = (pmin - 0.1 * vad_prob).max(0.0);
        pmax = (pmax + 0.1 * vad_prob).min(1.0);
        let weight = 1.0 - 0.1 * curr_lookahead as f32;
        prob_min = mul_add_f32(weight, pmin - prob_min, prob_min);
        prob_max = mul_add_f32(weight, pmax - prob_max, prob_max);
    }

    info_out.music_prob_min = prob_min;
    info_out.music_prob_max = prob_max;
}

fn tonality_analysis<PCM: DownmixInput + ?Sized>(
    tonal: &mut TonalityAnalysisState,
    _celt_mode: &OpusCustomMode<'_>,
    x: &PCM,
    len: usize,
    mut offset: i32,
    c1: i32,
    c2: i32,
    channels: i32,
    lsb_depth: i32,
) {
    let mut len = len as i32;
    if !tonal.initialized {
        tonal.mem_fill = INITIAL_MEM_FILL;
        tonal.initialized = true;
    }
    let alpha = 1.0 / min(10, 1 + tonal.count) as f32;
    let alpha_e = 1.0 / min(25, 1 + tonal.count) as f32;
    let mut alpha_e2 = 1.0 / min(100, 1 + tonal.count) as f32;
    if tonal.count <= 1 {
        alpha_e2 = 1.0;
    }

    if tonal.fs == 48_000 {
        len /= 2;
        offset /= 2;
    } else if tonal.fs == 16_000 {
        len = 3 * len / 2;
        offset = 3 * offset / 2;
    }

    // Capture the accumulated high-pass energy for this analysis window (matches C).
    let hp_ener = {
        let avail = min(len as usize, ANALYSIS_BUF_SIZE - tonal.mem_fill);
        let ret = downmix_and_resample(
            x,
            &mut tonal.inmem[tonal.mem_fill..],
            &mut tonal.downmix_state,
            avail,
            offset,
            c1,
            c2,
            channels,
            tonal.fs,
        );
        tonal.hp_ener_accum += ret;
        tonal.hp_ener_accum
    };

    if tonal.mem_fill + (len as usize) < ANALYSIS_BUF_SIZE {
        tonal.mem_fill += len as usize;
        return;
    }

    let info_slot = tonal.write_pos;
    tonal.write_pos = (tonal.write_pos + 1) % DETECT_SIZE;

    let is_silence = is_digital_silence32(&tonal.inmem, ANALYSIS_BUF_SIZE, 1, lsb_depth);

    let mut input_fft = [KissFftCpx::default(); 480];
    let mut output_fft = [KissFftCpx::default(); 480];
    let mut tonality = [0.0f32; 240];
    let mut noisiness = [0.0f32; 240];

    for i in 0..240 {
        let w = ANALYSIS_WINDOW[i];
        input_fft[i].r = w * tonal.inmem[i];
        input_fft[i].i = w * tonal.inmem[240 + i];
        input_fft[479 - i].r = w * tonal.inmem[479 - i];
        input_fft[479 - i].i = w * tonal.inmem[720 - i - 1];
    }

    tonal
        .inmem
        .copy_within(ANALYSIS_BUF_SIZE - 240..ANALYSIS_BUF_SIZE, 0);
    let remaining = len as usize - (ANALYSIS_BUF_SIZE - tonal.mem_fill);
    tonal.hp_ener_accum = downmix_and_resample(
        x,
        &mut tonal.inmem[240..],
        &mut tonal.downmix_state,
        remaining,
        offset + (ANALYSIS_BUF_SIZE - tonal.mem_fill) as i32,
        c1,
        c2,
        channels,
        tonal.fs,
    );
    tonal.mem_fill = 240 + remaining;

    if is_silence {
        let prev_pos = (tonal.write_pos + DETECT_SIZE - 2) % DETECT_SIZE;
        let prev_info = tonal.info[prev_pos].clone();
        tonal.info[info_slot] = prev_info;
        return;
    }

    let info = &mut tonal.info[info_slot];
    opus_fft(&tonal.kfft, &input_fft, &mut output_fft);
    if output_fft[0].r.is_nan() {
        info.valid = false;
        return;
    }

    let mut tonality2 = [0.0f32; 240];
    let mut band_tonality = [0.0f32; NB_TBANDS];
    let mut log_e = [0.0f32; NB_TBANDS];
    let mut bfcc = [0.0f32; 8];
    let mut features = [0.0f32; 25];
    let mut frame_noisiness = 0.0f32;
    let mut frame_stationarity = 0.0f32;
    let mut frame_tonality = 0.0f32;
    let mut max_frame_tonality = 0.0f32;
    let mut slope = 0.0f32;
    let mut relative_e = 0.0f32;
    let mut frame_loudness = 0.0f32;
    let mut noise_floor;
    let mut mid_e = [0.0f32; 8];
    let mut spec_variability = 0.0f32;
    let mut band_log2 = [0.0f32; NB_TBANDS + 1];
    let mut leakage_from = [0.0f32; NB_TBANDS + 1];
    let mut leakage_to = [0.0f32; NB_TBANDS + 1];
    let mut layer_out = [0.0f32; MAX_NEURONS];
    let mut is_masked = [false; NB_TBANDS + 1];
    let mut below_max_pitch;
    let mut above_max_pitch;

    let pi4 = pi4_f32();
    for i in 1..240 {
        let x1r = output_fft[i].r + output_fft[480 - i].r;
        let x1i = output_fft[i].i - output_fft[480 - i].i;
        let x2r = output_fft[i].i + output_fft[480 - i].i;
        let x2i = output_fft[480 - i].r - output_fft[i].r;

        let atan1 = fast_atan2f(x1i, x1r);
        let angle = 0.5 / PI * atan1;
        let d_angle = angle - tonal.angle[i];
        let d2_angle = d_angle - tonal.d_angle[i];

        let atan2 = fast_atan2f(x2i, x2r);
        let angle2 = 0.5 / PI * atan2;
        let d_angle2 = angle2 - angle;
        let d2_angle2 = d_angle2 - d_angle;

        let d2_angle_int = float2int(d2_angle);
        let mod1_pre = d2_angle - d2_angle_int as f32;
        noisiness[i] = mod1_pre.abs();
        let mut mod1 = mod1_pre;
        mod1 *= mod1;
        mod1 *= mod1;

        let d2_angle2_int = float2int(d2_angle2);
        let mod2_pre = d2_angle2 - d2_angle2_int as f32;
        noisiness[i] += mod2_pre.abs();
        let mut mod2 = mod2_pre;
        mod2 *= mod2;
        mod2 *= mod2;

        let avg_mod = 0.25 * (tonal.d2_angle[i] + mod1 + 2.0 * mod2);
        let scale = (40.0_f32 * 16.0_f32) * pi4;
        let denom = mul_add_f32(scale, avg_mod, 1.0);
        let denom2 = mul_add_f32(scale, mod2, 1.0);
        tonality[i] = 1.0 / denom - 0.015;
        tonality2[i] = 1.0 / denom2 - 0.015;

        tonal.angle[i] = angle2;
        tonal.d_angle[i] = d_angle2;
        tonal.d2_angle[i] = mod2;
    }

    for i in 2..239 {
        let tt = tonality2[i].min(tonality2[i - 1].max(tonality2[i + 1]));
        tonality[i] = 0.9 * tonality[i].max(tt - 0.1);
    }

    info.activity = 0.0;
    if tonal.count == 0 {
        tonal.low_e.fill(1e10);
        tonal.high_e.fill(-1e10);
    }

    // First band (DC).
    let e = {
        let x1r = 2.0 * output_fft[0].r;
        let x2r = 2.0 * output_fft[0].i;
        let mut sum = mul_add_f32(x1r, x1r, x2r * x2r);
        for i in 1..4 {
            let bin_e = bin_energy_sum(
                output_fft[i].r,
                output_fft[i].i,
                output_fft[480 - i].r,
                output_fft[480 - i].i,
            );
            sum += bin_e;
        }
        sum * SCALE_ENER
    };
    band_log2[0] = 0.5 * LOG2_E * logf(e + 1e-10);

    for b in 0..NB_TBANDS {
        let mut band_e = 0.0f32;
        let mut t_e = 0.0f32;
        let mut n_e = 0.0f32;
        let band_start = TBANDS[b];
        let band_end = TBANDS[b + 1];
        for i in band_start..band_end {
            let bin_e = bin_energy_sum(
                output_fft[i].r,
                output_fft[i].i,
                output_fft[480 - i].r,
                output_fft[480 - i].i,
            ) * SCALE_ENER;
            let tonality_val = tonality[i];
            let tonality_clamped = tonality_val.max(0.0);
            band_e += bin_e;
            t_e = accumulate_t_e(t_e, bin_e, tonality_clamped);
            let noisiness_term = 0.5 - noisiness[i];
            n_e = mul_add_f32(bin_e * 2.0, noisiness_term, n_e);
        }

        tonal.e[tonal.e_count][b] = band_e;
        let band_noisiness = n_e / (1e-15 + band_e);
        frame_noisiness += band_noisiness;
        frame_loudness += sqrtf(band_e + 1e-10);
        let band_e_eps = band_e + 1e-10;
        let log_e_val = log(band_e_eps as f64) as f32;
        log_e[b] = log_e_val;
        band_log2[b + 1] = 0.5 * LOG2_E * log_e_val;
        tonal.log_e[tonal.e_count][b] = log_e[b];

        if tonal.count == 0 {
            tonal.high_e[b] = log_e[b];
            tonal.low_e[b] = log_e[b];
        }
        if tonal.high_e[b] > tonal.low_e[b] + 7.5 {
            if tonal.high_e[b] - log_e[b] > log_e[b] - tonal.low_e[b] {
                tonal.high_e[b] -= 0.01;
            } else {
                tonal.low_e[b] += 0.01;
            }
        }
        if log_e[b] > tonal.high_e[b] {
            tonal.high_e[b] = log_e[b];
            tonal.low_e[b] = (tonal.high_e[b] - 15.0).max(tonal.low_e[b]);
        } else if log_e[b] < tonal.low_e[b] {
            tonal.low_e[b] = log_e[b];
            tonal.high_e[b] = (tonal.low_e[b] + 15.0).min(tonal.high_e[b]);
        }
        let denom = 1e-5 + (tonal.high_e[b] - tonal.low_e[b]);
        relative_e += (log_e[b] - tonal.low_e[b]) / denom;

        let mut l1 = 0.0f32;
        let mut l2 = 0.0f32;
        for i in 0..NB_FRAMES {
            let band_e = tonal.e[i][b];
            // Use f64 sqrt to mirror the C double path; keep the cast for parity.
            #[allow(clippy::cast_lossless)]
            let sqrt_e = sqrt(band_e as f64) as f32;
            l1 += sqrt_e;
            l2 += band_e;
        }
        let denom = stationarity_denom(l2);
        let mut stationarity = (l1 / denom).min(0.99);
        stationarity = stationarity * stationarity;
        stationarity *= stationarity;
        frame_stationarity += stationarity;

        let energy_ratio_num = t_e.max(0.0);
        let energy_ratio_denom = 1e-15 + band_e;
        let energy_ratio = energy_ratio_num / energy_ratio_denom;
        let stationarity_term = stationarity * tonal.prev_band_tonality[b];
        band_tonality[b] = energy_ratio.max(stationarity_term);
        frame_tonality += band_tonality[b];
        if b >= NB_TBANDS - NB_TONAL_SKIP_BANDS {
            let idx = b + NB_TONAL_SKIP_BANDS - NB_TBANDS;
            frame_tonality -= band_tonality[idx];
        }
        let weight = (1.0_f64 + 0.03_f64 * (b as f64 - NB_TBANDS as f64)) as f32;
        let weighted = (weight as f64 * frame_tonality as f64) as f32;
        max_frame_tonality = max_frame_tonality.max(weighted);
        let slope_delta = (b as i32 - 8) as f32;
        let slope_term = band_tonality[b] * slope_delta;
        slope = slope + slope_term;
        tonal.prev_band_tonality[b] = band_tonality[b];
    }

    leakage_from[0] = band_log2[0];
    leakage_to[0] = band_log2[0] - LEAKAGE_OFFSET;
    for b in 1..=NB_TBANDS {
        let leak_slope = LEAKAGE_SLOPE * (TBANDS[b] - TBANDS[b - 1]) as f32 / 4.0;
        leakage_from[b] = (leakage_from[b - 1] + leak_slope).min(band_log2[b]);
        leakage_to[b] = (leakage_to[b - 1] - leak_slope).max(band_log2[b] - LEAKAGE_OFFSET);
    }
    for b in (0..NB_TBANDS).rev() {
        let leak_slope = LEAKAGE_SLOPE * (TBANDS[b + 1] - TBANDS[b]) as f32 / 4.0;
        leakage_from[b] = (leakage_from[b + 1] + leak_slope).min(leakage_from[b]);
        leakage_to[b] = (leakage_to[b + 1] - leak_slope).max(leakage_to[b]);
    }
    #[allow(clippy::assertions_on_constants, clippy::int_plus_one)]
    {
        debug_assert!(NB_TBANDS + 1 <= LEAK_BANDS);
    }
    for b in 0..=NB_TBANDS {
        let boost = (leakage_to[b] - band_log2[b]).max(0.0)
            + (band_log2[b] - (leakage_from[b] + LEAKAGE_OFFSET)).max(0.0);
        info.leak_boost[b] = floorf(boost * 64.0 + 0.5).min(255.0) as u8;
    }
    info.leak_boost
        .iter_mut()
        .skip(NB_TBANDS + 1)
        .for_each(|value| *value = 0);

    for i in 0..NB_FRAMES {
        let mut mindist = f32::MAX;
        for j in 0..NB_FRAMES {
            if i == j {
                continue;
            }
            let mut dist = 0.0f32;
            for k in 0..NB_TBANDS {
                let tmp = tonal.log_e[i][k] - tonal.log_e[j][k];
                dist = fmaf(tmp, tmp, dist);
            }
            mindist = mindist.min(dist);
        }
        spec_variability += mindist;
    }
    spec_variability = sqrtf(spec_variability / (NB_FRAMES * NB_TBANDS) as f32);
    let mut bandwidth_mask = 0.0f32;
    let mut bandwidth = 0usize;
    let mut max_e = 0.0f32;
    let depth = (lsb_depth - 8).max(0);
    noise_floor = 5.7e-4 / (1 << depth) as f32;
    noise_floor *= noise_floor;
    below_max_pitch = 0.0;
    above_max_pitch = 0.0;
    for b in 0..NB_TBANDS {
        let mut band_e = 0.0f32;
        let band_start = TBANDS[b];
        let band_end = TBANDS[b + 1];
        for i in band_start..band_end {
            let bin_e = bin_energy_sum(
                output_fft[i].r,
                output_fft[i].i,
                output_fft[480 - i].r,
                output_fft[480 - i].i,
            );
            band_e += bin_e;
        }
        band_e *= SCALE_ENER;
        max_e = max_e.max(band_e);
        if band_start < 64 {
            below_max_pitch += band_e;
        } else {
            above_max_pitch += band_e;
        }
        tonal.mean_e[b] = ((1.0 - alpha_e2) * tonal.mean_e[b]).max(band_e);
        let em = tonal.mean_e[b].max(band_e);
        if band_e * 1e9 > max_e
            && (em > 3.0 * noise_floor * (band_end - band_start) as f32
                || band_e > noise_floor * (band_end - band_start) as f32)
        {
            bandwidth = b + 1;
        }
        let threshold = if tonal.prev_bandwidth >= (b + 1) as i32 {
            0.01
        } else {
            0.05
        } * bandwidth_mask;
        is_masked[b] = band_e < threshold;
        bandwidth_mask = (0.05 * bandwidth_mask).max(band_e);
    }

    if tonal.fs == 48_000 {
        let mut e_high = hp_ener * (1.0 / (60.0 * 60.0));
        if e_high < 0.0 {
            e_high = 0.0;
        }
        let noise_ratio = if tonal.prev_bandwidth == 20 {
            10.0
        } else {
            30.0
        };
        above_max_pitch += e_high;
        tonal.mean_e[NB_TBANDS] = ((1.0 - alpha_e2) * tonal.mean_e[NB_TBANDS]).max(e_high);
        let em = tonal.mean_e[NB_TBANDS].max(e_high);
        if em > 3.0 * noise_ratio * noise_floor * 160.0
            || e_high > noise_ratio * noise_floor * 160.0
        {
            bandwidth = 20;
        }
        let threshold = if tonal.prev_bandwidth == 20 {
            0.01
        } else {
            0.05
        } * bandwidth_mask;
        is_masked[NB_TBANDS] = e_high < threshold;
    }

    info.max_pitch_ratio = if above_max_pitch > below_max_pitch {
        below_max_pitch / above_max_pitch
    } else {
        1.0
    };
    if bandwidth == 20 && is_masked[NB_TBANDS] {
        bandwidth -= 2;
    } else if bandwidth > 0 && bandwidth <= NB_TBANDS && is_masked[bandwidth - 1] {
        bandwidth -= 1;
    }
    if tonal.count <= 2 {
        bandwidth = 20;
    }
    frame_loudness = 20.0 * log10f(frame_loudness);
    tonal.e_tracker = (tonal.e_tracker - 0.003).max(frame_loudness);
    tonal.low_e_count *= 1.0 - alpha_e;
    if frame_loudness < tonal.e_tracker - 30.0 {
        tonal.low_e_count += alpha_e;
    }

    for i in 0..8 {
        let mut sum = 0.0f32;
        for b in 0..16 {
            sum = fmaf(DCT_TABLE[i * 16 + b], log_e[b], sum);
        }
        bfcc[i] = sum;
    }
    for i in 0..8 {
        let mut sum = 0.0f32;
        for b in 0..16 {
            let mid = 0.5 * (tonal.high_e[b] + tonal.low_e[b]);
            sum = fmaf(DCT_TABLE[i * 16 + b], mid, sum);
        }
        mid_e[i] = sum;
    }

    frame_stationarity /= NB_TBANDS as f32;
    relative_e /= NB_TBANDS as f32;
    if tonal.count < 10 {
        relative_e = 0.5;
    }
    frame_noisiness /= NB_TBANDS as f32;
    info.activity = mul_add_f32(1.0 - frame_noisiness, relative_e, frame_noisiness);
    frame_tonality = max_frame_tonality / (NB_TBANDS - NB_TONAL_SKIP_BANDS) as f32;
    let prev_term = tonal.prev_tonality * 0.8;
    frame_tonality = frame_tonality.max(prev_term);
    tonal.prev_tonality = frame_tonality;

    slope /= 64.0;
    info.tonality_slope = slope;

    tonal.e_count = (tonal.e_count + 1) % NB_FRAMES;
    tonal.count = min(tonal.count + 1, ANALYSIS_COUNT_MAX);
    info.tonality = frame_tonality;

    for i in 0..4 {
        let bfcc_mem = bfcc[i] + tonal.mem[i + 24];
        let mem_sum = tonal.mem[i] + tonal.mem[i + 16];
        let mut sum = mul_add_f32(-0.122_99, bfcc_mem, 0.491_95 * mem_sum);
        sum = mul_add_f32(0.696_93, tonal.mem[i + 8], sum);
        sum = mul_add_f32(-1.4349, tonal.cmean[i], sum);
        features[i] = sum;
    }

    for i in 0..4 {
        let update = alpha * bfcc[i];
        tonal.cmean[i] = mul_add_f32(1.0 - alpha, tonal.cmean[i], update);
    }

    for i in 0..4 {
        let bfcc_delta = bfcc[i] - tonal.mem[i + 24];
        let mem_delta = tonal.mem[i] - tonal.mem[i + 16];
        let tail = 0.316_23 * mem_delta;
        features[4 + i] = mul_add_f32(0.632_46, bfcc_delta, tail);
    }
    for i in 0..3 {
        let bfcc_sum = bfcc[i] + tonal.mem[i + 24];
        let mem_sum = tonal.mem[i] + tonal.mem[i + 16];
        let part = mul_add_f32(0.534_52, bfcc_sum, -0.267_26 * mem_sum);
        features[8 + i] = mul_add_f32(-0.534_52, tonal.mem[i + 8], part);
    }

    if tonal.count > 5 {
        for i in 0..9 {
            let update = alpha * features[i] * features[i];
            tonal.std[i] = mul_add_f32(1.0 - alpha, tonal.std[i], update);
        }
    }
    for i in 0..4 {
        features[i] = bfcc[i] - mid_e[i];
    }

    for i in 0..8 {
        tonal.mem[i + 24] = tonal.mem[i + 16];
        tonal.mem[i + 16] = tonal.mem[i + 8];
        tonal.mem[i + 8] = tonal.mem[i];
        tonal.mem[i] = bfcc[i];
    }

    for i in 0..9 {
        let std_sqrt = sqrt(tonal.std[i] as f64) as f32;
        features[11 + i] = std_sqrt - STD_FEATURE_BIAS[i];
    }
    features[18] = spec_variability - 0.78;
    features[20] = info.tonality - 0.154_723;
    features[21] = info.activity - 0.724_643;
    features[22] = frame_stationarity - 0.743_717;
    features[23] = info.tonality_slope + 0.069_216;
    features[24] = tonal.low_e_count - 0.067_930;

    analysis_compute_dense(&LAYER0, &mut layer_out, &features);
    analysis_compute_gru(&LAYER1, &mut tonal.rnn_state, &layer_out);
    let mut frame_probs = [0.0f32; 2];
    analysis_compute_dense(&LAYER2, &mut frame_probs, &tonal.rnn_state);
    info.activity_probability = frame_probs[1];
    info.music_prob = frame_probs[0];
    info.bandwidth = bandwidth as i32;
    tonal.prev_bandwidth = bandwidth as i32;
    info.noisiness = frame_noisiness;
    info.valid = true;
}

pub(crate) fn run_analysis<PCM: DownmixInput + ?Sized>(
    analysis: &mut TonalityAnalysisState,
    celt_mode: &OpusCustomMode<'_>,
    analysis_pcm: Option<&PCM>,
    mut analysis_frame_size: usize,
    frame_size: usize,
    c1: i32,
    c2: i32,
    channels: i32,
    fs: i32,
    lsb_depth: i32,
    analysis_info: &mut AnalysisInfo,
) {
    analysis_frame_size &= !1;
    if let Some(pcm) = analysis_pcm {
        let max_analysis = min(((DETECT_SIZE - 5) * fs as usize) / 50, analysis_frame_size);
        let mut pcm_len = max_analysis as i32 - analysis.analysis_offset;
        let mut offset = analysis.analysis_offset;
        while pcm_len > 0 {
            let chunk = min(fs / 50, pcm_len);
            tonality_analysis(
                analysis,
                celt_mode,
                pcm,
                chunk as usize,
                offset,
                c1,
                c2,
                channels,
                lsb_depth,
            );
            offset += fs / 50;
            pcm_len -= fs / 50;
        }
        analysis.analysis_offset = max_analysis as i32;
        analysis.analysis_offset -= frame_size as i32;
    }

    tonality_get_info(analysis, analysis_info, frame_size);
}