mousiki 0.2.0

Pure Rust Opus codec.
Documentation
//! Ports the fixed-point warped autocorrelation helper from
//! `silk/fixed/warped_autocorrelation_FIX.c`.
//!
//! The routine computes autocorrelation values on a warped frequency axis and
//! returns a scaling factor so the results fit into 32-bit accumulators. It is
//! used by the SILK encoder's noise-shaping analysis when deriving warped LPC
//! coefficients.

use cfg_if::cfg_if;

/// Maximum LPC order supported by the warped autocorrelation helper.
pub const MAX_SHAPE_LPC_ORDER: usize = 24;

pub(super) const QC: i32 = 10;
pub(super) const QS: i32 = 13;
const CORR_SHIFT_QC: i32 = 2 * QS - QC; // equals 16 in the reference code

/// Computes warped autocorrelations for a Q0 input vector.
///
/// The `order` must be even and no larger than [`MAX_SHAPE_LPC_ORDER`]. The
/// `corr` slice must provide room for `order + 1` values. The function returns
/// the scaling factor that the C version writes to `scale`.
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
pub fn warped_autocorrelation(
    corr: &mut [i32],
    input: &[i16],
    warping_q16: i32,
    order: usize,
) -> i32 {
    assert!(
        order <= MAX_SHAPE_LPC_ORDER,
        "order must be <= {MAX_SHAPE_LPC_ORDER}"
    );
    assert!(order.is_multiple_of(2), "order must be even");
    assert!(
        corr.len() > order,
        "corr must expose at least {}",
        order + 1
    );

    warped_autocorrelation_impl(corr, input, warping_q16, order)
}

#[inline]
pub(super) fn clz64(value: i64) -> i32 {
    if value == 0 {
        64
    } else {
        (value as u64).leading_zeros() as i32
    }
}

#[inline]
fn smlawb(acc: i32, b: i32, c: i32) -> i32 {
    let c16 = i32::from(c as i16);
    let product = (i64::from(b) * i64::from(c16)) >> 16;
    acc.wrapping_add(product as i32)
}

#[inline]
fn mul_qc(a: i32, b: i32) -> i64 {
    (i64::from(a) * i64::from(b)) >> CORR_SHIFT_QC
}

#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
pub(super) fn warped_autocorrelation_scalar(
    corr: &mut [i32],
    input: &[i16],
    warping_q16: i32,
    order: usize,
) -> i32 {
    let mut state_qs = [0i32; MAX_SHAPE_LPC_ORDER + 1];
    let mut corr_qc = [0i64; MAX_SHAPE_LPC_ORDER + 1];

    for &sample in input {
        let mut tmp1_qs = (i32::from(sample)) << QS;

        let mut section = 0;
        while section < order {
            let next = section + 1;
            let tail = section + 2;

            let diff = state_qs[next].wrapping_sub(tmp1_qs);
            let tmp2_qs = smlawb(state_qs[section], diff, warping_q16);
            state_qs[section] = tmp1_qs;
            corr_qc[section] += mul_qc(tmp1_qs, state_qs[0]);

            let diff2 = state_qs[tail].wrapping_sub(tmp2_qs);
            tmp1_qs = smlawb(state_qs[next], diff2, warping_q16);
            state_qs[next] = tmp2_qs;
            corr_qc[next] += mul_qc(tmp2_qs, state_qs[0]);

            section += 2;
        }

        state_qs[order] = tmp1_qs;
        corr_qc[order] += mul_qc(tmp1_qs, state_qs[0]);
    }

    debug_assert!(
        corr_qc[0] >= 0,
        "corr_qc[0] should stay non-negative after accumulation"
    );

    let mut lsh = clz64(corr_qc[0]) - 35;
    lsh = lsh.clamp(-12 - QC, 30 - QC);

    let scale = -(QC + lsh);
    debug_assert!((-30..=12).contains(&scale));

    if lsh >= 0 {
        let shift = lsh as u32;
        for (dst, src) in corr.iter_mut().take(order + 1).zip(&corr_qc) {
            let value = src.wrapping_shl(shift);
            debug_assert!(
                value <= i64::from(i32::MAX) && value >= i64::from(i32::MIN),
                "scaled correlation overflows 32-bit range"
            );
            *dst = value as i32;
        }
    } else {
        let shift = (-lsh) as u32;
        for (dst, src) in corr.iter_mut().take(order + 1).zip(&corr_qc) {
            let value = src >> shift;
            debug_assert!(
                value <= i64::from(i32::MAX) && value >= i64::from(i32::MIN),
                "scaled correlation overflows 32-bit range"
            );
            *dst = value as i32;
        }
    }

    debug_assert!(
        corr_qc[0] >= 0,
        "corr_qc[0] should stay non-negative after scaling"
    );

    scale
}

cfg_if! {
    if #[cfg(all(target_arch = "aarch64", not(feature = "force-scalar")))] {
        mod aarch64_neon;

        #[inline]
        fn warped_autocorrelation_impl(
            corr: &mut [i32],
            input: &[i16],
            warping_q16: i32,
            order: usize,
        ) -> i32 {
            aarch64_neon::warped_autocorrelation(corr, input, warping_q16, order)
        }
    } else {
        #[inline]
        fn warped_autocorrelation_impl(
            corr: &mut [i32],
            input: &[i16],
            warping_q16: i32,
            order: usize,
        ) -> i32 {
            warped_autocorrelation_scalar(corr, input, warping_q16, order)
        }
    }
}

#[cfg(test)]
mod tests {
    use super::{MAX_SHAPE_LPC_ORDER, warped_autocorrelation, warped_autocorrelation_scalar};

    #[test]
    fn matches_reference_values() {
        let input = [3276, -1638, 819, -410, 205, -102, 51, -25];
        let mut corr = [0i32; MAX_SHAPE_LPC_ORDER + 1];
        let scale = warped_autocorrelation(&mut corr, &input, 29_491, 4);

        assert_eq!(scale, -5);
        assert_eq!(
            &corr[..5],
            &[
                457_911_552,
                -355_118_103,
                275_370_536,
                -213_637_770,
                165_476_668
            ]
        );
    }

    #[test]
    fn matches_reference_values_higher_order() {
        let input = [
            1234, 2345, -3456, 4567, -5678, 6789, -7890, 3210, -210, 1111, -999, 555,
        ];
        let mut corr = [0i32; MAX_SHAPE_LPC_ORDER + 1];
        let scale = warped_autocorrelation(&mut corr, &input, 16_384, 6);

        assert_eq!(scale, -1);
        assert_eq!(
            &corr[..7],
            &[
                386_588_116,
                -358_683_256,
                315_108_010,
                -274_226_201,
                232_894_203,
                -201_623_736,
                166_257_710
            ]
        );
    }

    #[test]
    fn zero_input_produces_min_scale() {
        let input = [0i16; 8];
        let mut corr = [0i32; MAX_SHAPE_LPC_ORDER + 1];
        let scale = warped_autocorrelation(&mut corr, &input, 1_000, 4);

        assert_eq!(scale, -30);
        assert!(corr.iter().take(5).all(|&v| v == 0));
    }

    #[test]
    fn dispatch_matches_scalar_across_orders() {
        let mut input = [0i16; 240];
        for (idx, sample) in input.iter_mut().enumerate() {
            let val = ((idx as i32 * 73) % 32_767) - 16_000;
            *sample = val as i16;
        }

        for &order in &[4usize, 6, 8, 10, 12, 16, 20, 24] {
            let mut corr_dispatch = [0i32; MAX_SHAPE_LPC_ORDER + 1];
            let mut corr_scalar = [0i32; MAX_SHAPE_LPC_ORDER + 1];
            let scale_dispatch = warped_autocorrelation(&mut corr_dispatch, &input, 20_000, order);
            let scale_scalar =
                warped_autocorrelation_scalar(&mut corr_scalar, &input, 20_000, order);

            assert_eq!(scale_dispatch, scale_scalar, "order={order}");
            assert_eq!(
                &corr_dispatch[..=order],
                &corr_scalar[..=order],
                "order={order}"
            );
        }
    }
}