audiobook-creation-exchange 0.1.0

ACX-compliant audio post-processing: normalisation, limiting, gating, LUFS measurement, and spectral analysis for AI-generated speech audio.
Documentation
/// Measurement snapshot for a PCM buffer.
#[derive(Debug, Clone)]
pub struct AcxReport {
    /// Overall RMS of the entire track in dBFS.
    pub rms_db: f32,
    /// True-peak value in dBFS (via 4× oversampling).
    pub peak_db: f32,
    /// RMS of the quietest 10 % of 50 ms windows — approximates the noise floor.
    pub noise_floor_db: f32,
    /// Whether all three metrics are within the configured ACX window.
    pub compliant: bool,
}

const NEG_INF_DB: f32 = -144.0;
const WINDOW_GATE_MS: u32 = 50;

/// Compute the RMS level of a sample slice in dBFS.
pub fn rms_db(samples: &[i16]) -> f32 {
    if samples.is_empty() {
        return NEG_INF_DB;
    }
    let sum_sq: f64 = samples.iter().map(|&s| (s as f64).powi(2)).sum();
    let rms = (sum_sq / samples.len() as f64).sqrt();
    linear_to_db(rms as f32 / i16::MAX as f32)
}

/// Compute the true-peak level via 4× linear interpolation oversampling.
///
/// 4× oversampling catches inter-sample peaks that naive sample-by-sample
/// inspection misses — especially important for hard consonants in speech audio.
pub fn peak_db(samples: &[i16]) -> f32 {
    if samples.is_empty() {
        return NEG_INF_DB;
    }

    let mut max: f32 = 0.0;

    for window in samples.windows(2) {
        let a = window[0] as f32;
        let b = window[1] as f32;
        // interpolate 3 in-between points (4× rate means 3 new points per pair)
        for k in 0..4usize {
            let t = k as f32 / 4.0;
            let interp = a + (b - a) * t;
            if interp.abs() > max {
                max = interp.abs();
            }
        }
    }
    // Check the last sample
    if let Some(&last) = samples.last() {
        let v = (last as f32).abs();
        if v > max {
            max = v;
        }
    }

    linear_to_db(max / i16::MAX as f32)
}

/// Compute the noise floor in dBFS.
///
/// Splits the buffer into 50 ms non-overlapping windows, computes RMS per window,
/// sorts them, and returns the median of the quietest 10 % — this approximates
/// the ambient noise the listener hears between words.
pub fn noise_floor_db(samples: &[i16], sample_rate: u32) -> f32 {
    let window_size = (sample_rate as usize * WINDOW_GATE_MS as usize) / 1000;
    if window_size == 0 || samples.is_empty() {
        return NEG_INF_DB;
    }

    let mut window_rms: Vec<f32> = samples
        .chunks(window_size)
        .filter(|w| w.len() == window_size)
        .map(|w| rms_db(w))
        .collect();

    if window_rms.is_empty() {
        return NEG_INF_DB;
    }

    window_rms.sort_by(|a, b| a.partial_cmp(b).unwrap());

    let quietest_count = ((window_rms.len() as f32 * 0.10) as usize).max(1);
    let sum: f32 = window_rms.iter().take(quietest_count).sum();
    sum / quietest_count as f32
}

/// Full measurement pass — produces an [`AcxReport`].
pub fn analyse(samples: &[i16], sample_rate: u32, cfg: &crate::AcxConfig) -> AcxReport {
    let rms = rms_db(samples);
    let peak = peak_db(samples);
    let floor = noise_floor_db(samples, sample_rate);

    let compliant = rms >= cfg.rms_min_db
        && rms <= cfg.rms_max_db
        && peak <= cfg.peak_ceiling_db
        && floor <= cfg.noise_floor_max_db;

    AcxReport {
        rms_db: rms,
        peak_db: peak,
        noise_floor_db: floor,
        compliant,
    }
}

pub fn linear_to_db(linear: f32) -> f32 {
    if linear <= 0.0 {
        return NEG_INF_DB;
    }
    20.0 * linear.log10()
}

pub fn db_to_linear(db: f32) -> f32 {
    10f32.powf(db / 20.0)
}