audiobook-creation-exchange 0.1.0

ACX-compliant audio post-processing: normalisation, limiting, gating, LUFS measurement, and spectral analysis for AI-generated speech audio.
Documentation
//! LUFS (Loudness Units relative to Full Scale) measurement per ITU-R BS.1770-4.
//!
//! LUFS is the broadcast and streaming standard that accounts for how human hearing
//! actually perceives different frequencies — unlike RMS which is purely electrical.
//!
//! # Targets
//! - Spotify: −14 LUFS integrated
//! - Apple Podcasts: −16 LUFS integrated
//! - ACX: −18 to −23 dB RMS (approximately −18 to −20 LUFS integrated)

/// Spotify's integrated LUFS target.
pub const SPOTIFY_TARGET_LUFS: f32 = -14.0;
/// Apple Podcasts' integrated LUFS target.
pub const APPLE_TARGET_LUFS: f32 = -16.0;
/// Maximum allowed Loudness Range for consistent listening.
pub const MAX_LOUDNESS_RANGE: f32 = 15.0;

/// A two-pole IIR biquad filter (Direct Form I).
struct Biquad {
    b0: f64,
    b1: f64,
    b2: f64,
    a1: f64,
    a2: f64,
    z1: f64,
    z2: f64,
}

impl Biquad {
    #[inline]
    fn process(&mut self, x: f64) -> f64 {
        // Direct Form II transposed (numerically stable)
        let y = self.b0 * x + self.z1;
        self.z1 = self.b1 * x - self.a1 * y + self.z2;
        self.z2 = self.b2 * x - self.a2 * y;
        y
    }
}

/// Compute the K-weighting filter coefficients for `sample_rate`.
///
/// K-weighting = stage 1 (high-shelf pre-filter at ~1682 Hz, +4 dB)
///             + stage 2 (high-pass at ~38 Hz, Q≈0.5)
///
/// Both stages are designed via bilinear transform from the ITU-R BS.1770-4
/// analog prototype. Coefficients are correct for any sample rate.
fn k_weighting_filters(sample_rate: u32) -> (Biquad, Biquad) {
    let fs = sample_rate as f64;

    // --- Stage 1: high-shelf pre-filter ---
    let f0 = 1681.974_450_955_533_f64;
    let q1 = 0.707_175_236_955_419_6_f64;
    let vh = 10f64.powf(3.999_843_853_973_347_f64 / 20.0); // ≈ +4 dB
    let vb = vh.sqrt();
    let k1 = (std::f64::consts::PI * f0 / fs).tan();
    let norm1 = 1.0 / (1.0 + k1 / q1 + k1 * k1);

    let stage1 = Biquad {
        b0: (vh + vb * k1 / q1 + k1 * k1) * norm1,
        b1: 2.0 * (k1 * k1 - vh) * norm1,
        b2: (vh - vb * k1 / q1 + k1 * k1) * norm1,
        a1: 2.0 * (k1 * k1 - 1.0) * norm1,
        a2: (1.0 - k1 / q1 + k1 * k1) * norm1,
        z1: 0.0,
        z2: 0.0,
    };

    // --- Stage 2: high-pass RLB filter ---
    let fc = 38.135_470_876_024_44_f64;
    let q2 = 0.500_327_037_323_877_3_f64;
    let k2 = (std::f64::consts::PI * fc / fs).tan();
    let norm2 = 1.0 / (1.0 + k2 / q2 + k2 * k2);

    let stage2 = Biquad {
        b0: norm2,
        b1: -2.0 * norm2,
        b2: norm2,
        a1: 2.0 * (k2 * k2 - 1.0) * norm2,
        a2: (1.0 - k2 / q2 + k2 * k2) * norm2,
        z1: 0.0,
        z2: 0.0,
    };

    (stage1, stage2)
}

/// Apply K-weighting filter to `samples` and return the filtered signal.
fn k_weight(samples: &[i16], sample_rate: u32) -> Vec<f64> {
    let (mut s1, mut s2) = k_weighting_filters(sample_rate);
    samples
        .iter()
        .map(|&s| {
            let x = s as f64 / i16::MAX as f64;
            let y1 = s1.process(x);
            s2.process(y1)
        })
        .collect()
}

/// Mean square of a slice of f64 samples.
fn mean_square(samples: &[f64]) -> f64 {
    if samples.is_empty() {
        return 0.0;
    }
    samples.iter().map(|&s| s * s).sum::<f64>() / samples.len() as f64
}

const LUFS_OFFSET: f64 = -0.691;

/// Convert a mean-square value to LUFS.
fn ms_to_lufs(ms: f64) -> f32 {
    if ms <= 0.0 {
        return -144.0;
    }
    (LUFS_OFFSET + 10.0 * ms.log10()) as f32
}

/// Compute Integrated LUFS for `samples` per ITU-R BS.1770-4.
///
/// Uses 400 ms blocks with 75 % overlap (100 ms step), absolute gate at −70 LUFS,
/// and a relative gate at (ungated integrated − 10) LU.
pub fn integrated_lufs(samples: &[i16], sample_rate: u32) -> f32 {
    let weighted = k_weight(samples, sample_rate);

    let block_size = (sample_rate as usize * 400) / 1000; // 400 ms
    let step = (sample_rate as usize * 100) / 1000; // 100 ms overlap step

    if weighted.len() < block_size {
        return -144.0;
    }

    let blocks: Vec<f64> = (0..)
        .map(|i| i * step)
        .take_while(|&start| start + block_size <= weighted.len())
        .map(|start| mean_square(&weighted[start..start + block_size]))
        .collect();

    if blocks.is_empty() {
        return -144.0;
    }

    // Absolute gate: −70 LUFS → mean_square threshold
    let abs_gate_ms = 10f64.powf((-70.0 - LUFS_OFFSET) / 10.0);
    let passing_abs: Vec<f64> = blocks
        .iter()
        .copied()
        .filter(|&ms| ms > abs_gate_ms)
        .collect();

    if passing_abs.is_empty() {
        return -144.0;
    }

    // Ungated integrated loudness (for relative gate reference)
    let ungated_ms = passing_abs.iter().sum::<f64>() / passing_abs.len() as f64;
    let ungated_lufs = ms_to_lufs(ungated_ms);

    // Relative gate: ungated − 10 LU
    let rel_gate_ms = 10f64.powf(((ungated_lufs - 10.0) as f64 - LUFS_OFFSET) / 10.0);
    let passing_rel: Vec<f64> = passing_abs
        .iter()
        .copied()
        .filter(|&ms| ms > rel_gate_ms)
        .collect();

    if passing_rel.is_empty() {
        return -144.0;
    }

    let final_ms = passing_rel.iter().sum::<f64>() / passing_rel.len() as f64;
    ms_to_lufs(final_ms)
}

/// Compute Loudness Range (LRA) per EBU R 128.
///
/// Uses 3 s short-term blocks with 1 s steps. LRA = 95th percentile − 10th percentile
/// of block loudness values passing the absolute gate.
pub fn loudness_range(samples: &[i16], sample_rate: u32) -> f32 {
    let weighted = k_weight(samples, sample_rate);

    let block_size = sample_rate as usize * 3; // 3 s
    let step = sample_rate as usize; // 1 s step

    if weighted.len() < block_size {
        return 0.0;
    }

    let abs_gate_ms = 10f64.powf((-70.0 - LUFS_OFFSET) / 10.0);

    let mut block_lufs: Vec<f32> = (0..)
        .map(|i| i * step)
        .take_while(|&start| start + block_size <= weighted.len())
        .filter_map(|start| {
            let ms = mean_square(&weighted[start..start + block_size]);
            if ms > abs_gate_ms {
                Some(ms_to_lufs(ms))
            } else {
                None
            }
        })
        .collect();

    if block_lufs.len() < 2 {
        return 0.0;
    }

    block_lufs.sort_by(|a, b| a.partial_cmp(b).unwrap());
    let lo_idx = (block_lufs.len() as f32 * 0.10) as usize;
    let hi_idx = (block_lufs.len() as f32 * 0.95) as usize;
    let hi_idx = hi_idx.min(block_lufs.len() - 1);

    (block_lufs[hi_idx] - block_lufs[lo_idx]).max(0.0)
}

/// Full loudness report.
#[derive(Debug, Clone)]
pub struct LufsReport {
    /// Integrated LUFS per ITU-R BS.1770-4.
    pub integrated_lufs: f32,
    /// Loudness range in LU per EBU R 128.
    pub loudness_range: f32,
    /// Whether integrated LUFS is within the Spotify target (≥ −14 LUFS, not silence).
    pub spotify_compliant: bool,
    /// Whether integrated LUFS is within the Apple Podcasts target (≥ −16 LUFS).
    pub apple_compliant: bool,
    /// Whether LRA is within the acceptable range (≤ 15 LU).
    pub lra_ok: bool,
}

/// Compute the full LUFS report for `samples`.
pub fn report(samples: &[i16], sample_rate: u32) -> LufsReport {
    let lufs = integrated_lufs(samples, sample_rate);
    let lra = loudness_range(samples, sample_rate);

    LufsReport {
        integrated_lufs: lufs,
        loudness_range: lra,
        spotify_compliant: lufs >= SPOTIFY_TARGET_LUFS,
        apple_compliant: lufs >= APPLE_TARGET_LUFS,
        lra_ok: lra <= MAX_LOUDNESS_RANGE,
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::f64::consts::PI;

    const SR: u32 = 24_000;

    /// Generate a sine at a specific LUFS level.
    /// A sine at amplitude A has mean-square A²/2; LUFS ≈ -0.691 + 10*log10(A²/2).
    fn sine_at_lufs(target_lufs: f32, secs: f32, sr: u32) -> Vec<i16> {
        let ms_target = 10f64.powf((target_lufs as f64 - LUFS_OFFSET) / 10.0);
        let amplitude = (ms_target * 2.0).sqrt() * i16::MAX as f64;
        let n = (sr as f32 * secs) as usize;
        (0..n)
            .map(|i| {
                let v = amplitude * (2.0 * PI * 1000.0 * i as f64 / sr as f64).sin();
                v.clamp(i16::MIN as f64, i16::MAX as f64) as i16
            })
            .collect()
    }

    #[test]
    fn integrated_lufs_approximately_correct() {
        // A 1 kHz sine at −20 LUFS should measure close to −20 LUFS
        let samples = sine_at_lufs(-20.0, 5.0, SR);
        let measured = integrated_lufs(&samples, SR);
        assert!(
            (measured - (-20.0)).abs() < 2.0,
            "Expected ~−20 LUFS, got {:.1}",
            measured
        );
    }

    #[test]
    fn silence_returns_very_low_lufs() {
        let samples = vec![0i16; SR as usize * 5];
        let measured = integrated_lufs(&samples, SR);
        assert!(
            measured < -70.0,
            "Expected very low LUFS for silence, got {:.1}",
            measured
        );
    }

    #[test]
    fn louder_signal_has_higher_lufs() {
        let quiet = sine_at_lufs(-25.0, 5.0, SR);
        let loud = sine_at_lufs(-15.0, 5.0, SR);
        let lufs_quiet = integrated_lufs(&quiet, SR);
        let lufs_loud = integrated_lufs(&loud, SR);
        assert!(
            lufs_loud > lufs_quiet,
            "Louder signal ({:.1}) should have higher LUFS than quiet ({:.1})",
            lufs_loud,
            lufs_quiet
        );
    }

    #[test]
    fn constant_signal_has_zero_lra() {
        // A constant-amplitude signal has no dynamic variation
        let samples = sine_at_lufs(-20.0, 10.0, SR);
        let lra = loudness_range(&samples, SR);
        assert!(
            lra < 3.0,
            "Constant signal should have near-zero LRA, got {:.2}",
            lra
        );
    }

    #[test]
    fn dynamic_signal_has_nonzero_lra() {
        // Alternate between loud and quiet segments
        let mut samples = sine_at_lufs(-15.0, 5.0, SR);
        samples.extend(sine_at_lufs(-30.0, 5.0, SR));
        samples.extend(sine_at_lufs(-15.0, 5.0, SR));
        let lra = loudness_range(&samples, SR);
        assert!(
            lra > 1.0,
            "Dynamic signal should have noticeable LRA, got {:.2}",
            lra
        );
    }

    #[test]
    fn lufs_report_fields_are_consistent() {
        let samples = sine_at_lufs(-16.0, 5.0, SR);
        let r = report(&samples, SR);
        assert!(r.apple_compliant, "−16 LUFS should be Apple compliant");
        // −16 is below Spotify's −14 target
        assert!(
            !r.spotify_compliant,
            "−16 LUFS should not be Spotify compliant"
        );
        assert!(r.lra_ok, "Constant sine should have acceptable LRA");
    }

    #[test]
    fn k_weighting_filter_does_not_panic_on_any_sample_rate() {
        for &sr in &[8000u32, 16000, 22050, 24000, 44100, 48000] {
            let samples = sine_at_lufs(-20.0, 2.0, sr);
            let _ = integrated_lufs(&samples, sr);
        }
    }

    #[test]
    fn short_clip_below_block_size_returns_low_lufs() {
        // 100 ms clip — shorter than the 400 ms block
        let samples = sine_at_lufs(-18.0, 0.1, SR);
        let measured = integrated_lufs(&samples, SR);
        assert!(measured < -70.0 || measured > -144.0); // either gated out or not
    }
}